diff --git a/CHANGELOG.md b/CHANGELOG.md index c7a1d62..e112207 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] ### Added + - Ported CsvReader for CF 3.5 compatibility from: https://github.com/tspence/csharp-csv-reader - Added enum extension method for cycling to the next enum value - Added GetLocalTimeZoneName method to IcdEnvironment - Added MatchAny method to RegexUtils diff --git a/ICD.Common.Utils/Csv/Csv.cs b/ICD.Common.Utils/Csv/Csv.cs new file mode 100644 index 0000000..083ae76 --- /dev/null +++ b/ICD.Common.Utils/Csv/Csv.cs @@ -0,0 +1,312 @@ +/* + * 2006 - 2018 Ted Spence, http://tedspence.com + * License: http://www.apache.org/licenses/LICENSE-2.0 + * Home page: https://github.com/tspence/csharp-csv-reader + */ + +using System; +using System.Collections.Generic; +using System.Text; +using ICD.Common.Utils.IO; +#if SIMPLSHARP +using ICD.Common.Utils.Extensions; +#endif + +namespace ICD.Common.Utils.Csv +{ + + /// + /// Root class that contains static functions for straightforward Csv parsing + /// + public static class Csv + { + /// + /// The default Csv field delimiter. + /// + public const char DEFAULT_CSV_DELIMITER = ','; + + /// + /// The default Csv text qualifier. This is used to encode strings that contain the field delimiter. + /// + public const char DEFAULT_CSV_QUALIFIER = '"'; + + /// + /// The default TSV (tab delimited file) field delimiter. + /// + public const char DEFAULT_TSV_DELIMITER = '\t'; + + /// + /// The default TSV (tabe delimited file) text qualifier. This is used to encode strings that contain the field delimiter. + /// + public const char DEFAULT_TSV_QUALIFIER = '"'; + + +#region Methods to read Csv data + /// + /// Parse a Csv stream into IEnumerable, while permitting embedded newlines + /// + /// The stream to read + /// The Csv settings to use for this parsing operation (Default: Csv) + /// An enumerable object that can be examined to retrieve rows from the stream. + public static IEnumerable ParseStream(IcdStreamReader inStream, CsvReaderSettings settings) + { + string line = ""; + int i = -1; + List list = new List(); + var work = new StringBuilder(); + + // Ensure settings are non-null + if (settings == null) { + settings = CsvReaderSettings.CSV; + } + + // Begin reading from the stream + while (i < line.Length || !inStream.EndOfStream) + { + // Consume the next character of data + i++; + if (i >= line.Length) { + var newLine = inStream.ReadLine(); + line += newLine + settings.LineSeparator; + } + char c = line[i]; + + // Are we at a line separator? If so, yield our work and begin again + if (String.Equals(line.Substring(i, settings.LineSeparator.Length), settings.LineSeparator)) { + list.Add(work.ToString()); + yield return list.ToArray(); + list.Clear(); + work.Clear(); + if (inStream.EndOfStream) + { + break; + } + + // Read in next line + if (i + settings.LineSeparator.Length >= line.Length) + { + line = inStream.ReadLine() + settings.LineSeparator; + } + else + { + line = line.Substring(i + settings.LineSeparator.Length); + } + i = -1; + + // While starting a field, do we detect a text qualifier? + } + else if ((c == settings.TextQualifier) && (work.Length == 0)) + { + // Our next task is to find the end of this qualified-text field + int p2 = -1; + while (p2 < 0) { + + // If we don't see an end in sight, read more from the stream + p2 = line.IndexOf(settings.TextQualifier, i + 1); + if (p2 < 0) { + + // No text qualifiers yet? Let's read more from the stream and continue + work.Append(line.Substring(i + 1)); + i = -1; + var newLine = inStream.ReadLine(); + if (String.IsNullOrEmpty(newLine) && inStream.EndOfStream) + { + break; + } + line = newLine + settings.LineSeparator; + continue; + } + + // Append the text between the qualifiers + work.Append(line.Substring(i + 1, p2 - i - 1)); + i = p2; + + // If the user put in a doubled-up qualifier, e.g. `""`, insert a single one and continue + if (((p2 + 1) < line.Length) && (line[p2 + 1] == settings.TextQualifier)) + { + work.Append(settings.TextQualifier); + i++; + p2 = -1; + continue; + } + } + + // Does this start a new field? + } + else if (c == settings.FieldDelimiter) + { + // Is this a null token, and do we permit null tokens? + AddToken(list, work, settings); + + // Test for special case: when the user has written a casual comma, space, and text qualifier, skip the space + // Checks if the second parameter of the if statement will pass through successfully + // e.g. `"bob", "mary", "bill"` + if (i + 2 <= line.Length - 1) + { + if (line[i + 1].Equals(' ') && line[i + 2].Equals(settings.TextQualifier)) + { + i++; + } + } + } + else + { + work.Append(c); + } + } + } + + /// + /// Parse a single row of data from a Csv line into an array of objects, while permitting embedded newlines + /// DEPRECATED - Please use ParseStream instead. + /// + /// The stream to read + /// The Csv settings to use for this parsing operation (Default: Csv) + /// An array containing all fields in the next row of data, or null if it could not be parsed. + public static string[] ParseMultiLine(IcdStreamReader inStream, CsvReaderSettings settings) + { + StringBuilder sb = new StringBuilder(); + string[] array = null; + while (!inStream.EndOfStream) + { + // Read in a line + sb.Append(inStream.ReadLine()); + + // Does it parse? + string s = sb.ToString(); + if (TryParseLine(s, out array, settings)) + { + return array; + } + + // We didn't succeed on the first try - our text must have an embedded newline in it. + // Let's assume that we were in the middle of parsing a field when we encountered a newline, + // and continue parsing. + sb.Append(settings.LineSeparator); + } + + // Fails to parse - return the best array we were able to get + return array; + } + + /// + /// Parse a line from a Csv file and return an array of fields, or null if + /// + /// One line of text from a Csv file + /// The Csv settings to use for this parsing operation (Default: Csv) + /// An array containing all fields in the next row of data, or null if it could not be parsed. + public static string[] ParseLine(string line, CsvReaderSettings settings) + { + string[] row; + TryParseLine(line, out row, settings); + return row; + } + + /// + /// Try to parse a line of Csv data. Can only return false if an unterminated text qualifier is encountered. + /// + /// False if there was an unterminated text qualifier in the + /// The line of text to parse + /// The Csv settings to use for this parsing operation (Default: Csv) + /// The array of fields found in the line + public static bool TryParseLine(string line, out string[] row, CsvReaderSettings settings) + { + // Ensure settings are non-null + if (settings == null) settings = CsvReaderSettings.CSV; + + // Okay, let's begin parsing + List list = new List(); + var work = new StringBuilder(); + for (int i = 0; i < line.Length; i++) + { + char c = line[i]; + + // If we are starting a new field, is this field text qualified? + if ((c == settings.TextQualifier) && (work.Length == 0)) + { + int p2; + while (true) + { + p2 = line.IndexOf(settings.TextQualifier, i + 1); + + // If no closing qualifier is found, this string is broken; return failure. + if (p2 < 0) + { + work.Append(line.Substring(i + 1)); + list.Add(work.ToString()); + row = list.ToArray(); + return false; + } + + // Append this qualified string + work.Append(line.Substring(i + 1, p2 - i - 1)); + i = p2; + + // If this is a double quote, keep going! + if (((p2 + 1) < line.Length) && (line[p2 + 1] == settings.TextQualifier)) + { + work.Append(settings.TextQualifier); + i++; + + // otherwise, this is a single qualifier, we're done + } + else + { + break; + } + } + + // Does this start a new field? + } + else if (c == settings.FieldDelimiter) + { + // Is this a null token, and do we permit null tokens? + AddToken(list, work, settings); + + // Test for special case: when the user has written a casual comma, space, and text qualifier, skip the space + // Checks if the second parameter of the if statement will pass through successfully + // e.g. "bob", "mary", "bill" + if (i + 2 <= line.Length - 1) + { + if (line[i + 1].Equals(' ') && line[i + 2].Equals(settings.TextQualifier)) + { + i++; + } + } + } + else + { + work.Append(c); + } + } + + // We always add the last work as an element. That means `alice,bob,charlie,` will be four items long. + AddToken(list, work, settings); + row = list.ToArray(); + return true; + } + + /// + /// Add a single token to the list + /// + /// List. + /// Work. + /// Settings. + private static void AddToken(List list, StringBuilder work, CsvReaderSettings settings) + { + var s = work.ToString(); + if (settings.AllowNull && String.Equals(s, settings.NullToken, StringComparison.Ordinal)) + { + list.Add(null); + } + else + { + list.Add(s); + } + work.Length = 0; + } + +#endregion + + } +} diff --git a/ICD.Common.Utils/Csv/CsvReader.cs b/ICD.Common.Utils/Csv/CsvReader.cs new file mode 100644 index 0000000..a70a180 --- /dev/null +++ b/ICD.Common.Utils/Csv/CsvReader.cs @@ -0,0 +1,100 @@ +/* + * 2006 - 2018 Ted Spence, http://tedspence.com + * License: http://www.apache.org/licenses/LICENSE-2.0 + * Home page: https://github.com/tspence/csharp-csv-reader + */ + +using System; +using System.Collections.Generic; +using ICD.Common.Properties; +using ICD.Common.Utils.IO; + +namespace ICD.Common.Utils.Csv +{ + public sealed class CsvReader : IEnumerable, IDisposable + { + private readonly CsvReaderSettings m_Settings; + private readonly IcdStreamReader m_Instream; + + #region Public Variables + + /// + /// If the first row in the file is a header row, this will be populated + /// + public string[] Headers = null; + + #endregion + + #region Constructors + /// + /// Construct a new Csv reader off a streamed source + /// + /// The stream source + /// The Csv settings to use for this reader (Default: Csv) + public CsvReader(IcdStreamReader source, [CanBeNull] CsvReaderSettings settings) + { + m_Instream = source; + m_Settings = settings ?? CsvReaderSettings.CSV; + + // Do we need to parse headers? + if (m_Settings.HeaderRowIncluded) + { + Headers = NextLine(); + } + else + { + Headers = m_Settings.AssumedHeaders != null ? m_Settings.AssumedHeaders.ToArray() : null; + } + } + #endregion + + #region Iterate through a Csv File + /// + /// Iterate through all lines in this Csv file + /// + /// An array of all data columns in the line + System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() + { + return Lines().GetEnumerator(); + } + + /// + /// Iterate through all lines in this Csv file + /// + /// An array of all data columns in the line + IEnumerator System.Collections.Generic.IEnumerable.GetEnumerator() + { + return Lines().GetEnumerator(); + } + + /// + /// Iterate through all lines in this Csv file + /// + /// An array of all data columns in the line + public IEnumerable Lines() + { + return Csv.ParseStream(m_Instream, m_Settings); + } + + /// + /// Retrieve the next line from the file. + /// DEPRECATED - + /// + /// One line from the file. + public string[] NextLine() + { + return Csv.ParseMultiLine(m_Instream, m_Settings); + } + #endregion + + #region Disposal + /// + /// Close our resources - specifically, the stream reader + /// + public void Dispose() + { + m_Instream.Dispose(); + } + #endregion + } +} diff --git a/ICD.Common.Utils/Csv/CsvReaderSettings.cs b/ICD.Common.Utils/Csv/CsvReaderSettings.cs new file mode 100644 index 0000000..65e9c4e --- /dev/null +++ b/ICD.Common.Utils/Csv/CsvReaderSettings.cs @@ -0,0 +1,106 @@ +/* + * 2006 - 2018 Ted Spence, http://tedspence.com + * License: http://www.apache.org/licenses/LICENSE-2.0 + * Home page: https://github.com/tspence/csharp-csv-reader + */ + +using System; +using System.Collections.Generic; + +namespace ICD.Common.Utils.Csv +{ + /// + /// Settings to configure how a Csv file is parsed + /// + public sealed class CsvReaderSettings + { + /// + /// Default constructor picks Csv as the default + /// + public CsvReaderSettings() + { + FieldDelimiter = ','; + TextQualifier = '"'; + ForceQualifiers = false; + LineSeparator = IcdEnvironment.NewLine; + NullToken = null; + AllowNull = false; + IgnoreDimensionErrors = true; + AssumedHeaders = null; + HeaderRowIncluded = true; + } + + /// + /// The character used to delimit individual fields in the Csv. + /// + public char FieldDelimiter { get; set; } + + /// + /// The character used to enclose fields that contain the delimiter character. + /// + public char TextQualifier { get; set; } + + /// + /// The separator used to indicate the end of a line in the Csv file. + /// + public string LineSeparator { get; set; } + + /// + /// Set this value to true to enclose all fields in the text qualifier character. + /// + public bool ForceQualifiers { get; set; } + + /// + /// Set this value to true to allow nulls to be rendered. + /// Csv files by default do not permit null fields. If this field is set to true, all non-null fields + /// will be enclosed by the text qualifier + /// + public bool AllowNull { get; set; } + + /// + /// If AllowNull is set to true, this token will be used to represent NULL values. + /// + public string NullToken { get; set; } + + /// + /// The first line of the Csv file will include the names of each field. + /// + public bool HeaderRowIncluded { get; set; } + + /// + /// If HeaderRowIncluded is false, use these values for the headers + /// + public List AssumedHeaders { get; set; } + + /// + /// Set this value to true to allow parsing for files where each row has a different number of fields + /// + public bool IgnoreDimensionErrors { get; set; } + + /// + /// Set this value to true to ignore header errors when deserializing + /// + public bool IgnoreHeaderErrors { get; set; } + + /// + /// Standard comma-separated value (Csv) file settings + /// + public static readonly CsvReaderSettings CSV = new CsvReaderSettings(); + + /// + /// Standard comma-separated value (Csv) file settings that permit rendering of NULL values + /// + public static readonly CsvReaderSettings CSV_PERMIT_NULL = new CsvReaderSettings() + { + AllowNull = true, + NullToken = "NULL" + }; + + /// + /// Standard tab-separated value (TSV) file settings + /// + public static readonly CsvReaderSettings TSV = new CsvReaderSettings() { + FieldDelimiter = '\t' + }; + } +} diff --git a/ICD.Common.Utils/ICD.Common.Utils_SimplSharp.csproj b/ICD.Common.Utils/ICD.Common.Utils_SimplSharp.csproj index b13d1a1..9124253 100644 --- a/ICD.Common.Utils/ICD.Common.Utils_SimplSharp.csproj +++ b/ICD.Common.Utils/ICD.Common.Utils_SimplSharp.csproj @@ -91,6 +91,9 @@ + + +