feat: Port open source CsvReader for CF 3.5 compatibility

This commit is contained in:
Austin Noska
2021-04-28 17:04:44 -04:00
parent 5aca963da0
commit ae10abd71e
5 changed files with 522 additions and 0 deletions

View File

@@ -6,6 +6,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
## [Unreleased]
### Added
- Ported CsvReader for CF 3.5 compatibility from: https://github.com/tspence/csharp-csv-reader
- Added enum extension method for cycling to the next enum value
- Added GetLocalTimeZoneName method to IcdEnvironment
- Added MatchAny method to RegexUtils

312
ICD.Common.Utils/Csv/Csv.cs Normal file
View File

@@ -0,0 +1,312 @@
/*
* 2006 - 2018 Ted Spence, http://tedspence.com
* License: http://www.apache.org/licenses/LICENSE-2.0
* Home page: https://github.com/tspence/csharp-csv-reader
*/
using System;
using System.Collections.Generic;
using System.Text;
using ICD.Common.Utils.IO;
#if SIMPLSHARP
using ICD.Common.Utils.Extensions;
#endif
namespace ICD.Common.Utils.Csv
{
/// <summary>
/// Root class that contains static functions for straightforward Csv parsing
/// </summary>
public static class Csv
{
/// <summary>
/// The default Csv field delimiter.
/// </summary>
public const char DEFAULT_CSV_DELIMITER = ',';
/// <summary>
/// The default Csv text qualifier. This is used to encode strings that contain the field delimiter.
/// </summary>
public const char DEFAULT_CSV_QUALIFIER = '"';
/// <summary>
/// The default TSV (tab delimited file) field delimiter.
/// </summary>
public const char DEFAULT_TSV_DELIMITER = '\t';
/// <summary>
/// The default TSV (tabe delimited file) text qualifier. This is used to encode strings that contain the field delimiter.
/// </summary>
public const char DEFAULT_TSV_QUALIFIER = '"';
#region Methods to read Csv data
/// <summary>
/// Parse a Csv stream into IEnumerable<string[]>, while permitting embedded newlines
/// </summary>
/// <param name="inStream">The stream to read</param>
/// <param name="settings">The Csv settings to use for this parsing operation (Default: Csv)</param>
/// <returns>An enumerable object that can be examined to retrieve rows from the stream.</returns>
public static IEnumerable<string[]> ParseStream(IcdStreamReader inStream, CsvReaderSettings settings)
{
string line = "";
int i = -1;
List<string> list = new List<string>();
var work = new StringBuilder();
// Ensure settings are non-null
if (settings == null) {
settings = CsvReaderSettings.CSV;
}
// Begin reading from the stream
while (i < line.Length || !inStream.EndOfStream)
{
// Consume the next character of data
i++;
if (i >= line.Length) {
var newLine = inStream.ReadLine();
line += newLine + settings.LineSeparator;
}
char c = line[i];
// Are we at a line separator? If so, yield our work and begin again
if (String.Equals(line.Substring(i, settings.LineSeparator.Length), settings.LineSeparator)) {
list.Add(work.ToString());
yield return list.ToArray();
list.Clear();
work.Clear();
if (inStream.EndOfStream)
{
break;
}
// Read in next line
if (i + settings.LineSeparator.Length >= line.Length)
{
line = inStream.ReadLine() + settings.LineSeparator;
}
else
{
line = line.Substring(i + settings.LineSeparator.Length);
}
i = -1;
// While starting a field, do we detect a text qualifier?
}
else if ((c == settings.TextQualifier) && (work.Length == 0))
{
// Our next task is to find the end of this qualified-text field
int p2 = -1;
while (p2 < 0) {
// If we don't see an end in sight, read more from the stream
p2 = line.IndexOf(settings.TextQualifier, i + 1);
if (p2 < 0) {
// No text qualifiers yet? Let's read more from the stream and continue
work.Append(line.Substring(i + 1));
i = -1;
var newLine = inStream.ReadLine();
if (String.IsNullOrEmpty(newLine) && inStream.EndOfStream)
{
break;
}
line = newLine + settings.LineSeparator;
continue;
}
// Append the text between the qualifiers
work.Append(line.Substring(i + 1, p2 - i - 1));
i = p2;
// If the user put in a doubled-up qualifier, e.g. `""`, insert a single one and continue
if (((p2 + 1) < line.Length) && (line[p2 + 1] == settings.TextQualifier))
{
work.Append(settings.TextQualifier);
i++;
p2 = -1;
continue;
}
}
// Does this start a new field?
}
else if (c == settings.FieldDelimiter)
{
// Is this a null token, and do we permit null tokens?
AddToken(list, work, settings);
// Test for special case: when the user has written a casual comma, space, and text qualifier, skip the space
// Checks if the second parameter of the if statement will pass through successfully
// e.g. `"bob", "mary", "bill"`
if (i + 2 <= line.Length - 1)
{
if (line[i + 1].Equals(' ') && line[i + 2].Equals(settings.TextQualifier))
{
i++;
}
}
}
else
{
work.Append(c);
}
}
}
/// <summary>
/// Parse a single row of data from a Csv line into an array of objects, while permitting embedded newlines
/// DEPRECATED - Please use ParseStream instead.
/// </summary>
/// <param name="inStream">The stream to read</param>
/// <param name="settings">The Csv settings to use for this parsing operation (Default: Csv)</param>
/// <returns>An array containing all fields in the next row of data, or null if it could not be parsed.</returns>
public static string[] ParseMultiLine(IcdStreamReader inStream, CsvReaderSettings settings)
{
StringBuilder sb = new StringBuilder();
string[] array = null;
while (!inStream.EndOfStream)
{
// Read in a line
sb.Append(inStream.ReadLine());
// Does it parse?
string s = sb.ToString();
if (TryParseLine(s, out array, settings))
{
return array;
}
// We didn't succeed on the first try - our text must have an embedded newline in it.
// Let's assume that we were in the middle of parsing a field when we encountered a newline,
// and continue parsing.
sb.Append(settings.LineSeparator);
}
// Fails to parse - return the best array we were able to get
return array;
}
/// <summary>
/// Parse a line from a Csv file and return an array of fields, or null if
/// </summary>
/// <param name="line">One line of text from a Csv file</param>
/// <param name="settings">The Csv settings to use for this parsing operation (Default: Csv)</param>
/// <returns>An array containing all fields in the next row of data, or null if it could not be parsed.</returns>
public static string[] ParseLine(string line, CsvReaderSettings settings)
{
string[] row;
TryParseLine(line, out row, settings);
return row;
}
/// <summary>
/// Try to parse a line of Csv data. Can only return false if an unterminated text qualifier is encountered.
/// </summary>
/// <returns>False if there was an unterminated text qualifier in the <paramref name="line"/></returns>
/// <param name="line">The line of text to parse</param>
/// <param name="settings">The Csv settings to use for this parsing operation (Default: Csv)</param>
/// <param name="row">The array of fields found in the line</param>
public static bool TryParseLine(string line, out string[] row, CsvReaderSettings settings)
{
// Ensure settings are non-null
if (settings == null) settings = CsvReaderSettings.CSV;
// Okay, let's begin parsing
List<string> list = new List<string>();
var work = new StringBuilder();
for (int i = 0; i < line.Length; i++)
{
char c = line[i];
// If we are starting a new field, is this field text qualified?
if ((c == settings.TextQualifier) && (work.Length == 0))
{
int p2;
while (true)
{
p2 = line.IndexOf(settings.TextQualifier, i + 1);
// If no closing qualifier is found, this string is broken; return failure.
if (p2 < 0)
{
work.Append(line.Substring(i + 1));
list.Add(work.ToString());
row = list.ToArray();
return false;
}
// Append this qualified string
work.Append(line.Substring(i + 1, p2 - i - 1));
i = p2;
// If this is a double quote, keep going!
if (((p2 + 1) < line.Length) && (line[p2 + 1] == settings.TextQualifier))
{
work.Append(settings.TextQualifier);
i++;
// otherwise, this is a single qualifier, we're done
}
else
{
break;
}
}
// Does this start a new field?
}
else if (c == settings.FieldDelimiter)
{
// Is this a null token, and do we permit null tokens?
AddToken(list, work, settings);
// Test for special case: when the user has written a casual comma, space, and text qualifier, skip the space
// Checks if the second parameter of the if statement will pass through successfully
// e.g. "bob", "mary", "bill"
if (i + 2 <= line.Length - 1)
{
if (line[i + 1].Equals(' ') && line[i + 2].Equals(settings.TextQualifier))
{
i++;
}
}
}
else
{
work.Append(c);
}
}
// We always add the last work as an element. That means `alice,bob,charlie,` will be four items long.
AddToken(list, work, settings);
row = list.ToArray();
return true;
}
/// <summary>
/// Add a single token to the list
/// </summary>
/// <param name="list">List.</param>
/// <param name="work">Work.</param>
/// <param name="settings">Settings.</param>
private static void AddToken(List<string> list, StringBuilder work, CsvReaderSettings settings)
{
var s = work.ToString();
if (settings.AllowNull && String.Equals(s, settings.NullToken, StringComparison.Ordinal))
{
list.Add(null);
}
else
{
list.Add(s);
}
work.Length = 0;
}
#endregion
}
}

View File

@@ -0,0 +1,100 @@
/*
* 2006 - 2018 Ted Spence, http://tedspence.com
* License: http://www.apache.org/licenses/LICENSE-2.0
* Home page: https://github.com/tspence/csharp-csv-reader
*/
using System;
using System.Collections.Generic;
using ICD.Common.Properties;
using ICD.Common.Utils.IO;
namespace ICD.Common.Utils.Csv
{
public sealed class CsvReader : IEnumerable<string[]>, IDisposable
{
private readonly CsvReaderSettings m_Settings;
private readonly IcdStreamReader m_Instream;
#region Public Variables
/// <summary>
/// If the first row in the file is a header row, this will be populated
/// </summary>
public string[] Headers = null;
#endregion
#region Constructors
/// <summary>
/// Construct a new Csv reader off a streamed source
/// </summary>
/// <param name="source">The stream source</param>
/// <param name="settings">The Csv settings to use for this reader (Default: Csv)</param>
public CsvReader(IcdStreamReader source, [CanBeNull] CsvReaderSettings settings)
{
m_Instream = source;
m_Settings = settings ?? CsvReaderSettings.CSV;
// Do we need to parse headers?
if (m_Settings.HeaderRowIncluded)
{
Headers = NextLine();
}
else
{
Headers = m_Settings.AssumedHeaders != null ? m_Settings.AssumedHeaders.ToArray() : null;
}
}
#endregion
#region Iterate through a Csv File
/// <summary>
/// Iterate through all lines in this Csv file
/// </summary>
/// <returns>An array of all data columns in the line</returns>
System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()
{
return Lines().GetEnumerator();
}
/// <summary>
/// Iterate through all lines in this Csv file
/// </summary>
/// <returns>An array of all data columns in the line</returns>
IEnumerator<string[]> System.Collections.Generic.IEnumerable<string[]>.GetEnumerator()
{
return Lines().GetEnumerator();
}
/// <summary>
/// Iterate through all lines in this Csv file
/// </summary>
/// <returns>An array of all data columns in the line</returns>
public IEnumerable<string[]> Lines()
{
return Csv.ParseStream(m_Instream, m_Settings);
}
/// <summary>
/// Retrieve the next line from the file.
/// DEPRECATED -
/// </summary>
/// <returns>One line from the file.</returns>
public string[] NextLine()
{
return Csv.ParseMultiLine(m_Instream, m_Settings);
}
#endregion
#region Disposal
/// <summary>
/// Close our resources - specifically, the stream reader
/// </summary>
public void Dispose()
{
m_Instream.Dispose();
}
#endregion
}
}

View File

@@ -0,0 +1,106 @@
/*
* 2006 - 2018 Ted Spence, http://tedspence.com
* License: http://www.apache.org/licenses/LICENSE-2.0
* Home page: https://github.com/tspence/csharp-csv-reader
*/
using System;
using System.Collections.Generic;
namespace ICD.Common.Utils.Csv
{
/// <summary>
/// Settings to configure how a Csv file is parsed
/// </summary>
public sealed class CsvReaderSettings
{
/// <summary>
/// Default constructor picks Csv as the default
/// </summary>
public CsvReaderSettings()
{
FieldDelimiter = ',';
TextQualifier = '"';
ForceQualifiers = false;
LineSeparator = IcdEnvironment.NewLine;
NullToken = null;
AllowNull = false;
IgnoreDimensionErrors = true;
AssumedHeaders = null;
HeaderRowIncluded = true;
}
/// <summary>
/// The character used to delimit individual fields in the Csv.
/// </summary>
public char FieldDelimiter { get; set; }
/// <summary>
/// The character used to enclose fields that contain the delimiter character.
/// </summary>
public char TextQualifier { get; set; }
/// <summary>
/// The separator used to indicate the end of a line in the Csv file.
/// </summary>
public string LineSeparator { get; set; }
/// <summary>
/// Set this value to true to enclose all fields in the text qualifier character.
/// </summary>
public bool ForceQualifiers { get; set; }
/// <summary>
/// Set this value to true to allow nulls to be rendered.
/// Csv files by default do not permit null fields. If this field is set to true, all non-null fields
/// will be enclosed by the text qualifier
/// </summary>
public bool AllowNull { get; set; }
/// <summary>
/// If AllowNull is set to true, this token will be used to represent NULL values.
/// </summary>
public string NullToken { get; set; }
/// <summary>
/// The first line of the Csv file will include the names of each field.
/// </summary>
public bool HeaderRowIncluded { get; set; }
/// <summary>
/// If HeaderRowIncluded is false, use these values for the headers
/// </summary>
public List<string> AssumedHeaders { get; set; }
/// <summary>
/// Set this value to true to allow parsing for files where each row has a different number of fields
/// </summary>
public bool IgnoreDimensionErrors { get; set; }
/// <summary>
/// Set this value to true to ignore header errors when deserializing
/// </summary>
public bool IgnoreHeaderErrors { get; set; }
/// <summary>
/// Standard comma-separated value (Csv) file settings
/// </summary>
public static readonly CsvReaderSettings CSV = new CsvReaderSettings();
/// <summary>
/// Standard comma-separated value (Csv) file settings that permit rendering of NULL values
/// </summary>
public static readonly CsvReaderSettings CSV_PERMIT_NULL = new CsvReaderSettings()
{
AllowNull = true,
NullToken = "NULL"
};
/// <summary>
/// Standard tab-separated value (TSV) file settings
/// </summary>
public static readonly CsvReaderSettings TSV = new CsvReaderSettings() {
FieldDelimiter = '\t'
};
}
}

View File

@@ -91,6 +91,9 @@
<Compile Include="Comparers\SequenceComparer.cs" />
<Compile Include="Comparers\UndefinedVersionComparer.cs" />
<Compile Include="Comparers\UndefinedVersionEqualityComparer.cs" />
<Compile Include="Csv\Csv.cs" />
<Compile Include="Csv\CsvReader.cs" />
<Compile Include="Csv\CsvReaderSettings.cs" />
<Compile Include="eConsoleColor.cs" />
<Compile Include="DateTimeUtils.cs" />
<Compile Include="eDaysOfWeek.cs" />