July 12, 2021

Csv File Reader and Writer

Here are a simple Generic CSV File Writer and Reader. It is simple and lightly tested, it has not been tested in other locales. There is no exception handling. Parameters are checked using Debug.Assert() which will not check them in release mode, ... . Could refactor the StreamWriter/StreamRedaer as a parameter so that it could be used to read/write to a generic StreamWriter/Reader rather than a file specifically. Be careful with the separator, be sure that it will never occur in within the data. I have defaulted to a TAB character but the '|' character is pretty good too. Also be careful with DateTimes, I convert them to universal format, if you pass in a 'local' DateTime kind, it will be returned as a "Utc" kind. Generally, you should not be saving DateTimes as a local format, but rather as a Universal DateTime (although there are always exceptions).

First is the writer:
/// <summary>
/// Writes a set of data objects types to disk. 
/// The data objects must have only simple types as properties: 
///     string, bool, numeric types, enum types, DateTime
/// </summary>
/// <typeparam name="T">Data object type to save
/// Must expose the data as properties and 
/// have a default (ie. parameterless) constructor</typeparam>
public class CsvStreamWriter<T> where T : class, new()
{
    public int WriteCsvFile(
        FileInfo file, 
        IEnumerable<T> targets, 
        string separator = "\t")
    {
        int datalinesWritten = 0;
        using (StreamWriter writer = file.CreateText())
        {
            datalinesWritten = WriteCsvStream(writer, targets, separator);
        }
        return datalinesWritten;
    }

    public int WriteCsvStream(StreamWriter writer,
        IEnumerable<T> targets, 
        string separator = "\t")
    {
        Debug.Assert(writer != null);
        Debug.Assert(separator.Length > 0);
        Debug.Assert(targets != null);

        // Assuming here that there are not ridiculous numbers of properties
        // on the class, so using ToList() has no big overhead.
        // Properties must have get and set
        var properties = typeof(T).GetProperties().
            Where(p => p.CanRead && p.CanWrite).
            ToList();
        Debug.Assert(properties.Count > 0, $"Type {typeof(T)} has no Properties!");

        int count = 0;
        writer.WriteLine(GetCsvHeaderLine(properties, separator));
        foreach (var entry in targets.Where(x => x != null))
        {
            var propertiesLine = GetObjectAsCsvLine(entry, properties, separator);
            writer.WriteLine(propertiesLine);
        }
        return count;
    }

    private string GetCsvHeaderLine(List<PropertyInfo> properties, 
        string separator)
    {
        StringBuilder stringBuilder = new StringBuilder();
        for (var ix = 0; ix < properties.Count - 1; ix++)
        {
            stringBuilder.Append(properties[ix].Name).Append(separator);
        }
        stringBuilder.Append(properties[properties.Count - 1].Name);

        return stringBuilder.ToString();
    }

    private string GetObjectAsCsvLine(T instance, 
        List<PropertyInfo> properties, 
        string separator)
    {
        Debug.Assert(instance != null);
        StringBuilder stringBuilder = new StringBuilder();

        for (var ix = 0; ix < properties.Count; ix++)
        {
            var prop = properties[ix];
            var obj = prop.GetValue(instance, System.Reflection.BindingFlags.GetProperty, 
                null, null, System.Globalization.CultureInfo.InvariantCulture);
            if (prop.PropertyType.IsEnum)
            { // Write the enum to the file as an integer rather than a string
                var valueString = Enum.Format(prop.PropertyType, obj, "d");
                stringBuilder.Append(valueString);
            }
            else if (prop.PropertyType == typeof(DateTime))
            {
                // Generally, DateTime should be persisted in Universal time format, 
                // convert to local time format when writing to a GUI or a report
                var valueString = ((DateTime)obj).ToUniversalTime().ToString("o");
                stringBuilder.Append(valueString);
            }
            else
            {
                stringBuilder.Append(obj);
            }
            if (ix != properties.Count - 1) // Append the separator to all but the last.
            {
                stringBuilder.Append(separator);
            }
        }

        return stringBuilder.ToString();
    }
}
Next is the reader:
/// <summary>
/// Reads a set of data objects types written to a file.
/// Each line of the file has a data object in CSV format.
/// The very first line is a special header line, that describes
/// the order in which the data object properties are persisted 
/// on each line
/// The data objects must have only simple types as properties: 
///     string, bool, numeric types, enum types, DateTime
/// </summary>
/// <typeparam name="T">Data object type to save
/// Must expose the data as properties and 
/// have a default (ie. parameterless) constructor</typeparam>
public class CsvStreamReader<T> where T : class, new()
{
    public IEnumerable<T> ReadCsvFile(
        FileInfo file, 
        string separator = "\t")
    {
        Debug.Assert(file.Exists);

        return ReadCsvStream(file.ReadLines(), separator);
    }

    public IEnumerable<T> ReadCsvStream(
        IEnumerable<string> dataLines, 
        string separator = "\t")
    {
        Debug.Assert(separator.Length > 0);
        // Assuming here that there are not ridiculous numbers of properties
        // on the class, so using ToList() has no big overhead.
        // Properties must have get and set
        List<PropertyInfo> properties = typeof(T).GetProperties().
            Where(p => p.CanRead && p.CanWrite).
            ToList();

        var propertiesInReadOrder = new List<PropertyInfo>();

        int count = 0;
        foreach (var line in dataLines)
        {
            if (count == 0)
            { // First line must be header, listing the props in order in which they are read
                propertiesInReadOrder = GetCsvHeaders(line, separator, properties);
                Debug.Assert(propertiesInReadOrder.Count > 0,
"First line must be the header, listing the properties in the order in which they are read. This was not found.");
            }
            else
            {
                T instance = ParseCsvLine(line, separator, propertiesInReadOrder);
                if (instance != null)
                    yield return instance;
            }
            count++;
        }
    }

    private List<PropertyInfo> GetCsvHeaders(string line,
    string separator,
    List<PropertyInfo> properties)
    {
        // The CSV headers give the order in which the properties are read in
        var parts = line.Split(new[] { separator }, StringSplitOptions.None);

        var propertiesInOrder = new PropertyInfo[parts.Length];
        // Dictionary of property name to PropertyInfo (property name is case sensitive)
        Dictionary<string, PropertyInfo> nameToPropertyMap =
            properties.ToDictionary(p => p.Name, p => p);
        var ix = 0;
        PropertyInfo property = null;
        foreach (var entryRaw in parts)
        {
            var entry = entryRaw.Trim();
            propertiesInOrder[ix] = nameToPropertyMap.TryGetValue(entry, out property) ?
                                                property : null;
            ix++;
        }
        return propertiesInOrder.ToList();
    }

    private T ParseCsvLine(string line,
        string separator,
        List<PropertyInfo> propertiesInOrder)
    {
        string[] parts = line.Split(new[] { separator }, StringSplitOptions.None);
        T instance = new T();
        for (int ix = 0; ix < propertiesInOrder.Count; ix++)
        {
            if (propertiesInOrder[ix] != null)
            {
                if (propertiesInOrder[ix].PropertyType.IsEnum)
                {  // Enums are saved as numeric values rather than strings
                    // get the enum underlying numeric type
                    var underlyingType = Enum.GetUnderlyingType(propertiesInOrder[ix].PropertyType);
                    // Get the enum value as an underlying numeric type value
                    var decimalValue = Convert.ChangeType(parts[ix], underlyingType);
                    // Convert the enum numeric value to an enum
                    var enumValue = Enum.ToObject(propertiesInOrder[ix].PropertyType, decimalValue);
                    propertiesInOrder[ix].SetValue(instance, enumValue);
                }
                else if (propertiesInOrder[ix].PropertyType == typeof(DateTime))
                {
                    var value = Convert.ToDateTime(parts[ix]).ToUniversalTime();
                    propertiesInOrder[ix].SetValue(instance, value,
                        System.Reflection.BindingFlags.SetProperty, null, null,
                        System.Globalization.CultureInfo.InvariantCulture);
                }
                else
                {
                    var value = Convert.ChangeType(parts[ix], propertiesInOrder[ix].PropertyType);
                    propertiesInOrder[ix].SetValue(instance, value,
                        System.Reflection.BindingFlags.SetProperty, null, null,
                        System.Globalization.CultureInfo.InvariantCulture);
                }
            }
        }
        return instance;
    }
}
Some FileInfo extensions to make them easy to use:
public static class FileInfoCsvExtender
{
    public static int WriteCsvFile<T>(this FileInfo file, 
        IEnumerable<T> targets, 
        string separator = "\t") 
            where T : class, new()
    {
        var csvwriter = new CsvStreamWriter<T>();
        return csvwriter.WriteCsvFile(file, targets, separator);
    }
    public static IEnumerable<T> ReadCsvFile<T>(this FileInfo file, 
        string separator = "\t") 
            where T : class, new()
    {
        var reader = new CsvStreamReader<T>();
        return reader.ReadCsvFile(file, separator);
    }
}
then to write a CSV file:
  List<TestStation> testStations = ... ;
  FileInfo fi = new FileInfo(@"~\some\path\TestStations.tsv");
  fi.WriteCsvFile<TestStation>(testStations);
and then to read it:
  FileInfo fi = new FileInfo(@"~\some\path\TestStations.tsv");
  List<TestStation> stations = fi.ReadCsvFile<TestStation>().ToList();
Here is a unit test to verify that it works:
    private IEnumerable<TestStation> TestData()
    {
        yield return new TestStation 
        { 
            Id = 2, HasMarket = false, Coordinate = 123.345f, Name = "Gehry Dock", 
            LandingPadSizes = LandingPadEnums.Small | LandingPadEnums.Medium, 
            DateTime = new DateTime(2021, 3, 28, 15, 10, 56, DateTimeKind.Utc) 
        };
        yield return new TestStation 
        { 
            Id = 3, HasMarket = true, Coordinate = -435.1f, Name = "Trevithick Terminal", 
            LandingPadSizes = LandingPadEnums.Small | LandingPadEnums.Medium | LandingPadEnums.Large, 
            DateTime = new DateTime(2021, 4, 12, 3, 1, 6, DateTimeKind.Utc) 
        };
        yield return new TestStation 
        { 
            Id = 1, HasMarket = true, Coordinate = 915.0f, Name = "Bain End", 
            LandingPadSizes = LandingPadEnums.Small, 
            DateTime = new DateTime(2019, 9, 7, 0, 45, 0, DateTimeKind.Utc) 
        };
    }

    [Test]
    public void SaveAndLoadTest()
    {
        List<TestStation> original = TestData().ToList();
        WriteStationsToCsvFile(original);
        ReadStationsFromCsvFile(original);
    }

    private void ReadStationsFromCsvFile(List<TestStation> original)
    {
        FileInfo fi = new FileInfo(Environment.ExpandEnvironmentVariables(@"%Temp%") + @"\TestStations.tsv");
        List<TestStation> stations = fi.ReadCsvFile<TestStation>().ToList();

        Trace.WriteLine($"*** {stations.Count} stations read from file \"{fi.FullName}\" ");

        Assert.AreEqual(stations.Count, original.Count);
        Assert.AreEqual(LandingPadEnums.Small | LandingPadEnums.Medium, stations[0].LandingPadSizes);
        Assert.AreEqual("Gehry Dock", stations[0].Name);
        Assert.IsTrue(stations[0].Coordinate == 123.345f);
        Assert.IsTrue(stations[0].Id == 2);
        Assert.IsFalse(stations[0].HasMarket);
        Assert.IsTrue(stations[0].DateTime == new DateTime(2021, 3, 28, 15, 10, 56, DateTimeKind.Utc));

        Assert.AreEqual("Trevithick Terminal", stations[1].Name);
        Assert.IsTrue(stations[1].Id == original[1].Id);
        Assert.IsTrue(stations[1].Coordinate == -435.1f);

        Assert.AreEqual("Bain End", stations[2].Name);
        Assert.IsTrue(stations[2].Id == 1);
        Assert.IsTrue(stations[2].HasMarket);
        Assert.IsTrue(stations[2].DateTime == new DateTime(2019, 9, 7, 0, 45, 0, DateTimeKind.Utc));
        Assert.AreEqual(LandingPadEnums.Small, stations[2].LandingPadSizes);

    }

    private void WriteStationsToCsvFile(List<TestStation> original)
    {
        Trace.WriteLine($"*** {original.Count} Stations found on server \"{TestServerNames.LocalSqlServerName}\": ");

        FileInfo fi = new FileInfo(Environment.ExpandEnvironmentVariables(@"%Temp%") + @"\TestStations.tsv");
        if (fi.Exists)
        {
            fi.Delete();
            fi.Refresh();
        }

        Assert.That(!fi.Exists);
        fi.WriteCsvFile(TestData());
        fi.Refresh();
        Assert.That(fi.Exists);

        Trace.WriteLine($"*** {original.Count} stations written to file \"{fi.FullName}\" ");
    }
and the CSV file looks like this:
Id	Coordinate	HasMarket	LandingPadSizes	Name	DateTime
2	123.345	False	3	Gehry Dock	2021-03-28T15:10:56.0000000Z
3	-435.1	True	7	Trevithick Terminal	2021-04-12T03:01:06.0000000Z
1	915	True	1	Bain End	2019-09-07T00:45:00.0000000Z

No comments: