September 13, 2017

CSV Files with Linq

An example of using Linq to parse a CSV file and write data to a TSV file. The beauty of the approach shown here is the file is read one line at a time when parsing, you do not have to load the whole file to process the data, and written one line at a time when writing. Here is a sample piece of the CSV file (note the first line is a header line not a data line):
id,edsm_id,name,x,y,z,population,is_populated,government_id,government,allegiance_id,allegiance,state_id,state,security_id,security,primary_economy_id,primary_economy,power,power_state,power_state_id,needs_permit,updated_at,simbad_ref,controlling_minor_faction_id,controlling_minor_faction,reserve_type_id,reserve_type
17,60,"10 Ursae Majoris",0.03125,34.90625,-39.09375,0,0,176,None,5,None,80,None,16,Low,10,None,,,,0,1497906646,"10 Ursae Majoris",,,,
24,12009,"11 Bootis",-49.40625,285.25,65.21875,0,0,176,None,5,None,80,None,16,Low,10,None,,,,0,1474116394,"11 Bootis",,,,
26,13308,"11 Mu Aurigae",-30,0.75,-150.03125,0,0,176,None,5,None,80,None,16,Low,10,None,,,,0,1497806946,,,,,


Want to parse some of these lines into this structure:
public class BaseSystemRaw
{
    public int id { get; set; }
    public string name { get; set; }
    public float x { get; set; }
    public float y { get; set; }
    public float z { get; set; }

    public override string ToString()
    {
        return "id=" + id + ", name=" + name + 
               ", location=( " + x + ", " + y + ", " + z + " )";
    }
}
Here is a test harness:
[ TestFixture ]
class ParseCsvFilesWithLinqTests
{
    [ Test ]
    public void ParseCsvFileTest()
    {
        // Skip(1) will skip the header line
        // This commented out version will only use the first 10 lines,
        // great for debugging
        // var baseSystemRaws = File.ReadLines(TestFilePaths.SystemsFilePath).
        //     Take(10).Skip(1).Select(line => ExtractBaseSystemRaw(line));
        var baseSystemRaws = File.ReadLines( TestFilePaths.SystemsFilePath ).
                Skip( 1 ).Select( line => ExtractBaseSystemRaw( line ) );
        DoSomethingWith( baseSystemRaws );
    }

    BaseSystemRaw ExtractBaseSystemRaw(
        string line )
    {
        var parts = line.Split( ',' );
        var sysRaw = new BaseSystemRaw();
        int tmp;
        if ( int.TryParse( parts[ 0 ], out tmp ) )
        {
            sysRaw.id = tmp;
        }
        // Ignore parts[1] edsm_id
        sysRaw.name = parts[ 2 ].Trim().Replace( "\"", "" );
        float ftmp;
        if ( float.TryParse( parts[ 3 ], out ftmp ) )
        {
            sysRaw.x = ftmp;
        }
        if ( float.TryParse( parts[ 4 ], out ftmp ) )
        {
            sysRaw.y = ftmp;
        }
        if ( float.TryParse( parts[ 5 ], out ftmp ) )
        {
            sysRaw.z = ftmp;
        }
        // Ignore other parts
        return sysRaw;
    }

    private void DoSomethingWith(
        IEnumerable<BaseSystemRaw> baseSystemRaws )
    {
        foreach ( var entry in baseSystemRaws )
        {
            Trace.WriteLine( entry );
        }
    }
}
Now write the data to a TSV file.
[ TestFixture ]
class WriteTsvFilesWithLinqTests
{
    [ Test ]
    public void WriteTsvFileTest()
    {
        const string separator = "\t";
        var headers = new[] { "Id", "System", "x", "y", "z" };
        // Skip(1) will skip the header line
        // This version will only use the first 4 lines, great for debugging, 
        // remove the Take(4) to process the whole file
        var baseSystemRaws = File.ReadLines( TestFilePaths.SystemsFilePath ).
            Take(4).Skip(1).Select( line => ExtractBaseSystemRaw( line ) );
        
        // Use Enumerable.Concat to add the header string
        File.WriteAllLines( TestFilePaths.BaseSystemsTsvFilePath,
            Enumerable.Concat( new[] { string.Join( separator, headers ) },
                baseSystemRaws.Select(sys => sys != null ? 
                    CoreSystemRawToTsv( separator, sys ) : "")));
    }

    private string CoreSystemRawToTsv(
        BaseSystemRaw sys )
    {
        string line = string.Join( "\t", sys.id, sys.name, sys.x.ToString( "0.0000" ),
            sys.y.ToString( "0.0000" ), sys.z.ToString( "0.0000" ) );
        return line;
    }
}

September 5, 2017

C# HashSet

Best to demonstrate what some of the methods do with some unit tests. In particular what does 'SymmetricExceptWith' do?
[ TestFixture ]
public class HashSetTests
{
    private HashSet<int> integerSet1;
    private HashSet<int> integerSet1Copy;
    private HashSet<int> integerSet2;

    [ SetUp ]
    public void Setup()
    {
        integerSet1 = new HashSet<int> { 1, 2, 3 };
        integerSet2 = new HashSet<int> { 2, 3, 4 };
        integerSet1Copy = new HashSet<int> { 2, 3, 1 };
    }

    [ Test ]
    // The SetEquals method ignores duplicate entries and 
    // the order of elements in the other parameter.
    public void SetEqualsTest()
    {
        var integerList = new List<int> { 2, 3, 1, 3, 2 };

        Assert.That( !integerSet1.SetEquals( integerSet2 ) );
        Assert.That( integerSet1.SetEquals( integerSet1Copy ) );
        Assert.That( integerSet1.SetEquals( integerList ) );
    }

    [ Test ]
    public void IntersectWithTest() // All elements common to both sets
    {
        integerSet1.IntersectWith( integerSet2 );
        Assert.That( !integerSet1.Contains( 1 ) );
        Assert.That( integerSet1.Contains( 2 ) );
        Assert.That( integerSet1.Contains( 3 ) );
        Assert.That( !integerSet1.Contains( 4 ) );
    }

    [ Test ]
    public void UnionWithTest() // All elements in both sets
    {
        integerSet1.UnionWith( integerSet2 );
        Assert.That( integerSet1.Contains( 1 ) );
        Assert.That( integerSet1.Contains( 2 ) );
        Assert.That( integerSet1.Contains( 3 ) );
        Assert.That( integerSet1.Contains( 4 ) );
    }

    [ Test ]
    public void SymmetricExceptWithTest() // All elements not common to both sets
    {
        integerSet1.SymmetricExceptWith( integerSet2 );
        Assert.That( integerSet1.Contains( 1 ) );
        Assert.That( !integerSet1.Contains( 2 ) );
        Assert.That( !integerSet1.Contains( 3 ) );
        Assert.That( integerSet1.Contains( 4 ) );
    }
}

Here are some extension methods that maintain the original values of the sets (the existing methods modify the calling set)
public static class HashSetExtensions
{
	public static HashSet<T> Union<T>(
        this HashSet<T> hashSet1, IEnumerable<T> hashSet2)
	{
		var res = new HashSet<T>(hashSet1, hashSet1.Comparer);
		res.UnionWith(hashSet2);
		return res;
	}

	public static HashSet<T> Intersecting<T>(
        this HashSet<T> hashSet1, IEnumerable<T> hashSet2)
	{
		var res = new HashSet<T>(hashSet1, hashSet1.Comparer);
		res.IntersectWith(hashSet2);
		return res;
	}

	public static HashSet<T> RemoveAnyFrom<T>(
        this HashSet<T> hashSet1, IEnumerable<T> hashSet2)
	{
		var res = new HashSet<T>(hashSet1, hashSet1.Comparer);
		res.ExceptWith(hashSet2);
		return res;
	}

	public static HashSet<T> NotIntersecting<T>(
        this HashSet<T> hashSet1, IEnumerable<T> hashSet2)
	{
		var res = new HashSet<T>(hashSet1, hashSet1.Comparer);
		res.SymmetricExceptWith(hashSet2);
		return res;
	}
}
and some tests for them
string[] names1 = new string[] {
	"banana","apple","pear","naranja","grapes","mango"
};

string[] names2 = new string[] {
	"advocado","lemon","Pear","lime","banana","passion fruit"
};

HashSet<string> hSetN1 = new HashSet<string>(names1, StringComparer.OrdinalIgnoreCase);
HashSet<string> hSetN2 = new HashSet<string>(names2, StringComparer.OrdinalIgnoreCase);

var res = hSetN1.Union(hSetN2);
var res2 = hSetN1.Intersecting(hSetN2);
var res3 = hSetN1.RemoveAnyFrom(hSetN2);
var res4 = hSetN1.NotIntersecting(hSetN2);