An example of using Linq to parse a CSV file and write data to a TSV file. The beauty of the approach shown here is the file is read one line at a time when parsing, you do not have to load the whole file to process the data, and written one line at a time when writing.
Here is a sample piece of the CSV file (note the first line is a header line not a data line):
id,edsm_id,name,x,y,z,population,is_populated,government_id,government,allegiance_id,allegiance,state_id,state,security_id,security,primary_economy_id,primary_economy,power,power_state,power_state_id,needs_permit,updated_at,simbad_ref,controlling_minor_faction_id,controlling_minor_faction,reserve_type_id,reserve_type
17,60,"10 Ursae Majoris",0.03125,34.90625,-39.09375,0,0,176,None,5,None,80,None,16,Low,10,None,,,,0,1497906646,"10 Ursae Majoris",,,,
24,12009,"11 Bootis",-49.40625,285.25,65.21875,0,0,176,None,5,None,80,None,16,Low,10,None,,,,0,1474116394,"11 Bootis",,,,
26,13308,"11 Mu Aurigae",-30,0.75,-150.03125,0,0,176,None,5,None,80,None,16,Low,10,None,,,,0,1497806946,,,,,
Want to parse some of these lines into this structure:
id,edsm_id,name,x,y,z,population,is_populated,government_id,government,allegiance_id,allegiance,state_id,state,security_id,security,primary_economy_id,primary_economy,power,power_state,power_state_id,needs_permit,updated_at,simbad_ref,controlling_minor_faction_id,controlling_minor_faction,reserve_type_id,reserve_type
17,60,"10 Ursae Majoris",0.03125,34.90625,-39.09375,0,0,176,None,5,None,80,None,16,Low,10,None,,,,0,1497906646,"10 Ursae Majoris",,,,
24,12009,"11 Bootis",-49.40625,285.25,65.21875,0,0,176,None,5,None,80,None,16,Low,10,None,,,,0,1474116394,"11 Bootis",,,,
26,13308,"11 Mu Aurigae",-30,0.75,-150.03125,0,0,176,None,5,None,80,None,16,Low,10,None,,,,0,1497806946,,,,,
Want to parse some of these lines into this structure:
public class BaseSystemRaw { public int id { get; set; } public string name { get; set; } public float x { get; set; } public float y { get; set; } public float z { get; set; } public override string ToString() { return "id=" + id + ", name=" + name + ", location=( " + x + ", " + y + ", " + z + " )"; } }Here is a test harness:
[ TestFixture ] class ParseCsvFilesWithLinqTests { [ Test ] public void ParseCsvFileTest() { // Skip(1) will skip the header line // This commented out version will only use the first 10 lines, // great for debugging // var baseSystemRaws = File.ReadLines(TestFilePaths.SystemsFilePath). // Take(10).Skip(1).Select(line => ExtractBaseSystemRaw(line)); var baseSystemRaws = File.ReadLines( TestFilePaths.SystemsFilePath ). Skip( 1 ).Select( line => ExtractBaseSystemRaw( line ) ); DoSomethingWith( baseSystemRaws ); } BaseSystemRaw ExtractBaseSystemRaw( string line ) { var parts = line.Split( ',' ); var sysRaw = new BaseSystemRaw(); int tmp; if ( int.TryParse( parts[ 0 ], out tmp ) ) { sysRaw.id = tmp; } // Ignore parts[1] edsm_id sysRaw.name = parts[ 2 ].Trim().Replace( "\"", "" ); float ftmp; if ( float.TryParse( parts[ 3 ], out ftmp ) ) { sysRaw.x = ftmp; } if ( float.TryParse( parts[ 4 ], out ftmp ) ) { sysRaw.y = ftmp; } if ( float.TryParse( parts[ 5 ], out ftmp ) ) { sysRaw.z = ftmp; } // Ignore other parts return sysRaw; } private void DoSomethingWith( IEnumerable<BaseSystemRaw> baseSystemRaws ) { foreach ( var entry in baseSystemRaws ) { Trace.WriteLine( entry ); } } }Now write the data to a TSV file.
[ TestFixture ] class WriteTsvFilesWithLinqTests { [ Test ] public void WriteTsvFileTest() { const string separator = "\t"; var headers = new[] { "Id", "System", "x", "y", "z" }; // Skip(1) will skip the header line // This version will only use the first 4 lines, great for debugging, // remove the Take(4) to process the whole file var baseSystemRaws = File.ReadLines( TestFilePaths.SystemsFilePath ). Take(4).Skip(1).Select( line => ExtractBaseSystemRaw( line ) ); // Use Enumerable.Concat to add the header string File.WriteAllLines( TestFilePaths.BaseSystemsTsvFilePath, Enumerable.Concat( new[] { string.Join( separator, headers ) }, baseSystemRaws.Select(sys => sys != null ? CoreSystemRawToTsv( separator, sys ) : ""))); } private string CoreSystemRawToTsv( BaseSystemRaw sys ) { string line = string.Join( "\t", sys.id, sys.name, sys.x.ToString( "0.0000" ), sys.y.ToString( "0.0000" ), sys.z.ToString( "0.0000" ) ); return line; } }
No comments:
Post a Comment