An example of using Linq to parse a CSV file and write data to a TSV file. The beauty of the approach shown here is the file is read one line at a time when parsing, you do not have to load the whole file to process the data, and written one line at a time when writing.
Here is a sample piece of the CSV file (note the first line is a header line not a data line):
id,edsm_id,name,x,y,z,population,is_populated,government_id,government,allegiance_id,allegiance,state_id,state,security_id,security,primary_economy_id,primary_economy,power,power_state,power_state_id,needs_permit,updated_at,simbad_ref,controlling_minor_faction_id,controlling_minor_faction,reserve_type_id,reserve_type
17,60,"10 Ursae Majoris",0.03125,34.90625,-39.09375,0,0,176,None,5,None,80,None,16,Low,10,None,,,,0,1497906646,"10 Ursae Majoris",,,,
24,12009,"11 Bootis",-49.40625,285.25,65.21875,0,0,176,None,5,None,80,None,16,Low,10,None,,,,0,1474116394,"11 Bootis",,,,
26,13308,"11 Mu Aurigae",-30,0.75,-150.03125,0,0,176,None,5,None,80,None,16,Low,10,None,,,,0,1497806946,,,,,
Want to parse some of these lines into this structure:
public class BaseSystemRaw
{
public int id { get; set; }
public string name { get; set; }
public float x { get; set; }
public float y { get; set; }
public float z { get; set; }
public override string ToString()
{
return "id=" + id + ", name=" + name +
", location=( " + x + ", " + y + ", " + z + " )";
}
}
Here is a test harness:
[ TestFixture ]
class ParseCsvFilesWithLinqTests
{
[ Test ]
public void ParseCsvFileTest()
{
// Skip(1) will skip the header line
// This commented out version will only use the first 10 lines,
// great for debugging
// var baseSystemRaws = File.ReadLines(TestFilePaths.SystemsFilePath).
// Take(10).Skip(1).Select(line => ExtractBaseSystemRaw(line));
var baseSystemRaws = File.ReadLines( TestFilePaths.SystemsFilePath ).
Skip( 1 ).Select( line => ExtractBaseSystemRaw( line ) );
DoSomethingWith( baseSystemRaws );
}
BaseSystemRaw ExtractBaseSystemRaw(
string line )
{
var parts = line.Split( ',' );
var sysRaw = new BaseSystemRaw();
int tmp;
if ( int.TryParse( parts[ 0 ], out tmp ) )
{
sysRaw.id = tmp;
}
// Ignore parts[1] edsm_id
sysRaw.name = parts[ 2 ].Trim().Replace( "\"", "" );
float ftmp;
if ( float.TryParse( parts[ 3 ], out ftmp ) )
{
sysRaw.x = ftmp;
}
if ( float.TryParse( parts[ 4 ], out ftmp ) )
{
sysRaw.y = ftmp;
}
if ( float.TryParse( parts[ 5 ], out ftmp ) )
{
sysRaw.z = ftmp;
}
// Ignore other parts
return sysRaw;
}
private void DoSomethingWith(
IEnumerable<BaseSystemRaw> baseSystemRaws )
{
foreach ( var entry in baseSystemRaws )
{
Trace.WriteLine( entry );
}
}
}
Now write the data to a TSV file.
[ TestFixture ]
class WriteTsvFilesWithLinqTests
{
[ Test ]
public void WriteTsvFileTest()
{
const string separator = "\t";
var headers = new[] { "Id", "System", "x", "y", "z" };
// Skip(1) will skip the header line
// This version will only use the first 4 lines, great for debugging,
// remove the Take(4) to process the whole file
var baseSystemRaws = File.ReadLines( TestFilePaths.SystemsFilePath ).
Take(4).Skip(1).Select( line => ExtractBaseSystemRaw( line ) );
// Use Enumerable.Concat to add the header string
File.WriteAllLines( TestFilePaths.BaseSystemsTsvFilePath,
Enumerable.Concat( new[] { string.Join( separator, headers ) },
baseSystemRaws.Select(sys => sys != null ?
CoreSystemRawToTsv( separator, sys ) : "")));
}
private string CoreSystemRawToTsv(
BaseSystemRaw sys )
{
string line = string.Join( "\t", sys.id, sys.name, sys.x.ToString( "0.0000" ),
sys.y.ToString( "0.0000" ), sys.z.ToString( "0.0000" ) );
return line;
}
}