Hi,
I have an issue with reading CSV files.
I am to reading CSV file and putting it in a Datatable in C#. I am using a regular expression to read the values. Below is the code. Now, it reads CSV file without any issues only if all the fields are not null. If any field is blank, it moves the values to the left and displays the value under invalid column.
Example is shown below:
A part of CSV file.
I am reading the first row as column name of the table and subsequent rows as values for those columns.
Input:
"BEGDOC","DOCID","CUSTODIAN","EvidenceFile"
"TCP-REV-00000015",29,"Linda Difilippo","NSF File"
Output:
BEGDOC DOCID CUSTODIAN EvidenceFile
TCP-REV-00000014 29 Linda Difilippo NSF File
But, If I have an input like this(BegDoc column is empty):
"BEGDOC","DOCID","CUSTODIAN","EvidenceFile"
“”, 29,"Linda Difilippo","NSF File"
Output I get as:
BEGDOC DOCID CUSTODIAN EvidenceFile
29 Linda Difilippo NSF File
29 comes under BEGDOC column, which was suppose to come under DOCID.
Please help me to fix this?
C# Code:
public DataTable ParseCSVFile(string path)
{
string inputString = "";
// check that the file exists before opening it
if (File.Exists(path))
{
StreamReader sr = new StreamReader(path);
inputString = sr.ReadToEnd();
sr.Close();
}
return ParseCSV(inputString);
}
public DataTable ParseCSV(string inputString)
{
DataTable dt = new DataTable();
// declare the Regular Expression that will match versus the input string
Regex re = new Regex("((?<field>[^\",\\r\\n]+)|\"(?<field>([^\"]|\"\")+)\")(,|(?<rowbreak>\\r\\n|\\n|$))");
ArrayList colArray = new ArrayList();
ArrayList rowArray = new ArrayList();
int colCount = 0;
int maxColCount = 0;
string rowbreak = "";
string field = "";
MatchCollection mc = re.Matches(inputString);
foreach (Match m in mc)
{
// retrieve the field and replace two double-quotes with a single double-quote
field = m.Result("${field}").Replace("\"\"", "\"");
rowbreak = m.Result("${rowbreak}");
if (field.Length > 0)
{
colArray.Add(field);
colCount++;
}
if (rowbreak.Length > 0)
{
// add the column array to the row Array List
rowArray.Add(colArray.ToArray());
// create a new Array List to hold the field values
colArray = new ArrayList();
if (colCount > maxColCount)
maxColCount = colCount;
colCount = 0;
}
}
if (rowbreak.Length == 0)
{
// this is executed when the last line doesn't
// end with a line break
rowArray.Add(colArray.ToArray());
if (colCount > maxColCount)
maxColCount = colCount;
}
// create the columns for the table
// for (int i = 0; i < maxColCount; i++)
// dt.Columns.Add(String.Format("col{0:000}", i));
//Ravi Coding started....
Array ra1 = rowArray.ToArray();
Array ca1 = (Array)(ra1.GetValue(0));
for (int j = 0; j < ca1.Length; j++)
{
string str = ca1.GetValue(j).ToString();
dt.Columns.Add(str);
}
//Ravi Coding ends here...
// convert the row Array List into an Array object for easier access
Array ra = rowArray.ToArray();
for (int i = 1; i < ra.Length; i++)
{
// create a new DataRow
DataRow dr = dt.NewRow();
// convert the column Array List into an Array object for easier access
Array ca = (Array)(ra.GetValue(i));
// add each field into the new DataRow
for (int j = 0; j < ca.Length; j++)
dr[j] = ca.GetValue(j);
// add the new DataRow to the DataTable
dt.Rows.Add(dr);
}
// in case no data was parsed, create a single column
if (dt.Columns.Count == 0)
dt.Columns.Add("NoData");
return dt;
}