//-----------------------------------------------------------------------------
// wx.NET - CSVParser.cs
//
// Parser for the MemLogDisplay
//
// (C) 2007 by Dr. Harald Meyer auf'm Hofe
//
// Licensed under the wxWidgets license, see LICENSE.txt for details.
//
// $Id: CSVParser.cs,v 1.1 2007/10/21 15:22:52 harald_meyer Exp $
//-----------------------------------------------------------------------------
using System;
using System.Text;
using System.Collections;
using System.IO;
namespace wx.MemLogDisplay{
/** Instances of this class parse CSV files (comma separated list).
* Currently, this parser recognizes 2 types of data: \c int and \c string.
* The comma will be used to separate different integers and strings.
* Everything inbetween two double quotes will be interpreted as one string
* (without the embracing quotes). This string may also contain commas.
* Use two consecutive double quotes to have
* one in the resulting string. Everything else will in a first try interpreted
* as an integer (including the sign). If this fails, it will be interpreted as
* a string (without trimming or other options of normalization).
*
* Use method ReadLine() to read a System.IO.TextReader line by line.
* The parser than produces an array of objects containing either integers
* or strings and return this. If the file has been read completely, this
* method will return \c null.
*/
public class CSVParser
{
#region State
TextReader _src;
#endregion
#region CTor
/** Generate a new parser reading rom the provided text reader.
*/
public CSVParser(TextReader src)
{
this._src = src;
}
#endregion
#region Public Methods
/** Return the items in the current line or \c null if all lines have been parsed.
*/
public object[] ReadLine()
{
try
{
string line = this._src.ReadLine();
if (line == null) return null;
ArrayList items = new ArrayList();
int itemStartPos = 0;
for (int pos = 0; pos >= 0 && pos < line.Length; ++pos)
{
if (line[pos] == '"')
{
itemStartPos = pos;
if (pos + 1 < line.Length && line[pos + 1] == '"')
{
int nextPos = SkipBlanks(pos+2, line);
if (nextPos < 0 || line[nextPos] == ',')
{
// these are simply two consecutive double quotes,
// a representation of the empty string.
items.Add("");
if (nextPos >= 0)
{
pos = nextPos;
itemStartPos = nextPos + 1;
}
else
pos = line.Length; // STOP!!!
continue;
}
}
// This is a quoted string.
// Search for the string end.
for (++pos; pos < line.Length; ++pos)
{
if (line[pos] == '"')
{
++pos;
if (pos < line.Length && line[pos] == '"') // the string continues
break;
// we have to decide, whether to strip quotes (plus trailing blanks)
// or whether to use the whole string.
int nextPos = SkipBlanks(pos, line);
if (nextPos < 0 || line[nextPos] == ',')
{
// we got a fully quoted string.
++itemStartPos;
if (itemStartPos <= pos - 2)
items.Add(line.Substring(itemStartPos, pos - 1 - itemStartPos));
if (nextPos >= 0)
{
pos = nextPos;
itemStartPos = nextPos + 1;
}
else
pos = line.Length; // STOP!!!
break;
}
else
{
// proceed
}
}
}
}
else if (line[pos] == ',')
{
if (itemStartPos < pos)
{
string newItem = line.Substring(itemStartPos, pos - itemStartPos);
try
{
long intItem = Convert.ToInt64(newItem);
items.Add(intItem);
}
catch (Exception exc)
{
System.Diagnostics.Trace.WriteLine(string.Format("CSVParser.ReadLine: {0} is not an int: {1}", newItem, exc));
items.Add(newItem);
}
itemStartPos = pos + 1;
}
else if (itemStartPos == pos)
{
items.Add("");
itemStartPos = pos + 1;
}
}
else if (pos == line.Length - 1)
{
string newItem = line.Substring(itemStartPos);
try
{
int intItem = Convert.ToInt32(newItem);
items.Add(intItem);
}
catch (Exception exc)
{
System.Diagnostics.Trace.WriteLine(string.Format("CSVParser.ReadLine: {0} is not an int: {1}", newItem, exc));
items.Add(newItem);
}
break;
}
}
return items.ToArray();
}
catch (Exception exc)
{
System.Diagnostics.Trace.WriteLine(string.Format("CSVParser.ReadLine {0}", exc));
}
return null;
}
#endregion
#region Private Helpers
/** This will return the next position after or on \c pos that is not a blank and a negative value if this position does not exist.
*/
static int SkipBlanks(int pos, string line)
{
int result = -1;
for (; pos < line.Length; ++pos)
{
if (line[pos] > ' ')
{
result = pos;
break;
}
}
return result;
}
#endregion
}
}
|