001: /*
002: * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
003: * PROPRIETARY/CONFIDENTIAL. Use of this product is subject to license terms.
004: */
005: package com.sun.portal.ubt.report.data.file.parser;
006:
007: import java.io.IOException;
008: import java.io.InputStream;
009: import java.io.InputStreamReader;
010: import java.io.Reader;
011:
012: public class CSVLexer {
013: private void ensureCharacterMapIsInstance() {
014: if (yycmap == yycmap_instance) {
015: yycmap_instance = new char[yycmap.length];
016: System.arraycopy(yycmap, 0, yycmap_instance, 0,
017: yycmap.length);
018: }
019: }
020:
021: private boolean charIsSafe(char c) {
022: return yycmap_instance[c] == yycmap[97]
023: || yycmap_instance[c] == yycmap[9];
024: }
025:
026: private void updateCharacterClasses(char c, char c1) {
027: ensureCharacterMapIsInstance();
028: yycmap_instance[c1] = yycmap_instance[c];
029: switch (c) {
030: case 34: // '"'
031: case 44: // ','
032: yycmap_instance[c] = yycmap[97];
033: break;
034:
035: default:
036: yycmap_instance[c] = yycmap[c];
037: break;
038: }
039: }
040:
041: public void changeDelimiter(char c) throws BadDelimiterException {
042: if (c == delimiter)
043: return;
044: if (!charIsSafe(c)) {
045: throw new BadDelimiterException(c
046: + " is not a safe delimiter.");
047: } else {
048: updateCharacterClasses(delimiter, c);
049: delimiter = c;
050: return;
051: }
052: }
053:
054: public void changeQuote(char c) throws BadQuoteException {
055: if (c == quote)
056: return;
057: if (!charIsSafe(c)) {
058: throw new BadQuoteException(c + " is not a safe quote.");
059: } else {
060: updateCharacterClasses(quote, c);
061: quote = c;
062: return;
063: }
064: }
065:
066: public void setEscapes(String s, String s1) {
067: int i = s.length();
068: if (s1.length() < i)
069: i = s1.length();
070: escapes = s.substring(0, i);
071: replacements = s1.substring(0, i);
072: }
073:
074: private String unescape(String s) {
075: if (s.indexOf('\\') == -1)
076: return s.substring(1, s.length() - 1);
077: StringBuffer stringbuffer = new StringBuffer(s.length());
078: for (int i = 1; i < s.length() - 1; i++) {
079: char c = s.charAt(i);
080: if (c == '\\') {
081: char c1 = s.charAt(++i);
082: if (c1 == '\\' || c1 == '"') {
083: stringbuffer.append(c1);
084: continue;
085: }
086: int j;
087: if ((j = escapes.indexOf(c1)) != -1)
088: stringbuffer.append(replacements.charAt(j));
089: else
090: stringbuffer.append(c1);
091: } else {
092: stringbuffer.append(c);
093: }
094: }
095:
096: return stringbuffer.toString();
097: }
098:
099: public void setCommentStart(String s) {
100: commentDelims = s;
101: }
102:
103: public int getLineNumber() {
104: return lines;
105: }
106:
107: public CSVLexer(Reader reader) {
108: yycmap_instance = yycmap;
109: yy_lexical_state = 0;
110: yy_buffer = new char[16384];
111: delimiter = ',';
112: quote = '"';
113: escapes = "";
114: replacements = "";
115: commentDelims = "";
116: addLine = 1;
117: lines = 0;
118: yy_reader = reader;
119: }
120:
121: public CSVLexer(InputStream inputstream) {
122: this (((Reader) (new InputStreamReader(inputstream))));
123: }
124:
125: private static int[] yy_unpack() {
126: int ai[] = new int[126];
127: int i = 0;
128: i = yy_unpack(
129: "\001\005\001\006\001\007\001\b\001\t\001\n\001\005\001\013\001\f\001\r\001\016\001\017\001\020\001\013\001\021\001\022\001\007\001\b\001\023\002\021\001\024\001\004\001\007\001\b\003\024\001\005\001\025\003\000\002\005\001\000\001\026\001\007\001\b\006\000\001\b\n\000\005\n\001\027\001\030\001\013\001\031\003\000\002\013\001\000\001\f\b\000\001\016\003\000\005\020\001\032\001\033\002\021\003\000\003\021\001\022\001\007\001\b\001\000\002\021\002\024\002\000\003\024\007\n\007\020",
130: i, ai);
131: return ai;
132: }
133:
134: private static int yy_unpack(String s, int i, int ai[]) {
135: int j = 0;
136: int k = i;
137: for (int l = s.length(); j < l;) {
138: int i1 = s.charAt(j++);
139: int j1 = s.charAt(j++);
140: j1--;
141: do
142: ai[k++] = j1;
143: while (--i1 > 0);
144: }
145:
146: return k;
147: }
148:
149: private static char[] yy_unpack_cmap(String s) {
150: char ac[] = new char[0x10000];
151: int i = 0;
152: int j = 0;
153: while (i < 30) {
154: int k = s.charAt(i++);
155: char c = s.charAt(i++);
156: do
157: ac[j++] = c;
158: while (--k > 0);
159: }
160: return ac;
161: }
162:
163: private boolean yy_refill() throws IOException {
164: if (yy_startRead > 0) {
165: System.arraycopy(yy_buffer, yy_startRead, yy_buffer, 0,
166: yy_endRead - yy_startRead);
167: yy_endRead -= yy_startRead;
168: yy_currentPos -= yy_startRead;
169: yy_markedPos -= yy_startRead;
170: yy_pushbackPos -= yy_startRead;
171: yy_startRead = 0;
172: }
173: if (yy_currentPos >= yy_buffer.length) {
174: char ac[] = new char[yy_currentPos * 2];
175: System.arraycopy(yy_buffer, 0, ac, 0, yy_buffer.length);
176: yy_buffer = ac;
177: }
178: int i = yy_reader.read(yy_buffer, yy_endRead, yy_buffer.length
179: - yy_endRead);
180: if (i < 0) {
181: return true;
182: } else {
183: yy_endRead += i;
184: return false;
185: }
186: }
187:
188: private final void yybegin(int i) {
189: yy_lexical_state = i;
190: }
191:
192: private final String yytext() {
193: return new String(yy_buffer, yy_startRead, yy_markedPos
194: - yy_startRead);
195: }
196:
197: private final int yylength() {
198: return yy_markedPos - yy_startRead;
199: }
200:
201: private void yy_ScanError(int i) {
202: String s;
203: try {
204: s = YY_ERROR_MSG[i];
205: } catch (ArrayIndexOutOfBoundsException arrayindexoutofboundsexception) {
206: s = YY_ERROR_MSG[0];
207: }
208: throw new Error(s);
209: }
210:
211: private void yypushback(int i) {
212: if (i > yylength())
213: yy_ScanError(3);
214: yy_markedPos -= i;
215: }
216:
217: public String getNextToken() throws IOException {
218: int i1 = yy_endRead;
219: char ac[] = yy_buffer;
220: char ac1[] = yycmap_instance;
221: int ai[] = yytrans;
222: int ai1[] = yy_rowMap;
223: byte abyte0[] = YY_ATTRIBUTE;
224: do {
225: int l = yy_markedPos;
226: int i = -1;
227: int j;
228: int k = j = yy_currentPos = yy_startRead = l;
229: yy_state = yy_lexical_state;
230: char c;
231: byte byte0;
232: label0: do {
233: do {
234: if (j < i1) {
235: c = ac[j++];
236: } else {
237: if (yy_atEOF) {
238: c = '\uFFFF';
239: break label0;
240: }
241: yy_currentPos = j;
242: yy_markedPos = l;
243: boolean flag = yy_refill();
244: j = yy_currentPos;
245: l = yy_markedPos;
246: ac = yy_buffer;
247: i1 = yy_endRead;
248: if (flag) {
249: c = '\uFFFF';
250: break label0;
251: }
252: c = ac[j++];
253: }
254: int j1 = ai[ai1[yy_state] + ac1[c]];
255: if (j1 == -1)
256: break label0;
257: yy_state = j1;
258: byte0 = abyte0[yy_state];
259: } while ((byte0 & 1) != 1);
260: i = yy_state;
261: l = j;
262: } while ((byte0 & 8) != 8);
263: yy_markedPos = l;
264: switch (i) {
265: case 15: // '\017'
266: yybegin(0);
267: return yytext();
268:
269: case 8: // '\b'
270: lines += addLine;
271: addLine = 0;
272: yybegin(1);
273: return "";
274:
275: case 10: // '\n'
276: yybegin(2);
277: return yytext();
278:
279: case 5: // '\005'
280: lines += addLine;
281: addLine = 0;
282: yybegin(1);
283: break;
284:
285: case 6: // '\006'
286: case 7: // '\007'
287: addLine++;
288: yybegin(0);
289: break;
290:
291: case 9: // '\t'
292: lines += addLine;
293: addLine = 0;
294: yybegin(0);
295: return yytext();
296:
297: case 4: // '\004'
298: lines += addLine;
299: addLine = 0;
300: String s = yytext();
301: if (commentDelims.indexOf(s.charAt(0)) == -1) {
302: yybegin(2);
303: return s;
304: }
305: yybegin(3);
306: break;
307:
308: case 12: // '\f'
309: case 13: // '\r'
310: addLine++;
311: yybegin(0);
312: return "";
313:
314: case 25: // '\031'
315: yybegin(2);
316: return unescape(yytext());
317:
318: case 22: // '\026'
319: lines += addLine;
320: addLine = 0;
321: yybegin(2);
322: return unescape(yytext());
323:
324: case 14: // '\016'
325: yybegin(1);
326: return "";
327:
328: case 18: // '\022'
329: yybegin(1);
330: break;
331:
332: case 20: // '\024'
333: case 21: // '\025'
334: case 23: // '\027'
335: case 24: // '\030'
336: case 26: // '\032'
337: case 27: // '\033'
338: default:
339: if (c == '\uFFFF' && yy_startRead == yy_currentPos) {
340: yy_atEOF = true;
341: switch (yy_lexical_state) {
342: case 1: // '\001'
343: yybegin(0);
344: addLine++;
345: return "";
346:
347: default:
348: return null;
349:
350: case 28: // '\034'
351: break;
352: }
353: } else {
354: yy_ScanError(2);
355: }
356: break;
357:
358: case 1: // '\001'
359: case 2: // '\002'
360: case 3: // '\003'
361: case 11: // '\013'
362: case 16: // '\020'
363: case 17: // '\021'
364: case 19: // '\023'
365: case 28: // '\034'
366: case 29: // '\035'
367: case 30: // '\036'
368: case 31: // '\037'
369: case 32: // ' '
370: case 33: // '!'
371: case 34: // '"'
372: case 35: // '#'
373: case 36: // '$'
374: case 37: // '%'
375: case 38: // '&'
376: case 39: // '\''
377: case 40: // '('
378: case 41: // ')'
379: case 42: // '*'
380: break;
381: }
382: } while (true);
383: }
384:
385: private static final int YYEOF = -1;
386: private static final int YY_BUFFERSIZE = 16384;
387: public static final int BEFORE = 1;
388: public static final int YYINITIAL = 0;
389: public static final int COMMENT = 3;
390: public static final int AFTER = 2;
391: private static final String yycmap_packed = "\t\000\001\001\001\003\001\000\001\001\001\002\022\000\001\001\001\000\001\005\t\000\001\004/\000\001\006\uFFA3\0";
392: private static final char yycmap[] = yy_unpack_cmap("\t\000\001\001\001\003\001\000\001\001\001\002\022\000\001\001\001\000\001\005\t\000\001\004/\000\001\006\uFFA3\0");
393: private static final int yy_rowMap[] = { 0, 7, 14, 21, 28, 35, 42,
394: 49, 49, 56, 63, 70, 77, 49, 49, 84, 91, 98, 49, 105, 28,
395: 35, 49, 112, 63, 49, 119 };
396: private static final String yy_packed0 = "\001\005\001\006\001\007\001\b\001\t\001\n\001\005\001\013\001\f\001\r\001\016\001\017\001\020\001\013\001\021\001\022\001\007\001\b\001\023\002\021\001\024\001\004\001\007\001\b\003\024\001\005\001\025\003\000\002\005\001\000\001\026\001\007\001\b\006\000\001\b\n\000\005\n\001\027\001\030\001\013\001\031\003\000\002\013\001\000\001\f\b\000\001\016\003\000\005\020\001\032\001\033\002\021\003\000\003\021\001\022\001\007\001\b\001\000\002\021\002\024\002\000\003\024\007\n\007\020";
397: private static final int yytrans[] = yy_unpack();
398: private char yycmap_instance[];
399: private static final int YY_UNKNOWN_ERROR = 0;
400: private static final int YY_ILLEGAL_STATE = 1;
401: private static final int YY_NO_MATCH = 2;
402: private static final int YY_PUSHBACK_2BIG = 3;
403: private static final String YY_ERROR_MSG[] = {
404: "Unkown internal scanner error",
405: "Internal error: unknown state",
406: "Error: could not match input",
407: "Error: pushback value was too large" };
408: private static final byte YY_ATTRIBUTE[] = { 0, 1, 1, 1, 1, 1, 1,
409: 9, 9, 1, 1, 1, 1, 9, 9, 1, 1, 1, 9, 1, 0, 0, 9, 0, 0, 9, 0 };
410: private Reader yy_reader;
411: private int yy_state;
412: private int yy_lexical_state;
413: private char yy_buffer[];
414: private int yy_markedPos;
415: private int yy_pushbackPos;
416: private int yy_currentPos;
417: private int yy_startRead;
418: private int yy_endRead;
419: private boolean yy_atEOF;
420: private char delimiter;
421: private char quote;
422: private String escapes;
423: private String replacements;
424: private String commentDelims;
425: private int addLine;
426: private int lines;
427:
428: }
|