001: package net.sourceforge.squirrel_sql.client.session.parser.kernel;
002:
003: import net.sourceforge.squirrel_sql.client.session.parser.kernel.ErrorStream;
004:
005: import java.io.*;
006: import java.util.*;
007:
008: class Token {
009: int kind; // token kind
010: int pos; // token position in the source text (starting at 0)
011: int col; // token column (starting at 0)
012: int line; // token line (starting at 1)
013: String str; // exact string value
014: String val; // token string value (uppercase if ignoreCase)
015: }
016:
017: public class Scanner {
018:
019: public abstract static class Buffer {
020: public static final char eof = 65535;
021:
022: int _bufLen;
023: int _pos;
024:
025: protected void setIndex(int position) {
026: if (position < 0)
027: position = 0;
028: else if (position >= _bufLen)
029: position = _bufLen;
030: _pos = position;
031: }
032:
033: protected abstract char read();
034: }
035:
036: static class FBuffer extends Buffer {
037: static char[] buf;
038:
039: FBuffer(File file) throws IOException {
040: _bufLen = (int) file.length();
041:
042: FileReader fr = new FileReader(file);
043: buf = new char[_bufLen];
044:
045: fr.read(buf);
046: _pos = 0;
047: }
048:
049: protected char read() {
050: if (_pos < _bufLen)
051: return buf[_pos++];
052: else
053: return eof;
054: }
055: }
056:
057: static class SBuffer extends Buffer {
058: String chars;
059:
060: SBuffer(String string) {
061: _bufLen = string.length();
062: chars = string;
063: _pos = 0;
064: }
065:
066: protected char read() {
067: if (_pos < _bufLen)
068: return chars.charAt(_pos++);
069: else
070: return eof;
071: }
072: }
073:
074: private static final char EOF = '\0';
075: private static final char CR = '\r';
076: private static final char LF = '\n';
077: private static final int noSym = 103;
078: private static final int[] start = { ParsingConstants.KW_AS, 0, 0,
079: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
080: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 4, 1, 1, 0, 0,
081: ParsingConstants.KIND_OPENING_BRAKET,
082: ParsingConstants.KW_UNION, 22, ParsingConstants.KW_ALL,
083: ParsingConstants.KW_INSERT, ParsingConstants.KW_DISTINCT,
084: ParsingConstants.KW_UPDATE, 2,
085: ParsingConstants.KIND_EQUALS, ParsingConstants.KW_EXCEPT,
086: ParsingConstants.KW_EXCEPT, ParsingConstants.KW_EXCEPT,
087: ParsingConstants.KW_EXCEPT, ParsingConstants.KW_EXCEPT,
088: ParsingConstants.KW_EXCEPT, ParsingConstants.KW_EXCEPT,
089: ParsingConstants.KW_EXCEPT, ParsingConstants.KW_EXCEPT,
090: ParsingConstants.KW_EXCEPT, ParsingConstants.KW_SET,
091: ParsingConstants.KW_INTERSECT, ParsingConstants.KW_INTO,
092: ParsingConstants.KW_MINUS, ParsingConstants.KW_FROM, 0, 1,
093: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
094: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
095: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
096: 1, 1, 1, 0 };
097:
098: // set of characters to be ignored by the scanner
099: private static BitSet ignore = new BitSet(128);
100: static {
101: ignore.set(1);
102: ignore.set(2);
103: ignore.set(3);
104: ignore.set(4);
105: ignore.set(ParsingConstants.KIND_OPENING_BRAKET);
106: ignore.set(6);
107: ignore.set(ParsingConstants.KW_UNION);
108: ignore.set(ParsingConstants.KW_EXCEPT);
109: ignore.set(ParsingConstants.KW_INTERSECT);
110: ignore.set(ParsingConstants.KW_MINUS);
111: ignore.set(ParsingConstants.KW_ALL);
112: ignore.set(ParsingConstants.KW_UPDATE);
113: ignore.set(ParsingConstants.KW_SET);
114: ignore.set(ParsingConstants.KIND_EQUALS);
115: ignore.set(ParsingConstants.KW_INSERT);
116: ignore.set(ParsingConstants.KW_INTO);
117: ignore.set(ParsingConstants.KW_VALUES);
118: ignore.set(ParsingConstants.KW_DELETE);
119: ignore.set(ParsingConstants.KW_FROM);
120: ignore.set(ParsingConstants.KW_SELECT);
121: ignore.set(ParsingConstants.KW_DISTINCT);
122: ignore.set(22);
123: ignore.set(ParsingConstants.KW_AS);
124: ignore.set(ParsingConstants.KW_JOIN);
125: ignore.set(ParsingConstants.KW_CROSS);
126: ignore.set(ParsingConstants.KW_NATURAL);
127: ignore.set(ParsingConstants.KW_INNER);
128: ignore.set(ParsingConstants.KW_FULL);
129: ignore.set(ParsingConstants.KW_LEFT);
130: ignore.set(ParsingConstants.KW_RIGHT);
131: ignore.set(ParsingConstants.KW_OUTER);
132: ignore.set(ParsingConstants.KW_ON);
133:
134: }
135:
136: ErrorStream err; // error messages
137:
138: private Buffer buf; // data, random accessible
139: protected Token t; // current token
140: protected char strCh; // current input character (original)
141: protected char ch; // current input character (for token)
142: protected char lastCh; // last input character
143: protected int pos; // position of current character
144: protected int line; // line number of current character
145: protected int lineStart; // start position of current line
146:
147: public Scanner(File file, ErrorStream e) throws IOException {
148: buf = new FBuffer(file);
149: init(e, file.getName());
150: }
151:
152: public Scanner(String parseString, ErrorStream e) {
153: buf = new SBuffer(parseString);
154: init(e, "");
155: }
156:
157: public Scanner(Buffer buff, ErrorStream e) {
158: this .buf = buff;
159: init(e, "");
160: }
161:
162: private void init(ErrorStream e, String eName) {
163: err = e;
164: err.fileName = eName;
165:
166: pos = -1;
167: line = 1;
168: lineStart = 0;
169: lastCh = 0;
170: NextCh();
171: }
172:
173: void setPos(int position) {
174: buf.setIndex(position);
175: }
176:
177: private void NextCh() {
178: lastCh = ch;
179: strCh = buf.read();
180: pos++;
181: ch = Character.toUpperCase(strCh);
182: if (ch == '\uffff')
183: ch = EOF;
184: else if (ch == CR) {
185: line++;
186: lineStart = pos + 1;
187: } else if (ch == LF) {
188: if (lastCh != CR)
189: line++;
190: lineStart = pos + 1;
191: } else if (ch > '\u007f') {
192: err.StoreError(0, line, pos - lineStart + 1,
193: "invalid character in source file");
194: err.count++;
195: ch = ' ';
196: }
197: }
198:
199: private final boolean Comment0() {
200: int level = 1, line0 = line, lineStart0 = lineStart;
201: char startCh;
202: NextCh();
203: if (ch == '-') {
204: NextCh();
205: for (;;) {
206: if (ch == ParsingConstants.KW_MINUS) {
207: level--;
208: if (level == 0) {
209: NextCh();
210: return true;
211: }
212: NextCh();
213: } else if (ch == EOF)
214: return false;
215: else
216: NextCh();
217: }
218: } else {
219: if (ch == CR || ch == LF) {
220: line--;
221: lineStart = lineStart0;
222: }
223: pos = pos - 2;
224: setPos(pos + 1);
225: NextCh();
226: }
227: return false;
228: }
229:
230: private final boolean Comment1() {
231: int level = 1, line0 = line, lineStart0 = lineStart;
232: char startCh;
233: NextCh();
234: if (ch == '*') {
235: NextCh();
236: for (;;) {
237: if (ch == '*') {
238: NextCh();
239: if (ch == '/') {
240: level--;
241: if (level == 0) {
242: NextCh();
243: return true;
244: }
245: NextCh();
246: }
247: } else if (ch == '/') {
248: NextCh();
249: if (ch == '*') {
250: level++;
251: NextCh();
252: }
253: } else if (ch == EOF)
254: return false;
255: else
256: NextCh();
257: }
258: } else {
259: if (ch == CR || ch == LF) {
260: line--;
261: lineStart = lineStart0;
262: }
263: pos = pos - 2;
264: setPos(pos + 1);
265: NextCh();
266: }
267: return false;
268: }
269:
270: private void CheckLiteral(StringBuffer buf) {
271: t.val = buf.toString().toUpperCase();
272: switch (t.val.charAt(0)) {
273: case 'A': {
274: if (t.val.equals("ACTION"))
275: t.kind = ParsingConstants.KW_ACTION;
276: else if (t.val.equals("ADD"))
277: t.kind = ParsingConstants.KW_ADD;
278: else if (t.val.equals("ALL"))
279: t.kind = ParsingConstants.KW_ALL;
280: else if (t.val.equals("ALTER"))
281: t.kind = ParsingConstants.KW_ALTER;
282: else if (t.val.equals("AND"))
283: t.kind = ParsingConstants.KW_AND;
284: else if (t.val.equals("AS"))
285: t.kind = ParsingConstants.KW_AS;
286: else if (t.val.equals("ASC"))
287: t.kind = ParsingConstants.KW_ASC;
288: else if (t.val.equals("AVG"))
289: t.kind = ParsingConstants.KW_AVG;
290: break;
291: }
292: case 'B': {
293: if (t.val.equals("BETWEEN"))
294: t.kind = ParsingConstants.KW_BETWEEN;
295: else if (t.val.equals("BY"))
296: t.kind = ParsingConstants.KW_BY;
297: break;
298: }
299: case 'C': {
300: if (t.val.equals("CASCADE"))
301: t.kind = ParsingConstants.KW_CASCADE;
302: else if (t.val.equals("CHAR"))
303: t.kind = ParsingConstants.KW_CHAR;
304: else if (t.val.equals("CHARACTER"))
305: t.kind = ParsingConstants.KW_CHARACTER;
306: else if (t.val.equals("CASE"))
307: t.kind = ParsingConstants.KW_CASE;
308: else if (t.val.equals("CHECK"))
309: t.kind = ParsingConstants.KW_CHECK;
310: else if (t.val.equals("COMMIT"))
311: t.kind = ParsingConstants.KW_COMMIT;
312: else if (t.val.equals("CONSTRAINT"))
313: t.kind = ParsingConstants.KW_CONSTRAINT;
314: else if (t.val.equals("COUNT"))
315: t.kind = ParsingConstants.KW_COUNT;
316: else if (t.val.equals("CREATE"))
317: t.kind = ParsingConstants.KW_CREATE;
318: else if (t.val.equals("CROSS"))
319: t.kind = ParsingConstants.KW_CROSS;
320: break;
321: }
322: case 'D': {
323: if (t.val.equals("DATE"))
324: t.kind = ParsingConstants.KW_DATE;
325: else if (t.val.equals("DEFAULT"))
326: t.kind = ParsingConstants.KW_DEFAULT;
327: else if (t.val.equals("DELETE"))
328: t.kind = ParsingConstants.KW_DELETE;
329: else if (t.val.equals("DESC"))
330: t.kind = ParsingConstants.KW_DESC;
331: else if (t.val.equals("DISTINCT"))
332: t.kind = ParsingConstants.KW_DISTINCT;
333: else if (t.val.equals("DROP"))
334: t.kind = ParsingConstants.KW_DROP;
335: break;
336: }
337: case 'E': {
338: if (t.val.equals("ELSE"))
339: t.kind = ParsingConstants.KW_ELSE;
340: else if (t.val.equals("END"))
341: t.kind = ParsingConstants.KW_END;
342: else if (t.val.equals("ESCAPE"))
343: t.kind = ParsingConstants.KW_ESCAPE;
344: else if (t.val.equals("EXCEPT"))
345: t.kind = ParsingConstants.KW_EXCEPT;
346: break;
347: }
348: case 'F': {
349: if (t.val.equals("FOREIGN"))
350: t.kind = ParsingConstants.KW_FOREIGN;
351: else if (t.val.equals("FROM"))
352: t.kind = ParsingConstants.KW_FROM;
353: else if (t.val.equals("FULL"))
354: t.kind = ParsingConstants.KW_FULL;
355: break;
356: }
357: case 'G': {
358: if (t.val.equals("GROUP"))
359: t.kind = ParsingConstants.KW_GROUP;
360: break;
361: }
362: case 'H': {
363: if (t.val.equals("HAVING"))
364: t.kind = ParsingConstants.KW_HAVING;
365: break;
366: }
367: case 'I': {
368: if (t.val.equals("IN"))
369: t.kind = ParsingConstants.KW_IN;
370: else if (t.val.equals("INDEX"))
371: t.kind = ParsingConstants.KW_INDEX;
372: else if (t.val.equals("INNER"))
373: t.kind = ParsingConstants.KW_INNER;
374: else if (t.val.equals("INSERT"))
375: t.kind = ParsingConstants.KW_INSERT;
376: else if (t.val.equals("INT"))
377: t.kind = ParsingConstants.KW_INT;
378: else if (t.val.equals("INTEGER"))
379: t.kind = ParsingConstants.KW_INTEGER;
380: else if (t.val.equals("INTERSECT"))
381: t.kind = ParsingConstants.KW_INTERSECT;
382: else if (t.val.equals("INTO"))
383: t.kind = ParsingConstants.KW_INTO;
384: else if (t.val.equals("IS"))
385: t.kind = ParsingConstants.KW_IS;
386: break;
387: }
388: case 'J': {
389: if (t.val.equals("JOIN"))
390: t.kind = ParsingConstants.KW_JOIN;
391: break;
392: }
393: case 'K': {
394: if (t.val.equals("KEY"))
395: t.kind = ParsingConstants.KW_KEY;
396: break;
397: }
398: case 'L': {
399: if (t.val.equals("LEFT"))
400: t.kind = ParsingConstants.KW_LEFT;
401: else if (t.val.equals("LIKE"))
402: t.kind = ParsingConstants.KW_LIKE;
403: break;
404: }
405: case 'M': {
406: if (t.val.equals("MATCH"))
407: t.kind = ParsingConstants.KW_MATCH;
408: else if (t.val.equals("MAX"))
409: t.kind = ParsingConstants.KW_MAX;
410: else if (t.val.equals("MIN"))
411: t.kind = ParsingConstants.KW_MIN;
412: else if (t.val.equals("MINUS"))
413: t.kind = ParsingConstants.KW_MINUS;
414: else if (t.val.equals("MONTH"))
415: t.kind = ParsingConstants.KW_MONTH;
416: break;
417: }
418: case 'N': {
419: if (t.val.equals("NATURAL"))
420: t.kind = ParsingConstants.KW_NATURAL;
421: else if (t.val.equals("NO"))
422: t.kind = ParsingConstants.KW_NO;
423: else if (t.val.equals("NOT"))
424: t.kind = ParsingConstants.KW_NOT;
425: else if (t.val.equals("NULL"))
426: t.kind = ParsingConstants.KW_NULL;
427: else if (t.val.equals("NUMERIC"))
428: t.kind = ParsingConstants.KW_NUMERIC;
429: break;
430: }
431: case 'O': {
432: if (t.val.equals("ON"))
433: t.kind = ParsingConstants.KW_ON;
434: else if (t.val.equals("OR"))
435: t.kind = ParsingConstants.KW_OR;
436: else if (t.val.equals("ORDER"))
437: t.kind = ParsingConstants.KW_ORDER;
438: else if (t.val.equals("OUTER"))
439: t.kind = ParsingConstants.KW_OUTER;
440: break;
441: }
442: case 'P': {
443: if (t.val.equals("PARTIAL"))
444: t.kind = ParsingConstants.KW_PARTIAL;
445: else if (t.val.equals("PRIMARY"))
446: t.kind = ParsingConstants.KW_PRIMARY;
447: break;
448: }
449: case 'R': {
450: if (t.val.equals("REFERENCES"))
451: t.kind = ParsingConstants.KW_REFERENCES;
452: else if (t.val.equals("RESTRICT"))
453: t.kind = ParsingConstants.KW_RESTRICT;
454: else if (t.val.equals("RIGHT"))
455: t.kind = ParsingConstants.KW_RIGHT;
456: else if (t.val.equals("ROLLBACK"))
457: t.kind = ParsingConstants.KW_ROLLBACK;
458: break;
459: }
460: case 'S': {
461: if (t.val.equals("SELECT"))
462: t.kind = ParsingConstants.KW_SELECT;
463: else if (t.val.equals("SET"))
464: t.kind = ParsingConstants.KW_SET;
465: else if (t.val.equals("SMALLINT"))
466: t.kind = ParsingConstants.KW_SMALLINT;
467: else if (t.val.equals("SUM"))
468: t.kind = ParsingConstants.KW_SUM;
469: break;
470: }
471: case 'T': {
472: if (t.val.equals("TABLE"))
473: t.kind = ParsingConstants.KW_TABLE;
474: else if (t.val.equals("THEN"))
475: t.kind = ParsingConstants.KW_THEN;
476: else if (t.val.equals("TIME"))
477: t.kind = ParsingConstants.KW_TIME;
478: else if (t.val.equals("TIMESTAMP"))
479: t.kind = ParsingConstants.KW_TIMESTAMP;
480: break;
481: }
482: case 'U': {
483: if (t.val.equals("UNION"))
484: t.kind = ParsingConstants.KW_UNION;
485: else if (t.val.equals("UNIQUE"))
486: t.kind = ParsingConstants.KW_UNIQUE;
487: else if (t.val.equals("UPDATE"))
488: t.kind = ParsingConstants.KW_UPDATE;
489: else if (t.val.equals("UPPER"))
490: t.kind = ParsingConstants.KW_UPPER;
491: else if (t.val.equals("USING"))
492: t.kind = ParsingConstants.KW_USING;
493: break;
494: }
495: case 'V': {
496: if (t.val.equals("VALUES"))
497: t.kind = ParsingConstants.KW_VALUES;
498: else if (t.val.equals("VARCHAR"))
499: t.kind = ParsingConstants.KW_VARCHAR;
500: else if (t.val.equals("VIEW"))
501: t.kind = ParsingConstants.KW_VIEW;
502: break;
503: }
504: case 'W': {
505: if (t.val.equals("WHERE"))
506: t.kind = ParsingConstants.KW_WHERE;
507: else if (t.val.equals("WHEN"))
508: t.kind = ParsingConstants.KW_WHEN;
509: else if (t.val.equals("WORK"))
510: t.kind = ParsingConstants.KW_WORK;
511: break;
512: }
513: case 'Y': {
514: if (t.val.equals("YEAR"))
515: t.kind = ParsingConstants.KW_YEAR;
516: break;
517: }
518: }
519: }
520:
521: Token Scan() {
522: while (ignore.get((int) ch))
523: NextCh();
524: if (ch == '-' && Comment0() || ch == '/' && Comment1())
525: return Scan();
526: t = new Token();
527: t.pos = pos;
528: t.col = pos - lineStart + 1;
529: t.line = line;
530: StringBuffer buf = new StringBuffer();
531: int state = start[ch];
532: int apx = 0;
533: loop: for (;;) {
534: buf.append(strCh);
535: NextCh();
536: switch (state) {
537: case 0: {
538: t.kind = noSym;
539: break loop;
540: } // NextCh already done
541: case 1:
542: if ((ch == '!' || ch >= '#' && ch <= '$' || ch >= '0'
543: && ch <= '9' || ch >= '@' && ch <= '{' || ch >= '}')) {
544: break;
545: } else {
546: t.kind = 1;
547: CheckLiteral(buf);
548: break loop;
549: }
550: case 2:
551: if ((ch >= '0' && ch <= '9')) {
552: state = 3;
553: break;
554: } else {
555: t.kind = 22;
556: break loop;
557: }
558: case 3:
559: if ((ch >= '0' && ch <= '9')) {
560: break;
561: } else {
562: t.kind = 3;
563: break loop;
564: }
565: case 4:
566: if ((ch >= ' ' && ch <= '!' || ch >= '#')) {
567: break;
568: } else if (ch == '"') {
569: state = 6;
570: break;
571: } else {
572: t.kind = noSym;
573: break loop;
574: }
575: case ParsingConstants.KIND_OPENING_BRAKET:
576: if ((ch >= ' ' && ch <= '&' || ch >= '(')) {
577: break;
578: } else if (ch == 39) {
579: state = 6;
580: break;
581: } else {
582: t.kind = noSym;
583: break loop;
584: }
585: case 6: {
586: t.kind = 4;
587: break loop;
588: }
589: case ParsingConstants.KW_UNION: {
590: t.kind = ParsingConstants.KIND_OPENING_BRAKET;
591: break loop;
592: }
593: case ParsingConstants.KW_EXCEPT:
594: if ((ch >= '0' && ch <= '9')) {
595: break;
596: } else if (ch == '.') {
597: state = 2;
598: break;
599: } else {
600: t.kind = 2;
601: break loop;
602: }
603: case ParsingConstants.KW_INTERSECT: {
604: t.kind = 6;
605: break loop;
606: }
607: case ParsingConstants.KW_MINUS: {
608: t.kind = ParsingConstants.KIND_EQUALS;
609: break loop;
610: }
611: case ParsingConstants.KW_ALL: {
612: t.kind = ParsingConstants.KIND_ASTERISK;
613: break loop;
614: }
615: case ParsingConstants.KW_UPDATE: {
616: t.kind = 52;
617: break loop;
618: }
619: case ParsingConstants.KW_SET: {
620: t.kind = 53;
621: break loop;
622: }
623: case ParsingConstants.KIND_EQUALS: {
624: t.kind = 55;
625: break loop;
626: }
627: case ParsingConstants.KW_INSERT: {
628: t.kind = 56;
629: break loop;
630: }
631: case ParsingConstants.KW_INTO:
632: if (ch == '>') {
633: state = ParsingConstants.KW_VALUES;
634: break;
635: } else if (ch == '=') {
636: state = ParsingConstants.KW_DELETE;
637: break;
638: } else {
639: t.kind = 63;
640: break loop;
641: }
642: case ParsingConstants.KW_VALUES: {
643: t.kind = 62;
644: break loop;
645: }
646: case ParsingConstants.KW_DELETE: {
647: t.kind = 64;
648: break loop;
649: }
650: case ParsingConstants.KW_FROM:
651: if (ch == '=') {
652: state = ParsingConstants.KW_SELECT;
653: break;
654: } else {
655: t.kind = 65;
656: break loop;
657: }
658: case ParsingConstants.KW_SELECT: {
659: t.kind = 66;
660: break loop;
661: }
662: case ParsingConstants.KW_DISTINCT: {
663: t.kind = ParsingConstants.KIND_COMMA;
664: break loop;
665: }
666: case 22: {
667: t.kind = ParsingConstants.KIND_CLOSING_BRAKET;
668: break loop;
669: }
670: case ParsingConstants.KW_AS: {
671: t.kind = 0;
672: break loop;
673: }
674: }
675: }
676: t.str = buf.toString();
677: t.val = t.str.toUpperCase();
678: return t;
679: }
680: }
|