001: /*
002: * Portions Copyright 2000-2007 Sun Microsystems, Inc. All Rights
003: * Reserved. Use is subject to license terms.
004: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER
005: *
006: * This program is free software; you can redistribute it and/or
007: * modify it under the terms of the GNU General Public License version
008: * 2 only, as published by the Free Software Foundation.
009: *
010: * This program is distributed in the hope that it will be useful, but
011: * WITHOUT ANY WARRANTY; without even the implied warranty of
012: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
013: * General Public License version 2 for more details (a copy is
014: * included at /legal/license.txt).
015: *
016: * You should have received a copy of the GNU General Public License
017: * version 2 along with this work; if not, write to the Free Software
018: * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
019: * 02110-1301 USA
020: *
021: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
022: * Clara, CA 95054 or visit www.sun.com if you need additional
023: * information or have any questions.
024: */
025: package gov.nist.core;
026:
027: import java.util.Hashtable;
028: import java.util.Vector;
029:
030: import com.sun.midp.log.Logging;
031: import com.sun.midp.log.LogChannels;
032:
033: /**
034: * A lexical analyzer that is used by all parsers in this implementation.
035: */
036: public abstract class LexerCore extends StringTokenizer {
037:
038: // IMPORTANT - All keyword matches should be between START and END
039: /** START token. */
040: public static final int START = 2048;
041: /** END token. */
042: public static final int END = START + 2048;
043: // IMPORTANT -- This should be < END
044: /** ID token. */
045: public static final int ID = END - 1;
046: // Individial token classes.
047: /** WHITESPACE token. */
048: public static final int WHITESPACE = END + 1;
049: /** DIGIT (numeric) token. */
050: public static final int DIGIT = END + 2;
051: /** ALPHA (alphabetic) token. */
052: public static final int ALPHA = END + 3;
053: /** BACKSLASH (escaping) token. */
054: public static final int BACKSLASH = (int) '\\';
055: /** Single QUOTE token. */
056: public static final int QUOTE = (int) '\'';
057: /** AT sign token. */
058: public static final int AT = (int) '@';
059: /** SPACE token. */
060: public static final int SP = (int) ' ';
061: /** HT (Horizontal tab) token. */
062: public static final int HT = (int) '\t';
063: /** COLON token. */
064: public static final int COLON = (int) ':';
065: /** STAR (asterisk) token. */
066: public static final int STAR = (int) '*';
067: /** DOLLAR token. */
068: public static final int DOLLAR = (int) '$';
069: /** PLUS token. */
070: public static final int PLUS = (int) '+';
071: /** POUND token. */
072: public static final int POUND = (int) '#';
073: /** MINUS token. */
074: public static final int MINUS = (int) '-';
075: /** DOUBLEQUOTE token. */
076: public static final int DOUBLEQUOTE = (int) '\"';
077: /** TILDE token. */
078: public static final int TILDE = (int) '~';
079: /** BACK_QUOTE token. */
080: public static final int BACK_QUOTE = (int) '`';
081: /** NULL token. */
082: public static final int NULL = (int) '\0';
083: /** EQUALS (equals sign) token. */
084: public static final int EQUALS = (int) '=';
085: /** SEMICOLON token. */
086: public static final int SEMICOLON = (int) ';';
087: /** Forward SLASH token. */
088: public static final int SLASH = (int) '/';
089: /** L_SQUARE_BRACKET (left square bracket) token. */
090: public static final int L_SQUARE_BRACKET = (int) '[';
091: /** R_SQUARE_BRACKET (right square bracket) token. */
092: public static final int R_SQUARE_BRACKET = (int) ']';
093: /** R_CURLY (right curly bracket) token. */
094: public static final int R_CURLY = (int) '}';
095: /** L_CURLY (left curly bracket) token. */
096: public static final int L_CURLY = (int) '{';
097: /** HAT (carot) token. */
098: public static final int HAT = (int) '^';
099: /** Veritcal BAR token. */
100: public static final int BAR = (int) '|';
101: /** DOT (period) token. */
102: public static final int DOT = (int) '.';
103: /** EXCLAMATION token. */
104: public static final int EXCLAMATION = (int) '!';
105: /** LPAREN (left paren) token. */
106: public static final int LPAREN = (int) '(';
107: /** RPAREN (right paren) token. */
108: public static final int RPAREN = (int) ')';
109: /** GREATER_THAN token. */
110: public static final int GREATER_THAN = (int) '>';
111: /** LESS_THAN token. */
112: public static final int LESS_THAN = (int) '<';
113: /** PERCENT token. */
114: public static final int PERCENT = (int) '%';
115: /** QUESTION mark token. */
116: public static final int QUESTION = (int) '?';
117: /** AND (ampersand) token. */
118: public static final int AND = (int) '&';
119: /** UNDERSCPRE token. */
120: public static final int UNDERSCORE = (int) '_';
121:
122: /** Global symbol table for intermediate elements. */
123: protected static Hashtable globalSymbolTable;
124: /** Lexical rules tables. */
125: protected static Hashtable lexerTables;
126: /** Current elements of current lexing operation. */
127: protected Hashtable currentLexer;
128: /** Name of the current Lexer. */
129: protected String currentLexerName;
130: /** Current matched token. */
131: protected Token currentMatch;
132:
133: /**
134: * Initializes the hash tables on first
135: * loading of the class.
136: */
137: static {
138: globalSymbolTable = new Hashtable();
139: lexerTables = new Hashtable();
140: }
141:
142: /**
143: * Adds a new keyword and value pair.
144: * @param name the name of the keyword
145: * @param value the content of the keyword
146: */
147: protected void addKeyword(String name, int value) {
148: // System.out.println("addKeyword " + name + " value = " + value);
149: // new Exception().printStackTrace();
150: Integer val = new Integer(value);
151: currentLexer.put(name, val);
152: if (!globalSymbolTable.containsKey(val))
153: globalSymbolTable.put(val, name);
154: }
155:
156: /**
157: * Looks up a requested token.
158: * @param value the token to find
159: * @return the value of the token
160: */
161: public String lookupToken(int value) {
162: if (value > START) {
163: return (String) globalSymbolTable.get(new Integer(value));
164: } else {
165: Character ch = new Character((char) value);
166: return ch.toString();
167: }
168: }
169:
170: /**
171: * Adds a new Lexer. If the named lexer
172: * does not exist anew hashtable is allocated.
173: * @param lexerName the lexer name
174: * @return the current lexer Hashtable
175: */
176: protected Hashtable addLexer(String lexerName) {
177: currentLexer = (Hashtable) lexerTables.get(lexerName);
178: if (currentLexer == null) {
179: currentLexer = new Hashtable();
180: lexerTables.put(lexerName, currentLexer);
181: }
182: return currentLexer;
183: }
184:
185: /**
186: * Selects a specific lexer by name.
187: * @param lexerName the requested lexer
188: */
189: public abstract void selectLexer(String lexerName);
190:
191: /**
192: * Default constructor.
193: * Allocates a new hashtable and labels the Lexer
194: * as "charLexer".
195: */
196: protected LexerCore() {
197: this .currentLexer = new Hashtable();
198: this .currentLexerName = "charLexer";
199: }
200:
201: /**
202: * Constructs a new lexer by name.
203: * @param lexerName the name for the lexer
204: */
205: public LexerCore(String lexerName) {
206: selectLexer(lexerName);
207: }
208:
209: /**
210: * Initialize the lexer with a buffer.
211: * @param lexerName the requested lexer
212: * @param buffer initial buffer to process
213: */
214: public LexerCore(String lexerName, String buffer) {
215: this (lexerName);
216: this .buffer = buffer;
217: }
218:
219: /**
220: * Peeks at the next id, but doesn't move the buffer pointer forward.
221: * @return the textual ID of the next token
222: */
223:
224: public String peekNextId() {
225: int oldPtr = ptr;
226: String retval = ttoken();
227: savedPtr = ptr;
228: ptr = oldPtr;
229: return retval;
230: }
231:
232: /**
233: * Gets the next id.
234: * @return textual ID of the next token
235: */
236: public String getNextId() {
237: return ttoken();
238: }
239:
240: // call this after you call match
241: /**
242: * Gets the next token.
243: * @return the next token
244: */
245: public Token getNextToken() {
246: return this .currentMatch;
247:
248: }
249:
250: /**
251: * Looks ahead for one token.
252: * @return the next token
253: * @exception ParseException if an error occurs during parsing
254: */
255: public Token peekNextToken() throws ParseException {
256: return (Token) peekNextToken(1).elementAt(0);
257: }
258:
259: /**
260: * Peeks at the next token.
261: * @param ntokens the number of tokens to look ahead
262: * @return a list of next tokens
263: * @exception ParseException if an error occurs during parsing
264: */
265: public Vector peekNextToken(int ntokens) throws ParseException {
266: int old = ptr;
267: Vector retval = new Vector();
268: for (int i = 0; i < ntokens; i++) {
269: Token tok = new Token();
270: if (startsId()) {
271: String id = ttoken();
272: tok.tokenValue = id;
273: if (currentLexer.containsKey(id.toUpperCase())) {
274: Integer type = (Integer) currentLexer.get(id
275: .toUpperCase());
276: tok.tokenType = type.intValue();
277: } else
278: tok.tokenType = ID;
279: } else {
280: char nextChar = getNextChar();
281: tok.tokenValue = new StringBuffer().append(nextChar)
282: .toString();
283: if (isAlpha(nextChar)) {
284: tok.tokenType = ALPHA;
285: } else if (isDigit(nextChar)) {
286: tok.tokenType = DIGIT;
287: } else
288: tok.tokenType = (int) nextChar;
289: }
290: retval.addElement(tok);
291: }
292: savedPtr = ptr;
293: ptr = old;
294: return retval;
295: }
296:
297: /**
298: * Match the given token or throw an exception, if no such token
299: * can be matched.
300: * @param tok the token to be checked
301: * @return the matched token
302: * @exception ParseException if an error occurs during parsing
303: */
304: public Token match(int tok) throws ParseException {
305: if (Logging.REPORT_LEVEL <= Logging.INFORMATION) {
306: Logging.report(Logging.INFORMATION, LogChannels.LC_JSR180,
307: "match " + tok);
308: }
309:
310: if (tok > START && tok < END) {
311: if (tok == ID) {
312: // Generic ID sought.
313: if (!startsId())
314: throw new ParseException(buffer + "\nID expected",
315: ptr);
316: String id = getNextId();
317: this .currentMatch = new Token();
318: this .currentMatch.tokenValue = id;
319: this .currentMatch.tokenType = ID;
320: } else {
321: String nexttok = getNextId();
322: Integer cur = (Integer) currentLexer.get(nexttok
323: .toUpperCase());
324:
325: if (cur == null || cur.intValue() != tok)
326: throw new ParseException(buffer
327: + "\nUnexpected Token : " + nexttok, ptr);
328: this .currentMatch = new Token();
329: this .currentMatch.tokenValue = nexttok;
330: this .currentMatch.tokenType = tok;
331: }
332: } else if (tok > END) {
333: // Character classes.
334: char next = lookAhead(0);
335: if (tok == DIGIT) {
336: if (!isDigit(next))
337: throw new ParseException(buffer
338: + "\nExpecting DIGIT", ptr);
339: this .currentMatch = new Token();
340: this .currentMatch.tokenValue = new StringBuffer()
341: .append(next).toString();
342: this .currentMatch.tokenType = tok;
343: consume(1);
344:
345: } else if (tok == ALPHA) {
346: if (!isAlpha(next))
347: throw new ParseException(buffer
348: + "\nExpecting ALPHA", ptr);
349: this .currentMatch = new Token();
350: this .currentMatch.tokenValue = new StringBuffer()
351: .append(next).toString();
352: this .currentMatch.tokenType = tok;
353: consume(1);
354:
355: }
356:
357: } else {
358: // This is a direct character spec.
359: Character ch = new Character((char) tok);
360: char next = lookAhead(0);
361: if (next == ch.charValue()) {
362: this .currentMatch = new Token();
363: this .currentMatch.tokenValue = new StringBuffer()
364: .append(ch.charValue()).toString();
365: this .currentMatch.tokenType = tok;
366: consume(1);
367: } else
368: throw new ParseException(buffer + "\nExpecting "
369: + ch.charValue(), ptr);
370: }
371: return this .currentMatch;
372: }
373:
374: /**
375: * Checks for space or horiizontal tab.
376: * The tokens are consumed if present.
377: * All parsing errors are ignored.(if any)
378: */
379: public void SPorHT() {
380: try {
381: while (lookAhead(0) == ' ' || lookAhead(0) == '\t')
382: consume(1);
383: } catch (ParseException ex) {
384: // Ignore
385: }
386: }
387:
388: /**
389: * Checks for staring IDs.
390: * @return true if next char is alphanumeric or
391: * begins with appropriate punctuation characters.
392: */
393: public boolean startsId() {
394: try {
395: char nextChar = lookAhead(0);
396: return isValidChar(nextChar);
397: } catch (ParseException ex) {
398: return false;
399: }
400: }
401:
402: /**
403: * Gets the next textual token.
404: * @return the next token as a string
405: */
406: public String ttoken() {
407: StringBuffer nextId = new StringBuffer();
408: try {
409: while (hasMoreChars()) {
410: char nextChar = lookAhead(0);
411: // println("nextChar = " + nextChar);
412: if (isValidChar(nextChar)) {
413: consume(1);
414: nextId.append(nextChar);
415: } else
416: break;
417:
418: }
419: return nextId.toString();
420: } catch (ParseException ex) {
421: return nextId.toString();
422: }
423: }
424:
425: /**
426: * Gets the next textual token including embedded
427: * white space
428: * @return the next text token as a string with embedded space and
429: * tab characters
430: */
431: public String ttokenAllowSpace() {
432: StringBuffer nextId = new StringBuffer();
433: try {
434: while (hasMoreChars()) {
435: char nextChar = lookAhead(0);
436: // println("nextChar = " + nextChar);
437: if (isAlpha(nextChar) || isDigit(nextChar)
438: || nextChar == '_' || nextChar == '+'
439: || nextChar == '-' || nextChar == '!'
440: || nextChar == '`' || nextChar == '\''
441: || nextChar == '~' || nextChar == '.'
442: || nextChar == ' ' || nextChar == '\t'
443: || nextChar == '*') {
444: nextId.append(nextChar);
445: consume(1);
446: } else
447: break;
448:
449: }
450: return nextId.toString();
451: } catch (ParseException ex) {
452: return nextId.toString();
453: }
454: }
455:
456: // Assume the cursor is at a quote.
457: /**
458: * Gets a quoted string.
459: * Read all the characters between double
460: * quotes into the next textual token.
461: * Preserve all back slash escaped characters.
462: * @return the contents of the quoted string, both
463: * starting and ending double quote characters
464: * are consumed.
465: * @exception ParseException if any parsing errors occur
466: */
467: public String quotedString() throws ParseException {
468: StringBuffer retval = new StringBuffer();
469: if (lookAhead(0) != '\"')
470: return null;
471: consume(1);
472: while (true) {
473: char next = getNextChar();
474: if (next == '\"')
475: break;
476: else if (next == '\\') {
477: retval.append(next);
478: next = getNextChar();
479: retval.append(next);
480: } else {
481: retval.append(next);
482: }
483: }
484: return retval.toString();
485: }
486:
487: // Assume the cursor is at a "("
488: /**
489: * Gets a comment string.
490: * Consumes all characters between left and right
491: * parens. Back slashed escaped characters are preserved.
492: * @return the comment string, both starting and ending parens are
493: * consumed.
494: * @exception ParseException if any parsing errors occur, or if the
495: * comment is not properly closed
496: */
497: public String comment() throws ParseException {
498: StringBuffer retval = new StringBuffer();
499: if (lookAhead(0) != '(')
500: return null;
501: consume(1);
502: while (true) {
503: char next = getNextChar();
504: if (next == ')') {
505: break;
506: } else if (next == '\0') {
507: throw new ParseException(this .buffer
508: + " :unexpected EOL", this .ptr);
509: } else if (next == '\\') {
510: retval.append(next);
511: next = getNextChar();
512: if (next == '\0')
513: throw new ParseException(this .buffer
514: + " : unexpected EOL", this .ptr);
515: retval.append(next);
516: } else {
517: retval.append(next);
518: }
519: }
520: return retval.toString();
521: }
522:
523: /**
524: * Gets a token up to the next semicolon or end of line.
525: * The end of line or terminating semicolon are not
526: * consumed. If a parsing exception occurs, the consumed
527: * characters are returned.
528: * @return the next token without embedded semicolons
529: */
530: public String byteStringNoSemicolon() {
531: StringBuffer retval = new StringBuffer();
532: try {
533: char next;
534: while ((next = lookAhead(0)) != '\0') {
535: if (next == '\n' || next == ';') {
536: break;
537: } else {
538: consume(1);
539: retval.append(next);
540: }
541: }
542: } catch (ParseException ex) {
543: return retval.toString();
544: }
545: return retval.toString();
546: }
547:
548: /**
549: * Gets a token up to the next comma or end of line.
550: * The end of line or terminating comma are not
551: * consumed. If a parsing exception occurs, the consumed
552: * characters are returned.
553: * @return the next token without embedded commas
554: */
555: public String byteStringNoComma() {
556: StringBuffer retval = new StringBuffer();
557: try {
558: char next;
559: while ((next = lookAhead(0)) != '\0') {
560: if (next == '\n' || next == ',') {
561: break;
562: } else {
563: consume(1);
564: retval.append(next);
565: }
566: }
567: } catch (ParseException ex) {
568: }
569: return retval.toString();
570: }
571:
572: /**
573: * Converts a character to a string.
574: * @param ch the character to enclose
575: * @return a string containing the single character
576: */
577: public static String charAsString(char ch) {
578: return new Character(ch).toString();
579: }
580:
581: /**
582: * Lookahead in the inputBuffer for n chars and return as a string.
583: * Do not consume the input. In the event of a parsing
584: * error return the characters that could be consumed.
585: * @param nchars the number of characters to look ahead
586: * @return a string containing the designated characters
587: */
588: public String charAsString(int nchars) {
589:
590: StringBuffer retval = new StringBuffer();
591: try {
592: for (int i = 0; i < nchars; i++) {
593: retval.append(lookAhead(i));
594: }
595: return retval.toString();
596: } catch (ParseException ex) {
597: return retval.toString();
598:
599: }
600: }
601:
602: /**
603: * Gets and consumes the next number.
604: * Only digits are included in the returned string.
605: * @return the parsed number as a string
606: * @exception ParseException if any parsing errors occur
607: */
608: public String number() throws ParseException {
609:
610: StringBuffer retval = new StringBuffer();
611: if (!isDigit(lookAhead(0))) {
612: throw new ParseException(buffer + ": Unexpected token at "
613: + lookAhead(0), ptr);
614: }
615: retval.append(lookAhead(0));
616: consume(1);
617: while (true) {
618: char next = lookAhead(0);
619: if (isDigit(next)) {
620: retval.append(next);
621: consume(1);
622: } else
623: break;
624: }
625: return retval.toString();
626: }
627:
628: /**
629: * Mark the position for backtracking.
630: * @return the current pointer in the parsed content
631: */
632: public int markInputPosition() {
633: return ptr;
634: }
635:
636: /**
637: * Rewinds the input pointer to the marked position.
638: * @param position the desired parsing location
639: */
640: public void rewindInputPosition(int position) {
641: this .ptr = position;
642: }
643:
644: /**
645: * Gets the rest of the string buffer.
646: * @return the remaining text in the buffer, or null if the
647: * buffer has been consumed.
648: */
649: public String getRest() {
650: if (ptr >= buffer.length())
651: return null;
652: else
653: return buffer.substring(ptr);
654: }
655:
656: /**
657: * Gets the sub-String until the requested character is
658: * encountered.
659: * @param c the character to match
660: * @return the string up til the separator caharacter
661: * @exception ParseException if a parsing error occurs
662: */
663: public String getString(char c) throws ParseException {
664: int savedPtr = ptr;
665: StringBuffer retval = new StringBuffer();
666: while (true) {
667: char next = lookAhead(0);
668:
669: if (next == '\0') {
670: ParseException exception = new ParseException(
671: this .buffer + "unexpected EOL", this .ptr);
672: ptr = savedPtr;
673: throw exception;
674: } else if (next == c) {
675: consume(1);
676: break;
677: } else if (next == '\\') {
678: consume(1);
679: char nextchar = lookAhead(0);
680: if (nextchar == '\0') {
681: ParseException exception = new ParseException(
682: this .buffer + "unexpected EOL", this .ptr);
683: ptr = savedPtr;
684: throw exception;
685: } else {
686: consume(1);
687: retval.append(nextchar);
688: }
689: } else {
690: consume(1);
691: retval.append(next);
692: }
693: }
694: return retval.toString();
695: }
696:
697: /**
698: * Gets the read pointer.
699: * @return offset in the buffer
700: */
701: public int getPtr() {
702: return this .ptr;
703: }
704:
705: /**
706: * Gets the buffer.
707: * @return the parsing buffer
708: */
709: public String getBuffer() {
710: return this .buffer;
711: }
712:
713: /**
714: * Creates a parse exception.
715: * @return an exception with the current buffer and offset
716: * in the exception contents
717: */
718: public ParseException createParseException() {
719: return new ParseException(this.buffer, this.ptr);
720: }
721: }
|