001: /* ====================================================================
002: * Tea - Copyright (c) 1997-2000 Walt Disney Internet Group
003: * ====================================================================
004: * The Tea Software License, Version 1.1
005: *
006: * Copyright (c) 2000 Walt Disney Internet Group. All rights reserved.
007: *
008: * Redistribution and use in source and binary forms, with or without
009: * modification, are permitted provided that the following conditions
010: * are met:
011: *
012: * 1. Redistributions of source code must retain the above copyright
013: * notice, this list of conditions and the following disclaimer.
014: *
015: * 2. Redistributions in binary form must reproduce the above copyright
016: * notice, this list of conditions and the following disclaimer in
017: * the documentation and/or other materials provided with the
018: * distribution.
019: *
020: * 3. The end-user documentation included with the redistribution,
021: * if any, must include the following acknowledgment:
022: * "This product includes software developed by the
023: * Walt Disney Internet Group (http://opensource.go.com/)."
024: * Alternately, this acknowledgment may appear in the software itself,
025: * if and wherever such third-party acknowledgments normally appear.
026: *
027: * 4. The names "Tea", "TeaServlet", "Kettle", "Trove" and "BeanDoc" must
028: * not be used to endorse or promote products derived from this
029: * software without prior written permission. For written
030: * permission, please contact opensource@dig.com.
031: *
032: * 5. Products derived from this software may not be called "Tea",
033: * "TeaServlet", "Kettle" or "Trove", nor may "Tea", "TeaServlet",
034: * "Kettle", "Trove" or "BeanDoc" appear in their name, without prior
035: * written permission of the Walt Disney Internet Group.
036: *
037: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
038: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
039: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
040: * DISCLAIMED. IN NO EVENT SHALL THE WALT DISNEY INTERNET GROUP OR ITS
041: * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
042: * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
043: * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
044: * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
045: * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
046: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
047: * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
048: * ====================================================================
049: *
050: * For more information about Tea, please see http://opensource.go.com/.
051: */
052:
053: package com.go.tea.compiler;
054:
055: import java.io.PrintStream;
056:
057: /******************************************************************************
058: * A Token represents the smallest whole element of a source file. Tokens are
059: * produced by a {@link Scanner}.
060: *
061: * @author Brian S O'Neill
062: * @version
063: * <!--$$Revision:--> 46 <!-- $-->, <!--$$JustDate:--> 5/31/01 <!-- $-->
064: */
065: public class Token implements java.io.Serializable {
066: /** Token ID for an unknown token. */
067: public final static int UNKNOWN = 0;
068: /** Token ID for the end of file. */
069: public final static int EOF = 1;
070:
071: // These tokens are not emitted by the Scanner unless explicitly enabled.
072: /** Token ID for a single-line or multi-line comment. */
073: public final static int COMMENT = 2;
074: /** Token ID for the start of a code region. */
075: public final static int ENTER_CODE = 3;
076: /** Token ID for the start of a text region. */
077: public final static int ENTER_TEXT = 4;
078:
079: /** Token ID for a string literal. */
080: public final static int STRING = 5;
081: /** Token ID for a number literal. */
082: public final static int NUMBER = 6;
083: /** Token ID for an identifier. */
084: public final static int IDENT = 7;
085:
086: // brackets
087: private final static int FIRST_BRACKET_ID = 8;
088:
089: /** Token ID for the left parenthesis: '(' */
090: public final static int LPAREN = 8;
091: /** Token ID for the right parenthesis: ')' */
092: public final static int RPAREN = 9;
093: /** Token ID for the left brace: '{' */
094: public final static int LBRACE = 10;
095: /** Token ID for the right brace: '}' */
096: public final static int RBRACE = 11;
097: /** Token ID for the left bracket: '[' */
098: public final static int LBRACK = 12;
099: /** Token ID for the right bracket: ']' */
100: public final static int RBRACK = 13;
101:
102: private final static int LAST_BRACKET_ID = 13;
103:
104: // operators
105: private final static int FIRST_OPERATOR_ID = 19;
106:
107: /** Token ID for the semi-colon: ';' */
108: public final static int SEMI = 14;
109: /** Token ID for the comma: ',' */
110: public final static int COMMA = 15;
111: /** Token ID for the dot: '.' */
112: public final static int DOT = 16;
113: /** Token ID for the double dot: '..' */
114: public final static int DOTDOT = 17;
115: /** Token ID for the ellipsis: '...' */
116: public final static int ELLIPSIS = 18;
117: /** Token ID for the forward slash: '/' */
118: public final static int SLASH = 19;
119: /** Token ID for the hash: '#' */
120: public final static int HASH = 20;
121: /** Token ID for the double hash: '##' */
122: public final static int DOUBLE_HASH = 21;
123: /** Token ID for the less than operator: '<' */
124: public final static int LT = 22;
125: /** Token ID for the less than or equal operator: '<=' */
126: public final static int LE = 23;
127: /** Token ID for the equal operator: '==' */
128: public final static int EQ = 24;
129: /** Token ID for the greater than or equal operator: '>=' */
130: public final static int GE = 25;
131: /** Token ID for the greater than operator: '>' */
132: public final static int GT = 26;
133: /** Token ID for the not equal operator: '!=' */
134: public final static int NE = 27;
135: /** Token ID for the concatenation operator: '&' */
136: public final static int CONCAT = 28;
137: /** Token ID for the plus operator: '+' */
138: public final static int PLUS = 29;
139: /** Token ID for the minus operator: '-' */
140: public final static int MINUS = 30;
141: /** Token ID for the multiplication operator: '*' */
142: public final static int MULT = 31;
143: /** Token ID for the division operator: '/' */
144: public final static int DIV = 32;
145: /** Token ID for the modulus operator: '%' */
146: public final static int MOD = 33;
147: /** Token ID for the assignment operator: '=' */
148: public final static int ASSIGN = 34;
149:
150: private final static int LAST_OPERATOR_ID = 34;
151: // reserved words
152: private final static int FIRST_RESERVED_ID = 35;
153:
154: // literals
155:
156: /** Token ID for the null literal: 'null' */
157: public final static int NULL = 35;
158: /** Token ID for the true literal: 'true' */
159: public final static int TRUE = 36;
160: /** Token ID for the false literal: 'false' */
161: public final static int FALSE = 37;
162:
163: // keywords
164:
165: /** Token ID for the not keyword: 'not' */
166: public final static int NOT = 38;
167: /** Token ID for the or keyword: 'or' */
168: public final static int OR = 39;
169: /** Token ID for the and keyword: 'and' */
170: public final static int AND = 40;
171: /** Token ID for the if keyword: 'if' */
172: public final static int IF = 41;
173: /** Token ID for the else keyword: 'else' */
174: public final static int ELSE = 42;
175: /** Token ID for the is-a keyword: 'isa' */
176: public final static int ISA = 43;
177: /** Token ID for the for-each keyword: 'foreach' */
178: public final static int FOREACH = 44;
179: /** Token ID for the in keyword: 'in' */
180: public final static int IN = 45;
181: /** Token ID for the reverse keyword: 'reverse' */
182: public final static int REVERSE = 46;
183: /** Token ID for the template keyword: 'template' */
184: public final static int TEMPLATE = 47;
185: /** Token ID for the call keyword: 'call' */
186: public final static int CALL = 48;
187: /** Token ID for the break keyword: 'break' */
188: public final static int BREAK = 49;
189:
190: private final static int LAST_RESERVED_ID = 49;
191:
192: private final static int LAST_ID = 49;
193:
194: private int mTokenID;
195: private SourceInfo mInfo;
196:
197: Token(int sourceLine, int sourceStartPos, int sourceEndPos,
198: int tokenID) {
199:
200: this (sourceLine, sourceStartPos, sourceEndPos, sourceStartPos,
201: tokenID);
202: }
203:
204: Token(int sourceLine, int sourceStartPos, int sourceEndPos,
205: int sourceDetailPos, int tokenID) {
206:
207: mTokenID = tokenID;
208:
209: if (tokenID > LAST_ID) {
210: throw new IllegalArgumentException(
211: "Token ID out of range: " + tokenID);
212: }
213:
214: if (sourceStartPos == sourceDetailPos) {
215: mInfo = new SourceInfo(sourceLine, sourceStartPos,
216: sourceEndPos);
217: } else {
218: mInfo = new SourceDetailedInfo(sourceLine, sourceStartPos,
219: sourceEndPos, sourceDetailPos);
220: }
221:
222: if (sourceStartPos > sourceEndPos) {
223: // This is an internal error.
224: throw new IllegalArgumentException(
225: "Token start position greater than end position at line: "
226: + sourceLine);
227: }
228: }
229:
230: public Token(SourceInfo info, int tokenID) {
231: mTokenID = tokenID;
232:
233: if (tokenID > LAST_ID) {
234: throw new IllegalArgumentException(
235: "Token ID out of range: " + tokenID);
236: }
237:
238: mInfo = info;
239: }
240:
241: /**
242: * Returns true if id is a reserved word
243: * @param id The Token id to test
244: */
245: public final static boolean isReservedWord(int id) {
246: return FIRST_RESERVED_ID <= id && id <= LAST_RESERVED_ID;
247: }
248:
249: /**
250: * Returns true if id is an operator
251: * @param id The Token id to test
252: */
253: public final static boolean isOperator(int id) {
254: return FIRST_OPERATOR_ID <= id && id <= LAST_OPERATOR_ID;
255: }
256:
257: /**
258: * Returns true if id is a bracket
259: * @param id The Token id to test
260: */
261: public final static boolean isBracket(int id) {
262: return FIRST_BRACKET_ID <= id && id <= LAST_BRACKET_ID;
263: }
264:
265: /**
266: * Returns true if id is an open bracket: (,[,{
267: * @param id The Token id to test
268: */
269: public final static boolean isOpenBracket(int id) {
270: return (id == LPAREN || id == LBRACE || id == LBRACK);
271: }
272:
273: /**
274: * Returns true if id is a close bracket: ), ], }
275: * @param id The Token id to test
276: */
277: public final static boolean isCloseBracket(int id) {
278: return (id == RPAREN || id == RBRACE || id == RBRACK);
279: }
280:
281: /**
282: * If the given id is a bracket: (,[,{,},],) then the matching bracket's
283: * id is returned. If id is not a bracket, then -1 is returned.
284: */
285: public final static int getMatchingBracket(int id) {
286: if (isOpenBracket(id)) {
287: return ++id;
288: } else if (isCloseBracket(id)) {
289: return --id;
290: }
291:
292: return -1;
293: }
294:
295: /**
296: * If the given StringBuffer starts with a valid token type, its ID is
297: * returned. Otherwise, the token ID UNKNOWN is returned.
298: */
299: public static int findReservedWordID(StringBuffer word) {
300: char c = word.charAt(0);
301:
302: switch (c) {
303: case 'a':
304: if (matches(word, "and"))
305: return AND;
306: break;
307: case 'b':
308: if (matches(word, "break"))
309: return BREAK;
310: break;
311: case 'c':
312: if (matches(word, "call"))
313: return CALL;
314: break;
315: case 'e':
316: if (matches(word, "else"))
317: return ELSE;
318: break;
319: case 'f':
320: if (matches(word, "foreach"))
321: return FOREACH;
322: if (matches(word, "false"))
323: return FALSE;
324: break;
325: case 'i':
326: if (matches(word, "if"))
327: return IF;
328: if (matches(word, "in"))
329: return IN;
330: if (matches(word, "isa"))
331: return ISA;
332: break;
333: case 'n':
334: if (matches(word, "null"))
335: return NULL;
336: if (matches(word, "not"))
337: return NOT;
338: break;
339: case 'o':
340: if (matches(word, "or"))
341: return OR;
342: break;
343: case 'r':
344: if (matches(word, "reverse"))
345: return REVERSE;
346: break;
347: case 't':
348: if (matches(word, "true"))
349: return TRUE;
350: if (matches(word, "template"))
351: return TEMPLATE;
352: break;
353: }
354:
355: return UNKNOWN;
356: }
357:
358: /**
359: * Case sensitive match test.
360: * @param val must be lowercase
361: */
362: private static boolean matches(StringBuffer word, String val) {
363: int len = word.length();
364: if (len != val.length())
365: return false;
366:
367: // Start at index 1, assuming that the first characters have already
368: // been checked to match.
369: for (int index = 1; index < len; index++) {
370: char cw = word.charAt(index);
371: char cv = val.charAt(index);
372:
373: if (cw != cv) {
374: return false;
375: }
376: }
377:
378: return true;
379: }
380:
381: /**
382: * Dumps the contents of this Token to System.out.
383: */
384: public final void dump() {
385: dump(System.out);
386: }
387:
388: /**
389: * Dumps the contents of this Token.
390: * @param out The PrintStream to write to.
391: */
392: public final void dump(PrintStream out) {
393: out.println("Token [Code: " + getCode() + "] [Image: "
394: + getImage() + "] [Value: " + getStringValue()
395: + "] [Id: " + getID() + "] [start: "
396: + mInfo.getStartPosition() + "] [end "
397: + mInfo.getEndPosition() + "]");
398: }
399:
400: /**
401: * Returns the ID of this Token, which identifies what type of token it is.
402: */
403: public final int getID() {
404: return mTokenID;
405: }
406:
407: /**
408: * Returns true if this Token is a reserved word.
409: */
410: public final boolean isReservedWord() {
411: return isReservedWord(mTokenID);
412: }
413:
414: /**
415: * Returns true if this Token is a bracket: (,[,{,},],)
416: */
417: public final boolean isBracket() {
418: return isBracket(mTokenID);
419: }
420:
421: /**
422: * Returns true if this Token is an open bracket: (,[,{
423: */
424: public final boolean isOpenBracket() {
425: return isOpenBracket(mTokenID);
426: }
427:
428: /**
429: * Returns true if this Token is a close bracket: ),],}
430: */
431: public final boolean isCloseBracket() {
432: return isCloseBracket(mTokenID);
433: }
434:
435: /**
436: * Returns true if this Token is an operator
437: */
438: public final boolean isOperator() {
439: return isOperator(mTokenID);
440: }
441:
442: /**
443: * Token image represents what a static token looks like in a source file.
444: * Token image is null if token is a string, number or identifier because
445: * these tokens don't have static images.
446: */
447: public String getImage() {
448: return Code.TOKEN_IMAGES[mTokenID];
449: }
450:
451: /**
452: * Token code is non-null, and is exactly the same as the name for its ID.
453: */
454: public String getCode() {
455: return Code.TOKEN_CODES[mTokenID];
456: }
457:
458: /**
459: * Returns information regarding where in the source file this token
460: * came from.
461: */
462: public final SourceInfo getSourceInfo() {
463: return mInfo;
464: }
465:
466: public String getStringValue() {
467: return null;
468: }
469:
470: /**
471: * Only valid if token is a number. Returns 0 if token is not a number
472: * or is an invalid number. Returns 1 for int, 2 for long, 3 for
473: * float and 4 for double. The token ID for all numbers (even invalid ones)
474: * is NUMBER.
475: *
476: * @return 0, 1, 2, 3 or 4.
477: */
478: public int getNumericType() {
479: return 0;
480: }
481:
482: /** Only valid if token is a number. */
483: public int getIntValue() {
484: return 0;
485: }
486:
487: /** Only valid if token is a number. */
488: public long getLongValue() {
489: return 0L;
490: }
491:
492: /** Only valid if token is a number. */
493: public float getFloatValue() {
494: return 0.0f;
495: }
496:
497: /** Only valid if token is a number. */
498: public double getDoubleValue() {
499: return 0.0d;
500: }
501:
502: public String toString() {
503: StringBuffer buf = new StringBuffer(10);
504:
505: String image = getImage();
506:
507: if (image != null) {
508: buf.append(image);
509: }
510:
511: String str = getStringValue();
512:
513: if (str != null) {
514: if (image != null) {
515: buf.append(' ');
516: }
517: buf.append(str);
518: }
519:
520: return buf.toString();
521: }
522:
523: private static class Code {
524: public final static String[] TOKEN_IMAGES = { null, null,
525:
526: null, null, null,
527:
528: null, null, null,
529:
530: "(", ")", "{", "}", "[", "]",
531:
532: ";", ",", ".", "..", "...", "/", "#", "##", "<", "<=", "==",
533: ">=", ">", "!=", "&", "+", "-", "*", "/", "%", "=",
534:
535: "null", "true", "false",
536:
537: "not", "or", "and", "if", "else", "isa", "foreach",
538: "in", "reverse", "template", "call", "break", };
539:
540: public static final String[] TOKEN_CODES = { "UNKNOWN", "EOF",
541:
542: "COMMENT", "ENTER_CODE", "ENTER_TEXT",
543:
544: "STRING", "NUMBER", "IDENT",
545:
546: "LPAREN", "RPAREN", "LBRACE", "RBRACE", "LBRACK", "RBRACK",
547:
548: "SEMI", "COMMA", "DOT", "DOTDOT", "ELLIPSIS", "SLASH", "HASH",
549: "DOUBLE_HASH", "LT", "LE", "EQ", "GE", "GT", "NE",
550: "CONCAT", "PLUS", "MINUS", "MULT", "DIV", "MOD",
551: "ASSIGN",
552:
553: "NULL", "TRUE", "FALSE",
554:
555: "NOT", "OR", "AND", "IF", "ELSE", "ISA", "FOREACH",
556: "IN", "REVERSE", "TEMPLATE", "CALL", "BREAK", };
557:
558: static {
559: if (TOKEN_IMAGES.length != TOKEN_CODES.length) {
560: // Internal error.
561: throw new RuntimeException(
562: "TOKEN_IMAGES and TOKEN_CODES have different lengths");
563: }
564: }
565: }
566: }
|