001: package net.sf.saxon.expr;
002:
003: import java.util.HashMap;
004:
005: /**
006: * This class holds static constants and methods defining the lexical tokens used in
007: * XPath and XQuery, and associated keywords.
008: */
009:
010: public abstract class Token {
011:
012: /**
013: * Token numbers. Those in the range 0 to 100 are tokens that can be followed
014: * by a name or expression; those in the range 101 to 200 are tokens that can be
015: * followed by an binary operator.
016: */
017:
018: /**
019: * Pseudo-token representing the end of the expression
020: */
021: public static final int EOF = 0;
022: /**
023: * "union" or "|" token
024: */
025: public static final int UNION = 1;
026: /**
027: * Forwards "/"
028: */
029: public static final int SLASH = 2;
030: /**
031: * At token, "@"
032: */
033: public static final int AT = 3;
034: /**
035: * Left square bracket
036: */
037: public static final int LSQB = 4;
038: /**
039: * Left parenthesis
040: */
041: public static final int LPAR = 5;
042: /**
043: * Equals token ("=")
044: */
045: public static final int EQUALS = 6;
046: /**
047: * Comma token
048: */
049: public static final int COMMA = 7;
050: /**
051: * Double forwards slash, "//"
052: */
053: public static final int SLSL = 8;
054: /**
055: * Operator "or"
056: */
057: public static final int OR = 9;
058: /**
059: * Operator "and"
060: */
061: public static final int AND = 10;
062: /**
063: * Operator ">"
064: */
065: public static final int GT = 11;
066: /**
067: * Operator "<"
068: */
069: public static final int LT = 12;
070: /**
071: * Operator ">="
072: */
073: public static final int GE = 13;
074: /**
075: * Operator "<="
076: */
077: public static final int LE = 14;
078: /**
079: * Operator "+"
080: */
081: public static final int PLUS = 15;
082: /**
083: * Binary minus operator
084: */
085: public static final int MINUS = 16;
086: /**
087: * Multiply operator, "*" when used in an operator context
088: */
089: public static final int MULT = 17;
090: /**
091: * Operator "div"
092: */
093: public static final int DIV = 18;
094: /**
095: * Operator "mod"
096: */
097: public static final int MOD = 19;
098: /**
099: * Operator "is"
100: */
101: public static final int IS = 20;
102: /**
103: * "$" symbol
104: */
105: public static final int DOLLAR = 21;
106: /**
107: * Operator not-equals. That is, "!="
108: */
109: public static final int NE = 22;
110: /**
111: * Operator "intersect"
112: */
113: public static final int INTERSECT = 23;
114: /**
115: * Operator "except"
116: */
117: public static final int EXCEPT = 24;
118: /**
119: * Keyword "return"
120: */
121: public static final int RETURN = 25;
122: /**
123: * Ketword "then"
124: */
125: public static final int THEN = 26;
126: /**
127: * Keyword "else"
128: */
129: public static final int ELSE = 27;
130: /**
131: * Keyword "where"
132: */
133: public static final int WHERE = 28;
134: /**
135: * Operator "to"
136: */
137: public static final int TO = 29;
138: /**
139: * Keyword "in"
140: */
141: public static final int IN = 30;
142: /**
143: * Keyword "some"
144: */
145: public static final int SOME = 31;
146: /**
147: * Keyword "every"
148: */
149: public static final int EVERY = 32;
150: /**
151: * Keyword "satisfies"
152: */
153: public static final int SATISFIES = 33;
154: /**
155: * Token representing the name of a function and the following "(" symbol
156: */
157: public static final int FUNCTION = 34;
158: /**
159: * Token representing the name of an axis and the following "::" symbol
160: */
161: public static final int AXIS = 35;
162: /**
163: * Keyword "if"
164: */
165: public static final int IF = 36;
166: /**
167: * Operator "<<"
168: */
169: public static final int PRECEDES = 37;
170: /**
171: * Operator ">>"
172: */
173: public static final int FOLLOWS = 38;
174: /**
175: * "::" symbol
176: */
177: public static final int COLONCOLON = 39;
178: /**
179: * ":*" symbol
180: */
181: public static final int COLONSTAR = 40;
182: /**
183: * operator "instance of"
184: */
185: public static final int INSTANCE_OF = 41;
186: /**
187: * operator "cast as"
188: */
189: public static final int CAST_AS = 42;
190: /**
191: * operator "treat as"
192: */
193: public static final int TREAT_AS = 43;
194: /**
195: * operator "eq"
196: */
197: public static final int FEQ = 44; // "Fortran" style comparison operators eq, ne, etc
198: /**
199: * operator "ne"
200: */
201: public static final int FNE = 45;
202: /**
203: * operator "gt"
204: */
205: public static final int FGT = 46;
206: /**
207: * operator "lt"
208: */
209: public static final int FLT = 47;
210: /**
211: * operator "ge"
212: */
213: public static final int FGE = 48;
214: /**
215: * opeartor "le"
216: */
217: public static final int FLE = 49;
218: /**
219: * operator "idiv"
220: */
221: public static final int IDIV = 50;
222: /**
223: * operator "castable as"
224: */
225: public static final int CASTABLE_AS = 51;
226: /**
227: * ":=" symbol (XQuery only)
228: */
229: public static final int ASSIGN = 52;
230: /**
231: * "{" symbol (XQuery only)
232: */
233: public static final int LCURLY = 53;
234: /**
235: * composite token: <keyword "{"> (XQuery only)
236: */
237: public static final int KEYWORD_CURLY = 54;
238: /**
239: * composite token <'element' QNAME> (XQuery only)
240: */
241: public static final int ELEMENT_QNAME = 55;
242: /**
243: * composite token <'attribute' QNAME> (XQuery only)
244: */
245: public static final int ATTRIBUTE_QNAME = 56;
246: /**
247: * composite token <'pi' QNAME> (XQuery only)
248: */
249: public static final int PI_QNAME = 57;
250: /**
251: * Keyword "typeswitch"
252: */
253: public static final int TYPESWITCH = 58;
254: /**
255: * Keyword "case"
256: */
257: public static final int CASE = 59;
258: /**
259: * Keyword "default"
260: */
261: public static final int DEFAULT = 60;
262: /**
263: * Node kind, e.g. "node()" or "comment()"
264: */
265: public static final int NODEKIND = 61;
266: /**
267: * "*:" token
268: */
269: public static final int SUFFIX = 62; // e.g. *:suffix - the suffix is actually a separate token
270:
271: // The following tokens are used only in the query prolog. They are categorized
272: // as operators on the basis that a following name is treated as a name rather than
273: // an operator.
274:
275: /**
276: * "xquery version"
277: */
278: public static final int XQUERY_VERSION = 70;
279: /**
280: * "declare namespace"
281: */
282: public static final int DECLARE_NAMESPACE = 71;
283: /**
284: * "declare default"
285: */
286: public static final int DECLARE_DEFAULT = 72;
287: /**
288: * "declare construction"
289: */
290: public static final int DECLARE_CONSTRUCTION = 73;
291: /**
292: * "declare base-uri"
293: */
294: public static final int DECLARE_BASEURI = 74;
295: /**
296: * "declare boundary-space"
297: */
298: public static final int DECLARE_BOUNDARY_SPACE = 75;
299: /**
300: * "import schema"
301: */
302: public static final int IMPORT_SCHEMA = 76;
303: /**
304: * "import module"
305: */
306: public static final int IMPORT_MODULE = 77;
307: /**
308: * "define variable"
309: */
310: public static final int DECLARE_VARIABLE = 78;
311: /**
312: * "define function"
313: */
314: public static final int DECLARE_FUNCTION = 79;
315: /**
316: * "module namespace"
317: */
318: public static final int MODULE_NAMESPACE = 80;
319: /**
320: * Various compound symbols supporting XQuery validation expression
321: */
322: public static final int VALIDATE = 81;
323: public static final int VALIDATE_STRICT = 82;
324: public static final int VALIDATE_LAX = 83;
325:
326: /**
327: * "declare xmlspace"
328: */
329: public static final int DECLARE_ORDERING = 84;
330:
331: /**
332: * "declare copy-namespaces"
333: */
334: public static final int DECLARE_COPY_NAMESPACES = 85;
335: /**
336: * "declare option"
337: */
338: public static final int DECLARE_OPTION = 86;
339: /**
340: * semicolon separator
341: */
342: public static final int SEMICOLON = 90;
343:
344: /**
345: * Constant identifying the token number of the last token to be classified as an operator
346: */
347: static int LAST_OPERATOR = 100;
348:
349: // Tokens that set "operator" context, so an immediately following "div" is recognized
350: // as an operator, not as an element name
351:
352: /**
353: * Name token (a QName, in general)
354: */
355: public static final int NAME = 101;
356: /**
357: * String literal
358: */
359: public static final int STRING_LITERAL = 102;
360: /**
361: * Right square bracket
362: */
363: public static final int RSQB = 103;
364: /**
365: * Right parenthesis
366: */
367: public static final int RPAR = 104;
368: /**
369: * "." symbol
370: */
371: public static final int DOT = 105;
372: /**
373: * ".." symbol
374: */
375: public static final int DOTDOT = 106;
376: /**
377: * "*" symbol when used as a wildcard
378: */
379: public static final int STAR = 107;
380: /**
381: * "prefix:*" token
382: */
383: public static final int PREFIX = 108; // e.g. prefix:*
384: /**
385: * Numeric literal
386: */
387: public static final int NUMBER = 109;
388:
389: /**
390: * "for" keyword
391: */
392: public static final int FOR = 111;
393: /**
394: * Question mark symbol. That is, "?"
395: */
396: public static final int QMARK = 113;
397: /**
398: * "}" symbol (XQuery only)
399: */
400: public static final int RCURLY = 115;
401: /**
402: * "let" keyword (XQuery only)
403: */
404: public static final int LET = 116;
405: /**
406: * "<" at the start of a tag (XQuery only). The pseudo-XML syntax that
407: * follows is read character-by-character by the XQuery parser
408: */
409: public static final int TAG = 117;
410: /**
411: * A token representing an XQuery pragma.
412: * This construct "(# .... #)" is regarded as a single token, for the QueryParser to sort out.
413: */
414: public static final int PRAGMA = 118;
415:
416: /**
417: * Unary minus sign
418: */
419: public static final int NEGATE = 199; // unary minus: not actually a token, but we
420: // use token numbers to identify operators.
421:
422: /**
423: * The following strings are used to represent tokens in error messages
424: */
425:
426: public static String[] tokens = new String[200];
427: static {
428: tokens[EOF] = "<eof>";
429: tokens[UNION] = "|";
430: tokens[SLASH] = "/";
431: tokens[AT] = "@";
432: tokens[LSQB] = "[";
433: tokens[LPAR] = "(";
434: tokens[EQUALS] = "=";
435: tokens[COMMA] = ",";
436: tokens[SLSL] = "//";
437: tokens[OR] = "or";
438: tokens[AND] = "and";
439: tokens[GT] = ">";
440: tokens[LT] = "<";
441: tokens[GE] = ">=";
442: tokens[LE] = "<=";
443: tokens[PLUS] = "+";
444: tokens[MINUS] = "-";
445: tokens[MULT] = "*";
446: tokens[DIV] = "div";
447: tokens[MOD] = "mod";
448: tokens[IS] = "is";
449: tokens[DOLLAR] = "$";
450: tokens[NE] = "!=";
451: tokens[INTERSECT] = "intersect";
452: tokens[EXCEPT] = "except";
453: tokens[RETURN] = "return";
454: tokens[THEN] = "then";
455: tokens[ELSE] = "else";
456: //tokens [ ISNOT ] = "isnot";
457: tokens[TO] = "to";
458: tokens[IN] = "in";
459: tokens[SOME] = "some";
460: tokens[EVERY] = "every";
461: tokens[SATISFIES] = "satisfies";
462: tokens[FUNCTION] = "<function>(";
463: tokens[AXIS] = "<axis>";
464: tokens[IF] = "if(";
465: tokens[PRECEDES] = "<<";
466: tokens[FOLLOWS] = ">>";
467: tokens[COLONCOLON] = "::";
468: tokens[COLONSTAR] = ":*";
469: tokens[INSTANCE_OF] = "instance of";
470: tokens[CAST_AS] = "cast as";
471: tokens[TREAT_AS] = "treat as";
472: tokens[FEQ] = "eq";
473: tokens[FNE] = "ne";
474: tokens[FGT] = "gt";
475: tokens[FGE] = "ge";
476: tokens[FLT] = "lt";
477: tokens[FLE] = "le";
478: tokens[IDIV] = "idiv";
479: tokens[CASTABLE_AS] = "castable as";
480: tokens[ASSIGN] = ":=";
481: tokens[TYPESWITCH] = "typeswitch";
482: tokens[CASE] = "case";
483: tokens[DEFAULT] = "default";
484:
485: tokens[NAME] = "<name>";
486: tokens[STRING_LITERAL] = "<string-literal>";
487: tokens[RSQB] = "]";
488: tokens[RPAR] = ")";
489: tokens[DOT] = ".";
490: tokens[DOTDOT] = "..";
491: tokens[STAR] = "*";
492: tokens[PREFIX] = "<prefix:*>";
493: tokens[NUMBER] = "<numeric-literal>";
494: tokens[NODEKIND] = "<node-type>()";
495: tokens[FOR] = "for";
496: tokens[SUFFIX] = "<*:local-name>";
497: tokens[QMARK] = "?";
498: tokens[LCURLY] = "{";
499: tokens[KEYWORD_CURLY] = "<keyword> {";
500: tokens[RCURLY] = "}";
501: tokens[LET] = "let";
502: tokens[VALIDATE] = "validate {";
503: tokens[TAG] = "<element>";
504: tokens[PRAGMA] = "(# ... #)";
505: tokens[SEMICOLON] = ";";
506: tokens[NEGATE] = "-";
507: }
508:
509: /**
510: * Lookup table for composite (two-keyword) tokens
511: */
512: public static HashMap doubleKeywords = new HashMap(30);
513: /**
514: * Pseudo-token representing the start of the expression
515: */
516: public static final int UNKNOWN = -1;
517:
518: private Token() {
519: }
520:
521: static {
522: mapDouble("instance of", INSTANCE_OF);
523: mapDouble("cast as", CAST_AS);
524: mapDouble("treat as", TREAT_AS);
525: mapDouble("castable as", CASTABLE_AS);
526: mapDouble("xquery version", XQUERY_VERSION);
527: mapDouble("declare namespace", DECLARE_NAMESPACE);
528: mapDouble("declare default", DECLARE_DEFAULT);
529: mapDouble("declare construction", DECLARE_CONSTRUCTION);
530: mapDouble("declare base-uri", DECLARE_BASEURI);
531: mapDouble("declare boundary-space", DECLARE_BOUNDARY_SPACE);
532: mapDouble("declare ordering", DECLARE_ORDERING);
533: mapDouble("declare copy-namespaces", DECLARE_COPY_NAMESPACES);
534: mapDouble("declare option", DECLARE_OPTION);
535: mapDouble("import schema", IMPORT_SCHEMA);
536: mapDouble("import module", IMPORT_MODULE);
537: mapDouble("declare variable", DECLARE_VARIABLE);
538: mapDouble("declare function", DECLARE_FUNCTION);
539: mapDouble("module namespace", MODULE_NAMESPACE);
540: mapDouble("validate strict", VALIDATE_STRICT);
541: mapDouble("validate lax", VALIDATE_LAX);
542:
543: }
544:
545: private static void mapDouble(String doubleKeyword, int token) {
546: doubleKeywords.put(doubleKeyword, new Integer(token));
547: tokens[token] = doubleKeyword;
548: }
549:
550: /**
551: * Return the inverse of a relational operator, so that "a op b" can be
552: * rewritten as "b inverse(op) a"
553: */
554:
555: public static final int inverse(int operator) {
556: switch (operator) {
557: case LT:
558: return GT;
559: case LE:
560: return GE;
561: case GT:
562: return LT;
563: case GE:
564: return LE;
565: case FLT:
566: return FGT;
567: case FLE:
568: return FGE;
569: case FGT:
570: return FLT;
571: case FGE:
572: return FLE;
573: default:
574: return operator;
575: }
576: }
577:
578: /**
579: * Return the negation of a relational operator, so that "a op b" can be
580: * rewritten as not(b op' a)
581: */
582:
583: public static final int negate(int operator) {
584: switch (operator) {
585: case FEQ:
586: return FNE;
587: case FNE:
588: return FEQ;
589: case FLT:
590: return FGE;
591: case FLE:
592: return FGT;
593: case FGT:
594: return FLE;
595: case FGE:
596: return FLT;
597: default:
598: throw new IllegalArgumentException(
599: "Invalid operator for negate()");
600: }
601: }
602: }
603:
604: //
605: // The contents of this file are subject to the Mozilla Public License Version 1.0 (the "License");
606: // you may not use this file except in compliance with the License. You may obtain a copy of the
607: // License at http://www.mozilla.org/MPL/
608: //
609: // Software distributed under the License is distributed on an "AS IS" basis,
610: // WITHOUT WARRANTY OF ANY KIND, either express or implied.
611: // See the License for the specific language governing rights and limitations under the License.
612: //
613: // The Original Code is: all this file.
614: //
615: // The Initial Developer of the Original Code is Michael H. Kay.
616: //
617: // Portions created by (your name) are Copyright (C) (your legal entity). All Rights Reserved.
618: //
619: // Contributor(s): none.
620: //
|