001: package fri.patterns.interpreter.parsergenerator.lexer;
002:
003: import fri.patterns.interpreter.parsergenerator.Token;
004:
005: /**
006: Standard lexer rules are building blocks for lexers dealing with text input.
007: This class resolves nonterminals enclosed in `backquotes` within an EBNF,
008: e.g. `cstylecomment`.
009: <p>
010: Furthermore it provides methods to retrieve sets of rules describing certain standard
011: scan items like `number` or `identifier`. The resulting arrays can be built together
012: by <i>SyntaxUtil.catenizeRules(...)</i>.
013: <p>
014: This class provides rules for comments with an arbitrary start character or start/end sequence:
015: <ul>
016: <li>getCustomOneLineCommentRules(String startChar)</li> and
017: <li>getCustomMultiLineCommentRules(String startSeq, String endSeq)</li>.
018: </ul>
019: <p>
020: Example (CStyleCommentStrip):
021: <pre>
022: String [][] rules = {
023: { Token.TOKEN, "others" }, // define what we want to receive
024: { Token.TOKEN, "`stringdef`" }, // need this rule as string definitions could contain comments
025: { Token.IGNORED, "`cstylecomment`" },
026: { "others", "others", "other" },
027: { "others", "other" },
028: { "other", "`char`", Token.BUTNOT, "`cstylecomment`", Token.BUTNOT, "`stringdef`" },
029: };
030: Syntax syntax = new Syntax(rules);
031: SyntaxSeparation separation = new SyntaxSeparation(syntax);
032: LexerBuilder builder = new LexerBuilder(separation.getLexerSyntax(), separation.getIgnoredSymbols());
033: Lexer lexer = builder.getLexer();
034: </pre>
035:
036: TODO: Refactor this class and make smaller units with better names.
037:
038: @see fri.patterns.interpreter.parsergenerator.lexer.LexerBuilder
039: @author (c) 2002, Fritz Ritzberger
040: */
041:
042: public abstract class StandardLexerRules {
043: /**
044: Returns e.g. the Letter-Rules <i>getUnicodeLetterRules()</i> for id "letter".
045: Using this, one can write things like `identifier` in a Lexer specification text,
046: as LexerBuilder tries to resolve these words calling this method.
047: Possible values for id are:
048: <ul>
049: <li>char (all UNICODE characters)</li>
050: <li>newline</li>
051: <li>newlines</li>
052: <li>space</li>
053: <li>spaces</li>
054: <li>whitespace</li>
055: <li>whitespaces</li>
056: <li>letter</li>
057: <li>digit</li>
058: <li>digits</li>
059: <li>hexdigit</li>
060: <li>hexdigits (does NOT include preceeding "0x")</li>
061: <li>identifier</li>
062: <li>stringdef</li>
063: <li>chardef</li>
064: <li>bnf_chardef (differs as in BNF characters can be written as "020" instead of '\020')</li>
065: <li>ruleref (`lexerrule`)</li>
066: <li>quantifier (*+?)</li>
067: <li>cstylecomment</li>
068: <li>comment</li>
069: <li>shellstylecomment</li>
070: <li>octdigits</li>
071: <li>bindigits</li>
072: <li>number</li>
073: <li>float</li>
074: <li>integer</li>
075: <li>xmlchar</li>
076: <li>combiningchar</li>
077: <li>extenderchar</li>
078: </ul>
079: */
080: public static String[][] rulesForIdentifier(String id) {
081: //System.err.println("searching for syntax rules for nonterminal "+id);
082: if (id.equals("char"))
083: return getUnicodeCharRules();
084: if (id.equals("newline"))
085: return getNewlineRules();
086: if (id.equals("newlines"))
087: return getNewlinesRules();
088: if (id.equals("space"))
089: return getSpaceRules();
090: if (id.equals("spaces"))
091: return getSpacesRules();
092: if (id.equals("whitespace"))
093: return getWhitespaceRules();
094: if (id.equals("whitespaces"))
095: return getWhitespacesRules();
096: if (id.equals("letter"))
097: return getUnicodeLetterRules();
098: if (id.equals("digit"))
099: return getUnicodeDigitRules();
100: if (id.equals("digits"))
101: return getUnicodeDigitsRules();
102: if (id.equals("hexdigit"))
103: return getHexDigitRules();
104: if (id.equals("hexdigits"))
105: return getHexDigitsRules();
106: if (id.equals("octdigits"))
107: return getOctDigitsRules();
108: if (id.equals("bindigits"))
109: return getBinDigitsRules();
110: if (id.equals("number"))
111: return getNumberRules();
112: if (id.equals("integer"))
113: return getIntegerRules();
114: if (id.equals("float"))
115: return getFloatRules();
116: if (id.equals("identifier"))
117: return getUnicodeIdentifierRules();
118: if (id.equals("stringdef"))
119: return getUnicodeStringdefRules();
120: if (id.equals("chardef"))
121: return getUnicodeChardefRules();
122: if (id.equals("bnf_chardef"))
123: return getUnicodeBNFChardefRules();
124: if (id.equals("ruleref"))
125: return getRulerefRules();
126: if (id.equals("quantifier"))
127: return getQuantifierRules();
128: if (id.equals("comment"))
129: return getCommentRules();
130: if (id.equals("cstylecomment"))
131: return getCStyleCommentRules();
132: if (id.equals("shellstylecomment"))
133: return getShellStyleCommentRules();
134: if (id.equals("xmlchar"))
135: return getUnicodeXmlCharRules();
136: if (id.equals("combiningchar"))
137: return getUnicodeCombiningCharRules();
138: if (id.equals("extenderchar"))
139: return getUnicodeExtenderCharRules();
140: return null;
141: }
142:
143: /**
144: Returns rules for a custom comment (like C-style "//", but with passed start sequence).
145: @param nonterminalName name of comment to be used within syntax, e.g. "basicComment".
146: @param startChar string (1-n characters) defining the start sequence of the comment, e.g. ";"
147: */
148: public static final String[][] getCustomOneLineCommentRules(
149: String nonterminalName, String startChar) {
150: String[][] sarr0 = getUnicodeCharRules();
151: String[][] sarr1 = getNewlineRules();
152: String[][] sarr2 = getSomeRules(290, 296);
153: String[] customRule = new String[sarr2[0].length];
154: System.arraycopy(sarr2[0], 0, customRule, 0, customRule.length);
155: customRule[0] = nonterminalName;
156: customRule[1] = "\"" + startChar + "\""; // put custom sequence where where "//" sits
157: sarr2[0] = customRule;
158: return catenizeRules(new String[][][] { sarr0, sarr1, sarr2 });
159: }
160:
161: /**
162: Returns rules for a custom comment (like C-style "/*", but with passed start and end sequence).
163: @param nonterminalName name of comment to be used within syntax, e.g. "pascalComment".
164: @param startSeq string defining the start sequence of the comment, e.g. "(*"
165: @param endSeq string defining the end sequence of the comment, e.g. "*)"
166: */
167: public static final String[][] getCustomMultiLineCommentRules(
168: String nonterminalName, String startSeq, String endSeq) {
169: String[][] sarr0 = getUnicodeCharRules();
170: String[][] sarr1 = getNewlineRules();
171: String[][] customRules = new String[6][];
172: customRules[0] = new String[] { nonterminalName,
173: "\"" + startSeq + "\"",
174: "char_minus_star_slash_list_opt", "\"" + endSeq + "\"" };
175: customRules[1] = new String[] {
176: "char_minus_" + nonterminalName, "char", Token.BUTNOT,
177: "\"" + endSeq + "\"" };
178: customRules[2] = new String[] {
179: "char_minus_" + nonterminalName + "_list",
180: "char_minus_" + nonterminalName + "_list",
181: "char_minus_" + nonterminalName };
182: customRules[3] = new String[] {
183: "char_minus_" + nonterminalName + "_list",
184: "char_minus_" + nonterminalName };
185: customRules[4] = new String[] {
186: "char_minus_" + nonterminalName + "_list_opt",
187: "char_minus_" + nonterminalName + "_list" };
188: customRules[5] = new String[] { "char_minus_" + nonterminalName
189: + "_list_opt" /*nothing*/};
190: return catenizeRules(new String[][][] { sarr0, sarr1,
191: customRules });
192: }
193:
194: /** Rules to scan one UNICODE character: 0x0 .. 0xFFFF. */
195: public static final String[][] getUnicodeCharRules() {
196: return getSomeRules(21, 22);
197: }
198:
199: /** Rules to scan one platform independent newline. */
200: public static final String[][] getNewlineRules() {
201: return getSomeRules(16, 21);
202: }
203:
204: /** Rules to scan one platform independent newline. */
205: public static final String[][] getNewlinesRules() {
206: String[][] sarr0 = getNewlineRules();
207: String[][] sarr1 = getSomeRules(0, 2, newlinesRules);
208: return catenizeRules(new String[][][] { sarr0, sarr1 });
209: }
210:
211: /** Rules to scan one space. */
212: public static final String[][] getSpaceRules() {
213: return getSomeRules(13, 16);
214: }
215:
216: /** Rules to scan spaces. */
217: public static final String[][] getSpacesRules() {
218: String[][] sarr0 = getSpaceRules();
219: String[][] sarr1 = getSomeRules(242, 244);
220: return catenizeRules(new String[][][] { sarr0, sarr1 });
221: }
222:
223: /** Rules to scan one space or newline. */
224: public static final String[][] getWhitespaceRules() {
225: String[][] sarr0 = getSpaceRules();
226: String[][] sarr1 = getNewlineRules();
227: String[][] sarr2 = getSomeRules(0, 2, whitespaceRules);
228: return catenizeRules(new String[][][] { sarr0, sarr1, sarr2 });
229: }
230:
231: /** Rules to scan spaces or newlines. */
232: public static final String[][] getWhitespacesRules() {
233: String[][] sarr0 = getSpacesRules();
234: String[][] sarr1 = getNewlinesRules();
235: String[][] sarr2 = getSomeRules(0, 4, whitespaceRules);
236: return catenizeRules(new String[][][] { sarr0, sarr1, sarr2 });
237: }
238:
239: /** Rules to scan one hexdigit. */
240: public static final String[][] getHexDigitRules() {
241: return getSomeRules(10, 13);
242: }
243:
244: /** Rules to scan hexdigits that form a number, starting "0x" not included. */
245: public static final String[][] getHexDigitsRules() {
246: String[][] sarr0 = getHexDigitRules();
247: String[][] sarr1 = getSomeRules(246, 248); // more hexdigits
248: return catenizeRules(new String[][][] { sarr0, sarr1 });
249: }
250:
251: /** Rules to scan one letter. */
252: public static final String[][] getUnicodeLetterRules() {
253: return getSomeRules(37, 242);
254: }
255:
256: /** Rules to scan one digit. */
257: public static final String[][] getUnicodeDigitRules() {
258: return getSomeRules(22, 37);
259: }
260:
261: /** Rules to scan digits. */
262: public static final String[][] getUnicodeDigitsRules() {
263: String[][] sarr0 = getUnicodeDigitRules();
264: String[][] sarr1 = getSomeRules(244, 246); // more digits
265: return catenizeRules(new String[][][] { sarr0, sarr1 });
266: }
267:
268: /** Rules to scan identifiers that start with letter and continue with letter or digit or '_'. */
269: public static final String[][] getUnicodeIdentifierRules() {
270: String[][] sarr0 = getUnicodeDigitRules();
271: String[][] sarr1 = getUnicodeLetterRules();
272: String[][] sarr2 = getSomeRules(259, 268);
273: return catenizeRules(new String[][][] { sarr0, sarr1, sarr2 });
274: }
275:
276: /** Rules to scan C/Java-like 'c'haracterdefinitions: '\377', 'A', '\n'. */
277: public static final String[][] getUnicodeChardefRules() {
278: String[][] sarr0 = getUnicodeCharRules();
279: String[][] sarr1 = getSomeRules(0, 1, digitRules); // octdigit
280: String[][] sarr2 = getSomeRules(0, 2, chardefRules);
281: String[][] sarr3 = getSomeRules(248, 249); // part of bnf_chardef
282: String[][] sarr4 = getSomeRules(251, 258); // part of bnf_chardef
283: return catenizeRules(new String[][][] { sarr0, sarr1, sarr2,
284: sarr3, sarr4 });
285: }
286:
287: /** Rules to scan BNF-like 'c'haracterdefinitions. They differ from C/Java-chardefs in that they can be written as digits: 0x20. */
288: public static final String[][] getUnicodeBNFChardefRules() {
289: String[][] sarr0 = getUnicodeCharRules();
290: String[][] sarr1 = getHexDigitsRules();
291: String[][] sarr2 = getUnicodeDigitsRules();
292: String[][] sarr3 = getSomeRules(248, 259);
293: return catenizeRules(new String[][][] { sarr0, sarr1, sarr2,
294: sarr3 });
295: }
296:
297: /** Rules to scan "stringdefinitions" that can contain backslash as masking character. */
298: public static final String[][] getUnicodeStringdefRules() {
299: String[][] sarr0 = getUnicodeCharRules();
300: String[][] sarr1 = getSomeRules(268, 284);
301: return catenizeRules(new String[][][] { sarr0, sarr1 });
302: }
303:
304: /** Rules to read a `lexerrule` within EBNF syntax specifications. */
305: public static final String[][] getRulerefRules() {
306: String[][] sarr0 = getUnicodeIdentifierRules();
307: String[][] sarr1 = getSomeRules(297, 298);
308: return catenizeRules(new String[][][] { sarr0, sarr1 });
309: }
310:
311: /** Rules to read quantifiers "*+?" within EBNF syntax specifications. */
312: public static final String[][] getQuantifierRules() {
313: return getSomeRules(7, 10);
314: }
315:
316: /** Rules to scan C-style slash-star and slash-slash AND shell-style # comments. */
317: public static final String[][] getCommentRules() {
318: String[][] sarr0 = getCStyleCommentRules();
319: String[][] sarr1 = getSomeRules(296, 297);
320: String[][] sarr2 = getSomeRules(299, 301);
321: return catenizeRules(new String[][][] { sarr0, sarr1, sarr2 });
322: }
323:
324: /** Rules to scan C-style slash-star and slash-slash comments. */
325: public static final String[][] getCStyleCommentRules() {
326: String[][] sarr0 = getUnicodeCharRules();
327: String[][] sarr1 = getNewlineRules();
328: String[][] sarr2 = getSomeRules(284, 296);
329: return catenizeRules(new String[][][] { sarr0, sarr1, sarr2 });
330: }
331:
332: /** Rules to scan # shell-style comments. */
333: public static final String[][] getShellStyleCommentRules() {
334: String[][] sarr0 = getUnicodeCharRules();
335: String[][] sarr1 = getNewlineRules();
336: String[][] sarr2 = getSomeRules(291, 297);
337: return catenizeRules(new String[][][] { sarr0, sarr1, sarr2 });
338: }
339:
340: /** Rules for XML combining chars. */
341: public static final String[][] getUnicodeXmlCharRules() {
342: return xmlCharRules;
343: }
344:
345: /** Rules for XML combining chars. */
346: public static final String[][] getUnicodeCombiningCharRules() {
347: String[][] sarr = getSomeRules(0, 95,
348: xmlCombinigAndExtenderRules);
349: return sarr;
350: }
351:
352: /** Rules for XML extender chars. */
353: public static final String[][] getUnicodeExtenderCharRules() {
354: String[][] sarr = getSomeRules(95, 106,
355: xmlCombinigAndExtenderRules);
356: return sarr;
357: }
358:
359: /** Rules for octal number chars. */
360: public static final String[][] getOctDigitsRules() {
361: String[][] sarr = getSomeRules(0, 3, digitRules);
362: return sarr;
363: }
364:
365: /** Rules for binary number chars. */
366: public static final String[][] getBinDigitsRules() {
367: String[][] sarr = getSomeRules(3, 6, digitRules);
368: return sarr;
369: }
370:
371: /** Rules for general number chars (integer, float). */
372: public static final String[][] getNumberRules() {
373: String[][] sarr2 = getIntegerRules();
374: String[][] sarr1 = getFloatRules();
375: String[][] sarr0 = getSomeRules(0, 2, numberRules);
376: return catenizeRules(new String[][][] { sarr0, sarr1, sarr2 });
377: }
378:
379: /** Rules for integer number chars. */
380: public static final String[][] getIntegerRules() {
381: String[][] sarr2 = getHexDigitsRules();
382: String[][] sarr1 = getUnicodeDigitsRules();
383: String[][] sarr0 = getSomeRules(19, 25, numberRules);
384: return catenizeRules(new String[][][] { sarr0, sarr1, sarr2 });
385: }
386:
387: /** Rules for float number chars. */
388: public static final String[][] getFloatRules() {
389: String[][] sarr1 = getUnicodeDigitsRules();
390: String[][] sarr0 = getSomeRules(2, 19, numberRules);
391: return catenizeRules(new String[][][] { sarr0, sarr1 });
392: }
393:
394: private static final String[][] getSomeRules(int startIncl,
395: int endExcl) {
396: return getSomeRules(startIncl, endExcl, lexerSyntax);
397: }
398:
399: private static final String[][] getSomeRules(int startIncl,
400: int endExcl, String[][] rules) {
401: String[][] sarr = new String[endExcl - startIncl][];
402: int j = 0;
403: for (int i = startIncl; i < endExcl; i++, j++)
404: sarr[j] = rules[i];
405: return sarr;
406: }
407:
408: /** Print a grammar to System.out. */
409: public static void printRules(String[][] syntax) {
410: for (int i = 0; i < syntax.length; i++) {
411: for (int j = 0; j < syntax[i].length; j++)
412: System.out.print(j == 1 ? " ::= " + syntax[i][j] + " "
413: : syntax[i][j] + " ");
414: System.out.println();
415: }
416: }
417:
418: /** Catenizes some rule sets to one rule set. Does not check for uniqueness. */
419: public static final String[][] catenizeRules(String[][][] arrays) {
420: int len = 0;
421: for (int i = 0; i < arrays.length; i++)
422: len += arrays[i].length;
423:
424: String[][] sarr = new String[len][];
425:
426: int k = 0;
427: for (int i = 0; i < arrays.length; i++) {
428: for (int j = 0; j < arrays[i].length; j++) {
429: sarr[k] = arrays[i][j];
430: k++;
431: }
432: }
433:
434: return sarr;
435: }
436:
437: /** Premade lexer syntax used to scan textual EBNF-like syntax specifications. */
438: public static final String[][] lexerSyntax = {
439:
440: // CAUTION: Do NOT edit without changing indexes in source above!!!
441:
442: /*0*/{ Token.TOKEN, "identifier" },
443: /*1*/{ Token.TOKEN, "bnf_chardef" },
444: /*2*/{ Token.TOKEN, "stringdef" },
445: /*3*/{ Token.TOKEN, "quantifier" }, // see 297: ruleref
446:
447: /*4*/{ Token.IGNORED, "spaces" },
448: /*5*/{ Token.IGNORED, "newline" },
449: /*6*/{ Token.IGNORED, "comment" },
450:
451: /*7*/{ "quantifier", "'*'" },
452: /*8*/{ "quantifier", "'+'" },
453: /*9*/{ "quantifier", "'?'" },
454:
455: // programmer digits
456:
457: /*10*/{ "hexdigit", "'0'", Token.UPTO, "'9'" },
458: /*11*/{ "hexdigit", "'A'", Token.UPTO, "'F'" },
459: /*12*/{ "hexdigit", "'a'", Token.UPTO, "'f'" },
460:
461: // formatting characters
462:
463: /*13*/{ "space", "0x20" },
464: /*14*/{ "space", "0x9" },
465: /*15*/{ "space", "0xC" }, // formfeed
466:
467: /*16*/{ "cr", "'\\r'" }, // 0xD
468: /*17*/{ "nl", "'\\n'" }, // 0xA
469: /*18*/{ "newline", "cr", "nl" },
470: /*19*/{ "newline", "cr" },
471: /*20*/{ "newline", "nl" },
472:
473: // UNICODE character set
474:
475: /*21*/{ "char", "0x0", Token.UPTO, "0xFFFF" },
476:
477: /*22*/{ "digit", "0x0030", Token.UPTO, "0x0039" },
478: /*23*/{ "digit", "0x0660", Token.UPTO, "0x0669" },
479: /*24*/{ "digit", "0x06F0", Token.UPTO, "0x06F9" },
480: /*25*/{ "digit", "0x0966", Token.UPTO, "0x096F" },
481: /*26*/{ "digit", "0x09E6", Token.UPTO, "0x09EF" },
482: /*27*/{ "digit", "0x0A66", Token.UPTO, "0x0A6F" },
483: /*28*/{ "digit", "0x0AE6", Token.UPTO, "0x0AEF" },
484: /*29*/{ "digit", "0x0B66", Token.UPTO, "0x0B6F" },
485: /*30*/{ "digit", "0x0BE7", Token.UPTO, "0x0BEF" },
486: /*31*/{ "digit", "0x0C66", Token.UPTO, "0x0C6F" },
487: /*32*/{ "digit", "0x0CE6", Token.UPTO, "0x0CEF" },
488: /*33*/{ "digit", "0x0D66", Token.UPTO, "0x0D6F" },
489: /*34*/{ "digit", "0x0E50", Token.UPTO, "0x0E59" },
490: /*35*/{ "digit", "0x0ED0", Token.UPTO, "0x0ED9" },
491: /*36*/{ "digit", "0x0F20", Token.UPTO, "0x0F29" },
492:
493: /*37*/{ "letter", "0x0041", Token.UPTO, "0x005A" }, // BaseChar
494: /*38*/{ "letter", "0x0061", Token.UPTO, "0x007A" },
495: /*39*/{ "letter", "0x00C0", Token.UPTO, "0x00D6" },
496: /*40*/{ "letter", "0x00D8", Token.UPTO, "0x00F6" },
497: /*41*/{ "letter", "0x00F8", Token.UPTO, "0x00FF" },
498: /*42*/{ "letter", "0x0100", Token.UPTO, "0x0131" },
499: /*43*/{ "letter", "0x0134", Token.UPTO, "0x013E" },
500: /*44*/{ "letter", "0x0141", Token.UPTO, "0x0148" },
501: /*45*/{ "letter", "0x014A", Token.UPTO, "0x017E" },
502: /*46*/{ "letter", "0x0180", Token.UPTO, "0x01C3" },
503: /*47*/{ "letter", "0x01CD", Token.UPTO, "0x01F0" },
504: /*48*/{ "letter", "0x01F4", Token.UPTO, "0x01F5" },
505: /*49*/{ "letter", "0x01FA", Token.UPTO, "0x0217" },
506: /*50*/{ "letter", "0x0250", Token.UPTO, "0x02A8" },
507: /*51*/{ "letter", "0x02BB", Token.UPTO, "0x02C1" },
508: /*52*/{ "letter", "0x0386" },
509: /*53*/{ "letter", "0x0388", Token.UPTO, "0x038A" },
510: /*54*/{ "letter", "0x038C" },
511: /*55*/{ "letter", "0x038E", Token.UPTO, "0x03A1" },
512: /*56*/{ "letter", "0x03A3", Token.UPTO, "0x03CE" },
513: /*57*/{ "letter", "0x03D0", Token.UPTO, "0x03D6" },
514: /*58*/{ "letter", "0x03DA" },
515: /*59*/{ "letter", "0x03DC" },
516: /*60*/{ "letter", "0x03DE" },
517: /*61*/{ "letter", "0x03E0" },
518: /*62*/{ "letter", "0x03E2", Token.UPTO, "0x03F3" },
519: /*63*/{ "letter", "0x0401", Token.UPTO, "0x040C" },
520: /*64*/{ "letter", "0x040E", Token.UPTO, "0x044F" },
521: /*65*/{ "letter", "0x0451", Token.UPTO, "0x045C" },
522: /*66*/{ "letter", "0x045E", Token.UPTO, "0x0481" },
523: /*67*/{ "letter", "0x0490", Token.UPTO, "0x04C4" },
524: /*68*/{ "letter", "0x04C7", Token.UPTO, "0x04C8" },
525: /*69*/{ "letter", "0x04CB", Token.UPTO, "0x04CC" },
526: /*70*/{ "letter", "0x04D0", Token.UPTO, "0x04EB" },
527: /*71*/{ "letter", "0x04EE", Token.UPTO, "0x04F5" },
528: /*72*/{ "letter", "0x04F8", Token.UPTO, "0x04F9" },
529: /*73*/{ "letter", "0x0531", Token.UPTO, "0x0556" },
530: /*74*/{ "letter", "0x0559" },
531: /*75*/{ "letter", "0x0561", Token.UPTO, "0x0586" },
532: /*76*/{ "letter", "0x05D0", Token.UPTO, "0x05EA" },
533: /*77*/{ "letter", "0x05F0", Token.UPTO, "0x05F2" },
534: /*78*/{ "letter", "0x0621", Token.UPTO, "0x063A" },
535: /*79*/{ "letter", "0x0641", Token.UPTO, "0x064A" },
536: /*80*/{ "letter", "0x0671", Token.UPTO, "0x06B7" },
537: /*81*/{ "letter", "0x06BA", Token.UPTO, "0x06BE" },
538: /*82*/{ "letter", "0x06C0", Token.UPTO, "0x06CE" },
539: /*83*/{ "letter", "0x06D0", Token.UPTO, "0x06D3" },
540: /*84*/{ "letter", "0x06D5" },
541: /*85*/{ "letter", "0x06E5", Token.UPTO, "0x06E6" },
542: /*86*/{ "letter", "0x0905", Token.UPTO, "0x0939" },
543: /*87*/{ "letter", "0x093D" },
544: /*88*/{ "letter", "0x0958", Token.UPTO, "0x0961" },
545: /*89*/{ "letter", "0x0985", Token.UPTO, "0x098C" },
546: /*90*/{ "letter", "0x098F", Token.UPTO, "0x0990" },
547: /*91*/{ "letter", "0x0993", Token.UPTO, "0x09A8" },
548: /*92*/{ "letter", "0x09AA", Token.UPTO, "0x09B0" },
549: /*93*/{ "letter", "0x09B2" },
550: /*94*/{ "letter", "0x09B6", Token.UPTO, "0x09B9" },
551: /*95*/{ "letter", "0x09DC", Token.UPTO, "0x09DD" },
552: /*96*/{ "letter", "0x09DF", Token.UPTO, "0x09E1" },
553: /*97*/{ "letter", "0x09F0", Token.UPTO, "0x09F1" },
554: /*98*/{ "letter", "0x0A05", Token.UPTO, "0x0A0A" },
555: /*99*/{ "letter", "0x0A0F", Token.UPTO, "0x0A10" },
556: /*100*/{ "letter", "0x0A13", Token.UPTO, "0x0A28" },
557: /*101*/{ "letter", "0x0A2A", Token.UPTO, "0x0A30" },
558: /*102*/{ "letter", "0x0A32", Token.UPTO, "0x0A33" },
559: /*103*/{ "letter", "0x0A35", Token.UPTO, "0x0A36" },
560: /*104*/{ "letter", "0x0A38", Token.UPTO, "0x0A39" },
561: /*105*/{ "letter", "0x0A59", Token.UPTO, "0x0A5C" },
562: /*106*/{ "letter", "0x0A5E" },
563: /*107*/{ "letter", "0x0A72", Token.UPTO, "0x0A74" },
564: /*108*/{ "letter", "0x0A85", Token.UPTO, "0x0A8B" },
565: /*109*/{ "letter", "0x0A8D" },
566: /*110*/{ "letter", "0x0A8F", Token.UPTO, "0x0A91" },
567: /*111*/{ "letter", "0x0A93", Token.UPTO, "0x0AA8" },
568: /*112*/{ "letter", "0x0AAA", Token.UPTO, "0x0AB0" },
569: /*113*/{ "letter", "0x0AB2", Token.UPTO, "0x0AB3" },
570: /*114*/{ "letter", "0x0AB5", Token.UPTO, "0x0AB9" },
571: /*115*/{ "letter", "0x0ABD" },
572: /*116*/{ "letter", "0x0AE0" },
573: /*117*/{ "letter", "0x0B05", Token.UPTO, "0x0B0C" },
574: /*118*/{ "letter", "0x0B0F", Token.UPTO, "0x0B10" },
575: /*119*/{ "letter", "0x0B13", Token.UPTO, "0x0B28" },
576: /*120*/{ "letter", "0x0B2A", Token.UPTO, "0x0B30" },
577: /*121*/{ "letter", "0x0B32", Token.UPTO, "0x0B33" },
578: /*122*/{ "letter", "0x0B36", Token.UPTO, "0x0B39" },
579: /*123*/{ "letter", "0x0B3D" },
580: /*124*/{ "letter", "0x0B5C", Token.UPTO, "0x0B5D" },
581: /*125*/{ "letter", "0x0B5F", Token.UPTO, "0x0B61" },
582: /*126*/{ "letter", "0x0B85", Token.UPTO, "0x0B8A" },
583: /*127*/{ "letter", "0x0B8E", Token.UPTO, "0x0B90" },
584: /*128*/{ "letter", "0x0B92", Token.UPTO, "0x0B95" },
585: /*129*/{ "letter", "0x0B99", Token.UPTO, "0x0B9A" },
586: /*130*/{ "letter", "0x0B9C" },
587: /*131*/{ "letter", "0x0B9E", Token.UPTO, "0x0B9F" },
588: /*132*/{ "letter", "0x0BA3", Token.UPTO, "0x0BA4" },
589: /*133*/{ "letter", "0x0BA8", Token.UPTO, "0x0BAA" },
590: /*134*/{ "letter", "0x0BAE", Token.UPTO, "0x0BB5" },
591: /*135*/{ "letter", "0x0BB7", Token.UPTO, "0x0BB9" },
592: /*136*/{ "letter", "0x0C05", Token.UPTO, "0x0C0C" },
593: /*137*/{ "letter", "0x0C0E", Token.UPTO, "0x0C10" },
594: /*138*/{ "letter", "0x0C12", Token.UPTO, "0x0C28" },
595: /*139*/{ "letter", "0x0C2A", Token.UPTO, "0x0C33" },
596: /*140*/{ "letter", "0x0C35", Token.UPTO, "0x0C39" },
597: /*141*/{ "letter", "0x0C60", Token.UPTO, "0x0C61" },
598: /*142*/{ "letter", "0x0C85", Token.UPTO, "0x0C8C" },
599: /*143*/{ "letter", "0x0C8E", Token.UPTO, "0x0C90" },
600: /*144*/{ "letter", "0x0C92", Token.UPTO, "0x0CA8" },
601: /*145*/{ "letter", "0x0CAA", Token.UPTO, "0x0CB3" },
602: /*146*/{ "letter", "0x0CB5", Token.UPTO, "0x0CB9" },
603: /*147*/{ "letter", "0x0CDE" },
604: /*148*/{ "letter", "0x0CE0", Token.UPTO, "0x0CE1" },
605: /*149*/{ "letter", "0x0D05", Token.UPTO, "0x0D0C" },
606: /*150*/{ "letter", "0x0D0E", Token.UPTO, "0x0D10" },
607: /*151*/{ "letter", "0x0D12", Token.UPTO, "0x0D28" },
608: /*152*/{ "letter", "0x0D2A", Token.UPTO, "0x0D39" },
609: /*153*/{ "letter", "0x0D60", Token.UPTO, "0x0D61" },
610: /*154*/{ "letter", "0x0E01", Token.UPTO, "0x0E2E" },
611: /*155*/{ "letter", "0x0E30" },
612: /*156*/{ "letter", "0x0E32", Token.UPTO, "0x0E33" },
613: /*157*/{ "letter", "0x0E40", Token.UPTO, "0x0E45" },
614: /*158*/{ "letter", "0x0E81", Token.UPTO, "0x0E82" },
615: /*159*/{ "letter", "0x0E84" },
616: /*160*/{ "letter", "0x0E87", Token.UPTO, "0x0E88" },
617: /*161*/{ "letter", "0x0E8A" },
618: /*162*/{ "letter", "0x0E8D" },
619: /*163*/{ "letter", "0x0E94", Token.UPTO, "0x0E97" },
620: /*164*/{ "letter", "0x0E99", Token.UPTO, "0x0E9F" },
621: /*165*/{ "letter", "0x0EA1", Token.UPTO, "0x0EA3" },
622: /*166*/{ "letter", "0x0EA5" },
623: /*167*/{ "letter", "0x0EA7" },
624: /*168*/{ "letter", "0x0EAA", Token.UPTO, "0x0EAB" },
625: /*169*/{ "letter", "0x0EAD", Token.UPTO, "0x0EAE" },
626: /*170*/{ "letter", "0x0EB0" },
627: /*171*/{ "letter", "0x0EB2", Token.UPTO, "0x0EB3" },
628: /*172*/{ "letter", "0x0EBD" },
629: /*173*/{ "letter", "0x0EC0", Token.UPTO, "0x0EC4" },
630: /*174*/{ "letter", "0x0F40", Token.UPTO, "0x0F47" },
631: /*175*/{ "letter", "0x0F49", Token.UPTO, "0x0F69" },
632: /*176*/{ "letter", "0x10A0", Token.UPTO, "0x10C5" },
633: /*177*/{ "letter", "0x10D0", Token.UPTO, "0x10F6" },
634: /*178*/{ "letter", "0x1100" },
635: /*179*/{ "letter", "0x1102", Token.UPTO, "0x1103" },
636: /*180*/{ "letter", "0x1105", Token.UPTO, "0x1107" },
637: /*181*/{ "letter", "0x1109" },
638: /*182*/{ "letter", "0x110B", Token.UPTO, "0x110C" },
639: /*183*/{ "letter", "0x110E", Token.UPTO, "0x1112" },
640: /*184*/{ "letter", "0x113C" },
641: /*185*/{ "letter", "0x113E" },
642: /*186*/{ "letter", "0x1140" },
643: /*187*/{ "letter", "0x114C" },
644: /*188*/{ "letter", "0x114E" },
645: /*189*/{ "letter", "0x1150" },
646: /*190*/{ "letter", "0x1154", Token.UPTO, "0x1155" },
647: /*191*/{ "letter", "0x1159" },
648: /*192*/{ "letter", "0x115F", Token.UPTO, "0x1161" },
649: /*193*/{ "letter", "0x1163" },
650: /*194*/{ "letter", "0x1165" },
651: /*195*/{ "letter", "0x1167" },
652: /*196*/{ "letter", "0x1169" },
653: /*197*/{ "letter", "0x116D", Token.UPTO, "0x116E" },
654: /*198*/{ "letter", "0x1172", Token.UPTO, "0x1173" },
655: /*199*/{ "letter", "0x1175" },
656: /*200*/{ "letter", "0x119E" },
657: /*201*/{ "letter", "0x11A8" },
658: /*202*/{ "letter", "0x11AB" },
659: /*203*/{ "letter", "0x11AE", Token.UPTO, "0x11AF" },
660: /*204*/{ "letter", "0x11B7", Token.UPTO, "0x11B8" },
661: /*205*/{ "letter", "0x11BA" },
662: /*206*/{ "letter", "0x11BC", Token.UPTO, "0x11C2" },
663: /*207*/{ "letter", "0x11EB" },
664: /*208*/{ "letter", "0x11F0" },
665: /*209*/{ "letter", "0x11F9" },
666: /*210*/{ "letter", "0x1E00", Token.UPTO, "0x1E9B" },
667: /*211*/{ "letter", "0x1EA0", Token.UPTO, "0x1EF9" },
668: /*212*/{ "letter", "0x1F00", Token.UPTO, "0x1F15" },
669: /*213*/{ "letter", "0x1F18", Token.UPTO, "0x1F1D" },
670: /*214*/{ "letter", "0x1F20", Token.UPTO, "0x1F45" },
671: /*215*/{ "letter", "0x1F48", Token.UPTO, "0x1F4D" },
672: /*216*/{ "letter", "0x1F50", Token.UPTO, "0x1F57" },
673: /*217*/{ "letter", "0x1F59" },
674: /*218*/{ "letter", "0x1F5B" },
675: /*219*/{ "letter", "0x1F5D" },
676: /*220*/{ "letter", "0x1F5F", Token.UPTO, "0x1F7D" },
677: /*221*/{ "letter", "0x1F80", Token.UPTO, "0x1FB4" },
678: /*222*/{ "letter", "0x1FB6", Token.UPTO, "0x1FBC" },
679: /*223*/{ "letter", "0x1FBE" },
680: /*224*/{ "letter", "0x1FC2", Token.UPTO, "0x1FC4" },
681: /*225*/{ "letter", "0x1FC6", Token.UPTO, "0x1FCC" },
682: /*226*/{ "letter", "0x1FD0", Token.UPTO, "0x1FD3" },
683: /*227*/{ "letter", "0x1FD6", Token.UPTO, "0x1FDB" },
684: /*228*/{ "letter", "0x1FE0", Token.UPTO, "0x1FEC" },
685: /*229*/{ "letter", "0x1FF2", Token.UPTO, "0x1FF4" },
686: /*230*/{ "letter", "0x1FF6", Token.UPTO, "0x1FFC" },
687: /*231*/{ "letter", "0x2126" },
688: /*232*/{ "letter", "0x212A", Token.UPTO, "0x212B" },
689: /*233*/{ "letter", "0x212E" },
690: /*234*/{ "letter", "0x2180", Token.UPTO, "0x2182" },
691: /*235*/{ "letter", "0x3041", Token.UPTO, "0x3094" },
692: /*236*/{ "letter", "0x30A1", Token.UPTO, "0x30FA" },
693: /*237*/{ "letter", "0x3105", Token.UPTO, "0x312C" },
694: /*238*/{ "letter", "0xAC00", Token.UPTO, "0xD7A3" },
695: // Ideographic
696: /*239*/{ "letter", "0x4E00", Token.UPTO, "0x9FA5" },
697: /*240*/{ "letter", "0x3007" },
698: /*241*/{ "letter", "0x3021", Token.UPTO, "0x3029" },
699:
700: // helper rules
701:
702: /*242*/{ "spaces", "spaces", "space" },
703: /*243*/{ "spaces", "space" },
704:
705: /*244*/{ "digits", "digits", "digit" },
706: /*245*/{ "digits", "digit" },
707:
708: /*246*/{ "hexdigits", "hexdigits", "hexdigit" },
709: /*247*/{ "hexdigits", "hexdigit" },
710:
711: /*248*/{ "bnf_chardef", "\"'\"", "char", "\"'\"" },
712: /*249*/{ "bnf_chardef", "\"0x\"", "hexdigits" },
713: /*250*/{ "bnf_chardef", "\"0X\"", "hexdigits" },
714: /*251*/{ "bnf_chardef", "\"'\\''\"" }, // single quote
715: /*252*/{ "bnf_chardef", "\"'\\n'\"" }, // newline
716: /*253*/{ "bnf_chardef", "\"'\\r'\"" }, // carriage return
717: /*254*/{ "bnf_chardef", "\"'\\t'\"" }, // tabulator
718: /*255*/{ "bnf_chardef", "\"'\\f'\"" }, // formfeed
719: /*256*/{ "bnf_chardef", "\"'\\b'\"" }, // backspace
720: /*257*/{ "bnf_chardef", "\"'\\\\'\"" }, // backslash
721: // bell \a was removed in favor of backslash and too much work re-numbering ...
722: /*258*/{ "bnf_chardef", "digits" },
723:
724: /*259*/{ "identifier", "letter_or_uscore",
725: "letter_or_digit_list_opt" },
726: /*260*/{ "letter_or_uscore", "letter" },
727: /*261*/{ "letter_or_uscore", "'_'" },
728: /*262*/{ "letter_or_digit", "letter_or_uscore" },
729: /*263*/{ "letter_or_digit", "digit" },
730: /*264*/{ "letter_or_digit_list", "letter_or_digit_list",
731: "letter_or_digit" },
732: /*265*/{ "letter_or_digit_list", "letter_or_digit" },
733: /*266*/{ "letter_or_digit_list_opt",
734: "letter_or_digit_list" },
735: /*267*/{ "letter_or_digit_list_opt" /*nothing*/},
736:
737: /*268*/{ "stringdef", "'\"'", "stringpart_list_opt",
738: "'\"'" },
739: /*269*/{ "char_minus_doublequote_list",
740: "char_minus_doublequote_list",
741: "char_minus_doublequote" },
742: /*270*/{ "char_minus_doublequote_list",
743: "char_minus_doublequote" },
744: /*271*/{ "char_minus_doublequote", "char", Token.BUTNOT,
745: "'\"'", Token.BUTNOT, "'\\'" }, // does not contain " or \
746: /*272*/{ "char_minus_doublequote_list_opt",
747: "char_minus_doublequote_list" },
748: /*273*/{ "char_minus_doublequote_list_opt" /*nothing*/},
749: /*274*/{ "backslash_char_list", "backslash_char_list",
750: "backslash_char" },
751: /*275*/{ "backslash_char_list", "backslash_char" },
752: /*276*/{ "backslash_char", "'\\'", "char" }, // escaped character like "\""
753: /*277*/{ "backslash_char_list_opt", "backslash_char_list" },
754: /*278*/{ "backslash_char_list_opt" /*nothing*/},
755: /*279*/{ "stringpart", "char_minus_doublequote_list_opt",
756: "backslash_char_list_opt" },
757: /*280*/{ "stringpart_list", "stringpart_list",
758: "stringpart" },
759: /*281*/{ "stringpart_list", "stringpart" },
760: /*282*/{ "stringpart_list_opt", "stringpart_list" },
761: /*283*/{ "stringpart_list_opt" /*nothing*/},
762:
763: /*284*/{ "cstylecomment", "\"/*\"",
764: "char_minus_star_slash_list_opt", "\"*/\"" },
765: /*285*/{ "char_minus_star_slash", "char", Token.BUTNOT,
766: "\"*/\"" },
767: /*286*/{ "char_minus_star_slash_list",
768: "char_minus_star_slash_list",
769: "char_minus_star_slash" },
770: /*287*/{ "char_minus_star_slash_list",
771: "char_minus_star_slash" },
772: /*288*/{ "char_minus_star_slash_list_opt",
773: "char_minus_star_slash_list" },
774: /*289*/{ "char_minus_star_slash_list_opt" /*nothing*/},
775:
776: /*290*/{ "cstylecomment", "\"//\"",
777: "char_minus_newline_list_opt" },
778: /*291*/{ "char_minus_newline", "char", Token.BUTNOT,
779: "newline" },
780: /*292*/{ "char_minus_newline_list",
781: "char_minus_newline_list", "char_minus_newline" },
782: /*293*/{ "char_minus_newline_list", "char_minus_newline" },
783: /*294*/{ "char_minus_newline_list_opt",
784: "char_minus_newline_list" },
785: /*295*/{ "char_minus_newline_list_opt" /*nothing*/},
786: /*296*/{ "shellstylecomment", "'#'",
787: "char_minus_newline_list_opt" },
788:
789: /*297*/{ "ruleref", "'" + Token.COMMAND_QUOTE + "'",
790: "identifier", "'" + Token.COMMAND_QUOTE + "'" },
791: /*298*/{ Token.TOKEN, "ruleref" },
792:
793: /*299*/{ "comment", "cstylecomment" },
794: /*300*/{ "comment", "shellstylecomment" }, };
795:
796: /** XML Char definitions of W3C. */
797: public static final String[][] xmlCharRules = {
798: /*0*/{ "xmlchar", "0x9" },
799: /*1*/{ "xmlchar", "0xA" },
800: /*2*/{ "xmlchar", "0xD" },
801: /*3*/{ "xmlchar", "0x20", Token.UPTO, "0xD7FF" },
802: /*4*/{ "xmlchar", "0xE000", Token.UPTO, "0xFFFD" },
803: /*5*/{ "xmlchar", "0x10000", Token.UPTO, "0x10FFFF" }, };
804:
805: /** XML CombiningChar and XML Extender definitions of W3C. */
806: public static final String[][] xmlCombinigAndExtenderRules = {
807: /*0*/{ "combiningchar", "0x0300", Token.UPTO, "0x0345" },
808: /*1*/{ "combiningchar", "0x0360", Token.UPTO, "0x0361" },
809: /*2*/{ "combiningchar", "0x0483", Token.UPTO, "0x0486" },
810: /*3*/{ "combiningchar", "0x0591", Token.UPTO, "0x05A1" },
811: /*4*/{ "combiningchar", "0x05A3", Token.UPTO, "0x05B9" },
812: /*5*/{ "combiningchar", "0x05BB", Token.UPTO, "0x05BD" },
813: /*6*/{ "combiningchar", "0x05BF" },
814: /*7*/{ "combiningchar", "0x05C1", Token.UPTO, "0x05C2" },
815: /*8*/{ "combiningchar", "0x05C4" },
816: /*9*/{ "combiningchar", "0x064B", Token.UPTO, "0x0652" },
817: /*10*/{ "combiningchar", "0x0670" },
818: /*11*/{ "combiningchar", "0x06D6", Token.UPTO, "0x06DC" },
819: /*12*/{ "combiningchar", "0x06DD", Token.UPTO, "0x06DF" },
820: /*13*/{ "combiningchar", "0x06E0", Token.UPTO, "0x06E4" },
821: /*14*/{ "combiningchar", "0x06E7", Token.UPTO, "0x06E8" },
822: /*15*/{ "combiningchar", "0x06EA", Token.UPTO, "0x06ED" },
823: /*16*/{ "combiningchar", "0x0901", Token.UPTO, "0x0903" },
824: /*17*/{ "combiningchar", "0x093C" },
825: /*18*/{ "combiningchar", "0x093E", Token.UPTO, "0x094C" },
826: /*19*/{ "combiningchar", "0x094D" },
827: /*20*/{ "combiningchar", "0x0951", Token.UPTO, "0x0954" },
828: /*21*/{ "combiningchar", "0x0962", Token.UPTO, "0x0963" },
829: /*22*/{ "combiningchar", "0x0981", Token.UPTO, "0x0983" },
830: /*23*/{ "combiningchar", "0x09BC" },
831: /*24*/{ "combiningchar", "0x09BE" },
832: /*25*/{ "combiningchar", "0x09BF" },
833: /*26*/{ "combiningchar", "0x09C0", Token.UPTO, "0x09C4" },
834: /*27*/{ "combiningchar", "0x09C7", Token.UPTO, "0x09C8" },
835: /*28*/{ "combiningchar", "0x09CB", Token.UPTO, "0x09CD" },
836: /*29*/{ "combiningchar", "0x09D7" },
837: /*30*/{ "combiningchar", "0x09E2", Token.UPTO, "0x09E3" },
838: /*31*/{ "combiningchar", "0x0A02" },
839: /*32*/{ "combiningchar", "0x0A3C" },
840: /*33*/{ "combiningchar", "0x0A3E" },
841: /*34*/{ "combiningchar", "0x0A3F" },
842: /*35*/{ "combiningchar", "0x0A40", Token.UPTO, "0x0A42" },
843: /*36*/{ "combiningchar", "0x0A47", Token.UPTO, "0x0A48" },
844: /*37*/{ "combiningchar", "0x0A4B", Token.UPTO, "0x0A4D" },
845: /*38*/{ "combiningchar", "0x0A70", Token.UPTO, "0x0A71" },
846: /*39*/{ "combiningchar", "0x0A81", Token.UPTO, "0x0A83" },
847: /*40*/{ "combiningchar", "0x0ABC" },
848: /*41*/{ "combiningchar", "0x0ABE", Token.UPTO, "0x0AC5" },
849: /*42*/{ "combiningchar", "0x0AC7", Token.UPTO, "0x0AC9" },
850: /*43*/{ "combiningchar", "0x0ACB", Token.UPTO, "0x0ACD" },
851: /*44*/{ "combiningchar", "0x0B01", Token.UPTO, "0x0B03" },
852: /*45*/{ "combiningchar", "0x0B3C" },
853: /*46*/{ "combiningchar", "0x0B3E", Token.UPTO, "0x0B43" },
854: /*47*/{ "combiningchar", "0x0B47", Token.UPTO, "0x0B48" },
855: /*48*/{ "combiningchar", "0x0B4B", Token.UPTO, "0x0B4D" },
856: /*49*/{ "combiningchar", "0x0B56", Token.UPTO, "0x0B57" },
857: /*50*/{ "combiningchar", "0x0B82", Token.UPTO, "0x0B83" },
858: /*51*/{ "combiningchar", "0x0BBE", Token.UPTO, "0x0BC2" },
859: /*52*/{ "combiningchar", "0x0BC6", Token.UPTO, "0x0BC8" },
860: /*53*/{ "combiningchar", "0x0BCA", Token.UPTO, "0x0BCD" },
861: /*54*/{ "combiningchar", "0x0BD7" },
862: /*55*/{ "combiningchar", "0x0C01", Token.UPTO, "0x0C03" },
863: /*56*/{ "combiningchar", "0x0C3E", Token.UPTO, "0x0C44" },
864: /*57*/{ "combiningchar", "0x0C46", Token.UPTO, "0x0C48" },
865: /*58*/{ "combiningchar", "0x0C4A", Token.UPTO, "0x0C4D" },
866: /*59*/{ "combiningchar", "0x0C55", Token.UPTO, "0x0C56" },
867: /*60*/{ "combiningchar", "0x0C82", Token.UPTO, "0x0C83" },
868: /*61*/{ "combiningchar", "0x0CBE", Token.UPTO, "0x0CC4" },
869: /*62*/{ "combiningchar", "0x0CC6", Token.UPTO, "0x0CC8" },
870: /*63*/{ "combiningchar", "0x0CCA", Token.UPTO, "0x0CCD" },
871: /*64*/{ "combiningchar", "0x0CD5", Token.UPTO, "0x0CD6" },
872: /*65*/{ "combiningchar", "0x0D02", Token.UPTO, "0x0D03" },
873: /*66*/{ "combiningchar", "0x0D3E", Token.UPTO, "0x0D43" },
874: /*67*/{ "combiningchar", "0x0D46", Token.UPTO, "0x0D48" },
875: /*68*/{ "combiningchar", "0x0D4A", Token.UPTO, "0x0D4D" },
876: /*69*/{ "combiningchar", "0x0D57" },
877: /*70*/{ "combiningchar", "0x0E31" },
878: /*71*/{ "combiningchar", "0x0E34", Token.UPTO, "0x0E3A" },
879: /*72*/{ "combiningchar", "0x0E47", Token.UPTO, "0x0E4E" },
880: /*73*/{ "combiningchar", "0x0EB1" },
881: /*74*/{ "combiningchar", "0x0EB4", Token.UPTO, "0x0EB9" },
882: /*75*/{ "combiningchar", "0x0EBB", Token.UPTO, "0x0EBC" },
883: /*76*/{ "combiningchar", "0x0EC8", Token.UPTO, "0x0ECD" },
884: /*77*/{ "combiningchar", "0x0F18", Token.UPTO, "0x0F19" },
885: /*78*/{ "combiningchar", "0x0F35" },
886: /*79*/{ "combiningchar", "0x0F37" },
887: /*80*/{ "combiningchar", "0x0F39" },
888: /*81*/{ "combiningchar", "0x0F3E" },
889: /*82*/{ "combiningchar", "0x0F3F" },
890: /*83*/{ "combiningchar", "0x0F71", Token.UPTO, "0x0F84" },
891: /*84*/{ "combiningchar", "0x0F86", Token.UPTO, "0x0F8B" },
892: /*85*/{ "combiningchar", "0x0F90", Token.UPTO, "0x0F95" },
893: /*86*/{ "combiningchar", "0x0F97" },
894: /*87*/{ "combiningchar", "0x0F99", Token.UPTO, "0x0FAD" },
895: /*88*/{ "combiningchar", "0x0FB1", Token.UPTO, "0x0FB7" },
896: /*89*/{ "combiningchar", "0x0FB9" },
897: /*90*/{ "combiningchar", "0x20D0", Token.UPTO, "0x20DC" },
898: /*91*/{ "combiningchar", "0x20E1" },
899: /*92*/{ "combiningchar", "0x302A", Token.UPTO, "0x302F" },
900: /*93*/{ "combiningchar", "0x3099" },
901: /*94*/{ "combiningchar", "0x309A" },
902:
903: /*95*/{ "extenderchar", "0x00B7" },
904: /*96*/{ "extenderchar", "0x02D0" },
905: /*97*/{ "extenderchar", "0x02D1" },
906: /*98*/{ "extenderchar", "0x0387" },
907: /*99*/{ "extenderchar", "0x0640" },
908: /*100*/{ "extenderchar", "0x0E46" },
909: /*101*/{ "extenderchar", "0x0EC6" },
910: /*102*/{ "extenderchar", "0x3005" },
911: /*103*/{ "extenderchar", "0x3031", Token.UPTO, "0x3035" },
912: /*104*/{ "extenderchar", "0x309D", Token.UPTO, "0x309E" },
913: /*105*/{ "extenderchar", "0x30FC", Token.UPTO, "0x30FE" }, };
914:
915: /** Numerical rules for binary and octal <b>digits</b>. */
916: public static final String[][] digitRules = {
917: /*0*/{ "octdigit", "'0'", Token.UPTO, "'7'" },
918: /*1*/{ "octdigits", "octdigits", "octdigit" },
919: /*2*/{ "octdigits", "octdigit" },
920:
921: /*3*/{ "bindigit", "'0'", Token.UPTO, "'1'" },
922: /*4*/{ "bindigits", "bindigits", "bindigit" },
923: /*5*/{ "bindigits", "bindigit" }, };
924:
925: /** Numerical rules for <b>numbers</b> within sourcecode: number ::= integer | float. */
926: public static final String[][] numberRules = {
927: // number = float | integer (incl. hexnumber)
928: /*0*/{ "number", "float" },
929: /*1*/{ "number", "integer" },
930: /*2*/{ "float", "wholenumber", "'.'", "mantissa",
931: "float_opt" },
932: /*3*/{ "wholenumber", "digits" },
933: /*4*/{ "wholenumber" /*nothing*/},
934: /*5*/{ "mantissa", "digits", "mantissa_opt" },
935: /*6*/{ "mantissa_opt", "exponent", "digits" },
936: /*7*/{ "mantissa_opt" /*nothing*/},
937: /*8*/{ "exponent", "exponentletter", "exponentsign" },
938: /*9*/{ "exponentletter", "'e'" },
939: /*10*/{ "exponentletter", "'E'" },
940: /*11*/{ "exponentsign", "'-'" },
941: /*12*/{ "exponentsign", "'+'" },
942: /*13*/{ "exponentsign" /*nothing*/},
943: /*14*/{ "float_opt", "'f'" },
944: /*15*/{ "float_opt", "'F'" },
945: /*16*/{ "float_opt", "'d'" },
946: /*17*/{ "float_opt", "'D'" },
947: /*18*/{ "float_opt" /*nothing*/},
948: /*19*/{ "integer", "\"0X\"", "hexdigits" },
949: /*20*/{ "integer", "\"0x\"", "hexdigits" },
950: /*21*/{ "integer", "digits", "integer_opt" },
951: /*22*/{ "integer_opt", "'l'" }, // "long" marker
952: /*23*/{ "integer_opt", "'L'" }, // "long" marker
953: /*24*/{ "integer_opt" /*nothing*/}, };
954:
955: /** Rules describing one or more newlines. */
956: public static final String[][] newlinesRules = {
957: { "newlines", "newlines", "newline" },
958: { "newlines", "newline" }, };
959:
960: /** Rules describing C/Java-like character definitions: 'c', '\r', '\007'. */
961: public static final String[][] chardefRules = {
962: /*0*/{ "chardef", "\"'\\\"", "'0'", Token.UPTO, "'3'",
963: "octdigit", "octdigit", "\"'\"" },
964: /*1*/{ "chardef", "bnf_chardef" }, // but only 248 and 251 - 258 !!!
965: };
966:
967: /** Rules describing whitespace: newlines and spaces, minimum one. */
968: public static final String[][] whitespaceRules = {
969: /*0*/{ "whitespace", "newline" },
970: /*1*/{ "whitespace", "space" },
971: /*2*/{ "whitespaces", "whitespaces", "whitespace" },
972: /*3*/{ "whitespaces", "whitespace" }, };
973:
974: private StandardLexerRules() {
975: }
976:
977: }
|