001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017:
018: package org.apache.harmony.lang.reflect.parser;
019:
020: import antlr.TokenStream;
021: import antlr.TokenStreamException;
022: import antlr.Token;
023:
024: /**
025: * @author Serguei S. Zapreyev
026: * @version $Revision: 1.1.2.1 $
027: *
028: * NOTE. Initially this Signature Parser was created and debugged using lex and yacc on Linux:
029: *
030: * -bash-3.00$ uname -a
031: * Linux nstdrlel8.ins.intel.com 2.6.9-11.ELsmp #1 SMP Fri May 20 18:25:30 EDT 2005 x86_64 x86_64 x86_64 GNU/Linux
032: * -bash-3.00$ which yacc
033: * /usr/bin/yacc
034: * -bash-3.00$ which lex
035: * /usr/bin/lex
036: * -bash-3.00$ lex --version
037: * lex version 2.5.4
038: *
039: * then it was rewritten for ANTLR 2.7.5 (http://www.antlr.org/) and redebugged:
040: *
041: * // $ANTLR 2.7.5 (20050128): "signature.g" -> "SignatureParser.java"$
042: */
043: public final class SignatureLexer2 extends antlr.CharScanner implements
044: SignatureParserTokenTypes, TokenStream {
045:
046: String ident = null; // to keep previous symbol if it's identifier
047:
048: int prevLexeme = -1;// to distinguish ID and TVAR
049:
050: int stackDepth = 0; // the current acheived depth of the parsered nested parameterized types chain (ParameterizedType1<ParameterizedType2<...>;ParameterizedType3<...>>;)
051: int Lflag2; // to distinguish ID and TBASE
052: int Lflag3; // to distinguish ID and TBASE
053: int trnglsCount = 0;
054:
055: boolean DEBUGGING = false;
056:
057: String sgntr;
058: int ind;
059: int lexlen;
060:
061: public SignatureLexer2(String sig) {
062: sgntr = sig;
063: ind = 0;
064: lexlen = 0;
065: }
066:
067: public Token nextToken() throws TokenStreamException {
068: if (ind >= sgntr.length()) {
069: return new MToken(MToken.EOF_TYPE, "the end");
070: }
071: if (DEBUGGING) {
072: //System.out.println("nextToken1:"+sgntr);
073: //System.out.println("nextToken1:"+sgntr.charAt(ind));
074: }
075: MToken theRetToken = null;
076: try {
077: switch (sgntr.charAt(ind)) {
078: case '*': {
079: if (DEBUGGING) {
080: System.out.println(".............lex:STAR_SIGN:\""
081: + String.valueOf(sgntr.charAt(ind)) + "\"");
082: }
083: prevLexeme = STAR_SIGN;
084: theRetToken = new MToken(STAR_SIGN, "*");
085: ind++;
086: break;
087: }
088: case '+': {
089: if (DEBUGGING) {
090: System.out.println(".............lex:PLUS_SIGN:\""
091: + String.valueOf(sgntr.charAt(ind)) + "\"");
092: }
093: prevLexeme = PLUS_SIGN;
094: theRetToken = new MToken(PLUS_SIGN, "+");
095: ind++;
096: break;
097: }
098: case '-': {
099: if (DEBUGGING) {
100: System.out.println(".............lex:MINUS_SIGN:\""
101: + String.valueOf(sgntr.charAt(ind)) + "\"");
102: }
103: prevLexeme = MINUS_SIGN;
104: theRetToken = new MToken(MINUS_SIGN, "-");
105: ind++;
106: break;
107: }
108: case '[': {
109: if (DEBUGGING) {
110: System.out
111: .println(".............lex:SQUAREOPEN_SIGN:\""
112: + String.valueOf(sgntr.charAt(ind))
113: + "\"");
114: }
115: prevLexeme = SQUAREOPEN_SIGN;
116: theRetToken = new MToken(SQUAREOPEN_SIGN, "[");
117: ind++;
118: break;
119: }
120: case ':': {
121: if (DEBUGGING) {
122: System.out.println(".............lex:COLON_SIGN:\""
123: + String.valueOf(sgntr.charAt(ind)) + "\"");
124: }
125: prevLexeme = COLON_SIGN;
126: theRetToken = new MToken(COLON_SIGN, ":");
127: ind++;
128: break;
129: }
130: case '.': {
131: if (DEBUGGING) {
132: System.out
133: .println(".............lex:DOT_OR_DOLLAR_SIGN:\""
134: + String.valueOf(sgntr.charAt(ind))
135: + "\"");
136: }
137: prevLexeme = DOT_OR_DOLLAR_SIGN;
138: theRetToken = new MToken(DOT_OR_DOLLAR_SIGN, ".");
139: ind++;
140: break;
141: }
142: case '<': {
143: if (DEBUGGING) {
144: System.out
145: .println(".............lex:TRIANGLEOPEN_SIGN:\""
146: + String.valueOf(sgntr.charAt(ind))
147: + "\"");
148: }
149: if (Lflag2 == 1)
150: trnglsCount++;
151: prevLexeme = TRIANGLEOPEN_SIGN;
152: theRetToken = new MToken(TRIANGLEOPEN_SIGN, "<");
153: ind++;
154: break;
155: }
156: case '>': {
157: if (DEBUGGING) {
158: System.out
159: .println(".............lex:TRIANGLECLOSE_SIGN:\""
160: + String.valueOf(sgntr.charAt(ind))
161: + "\"");
162: }
163: if (Lflag2 == 1)
164: trnglsCount--;
165: prevLexeme = TRIANGLECLOSE_SIGN;
166: theRetToken = new MToken(TRIANGLECLOSE_SIGN, ">");
167: ind++;
168: break;
169: }
170: case '^': {
171: if (DEBUGGING) {
172: System.out.println(".............lex:CNTRL_SIGN:\""
173: + String.valueOf(sgntr.charAt(ind)) + "\"");
174: }
175: Lflag2 = 0;
176: prevLexeme = CNTRL_SIGN;
177: theRetToken = new MToken(CNTRL_SIGN, "^");
178: ind++;
179: break;
180: }
181: case ';': {
182: if (DEBUGGING) {
183: System.out
184: .println(".............lex:SEMICOLON_SIGN:\""
185: + String.valueOf(sgntr.charAt(ind))
186: + "\"");
187: }
188: if (Lflag2 == 1 && trnglsCount == 0) {
189: Lflag3 = 0;
190: }
191: prevLexeme = SEMICOLON_SIGN;
192: theRetToken = new MToken(SEMICOLON_SIGN, ";");
193: ind++;
194: break;
195: }
196: case '(': {
197: if (DEBUGGING) {
198: System.out
199: .println(".............lex:RINGOPEN_SIGN:\""
200: + String.valueOf(sgntr.charAt(ind))
201: + "\"");
202: }
203: Lflag2 = 1;
204: prevLexeme = RINGOPEN_SIGN;
205: theRetToken = new MToken(RINGOPEN_SIGN, "(");
206: ind++;
207: break;
208: }
209: case ')': {
210: if (DEBUGGING) {
211: System.out
212: .println(".............lex:RINGCLOSE_SIGN:\""
213: + String.valueOf(sgntr.charAt(ind))
214: + "\"");
215: }
216: prevLexeme = RINGCLOSE_SIGN;
217: theRetToken = new MToken(RINGCLOSE_SIGN, ")");
218: ind++;
219: break;
220: }
221: case 'V': {
222: if (Lflag2 == 1 && trnglsCount == 0
223: && prevLexeme != PACKAGE_SPECIFIER
224: && prevLexeme != DOT_OR_DOLLAR_SIGN) {
225: if (DEBUGGING) {
226: System.out.println(".............lex:VOIDTYPE");
227: }
228: prevLexeme = VOIDTYPE;
229: theRetToken = new MToken(VOIDTYPE, "V");
230: ind++;
231: } else if (isID_COLON()) {
232: ident = sgntr.substring(ind, ind + lexlen - 1); // last ":" should be recovered in stream
233: if (DEBUGGING) {
234: System.out
235: .println(".............lex:ID_COLON:\""
236: + ident + "\"");
237: }
238: prevLexeme = ID_COLON;
239: ind += lexlen - 1;
240: theRetToken = new MToken(ID_COLON, ident);
241: } else if (isIDwoL()) {
242: ident = sgntr.substring(ind, ind + lexlen);
243: if (DEBUGGING) {
244: System.out.println(".............lex:ID:\""
245: + ident + "\"");
246: }
247: prevLexeme = ID;
248: ind += lexlen;
249: theRetToken = new MToken(ID, ident);
250: } else {
251: theRetToken = new MToken(MToken.INVALID_TYPE,
252: "figvam");
253: throw new Exception();
254: }
255: break;
256: }
257: case 'B':
258: case 'C':
259: case 'D':
260: case 'F':
261: case 'I':
262: case 'J':
263: case 'S':
264: case 'Z': {
265: if (Lflag2 == 1 && Lflag3 != 1) {
266: if (DEBUGGING) {
267: System.out.println(".............lex:TBASE: \""
268: + String.valueOf(sgntr.charAt(ind))
269: + "\"");
270: }
271: if (ind == sgntr.length() - 1
272: || sgntr.charAt(ind + 1) == '^') {
273: prevLexeme = RETURN_BASE_TYPE;
274: theRetToken = new MToken(RETURN_BASE_TYPE,
275: String.valueOf(sgntr.charAt(ind)));
276: } else {
277: prevLexeme = TBASE;
278: theRetToken = new MToken(TBASE, String
279: .valueOf(sgntr.charAt(ind)));
280: }
281: ind++;
282: } else if (isID_COLON()) {
283: ident = sgntr.substring(ind, ind + lexlen - 1); // last ":" should be recovered in stream
284: if (DEBUGGING) {
285: System.out
286: .println(".............lex:ID_COLON:\""
287: + ident + "\"");
288: }
289: prevLexeme = ID_COLON;
290: theRetToken = new MToken(ID_COLON, ident);
291: ind += lexlen - 1;
292: } else if (isIDwoL()) {
293: ident = sgntr.substring(ind, ind + lexlen);
294: if (DEBUGGING) {
295: System.out.println(".............lex:ID:\""
296: + ident + "\"");
297: }
298: prevLexeme = ID;
299: theRetToken = new MToken(ID, ident);
300: ind += lexlen;
301: } else {
302: theRetToken = new MToken(MToken.INVALID_TYPE,
303: "figvam");
304: throw new Exception();
305: }
306: break;
307: }
308: case 'T': {
309: if (isTV()) {
310: if (prevLexeme == PACKAGE_SPECIFIER
311: || prevLexeme == DOT_OR_DOLLAR_SIGN) {
312: ident = sgntr.substring(ind, ind + lexlen - 1);// - ";"
313: if (DEBUGGING) {
314: System.out.println(".............lex:ID:\""
315: + ident + "\"");
316: }
317: prevLexeme = ID;
318: theRetToken = new MToken(ID, ident);
319: ind += lexlen - 1;
320: } else {
321: ident = sgntr.substring(ind, ind + lexlen);
322: if (DEBUGGING) {
323: System.out
324: .println(".............lex:TVAR:\""
325: + ident + "\"");
326: }
327: prevLexeme = TVAR;
328: theRetToken = new MToken(TVAR, ident);
329: ind += lexlen;
330: }
331: } else if (isID_COLON()) {
332: ident = sgntr.substring(ind, ind + lexlen - 1); // last ":" should be recovered in stream
333: if (DEBUGGING) {
334: System.out
335: .println(".............lex:ID_COLON:\""
336: + ident + "\"");
337: }
338: prevLexeme = ID_COLON;
339: theRetToken = new MToken(ID_COLON, ident);
340: ind += lexlen - 1;
341: } else if (isIDwoL()) {
342: ident = sgntr.substring(ind, ind + lexlen);
343: if (DEBUGGING) {
344: System.out.println(".............lex:ID:\""
345: + ident + "\"");
346: }
347: prevLexeme = ID;
348: theRetToken = new MToken(ID, ident);
349: ind += lexlen;
350: } else {
351: theRetToken = new MToken(MToken.INVALID_TYPE,
352: "figvam");
353: throw new Exception();
354: }
355: break;
356: }
357: case 'L': {
358: if (isPACKAGE_SPECIFIER()) {
359: ident = sgntr.substring(ind, ind + lexlen);
360: if (DEBUGGING) {
361: System.out
362: .println(".............lex:PACKAGE_SPECIFIER:\""
363: + ident + "\"");
364: }
365: if (Lflag2 == 1) {
366: Lflag3 = 1;
367: }
368: prevLexeme = PACKAGE_SPECIFIER;
369: theRetToken = new MToken(PACKAGE_SPECIFIER, ident);
370: ind += lexlen;
371: } else if (isID_COLON()) {
372: ident = sgntr.substring(ind, ind + lexlen - 1); // last ":" should be recovered in stream
373: if (DEBUGGING) {
374: System.out
375: .println(".............lex:ID_COLON:\""
376: + ident + "\"");
377: }
378: prevLexeme = ID_COLON;
379: theRetToken = new MToken(ID_COLON, ident);
380: ind += lexlen - 1;
381: } else if (isIDwL()) {
382: ident = sgntr.substring(ind, ind + lexlen);
383: if (DEBUGGING) {
384: System.out.println(".............lex:ID:\""
385: + ident + "\"");
386: }
387: prevLexeme = ID;
388: theRetToken = new MToken(ID, ident);
389: ind += lexlen;
390: } else {
391: theRetToken = new MToken(MToken.INVALID_TYPE,
392: "figvam");
393: throw new Exception();
394: }
395: break;
396: }
397: case '$': {
398: if (prevLexeme == TRIANGLECLOSE_SIGN) {
399: if (DEBUGGING) {
400: System.out
401: .println(".............lex:DOT_OR_DOLLAR_SIGN:\""
402: + String.valueOf(sgntr
403: .charAt(ind)) + "\"");
404: }
405: prevLexeme = DOT_OR_DOLLAR_SIGN;
406: theRetToken = new MToken(DOT_OR_DOLLAR_SIGN, "$");
407: ind++;
408: } else if (isID_COLON()) {
409: ident = sgntr.substring(ind, ind + lexlen - 1); // last ":" should be recovered in stream
410: if (DEBUGGING) {
411: System.out
412: .println(".............lex:ID_COLON:\""
413: + ident + "\"");
414: }
415: prevLexeme = ID_COLON;
416: theRetToken = new MToken(ID_COLON, ident);
417: ind += lexlen - 1;
418: } else if (isIDwoL()) {
419: ident = sgntr.substring(ind, ind + lexlen);
420: if (DEBUGGING) {
421: System.out.println(".............lex:ID:\""
422: + ident + "\"");
423: }
424: prevLexeme = ID;
425: theRetToken = new MToken(ID, ident);
426: ind += lexlen;
427: } else {
428: theRetToken = new MToken(MToken.INVALID_TYPE,
429: "figvam");
430: throw new Exception();
431: }
432: break;
433: }
434: case 'A':
435: case 'E':
436: case 'G':
437: case 'H':
438: case 'K':
439: case 'M':
440: case 'N':
441: case 'O':
442: case 'P':
443: case 'Q':
444: case 'R':
445: case 'U':
446: case 'W':
447: case 'X':
448: case 'Y':
449: case '\\':
450: case '_':
451: case 'a':
452: case 'b':
453: case 'c':
454: case 'd':
455: case 'e':
456: case 'f':
457: case 'g':
458: case 'h':
459: case 'i':
460: case 'j':
461: case 'k':
462: case 'l':
463: case 'm':
464: case 'n':
465: case 'o':
466: case 'p':
467: case 'q':
468: case 'r':
469: case 's':
470: case 't':
471: case 'u':
472: case 'v':
473: case 'w':
474: case 'x':
475: case 'y':
476: case 'z': {
477: if (isID_COLON()) {
478: ident = sgntr.substring(ind, ind + lexlen - 1); // last ":" should be recovered in stream
479: if (DEBUGGING) {
480: System.out
481: .println(".............lex:ID_COLON:\""
482: + ident + "\"");
483: }
484: prevLexeme = ID_COLON;
485: theRetToken = new MToken(ID_COLON, ident);
486: ind += lexlen - 1;
487: } else if (isIDwoL()) {
488: ident = sgntr.substring(ind, ind + lexlen);
489: if (DEBUGGING) {
490: System.out.println(".............lex:ID:\""
491: + ident + "\"");
492: }
493: prevLexeme = ID;
494: theRetToken = new MToken(ID, ident);
495: ind += lexlen;
496: } else {
497: theRetToken = new MToken(MToken.INVALID_TYPE,
498: "figvam");
499: throw new Exception();
500: }
501: break;
502: }
503: default: {
504: theRetToken = new MToken(MToken.INVALID_TYPE, "figvam");
505: throw new Exception();
506: }
507: }
508: if (DEBUGGING) {
509: //new Throwable().printStackTrace();
510: System.out.println("------------nextToken2:\""
511: + theRetToken.getText() + "\"");
512: //System.out.println("nextToken2:"+theRetToken.getType());
513: //System.out.println("nextToken2:"+ind);
514: }
515: return theRetToken;
516: } catch (Exception e) {
517: /**/e.printStackTrace();
518: /**/System.err.println("===nextToken===: " + e.toString());
519: throw new TokenStreamException(e.getMessage());
520: }
521: }
522:
523: private boolean isTV() {
524: if (DEBUGGING) {
525: System.out.println(".............isTV:" + ind);
526: }
527: String ns = sgntr
528: .substring(ind)
529: .replaceFirst(
530: "T((\\\\[a-f0-9]{3})|[A-Za-z_$]){1}((\\\\[a-f0-9]{3})|[A-Za-z_$0-9])*;",
531: "#");
532: if (ns.charAt(0) == '#') {
533: int i = sgntr.indexOf(ns.substring(1), ind);
534: lexlen = (i == ind) ? sgntr.length() - ind : i - ind;
535: return true;
536: }
537: return false;
538: }
539:
540: private boolean isIDwoL() {
541: if (DEBUGGING) {
542: System.out.println(".............isIDwoL:" + ind + "|"
543: + sgntr);
544: }
545: String ns = sgntr
546: .substring(ind)
547: .replaceFirst(
548: "((\\\\[a-f0-9]{3})|[A-KM-Za-z_$]){1}((\\\\[a-f0-9]{3})|[A-Za-z_$0-9])*",
549: "#");
550: if (ns.charAt(0) == '#') {
551: int i = sgntr.indexOf(ns.substring(1), ind);
552: lexlen = (i == ind) ? sgntr.length() - ind : i - ind;
553: return true;
554: }
555: return false;
556: }
557:
558: private boolean isID_COLON() {
559: if (DEBUGGING) {
560: System.out.println(".............isID_COLON:" + ind);
561: }
562: String ns = sgntr
563: .substring(ind)
564: .replaceFirst(
565: "((\\\\[a-f0-9]{3})|[A-Za-z_$]){1}((\\\\[a-f0-9]{3})|[A-Za-z_$0-9])*::",
566: "#");
567: if (ns.charAt(0) == '#') {
568: int i = sgntr.indexOf(ns.substring(1), ind);
569: lexlen = (i == ind) ? sgntr.length() - ind : i - ind;
570: return true;
571: }
572: return false;
573: }
574:
575: private boolean isPACKAGE_SPECIFIER() {
576: if (DEBUGGING) {
577: System.out.println(".............isPACKAGE_SPECIFIER:"
578: + ind);
579: }
580: String ns = sgntr
581: .substring(ind)
582: .replaceFirst(
583: "L((\\\\[a-f0-9]{3})|[A-Za-z_$]){1}((\\\\[a-f0-9]{3})|[A-Za-z_$0-9/])*/",
584: "#");
585: if (ns.charAt(0) == '#') {
586: int i = sgntr.indexOf(ns.substring(1), ind);
587: lexlen = (i == ind) ? sgntr.length() - ind : i - ind;
588: return true;
589: }
590: return false;
591: }
592:
593: private boolean isIDwL() {
594: if (DEBUGGING) {
595: System.out.println(".............isIDwL:" + ind);
596: }
597: String ns = sgntr.substring(ind).replaceFirst(
598: "L((\\\\[a-f0-9]{3})|[A-Za-z_$0-9])*", "#");
599: if (ns.charAt(0) == '#') {
600: int i = sgntr.indexOf(ns.substring(1), ind);
601: lexlen = (i == ind) ? sgntr.length() - ind : i - ind;
602: return true;
603: }
604: return false;
605: }
606: }
607:
608: class MToken extends Token {
609: String txt;
610:
611: public MToken(int t, String txt) {
612: super ();
613: super .type = t;
614: setText(txt);
615: }
616:
617: public String getText() {
618: return txt;
619: }
620:
621: public void setText(String t) {
622: txt = t;
623: }
624: }
|