001: package persistence.antlr;
002:
003: /* ANTLR Translator Generator
004: * Project led by Terence Parr at http://www.jGuru.com
005: * Software rights: http://www.antlr.org/license.html
006: *
007: */
008:
009: import java.util.Hashtable;
010:
011: import persistence.antlr.collections.impl.BitSet;
012:
013: import java.io.IOException;
014:
015: public abstract class CharScanner implements TokenStream {
016: static final char NO_CHAR = 0;
017: public static final char EOF_CHAR = (char) -1;
018: protected ANTLRStringBuffer text; // text of current token
019:
020: protected boolean saveConsumedInput = true; // does consume() save characters?
021: protected Class tokenObjectClass; // what kind of tokens to create?
022: protected boolean caseSensitive = true;
023: protected boolean caseSensitiveLiterals = true;
024: protected Hashtable literals; // set by subclass
025:
026: /** Tab chars are handled by tab() according to this value; override
027: * method to do anything weird with tabs.
028: */
029: protected int tabsize = 8;
030:
031: protected Token _returnToken = null; // used to return tokens w/o using return val.
032:
033: // Hash string used so we don't new one every time to check literals table
034: protected ANTLRHashString hashString;
035:
036: protected LexerSharedInputState inputState;
037:
038: /** Used during filter mode to indicate that path is desired.
039: * A subsequent scan error will report an error as usual if
040: * acceptPath=true;
041: */
042: protected boolean commitToPath = false;
043:
044: /** Used to keep track of indentdepth for traceIn/Out */
045: protected int traceDepth = 0;
046:
047: public CharScanner() {
048: text = new ANTLRStringBuffer();
049: hashString = new ANTLRHashString(this );
050: setTokenObjectClass("persistence.antlr.CommonToken");
051: }
052:
053: public CharScanner(InputBuffer cb) { // SAS: use generic buffer
054: this ();
055: inputState = new LexerSharedInputState(cb);
056: }
057:
058: public CharScanner(LexerSharedInputState sharedState) {
059: this ();
060: inputState = sharedState;
061: }
062:
063: public void append(char c) {
064: if (saveConsumedInput) {
065: text.append(c);
066: }
067: }
068:
069: public void append(String s) {
070: if (saveConsumedInput) {
071: text.append(s);
072: }
073: }
074:
075: public void commit() {
076: inputState.input.commit();
077: }
078:
079: public void consume() throws CharStreamException {
080: if (inputState.guessing == 0) {
081: char c = LA(1);
082: if (caseSensitive) {
083: append(c);
084: } else {
085: // use input.LA(), not LA(), to get original case
086: // CharScanner.LA() would toLower it.
087: append(inputState.input.LA(1));
088: }
089: if (c == '\t') {
090: tab();
091: } else {
092: inputState.column++;
093: }
094: }
095: inputState.input.consume();
096: }
097:
098: /** Consume chars until one matches the given char */
099: public void consumeUntil(int c) throws CharStreamException {
100: while (LA(1) != EOF_CHAR && LA(1) != c) {
101: consume();
102: }
103: }
104:
105: /** Consume chars until one matches the given set */
106: public void consumeUntil(BitSet set) throws CharStreamException {
107: while (LA(1) != EOF_CHAR && !set.member(LA(1))) {
108: consume();
109: }
110: }
111:
112: public boolean getCaseSensitive() {
113: return caseSensitive;
114: }
115:
116: public final boolean getCaseSensitiveLiterals() {
117: return caseSensitiveLiterals;
118: }
119:
120: public int getColumn() {
121: return inputState.column;
122: }
123:
124: public void setColumn(int c) {
125: inputState.column = c;
126: }
127:
128: public boolean getCommitToPath() {
129: return commitToPath;
130: }
131:
132: public String getFilename() {
133: return inputState.filename;
134: }
135:
136: public InputBuffer getInputBuffer() {
137: return inputState.input;
138: }
139:
140: public LexerSharedInputState getInputState() {
141: return inputState;
142: }
143:
144: public void setInputState(LexerSharedInputState state) {
145: inputState = state;
146: }
147:
148: public int getLine() {
149: return inputState.line;
150: }
151:
152: /** return a copy of the current text buffer */
153: public String getText() {
154: return text.toString();
155: }
156:
157: public Token getTokenObject() {
158: return _returnToken;
159: }
160:
161: public char LA(int i) throws CharStreamException {
162: if (caseSensitive) {
163: return inputState.input.LA(i);
164: } else {
165: return toLower(inputState.input.LA(i));
166: }
167: }
168:
169: protected Token makeToken(int t) {
170: try {
171: Token tok = (Token) tokenObjectClass.newInstance();
172: tok.setType(t);
173: tok.setColumn(inputState.tokenStartColumn);
174: tok.setLine(inputState.tokenStartLine);
175: // tracking real start line now: tok.setLine(inputState.line);
176: return tok;
177: } catch (InstantiationException ie) {
178: panic("can't instantiate token: " + tokenObjectClass);
179: } catch (IllegalAccessException iae) {
180: panic("Token class is not accessible" + tokenObjectClass);
181: }
182: return Token.badToken;
183: }
184:
185: public int mark() {
186: return inputState.input.mark();
187: }
188:
189: public void match(char c) throws MismatchedCharException,
190: CharStreamException {
191: if (LA(1) != c) {
192: throw new MismatchedCharException(LA(1), c, false, this );
193: }
194: consume();
195: }
196:
197: public void match(BitSet b) throws MismatchedCharException,
198: CharStreamException {
199: if (!b.member(LA(1))) {
200: throw new MismatchedCharException(LA(1), b, false, this );
201: } else {
202: consume();
203: }
204: }
205:
206: public void match(String s) throws MismatchedCharException,
207: CharStreamException {
208: int len = s.length();
209: for (int i = 0; i < len; i++) {
210: if (LA(1) != s.charAt(i)) {
211: throw new MismatchedCharException(LA(1), s.charAt(i),
212: false, this );
213: }
214: consume();
215: }
216: }
217:
218: public void matchNot(char c) throws MismatchedCharException,
219: CharStreamException {
220: if (LA(1) == c) {
221: throw new MismatchedCharException(LA(1), c, true, this );
222: }
223: consume();
224: }
225:
226: public void matchRange(char c1, char c2)
227: throws MismatchedCharException, CharStreamException {
228: if (LA(1) < c1 || LA(1) > c2)
229: throw new MismatchedCharException(LA(1), c1, c2, false,
230: this );
231: consume();
232: }
233:
234: public void newline() {
235: inputState.line++;
236: inputState.column = 1;
237: }
238:
239: /** advance the current column number by an appropriate amount
240: * according to tab size. This method is called from consume().
241: */
242: public void tab() {
243: int c = getColumn();
244: int nc = (((c - 1) / tabsize) + 1) * tabsize + 1; // calculate tab stop
245: setColumn(nc);
246: }
247:
248: public void setTabSize(int size) {
249: tabsize = size;
250: }
251:
252: public int getTabSize() {
253: return tabsize;
254: }
255:
256: /** @see #panic(String)
257: */
258: public void panic() {
259: System.err.println("CharScanner: panic");
260: System.exit(1);
261: }
262:
263: /** This method is executed by ANTLR internally when it detected an illegal
264: * state that cannot be recovered from.
265: * The default implementation of this method calls
266: * {@link java.lang.System.exit(int)} and writes directly to
267: * {@link java.lang.System.err)} , which is usually not appropriate when
268: * a translator is embedded into a larger application. <em>It is highly
269: * recommended that this method be overridden to handle the error in a
270: * way appropriate for your application (e.g. throw an unchecked
271: * exception)</em>.
272: */
273: public void panic(String s) {
274: System.err.println("CharScanner; panic: " + s);
275: System.exit(1);
276: }
277:
278: /** Parser error-reporting function can be overridden in subclass */
279: public void reportError(RecognitionException ex) {
280: System.err.println(ex);
281: }
282:
283: /** Parser error-reporting function can be overridden in subclass */
284: public void reportError(String s) {
285: if (getFilename() == null) {
286: System.err.println("error: " + s);
287: } else {
288: System.err.println(getFilename() + ": error: " + s);
289: }
290: }
291:
292: /** Parser warning-reporting function can be overridden in subclass */
293: public void reportWarning(String s) {
294: if (getFilename() == null) {
295: System.err.println("warning: " + s);
296: } else {
297: System.err.println(getFilename() + ": warning: " + s);
298: }
299: }
300:
301: public void resetText() {
302: text.setLength(0);
303: inputState.tokenStartColumn = inputState.column;
304: inputState.tokenStartLine = inputState.line;
305: }
306:
307: public void rewind(int pos) {
308: inputState.input.rewind(pos);
309: // RK: should not be here, it is messing up column calculation
310: // setColumn(inputState.tokenStartColumn);
311: }
312:
313: public void setCaseSensitive(boolean t) {
314: caseSensitive = t;
315: }
316:
317: public void setCommitToPath(boolean commit) {
318: commitToPath = commit;
319: }
320:
321: public void setFilename(String f) {
322: inputState.filename = f;
323: }
324:
325: public void setLine(int line) {
326: inputState.line = line;
327: }
328:
329: public void setText(String s) {
330: resetText();
331: text.append(s);
332: }
333:
334: public void setTokenObjectClass(String cl) {
335: try {
336: tokenObjectClass = Class.forName(cl);
337: } catch (ClassNotFoundException ce) {
338: panic("ClassNotFoundException: " + cl);
339: }
340: }
341:
342: // Test the token text against the literals table
343: // Override this method to perform a different literals test
344: public int testLiteralsTable(int ttype) {
345: hashString.setBuffer(text.getBuffer(), text.length());
346: Integer literalsIndex = (Integer) literals.get(hashString);
347: if (literalsIndex != null) {
348: ttype = literalsIndex.intValue();
349: }
350: return ttype;
351: }
352:
353: /** Test the text passed in against the literals table
354: * Override this method to perform a different literals test
355: * This is used primarily when you want to test a portion of
356: * a token.
357: */
358: public int testLiteralsTable(String text, int ttype) {
359: ANTLRHashString s = new ANTLRHashString(text, this );
360: Integer literalsIndex = (Integer) literals.get(s);
361: if (literalsIndex != null) {
362: ttype = literalsIndex.intValue();
363: }
364: return ttype;
365: }
366:
367: // Override this method to get more specific case handling
368: public char toLower(char c) {
369: return Character.toLowerCase(c);
370: }
371:
372: public void traceIndent() {
373: for (int i = 0; i < traceDepth; i++)
374: System.out.print(" ");
375: }
376:
377: public void traceIn(String rname) throws CharStreamException {
378: traceDepth += 1;
379: traceIndent();
380: System.out.println("> lexer " + rname + "; c==" + LA(1));
381: }
382:
383: public void traceOut(String rname) throws CharStreamException {
384: traceIndent();
385: System.out.println("< lexer " + rname + "; c==" + LA(1));
386: traceDepth -= 1;
387: }
388:
389: /** This method is called by YourLexer.nextToken() when the lexer has
390: * hit EOF condition. EOF is NOT a character.
391: * This method is not called if EOF is reached during
392: * syntactic predicate evaluation or during evaluation
393: * of normal lexical rules, which presumably would be
394: * an IOException. This traps the "normal" EOF condition.
395: *
396: * uponEOF() is called after the complete evaluation of
397: * the previous token and only if your parser asks
398: * for another token beyond that last non-EOF token.
399: *
400: * You might want to throw token or char stream exceptions
401: * like: "Heh, premature eof" or a retry stream exception
402: * ("I found the end of this file, go back to referencing file").
403: */
404: public void uponEOF() throws TokenStreamException,
405: CharStreamException {
406: }
407: }
|