001: package antlr;
002:
003: /* ANTLR Translator Generator
004: * Project led by Terence Parr at http://www.cs.usfca.edu
005: * Software rights: http://www.antlr.org/license.html
006: */
007:
008: import java.util.Hashtable;
009:
010: import antlr.collections.impl.BitSet;
011:
012: public abstract class CharScanner extends MatchExceptionState implements
013: TokenStream {
014: static final char NO_CHAR = 0;
015: public static final char EOF_CHAR = (char) -1;
016: protected ANTLRStringBuffer text; // text of current token
017:
018: protected boolean saveConsumedInput = true; // does consume() save characters?
019: protected Class tokenObjectClass; // what kind of tokens to create?
020: protected boolean caseSensitive = true;
021: protected boolean caseSensitiveLiterals = true;
022: protected Hashtable literals; // set by subclass
023:
024: /** Tab chars are handled by tab() according to this value; override
025: * method to do anything weird with tabs.
026: */
027: protected int tabsize = 8;
028:
029: protected Token _returnToken = null; // used to return tokens w/o using return val.
030:
031: // Hash string used so we don't new one every time to check literals table
032: protected ANTLRHashString hashString;
033:
034: protected LexerSharedInputState inputState;
035:
036: /** Used during filter mode to indicate that path is desired.
037: * A subsequent scan error will report an error as usual if
038: * acceptPath=true;
039: */
040: protected boolean commitToPath = false;
041:
042: /** Used to keep track of indentdepth for traceIn/Out */
043: protected int traceDepth = 0;
044:
045: public CharScanner() {
046: text = new ANTLRStringBuffer();
047: hashString = new ANTLRHashString(this );
048: setTokenObjectClass("antlr.CommonToken");
049: }
050:
051: public CharScanner(InputBuffer cb) { // SAS: use generic buffer
052: this ();
053: inputState = new LexerSharedInputState(cb);
054: }
055:
056: public CharScanner(LexerSharedInputState sharedState) {
057: this ();
058: inputState = sharedState;
059: }
060:
061: public void append(char c) {
062: if (saveConsumedInput) {
063: text.append(c);
064: }
065: }
066:
067: public void append(String s) {
068: if (saveConsumedInput) {
069: text.append(s);
070: }
071: }
072:
073: /*public void commit() {
074: inputState.input.commit();
075: }*/
076:
077: public void consume() {
078: if (inputState.guessing == 0) {
079: char c = LA(1);
080: if (caseSensitive) {
081: append(c);
082: } else {
083: // use input.LA(), not LA(), to get original case
084: // CharScanner.LA() would toLower it.
085: append(inputState.input.LA(1));
086: }
087: if (c == '\t') {
088: tab();
089: } else {
090: inputState.column++;
091: }
092: }
093: inputState.input.consume();
094: }
095:
096: /** Consume chars until one matches the given char */
097: public void consumeUntil(int c) {
098: char LA1 = LA(1);
099: while (LA1 != EOF_CHAR && LA1 != c) {
100: consume();
101: LA1 = LA(1);
102: }
103: }
104:
105: /** Consume chars until one matches the given set */
106: public void consumeUntil(BitSet set) {
107: char LA1 = LA(1);
108: while (LA1 != EOF_CHAR && !set.member(LA1)) {
109: consume();
110: LA1 = LA(1);
111: }
112: }
113:
114: public boolean getCaseSensitive() {
115: return caseSensitive;
116: }
117:
118: public final boolean getCaseSensitiveLiterals() {
119: return caseSensitiveLiterals;
120: }
121:
122: public int getColumn() {
123: return inputState.column;
124: }
125:
126: public void setColumn(int c) {
127: inputState.column = c;
128: }
129:
130: public boolean getCommitToPath() {
131: return commitToPath;
132: }
133:
134: public String getFilename() {
135: return inputState.filename;
136: }
137:
138: public InputBuffer getInputBuffer() {
139: return inputState.input;
140: }
141:
142: public LexerSharedInputState getInputState() {
143: return inputState;
144: }
145:
146: public void setInputState(LexerSharedInputState state) {
147: inputState = state;
148: }
149:
150: public int getLine() {
151: return inputState.line;
152: }
153:
154: /** return a copy of the current text buffer */
155: public String getText() {
156: return text.toString();
157: }
158:
159: public Token getTokenObject() {
160: return _returnToken;
161: }
162:
163: public char LA(int i) {
164: if (caseSensitive) {
165: return inputState.input.LA(i);
166: } else {
167: return toLower(inputState.input.LA(i));
168: }
169: }
170:
171: // Created to avoid reflection usage
172: // Can be overridden later
173: protected Token createToken(int type)
174: throws InstantiationException, IllegalAccessException {
175: return (Token) tokenObjectClass.newInstance();
176: }
177:
178: protected Token makeToken(int t) {
179: try {
180: Token tok = createToken(t);
181: tok.setType(t);
182: tok.setColumn(inputState.tokenStartColumn);
183: tok.setLine(inputState.tokenStartLine);
184: // tracking real start line now: tok.setLine(inputState.line);
185: return tok;
186: } catch (InstantiationException ie) {
187: panic("can't instantiate token: " + tokenObjectClass);
188: } catch (IllegalAccessException iae) {
189: panic("Token class is not accessible" + tokenObjectClass);
190: }
191: return TokenImpl.badToken;
192: }
193:
194: public int mark() {
195: return inputState.input.mark();
196: }
197:
198: public void match(char c) throws MismatchedCharException {
199: if (LA(1) != c) {
200: throw new MismatchedCharException(LA(1), c, false, this );
201: }
202: consume();
203: }
204:
205: public void match(BitSet b) throws MismatchedCharException {
206: if (!b.member(LA(1))) {
207: throw new MismatchedCharException(LA(1), b, false, this );
208: } else {
209: consume();
210: }
211: }
212:
213: public void match(String s) throws MismatchedCharException {
214: int len = s.length();
215: for (int i = 0; i < len; i++) {
216: if (LA(1) != s.charAt(i)) {
217: throw new MismatchedCharException(LA(1), s.charAt(i),
218: false, this );
219: }
220: consume();
221: }
222: }
223:
224: public void matchNot(char c) throws MismatchedCharException {
225: if (LA(1) == c) {
226: throw new MismatchedCharException(LA(1), c, true, this );
227: }
228: consume();
229: }
230:
231: public void matchRange(char c1, char c2)
232: throws MismatchedCharException {
233: if (LA(1) < c1 || LA(1) > c2)
234: throw new MismatchedCharException(LA(1), c1, c2, false,
235: this );
236: consume();
237: }
238:
239: public void newline() {
240: inputState.line++;
241: inputState.column = 1;
242: }
243:
244: /** advance the current column number by an appropriate amount
245: * according to tab size. This method is called from consume().
246: */
247: public void tab() {
248: int c = getColumn();
249: int nc = (((c - 1) / tabsize) + 1) * tabsize + 1; // calculate tab stop
250: setColumn(nc);
251: }
252:
253: public void setTabSize(int size) {
254: tabsize = size;
255: }
256:
257: public int getTabSize() {
258: return tabsize;
259: }
260:
261: /** @see #panic(String)
262: */
263: public void panic() {
264: System.err.println("CharScanner: panic");
265: Utils.error("");
266: }
267:
268: /** This method is executed by ANTLR internally when it detected an illegal
269: * state that cannot be recovered from.
270: * The default implementation of this method calls
271: * {@link java.lang.System.exit(int)} and writes directly to
272: * {@link java.lang.System.err)} , which is usually not appropriate when
273: * a translator is embedded into a larger application. <em>It is highly
274: * recommended that this method be overridden to handle the error in a
275: * way appropriate for your application (e.g. throw an unchecked
276: * exception)</em>.
277: */
278: public void panic(String s) {
279: System.err.println("CharScanner; panic: " + s);
280: Utils.error(s);
281: }
282:
283: /** Parser error-reporting function can be overridden in subclass */
284: public void reportError(RecognitionException ex) {
285: System.err.println(ex);
286: }
287:
288: /** Parser error-reporting function can be overridden in subclass */
289: public void reportError(String s) {
290: if (getFilename() == null) {
291: System.err.println("error: " + s);
292: } else {
293: System.err.println(getFilename() + ": error: " + s);
294: }
295: }
296:
297: /** Parser warning-reporting function can be overridden in subclass */
298: public void reportWarning(String s) {
299: if (getFilename() == null) {
300: System.err.println("warning: " + s);
301: } else {
302: System.err.println(getFilename() + ": warning: " + s);
303: }
304: }
305:
306: public void resetText() {
307: text.setLength(0);
308: inputState.tokenStartColumn = inputState.column;
309: inputState.tokenStartLine = inputState.line;
310: }
311:
312: public void rewind(int pos) {
313: inputState.input.rewind(pos);
314: // RK: should not be here, it is messing up column calculation
315: // setColumn(inputState.tokenStartColumn);
316: }
317:
318: public void setCaseSensitive(boolean t) {
319: caseSensitive = t;
320: }
321:
322: public void setCommitToPath(boolean commit) {
323: commitToPath = commit;
324: }
325:
326: public void setFilename(String f) {
327: inputState.filename = f;
328: }
329:
330: public void setLine(int line) {
331: inputState.line = line;
332: }
333:
334: public void setText(String s) {
335: resetText();
336: text.append(s);
337: }
338:
339: public void setTokenObjectClass(String cl) {
340: try {
341: tokenObjectClass = Utils.loadClass(cl);
342: } catch (ClassNotFoundException ce) {
343: panic("ClassNotFoundException: " + cl);
344: }
345: }
346:
347: // Test the token text against the literals table
348: // Override this method to perform a different literals test
349: public int testLiteralsTable(int ttype) {
350: hashString.setBuffer(text.getBuffer(), text.length());
351: Integer literalsIndex = (Integer) literals.get(hashString);
352: if (literalsIndex != null) {
353: ttype = literalsIndex.intValue();
354: }
355: return ttype;
356: }
357:
358: /** Test the text passed in against the literals table
359: * Override this method to perform a different literals test
360: * This is used primarily when you want to test a portion of
361: * a token.
362: */
363: public int testLiteralsTable(String text, int ttype) {
364: ANTLRHashString s = new ANTLRHashString(text, this );
365: Integer literalsIndex = (Integer) literals.get(s);
366: if (literalsIndex != null) {
367: ttype = literalsIndex.intValue();
368: }
369: return ttype;
370: }
371:
372: // Override this method to get more specific case handling
373: public char toLower(char c) {
374: return Character.toLowerCase(c);
375: }
376:
377: public void traceIndent() {
378: for (int i = 0; i < traceDepth; i++)
379: System.out.print(" ");
380: }
381:
382: public void traceIn(String rname) {
383: traceDepth += 1;
384: traceIndent();
385: System.out.println("> lexer " + rname + "; c==" + LA(1));
386: }
387:
388: public void traceOut(String rname) {
389: traceIndent();
390: System.out.println("< lexer " + rname + "; c==" + LA(1));
391: traceDepth -= 1;
392: }
393:
394: /** This method is called by YourLexer.nextToken() when the lexer has
395: * hit EOF condition. EOF is NOT a character.
396: * This method is not called if EOF is reached during
397: * syntactic predicate evaluation or during evaluation
398: * of normal lexical rules, which presumably would be
399: * an IOException. This traps the "normal" EOF condition.
400: *
401: * uponEOF() is called after the complete evaluation of
402: * the previous token and only if your parser asks
403: * for another token beyond that last non-EOF token.
404: *
405: * You might want to throw token or char stream exceptions
406: * like: "Heh, premature eof" or a retry stream exception
407: * ("I found the end of this file, go back to referencing file").
408: */
409: public void uponEOF() throws TokenStreamException {
410: }
411: }
|