001: package antlr;
002:
003: /* ANTLR Translator Generator
004: * Project led by Terence Parr at http://www.cs.usfca.edu
005: * Software rights: http://www.antlr.org/license.html
006: */
007:
008: import antlr.ASdebug.ASDebugStream;
009: import antlr.ASdebug.IASDebugStream;
010: import antlr.ASdebug.TokenOffsetInfo;
011: import antlr.collections.impl.BitSet;
012:
013: import java.util.*;
014:
015: /** This token stream tracks the *entire* token stream coming from
016: * a lexer, but does not pass on the whitespace (or whatever else
017: * you want to discard) to the parser.
018: *
019: * This class can then be asked for the ith token in the input stream.
020: * Useful for dumping out the input stream exactly after doing some
021: * augmentation or other manipulations. Tokens are index from 0..n-1
022: *
023: * You can insert stuff, replace, and delete chunks. Note that the
024: * operations are done lazily--only if you convert the buffer to a
025: * String. This is very efficient because you are not moving data around
026: * all the time. As the buffer of tokens is converted to strings, the
027: * toString() method(s) check to see if there is an operation at the
028: * current index. If so, the operation is done and then normal String
029: * rendering continues on the buffer. This is like having multiple Turing
030: * machine instruction streams (programs) operating on a single input tape. :)
031: *
032: * Since the operations are done lazily at toString-time, operations do not
033: * screw up the token index values. That is, an insert operation at token
034: * index i does not change the index values for tokens i+1..n-1.
035: *
036: * Because operations never actually alter the buffer, you may always get
037: * the original token stream back without undoing anything. Since
038: * the instructions are queued up, you can easily simulate transactions and
039: * roll back any changes if there is an error just by removing instructions.
040: * For example,
041: *
042: * TokenStreamRewriteEngine rewriteEngine =
043: * new TokenStreamRewriteEngine(lexer);
044: * JavaRecognizer parser = new JavaRecognizer(rewriteEngine);
045: * ...
046: * rewriteEngine.insertAfter("pass1", t, "foobar");}
047: * rewriteEngine.insertAfter("pass2", u, "start");}
048: * System.out.println(rewriteEngine.toString("pass1"));
049: * System.out.println(rewriteEngine.toString("pass2"));
050: *
051: * You can also have multiple "instruction streams" and get multiple
052: * rewrites from a single pass over the input. Just name the instruction
053: * streams and use that name again when printing the buffer. This could be
054: * useful for generating a C file and also its header file--all from the
055: * same buffer.
056: *
057: * If you don't use named rewrite streams, a "default" stream is used.
058: *
059: * Terence Parr, parrt at antlr.org
060: * University of San Francisco
061: * February 2004
062: */
063: public class TokenStreamRewriteEngine implements TokenStream,
064: IASDebugStream {
065: public static final int MIN_TOKEN_INDEX = 0;
066:
067: static class RewriteOperation {
068: protected int index;
069: protected String text;
070:
071: protected RewriteOperation(int index, String text) {
072: this .index = index;
073: this .text = text;
074: }
075:
076: /** Execute the rewrite operation by possibly adding to the buffer.
077: * Return the index of the next token to operate on.
078: */
079: public int execute(StringBuffer buf) {
080: return index;
081: }
082:
083: public String toString() {
084: String opName = getClass().getName();
085: int $index = opName.indexOf('$');
086: opName = opName.substring($index + 1, opName.length());
087: return opName + "@" + index + '"' + text + '"';
088: }
089: }
090:
091: static class InsertBeforeOp extends RewriteOperation {
092: public InsertBeforeOp(int index, String text) {
093: super (index, text);
094: }
095:
096: public int execute(StringBuffer buf) {
097: buf.append(text);
098: return index;
099: }
100: }
101:
102: /** I'm going to try replacing range from x..y with (y-x)+1 ReplaceOp
103: * instructions.
104: */
105: static class ReplaceOp extends RewriteOperation {
106: protected int lastIndex;
107:
108: public ReplaceOp(int from, int to, String text) {
109: super (from, text);
110: lastIndex = to;
111: }
112:
113: public int execute(StringBuffer buf) {
114: if (text != null) {
115: buf.append(text);
116: }
117: return lastIndex + 1;
118: }
119: }
120:
121: static class DeleteOp extends ReplaceOp {
122: public DeleteOp(int from, int to) {
123: super (from, to, null);
124: }
125: }
126:
127: public static final String DEFAULT_PROGRAM_NAME = "default";
128: public static final int PROGRAM_INIT_SIZE = 100;
129:
130: /** Track the incoming list of tokens */
131: protected List tokens;
132:
133: /** You may have multiple, named streams of rewrite operations.
134: * I'm calling these things "programs."
135: * Maps String (name) -> rewrite (List)
136: */
137: protected Map programs = null;
138:
139: /** Map String (program name) -> Integer index */
140: protected Map lastRewriteTokenIndexes = null;
141:
142: /** track index of tokens */
143: protected int index = MIN_TOKEN_INDEX;
144:
145: /** Who do we suck tokens from? */
146: protected TokenStream stream;
147:
148: /** Which (whitespace) token(s) to throw out */
149: protected BitSet discardMask = new BitSet();
150:
151: public TokenStreamRewriteEngine(TokenStream upstream) {
152: this (upstream, 1000);
153: }
154:
155: public TokenStreamRewriteEngine(TokenStream upstream,
156: int initialSize) {
157: stream = upstream;
158: tokens = new ArrayList(initialSize);
159: programs = new HashMap();
160: programs.put(DEFAULT_PROGRAM_NAME, new ArrayList(
161: PROGRAM_INIT_SIZE));
162: lastRewriteTokenIndexes = new HashMap();
163: }
164:
165: public Token nextToken() throws TokenStreamException {
166: TokenWithIndex t;
167: // suck tokens until end of stream or we find a non-discarded token
168: do {
169: t = (TokenWithIndex) stream.nextToken();
170: if (t != null) {
171: t.setIndex(index); // what is t's index in list?
172: if (t.getType() != Token.EOF_TYPE) {
173: tokens.add(t); // track all tokens except EOF
174: }
175: index++; // move to next position
176: }
177: } while (t != null && discardMask.member(t.getType()));
178: return t;
179: }
180:
181: public void rollback(int instructionIndex) {
182: rollback(DEFAULT_PROGRAM_NAME, instructionIndex);
183: }
184:
185: /** Rollback the instruction stream for a program so that
186: * the indicated instruction (via instructionIndex) is no
187: * longer in the stream. UNTESTED!
188: */
189: public void rollback(String programName, int instructionIndex) {
190: List is = (List) programs.get(programName);
191: if (is != null) {
192: programs.put(programName, is.subList(MIN_TOKEN_INDEX,
193: instructionIndex));
194: }
195: }
196:
197: public void deleteProgram() {
198: deleteProgram(DEFAULT_PROGRAM_NAME);
199: }
200:
201: /** Reset the program so that no instructions exist */
202: public void deleteProgram(String programName) {
203: rollback(programName, MIN_TOKEN_INDEX);
204: }
205:
206: /** If op.index > lastRewriteTokenIndexes, just add to the end.
207: * Otherwise, do linear */
208: protected void addToSortedRewriteList(RewriteOperation op) {
209: addToSortedRewriteList(DEFAULT_PROGRAM_NAME, op);
210: }
211:
212: /** old; before moving v3 stuff in
213: protected void addToSortedRewriteList(String programName, RewriteOperation op) {
214: List rewrites = getProgram(programName);
215: // if at or beyond last op's index, just append
216: if ( op.index>=getLastRewriteTokenIndex(programName) ) {
217: rewrites.add(op); // append to list of operations
218: // record the index of this operation for next time through
219: setLastRewriteTokenIndex(programName, op.index);
220: return;
221: }
222: // not after the last one, so must insert to ordered list
223: Comparator comparator = new Comparator() {
224: public int compare(Object o, Object o1) {
225: RewriteOperation a = (RewriteOperation)o;
226: RewriteOperation b = (RewriteOperation)o1;
227: if ( a.index<b.index ) return -1;
228: if ( a.index>b.index ) return 1;
229: return 0;
230: }
231: };
232: int pos = Collections.binarySearch(rewrites, op, comparator);
233: if ( pos<0 ) {
234: rewrites.add(-pos-1, op);
235: }
236: }
237: */
238:
239: /** Add an instruction to the rewrite instruction list ordered by
240: * the instruction number (use a binary search for efficiency).
241: * The list is ordered so that toString() can be done efficiently.
242: *
243: * When there are multiple instructions at the same index, the instructions
244: * must be ordered to ensure proper behavior. For example, a delete at
245: * index i must kill any replace operation at i. Insert-before operations
246: * must come before any replace / delete instructions. If there are
247: * multiple insert instructions for a single index, they are done in
248: * reverse insertion order so that "insert foo" then "insert bar" yields
249: * "foobar" in front rather than "barfoo". This is convenient because
250: * I can insert new InsertOp instructions at the index returned by
251: * the binary search. A ReplaceOp kills any previous replace op. Since
252: * delete is the same as replace with null text, i can check for
253: * ReplaceOp and cover DeleteOp at same time. :)
254: */
255: protected void addToSortedRewriteList(String programName,
256: RewriteOperation op) {
257: List rewrites = getProgram(programName);
258: //System.out.println("### add "+op+"; rewrites="+rewrites);
259: Comparator comparator = new Comparator() {
260: public int compare(Object o, Object o1) {
261: RewriteOperation a = (RewriteOperation) o;
262: RewriteOperation b = (RewriteOperation) o1;
263: if (a.index < b.index)
264: return -1;
265: if (a.index > b.index)
266: return 1;
267: return 0;
268: }
269: };
270: int pos = Collections.binarySearch(rewrites, op, comparator);
271: //System.out.println("bin search returns: pos="+pos);
272:
273: if (pos >= 0) {
274: // binarySearch does not guarantee first element when multiple
275: // are found. I must seach backwards for first op with op.index
276: for (; pos >= 0; pos--) {
277: RewriteOperation prevOp = (RewriteOperation) rewrites
278: .get(pos);
279: if (prevOp.index < op.index) {
280: break;
281: }
282: }
283: pos++; // pos points at first op before ops with op.index; go back up one
284: // now pos is the index in rewrites of first op with op.index
285: //System.out.println("first op with op.index: pos="+pos);
286:
287: // an instruction operating already on that index was found;
288: // make this one happen after all the others
289: //System.out.println("found instr for index="+op.index);
290: if (op instanceof ReplaceOp) {
291: boolean replaced = false;
292: int i;
293: // look for an existing replace
294: for (i = pos; i < rewrites.size(); i++) {
295: RewriteOperation prevOp = (RewriteOperation) rewrites
296: .get(pos);
297: if (prevOp.index != op.index) {
298: break;
299: }
300: if (prevOp instanceof ReplaceOp) {
301: rewrites.set(pos, op); // replace old with new
302: replaced = true;
303: break;
304: }
305: // keep going; must be an insert
306: }
307: if (!replaced) {
308: // add replace op to the end of all the inserts
309: rewrites.add(i, op);
310: }
311: } else {
312: // inserts are added in front of existing inserts
313: rewrites.add(pos, op);
314: }
315: } else {
316: //System.out.println("no instruction at pos=="+pos);
317: rewrites.add(-pos - 1, op);
318: }
319: //System.out.println("after, rewrites="+rewrites);
320: }
321:
322: public void insertAfter(Token t, String text) {
323: insertAfter(DEFAULT_PROGRAM_NAME, t, text);
324: }
325:
326: public void insertAfter(int index, String text) {
327: insertAfter(DEFAULT_PROGRAM_NAME, index, text);
328: }
329:
330: public void insertAfter(String programName, Token t, String text) {
331: insertAfter(programName, ((TokenWithIndex) t).getIndex(), text);
332: }
333:
334: public void insertAfter(String programName, int index, String text) {
335: // to insert after, just insert before next index (even if past end)
336: insertBefore(programName, index + 1, text);
337: }
338:
339: public void insertBefore(Token t, String text) {
340: insertBefore(DEFAULT_PROGRAM_NAME, t, text);
341: }
342:
343: public void insertBefore(int index, String text) {
344: insertBefore(DEFAULT_PROGRAM_NAME, index, text);
345: }
346:
347: public void insertBefore(String programName, Token t, String text) {
348: insertBefore(programName, ((TokenWithIndex) t).getIndex(), text);
349: }
350:
351: public void insertBefore(String programName, int index, String text) {
352: addToSortedRewriteList(programName, new InsertBeforeOp(index,
353: text));
354: }
355:
356: public void replace(int index, String text) {
357: replace(DEFAULT_PROGRAM_NAME, index, index, text);
358: }
359:
360: public void replace(int from, int to, String text) {
361: replace(DEFAULT_PROGRAM_NAME, from, to, text);
362: }
363:
364: public void replace(Token indexT, String text) {
365: replace(DEFAULT_PROGRAM_NAME, indexT, indexT, text);
366: }
367:
368: public void replace(Token from, Token to, String text) {
369: replace(DEFAULT_PROGRAM_NAME, from, to, text);
370: }
371:
372: public void replace(String programName, int from, int to,
373: String text) {
374: addToSortedRewriteList(new ReplaceOp(from, to, text));
375: }
376:
377: public void replace(String programName, Token from, Token to,
378: String text) {
379: replace(programName, ((TokenWithIndex) from).getIndex(),
380: ((TokenWithIndex) to).getIndex(), text);
381: }
382:
383: public void delete(int index) {
384: delete(DEFAULT_PROGRAM_NAME, index, index);
385: }
386:
387: public void delete(int from, int to) {
388: delete(DEFAULT_PROGRAM_NAME, from, to);
389: }
390:
391: public void delete(Token indexT) {
392: delete(DEFAULT_PROGRAM_NAME, indexT, indexT);
393: }
394:
395: public void delete(Token from, Token to) {
396: delete(DEFAULT_PROGRAM_NAME, from, to);
397: }
398:
399: public void delete(String programName, int from, int to) {
400: replace(programName, from, to, null);
401: }
402:
403: public void delete(String programName, Token from, Token to) {
404: replace(programName, from, to, null);
405: }
406:
407: public void discard(int ttype) {
408: discardMask.add(ttype);
409: }
410:
411: public TokenWithIndex getToken(int i) {
412: return (TokenWithIndex) tokens.get(i);
413: }
414:
415: public int getTokenStreamSize() {
416: return tokens.size();
417: }
418:
419: public String toOriginalString() {
420: return toOriginalString(MIN_TOKEN_INDEX,
421: getTokenStreamSize() - 1);
422: }
423:
424: public String toOriginalString(int start, int end) {
425: StringBuffer buf = new StringBuffer();
426: for (int i = start; i >= MIN_TOKEN_INDEX && i <= end
427: && i < tokens.size(); i++) {
428: buf.append(getToken(i).getText());
429: }
430: return buf.toString();
431: }
432:
433: public String toString() {
434: return toString(MIN_TOKEN_INDEX, getTokenStreamSize() - 1);
435: }
436:
437: public String toString(String programName) {
438: return toString(programName, MIN_TOKEN_INDEX,
439: getTokenStreamSize() - 1);
440: }
441:
442: public String toString(int start, int end) {
443: return toString(DEFAULT_PROGRAM_NAME, start, end);
444: }
445:
446: public String toString(String programName, int start, int end) {
447: List rewrites = (List) programs.get(programName);
448: if (rewrites == null || rewrites.size() == 0) {
449: return toOriginalString(start, end); // no instructions to execute
450: }
451: StringBuffer buf = new StringBuffer();
452:
453: /// Index of first rewrite we have not done
454: int rewriteOpIndex = 0;
455:
456: int tokenCursor = start;
457: while (tokenCursor >= MIN_TOKEN_INDEX && tokenCursor <= end
458: && tokenCursor < tokens.size()) {
459: //System.out.println("tokenCursor="+tokenCursor);
460: // execute instructions associated with this token index
461: if (rewriteOpIndex < rewrites.size()) {
462: RewriteOperation op = (RewriteOperation) rewrites
463: .get(rewriteOpIndex);
464:
465: // skip all ops at lower index
466: while (op.index < tokenCursor
467: && rewriteOpIndex < rewrites.size()) {
468: rewriteOpIndex++;
469: if (rewriteOpIndex < rewrites.size()) {
470: op = (RewriteOperation) rewrites
471: .get(rewriteOpIndex);
472: }
473: }
474:
475: // while we have ops for this token index, exec them
476: while (tokenCursor == op.index
477: && rewriteOpIndex < rewrites.size()) {
478: //System.out.println("execute "+op+" at instruction "+rewriteOpIndex);
479: tokenCursor = op.execute(buf);
480: //System.out.println("after execute tokenCursor = "+tokenCursor);
481: rewriteOpIndex++;
482: if (rewriteOpIndex < rewrites.size()) {
483: op = (RewriteOperation) rewrites
484: .get(rewriteOpIndex);
485: }
486: }
487: }
488: // dump the token at this index
489: if (tokenCursor <= end) {
490: buf.append(getToken(tokenCursor).getText());
491: tokenCursor++;
492: }
493: }
494: // now see if there are operations (append) beyond last token index
495: for (int opi = rewriteOpIndex; opi < rewrites.size(); opi++) {
496: RewriteOperation op = (RewriteOperation) rewrites.get(opi);
497: if (op.index >= size()) {
498: op.execute(buf); // must be insertions if after last token
499: }
500: //System.out.println("execute "+op+" at "+opi);
501: //op.execute(buf); // must be insertions if after last token
502: }
503:
504: return buf.toString();
505: }
506:
507: public String toDebugString() {
508: return toDebugString(MIN_TOKEN_INDEX, getTokenStreamSize() - 1);
509: }
510:
511: public String toDebugString(int start, int end) {
512: StringBuffer buf = new StringBuffer();
513: for (int i = start; i >= MIN_TOKEN_INDEX && i <= end
514: && i < tokens.size(); i++) {
515: buf.append(getToken(i));
516: }
517: return buf.toString();
518: }
519:
520: public int getLastRewriteTokenIndex() {
521: return getLastRewriteTokenIndex(DEFAULT_PROGRAM_NAME);
522: }
523:
524: protected int getLastRewriteTokenIndex(String programName) {
525: Integer I = (Integer) lastRewriteTokenIndexes.get(programName);
526: if (I == null) {
527: return -1;
528: }
529: return I.intValue();
530: }
531:
532: protected void setLastRewriteTokenIndex(String programName, int i) {
533: lastRewriteTokenIndexes.put(programName, new Integer(i));
534: }
535:
536: protected List getProgram(String name) {
537: List is = (List) programs.get(name);
538: if (is == null) {
539: is = initializeProgram(name);
540: }
541: return is;
542: }
543:
544: private List initializeProgram(String name) {
545: List is = new ArrayList(PROGRAM_INIT_SIZE);
546: programs.put(name, is);
547: return is;
548: }
549:
550: public int size() {
551: return tokens.size();
552: }
553:
554: public int index() {
555: return index;
556: }
557:
558: public String getEntireText() {
559: return ASDebugStream.getEntireText(this .stream);
560: }
561:
562: public TokenOffsetInfo getOffsetInfo(Token token) {
563: return ASDebugStream.getOffsetInfo(this.stream, token);
564: }
565: }
|