001: package persistence.antlr;
002:
003: /* ANTLR Translator Generator
004: * Project led by Terence Parr at http://www.jGuru.com
005: * Software rights: http://www.antlr.org/license.html
006: */
007:
008: import persistence.antlr.collections.impl.BitSet;
009: import java.util.*;
010:
011: /** This token stream tracks the *entire* token stream coming from
012: * a lexer, but does not pass on the whitespace (or whatever else
013: * you want to discard) to the parser.
014: *
015: * This class can then be asked for the ith token in the input stream.
016: * Useful for dumping out the input stream exactly after doing some
017: * augmentation or other manipulations. Tokens are index from 0..n-1
018: *
019: * You can insert stuff, replace, and delete chunks. Note that the
020: * operations are done lazily--only if you convert the buffer to a
021: * String. This is very efficient because you are not moving data around
022: * all the time. As the buffer of tokens is converted to strings, the
023: * toString() method(s) check to see if there is an operation at the
024: * current index. If so, the operation is done and then normal String
025: * rendering continues on the buffer. This is like having multiple Turing
026: * machine instruction streams (programs) operating on a single input tape. :)
027: *
028: * Since the operations are done lazily at toString-time, operations do not
029: * screw up the token index values. That is, an insert operation at token
030: * index i does not change the index values for tokens i+1..n-1.
031: *
032: * Because operations never actually alter the buffer, you may always get
033: * the original token stream back without undoing anything. Since
034: * the instructions are queued up, you can easily simulate transactions and
035: * roll back any changes if there is an error just by removing instructions.
036: * For example,
037: *
038: * TokenStreamRewriteEngine rewriteEngine =
039: * new TokenStreamRewriteEngine(lexer);
040: * JavaRecognizer parser = new JavaRecognizer(rewriteEngine);
041: * ...
042: * rewriteEngine.insertAfter("pass1", t, "foobar");}
043: * rewriteEngine.insertAfter("pass2", u, "start");}
044: * System.out.println(rewriteEngine.toString("pass1"));
045: * System.out.println(rewriteEngine.toString("pass2"));
046: *
047: * You can also have multiple "instruction streams" and get multiple
048: * rewrites from a single pass over the input. Just name the instruction
049: * streams and use that name again when printing the buffer. This could be
050: * useful for generating a C file and also its header file--all from the
051: * same buffer.
052: *
053: * If you don't use named rewrite streams, a "default" stream is used.
054: *
055: * Terence Parr, parrt@cs.usfca.edu
056: * University of San Francisco
057: * February 2004
058: */
059: public class TokenStreamRewriteEngine implements TokenStream {
060: public static final int MIN_TOKEN_INDEX = 0;
061:
062: static class RewriteOperation {
063: protected int index;
064: protected String text;
065:
066: protected RewriteOperation(int index, String text) {
067: this .index = index;
068: this .text = text;
069: }
070:
071: /** Execute the rewrite operation by possibly adding to the buffer.
072: * Return the index of the next token to operate on.
073: */
074: public int execute(StringBuffer buf) {
075: return index;
076: }
077: }
078:
079: static class InsertBeforeOp extends RewriteOperation {
080: public InsertBeforeOp(int index, String text) {
081: super (index, text);
082: }
083:
084: public int execute(StringBuffer buf) {
085: buf.append(text);
086: return index;
087: }
088: }
089:
090: static class ReplaceOp extends RewriteOperation {
091: protected int lastIndex;
092:
093: public ReplaceOp(int from, int to, String text) {
094: super (from, text);
095: lastIndex = to;
096: }
097:
098: public int execute(StringBuffer buf) {
099: if (text != null) {
100: buf.append(text);
101: }
102: return lastIndex + 1;
103: }
104: }
105:
106: static class DeleteOp extends ReplaceOp {
107: public DeleteOp(int from, int to) {
108: super (from, to, null);
109: }
110: }
111:
112: public static final String DEFAULT_PROGRAM_NAME = "default";
113: public static final int PROGRAM_INIT_SIZE = 100;
114:
115: /** Track the incoming list of tokens */
116: protected List tokens;
117:
118: /** You may have multiple, named streams of rewrite operations.
119: * I'm calling these things "programs."
120: * Maps String (name) -> rewrite (List)
121: */
122: protected Map programs = null;
123:
124: /** Map String (program name) -> Integer index */
125: protected Map lastRewriteTokenIndexes = null;
126:
127: /** track index of tokens */
128: protected int index = MIN_TOKEN_INDEX;
129:
130: /** Who do we suck tokens from? */
131: protected TokenStream stream;
132:
133: /** Which (whitespace) token(s) to throw out */
134: protected BitSet discardMask = new BitSet();
135:
136: public TokenStreamRewriteEngine(TokenStream upstream) {
137: this (upstream, 1000);
138: }
139:
140: public TokenStreamRewriteEngine(TokenStream upstream,
141: int initialSize) {
142: stream = upstream;
143: tokens = new ArrayList(initialSize);
144: programs = new HashMap();
145: programs.put(DEFAULT_PROGRAM_NAME, new ArrayList(
146: PROGRAM_INIT_SIZE));
147: lastRewriteTokenIndexes = new HashMap();
148: }
149:
150: public Token nextToken() throws TokenStreamException {
151: TokenWithIndex t;
152: // suck tokens until end of stream or we find a non-discarded token
153: do {
154: t = (TokenWithIndex) stream.nextToken();
155: if (t != null) {
156: t.setIndex(index); // what is t's index in list?
157: if (t.getType() != Token.EOF_TYPE) {
158: tokens.add(t); // track all tokens except EOF
159: }
160: index++; // move to next position
161: }
162: } while (t != null && discardMask.member(t.getType()));
163: return t;
164: }
165:
166: public void rollback(int instructionIndex) {
167: rollback(DEFAULT_PROGRAM_NAME, instructionIndex);
168: }
169:
170: /** Rollback the instruction stream for a program so that
171: * the indicated instruction (via instructionIndex) is no
172: * longer in the stream. UNTESTED!
173: */
174: public void rollback(String programName, int instructionIndex) {
175: List is = (List) programs.get(programName);
176: if (is != null) {
177: programs.put(programName, is.subList(MIN_TOKEN_INDEX,
178: instructionIndex));
179: }
180: }
181:
182: public void deleteProgram() {
183: deleteProgram(DEFAULT_PROGRAM_NAME);
184: }
185:
186: /** Reset the program so that no instructions exist */
187: public void deleteProgram(String programName) {
188: rollback(programName, MIN_TOKEN_INDEX);
189: }
190:
191: /** If op.index > lastRewriteTokenIndexes, just add to the end.
192: * Otherwise, do linear */
193: protected void addToSortedRewriteList(RewriteOperation op) {
194: addToSortedRewriteList(DEFAULT_PROGRAM_NAME, op);
195: }
196:
197: protected void addToSortedRewriteList(String programName,
198: RewriteOperation op) {
199: List rewrites = getProgram(programName);
200: // if at or beyond last op's index, just append
201: if (op.index >= getLastRewriteTokenIndex(programName)) {
202: rewrites.add(op); // append to list of operations
203: // record the index of this operation for next time through
204: setLastRewriteTokenIndex(programName, op.index);
205: return;
206: }
207: // not after the last one, so must insert to ordered list
208: Comparator comparator = new Comparator() {
209: public int compare(Object o, Object o1) {
210: RewriteOperation a = (RewriteOperation) o;
211: RewriteOperation b = (RewriteOperation) o1;
212: if (a.index < b.index)
213: return -1;
214: if (a.index > b.index)
215: return 1;
216: return 0;
217: }
218: };
219: int pos = Collections.binarySearch(rewrites, op, comparator);
220: if (pos < 0) {
221: rewrites.add(-pos - 1, op);
222: }
223: }
224:
225: public void insertAfter(Token t, String text) {
226: insertAfter(DEFAULT_PROGRAM_NAME, t, text);
227: }
228:
229: public void insertAfter(int index, String text) {
230: insertAfter(DEFAULT_PROGRAM_NAME, index, text);
231: }
232:
233: public void insertAfter(String programName, Token t, String text) {
234: insertAfter(programName, ((TokenWithIndex) t).getIndex(), text);
235: }
236:
237: public void insertAfter(String programName, int index, String text) {
238: // to insert after, just insert before next index (even if past end)
239: insertBefore(programName, index + 1, text);
240: }
241:
242: public void insertBefore(Token t, String text) {
243: insertBefore(DEFAULT_PROGRAM_NAME, t, text);
244: }
245:
246: public void insertBefore(int index, String text) {
247: insertBefore(DEFAULT_PROGRAM_NAME, index, text);
248: }
249:
250: public void insertBefore(String programName, Token t, String text) {
251: insertBefore(programName, ((TokenWithIndex) t).getIndex(), text);
252: }
253:
254: public void insertBefore(String programName, int index, String text) {
255: addToSortedRewriteList(programName, new InsertBeforeOp(index,
256: text));
257: }
258:
259: public void replace(int index, String text) {
260: replace(DEFAULT_PROGRAM_NAME, index, index, text);
261: }
262:
263: public void replace(int from, int to, String text) {
264: replace(DEFAULT_PROGRAM_NAME, from, to, text);
265: }
266:
267: public void replace(Token indexT, String text) {
268: replace(DEFAULT_PROGRAM_NAME, indexT, indexT, text);
269: }
270:
271: public void replace(Token from, Token to, String text) {
272: replace(DEFAULT_PROGRAM_NAME, from, to, text);
273: }
274:
275: public void replace(String programName, int from, int to,
276: String text) {
277: addToSortedRewriteList(new ReplaceOp(from, to, text));
278: }
279:
280: public void replace(String programName, Token from, Token to,
281: String text) {
282: replace(programName, ((TokenWithIndex) from).getIndex(),
283: ((TokenWithIndex) to).getIndex(), text);
284: }
285:
286: public void delete(int index) {
287: delete(DEFAULT_PROGRAM_NAME, index, index);
288: }
289:
290: public void delete(int from, int to) {
291: delete(DEFAULT_PROGRAM_NAME, from, to);
292: }
293:
294: public void delete(Token indexT) {
295: delete(DEFAULT_PROGRAM_NAME, indexT, indexT);
296: }
297:
298: public void delete(Token from, Token to) {
299: delete(DEFAULT_PROGRAM_NAME, from, to);
300: }
301:
302: public void delete(String programName, int from, int to) {
303: replace(programName, from, to, null);
304: }
305:
306: public void delete(String programName, Token from, Token to) {
307: replace(programName, from, to, null);
308: }
309:
310: public void discard(int ttype) {
311: discardMask.add(ttype);
312: }
313:
314: public TokenWithIndex getToken(int i) {
315: return (TokenWithIndex) tokens.get(i);
316: }
317:
318: public int getTokenStreamSize() {
319: return tokens.size();
320: }
321:
322: public String toOriginalString() {
323: return toOriginalString(MIN_TOKEN_INDEX,
324: getTokenStreamSize() - 1);
325: }
326:
327: public String toOriginalString(int start, int end) {
328: StringBuffer buf = new StringBuffer();
329: for (int i = start; i >= MIN_TOKEN_INDEX && i <= end
330: && i < tokens.size(); i++) {
331: buf.append(getToken(i).getText());
332: }
333: return buf.toString();
334: }
335:
336: public String toString() {
337: return toString(MIN_TOKEN_INDEX, getTokenStreamSize());
338: }
339:
340: public String toString(String programName) {
341: return toString(programName, MIN_TOKEN_INDEX,
342: getTokenStreamSize());
343: }
344:
345: public String toString(int start, int end) {
346: return toString(DEFAULT_PROGRAM_NAME, start, end);
347: }
348:
349: public String toString(String programName, int start, int end) {
350: List rewrites = (List) programs.get(programName);
351: if (rewrites == null) {
352: return null; // invalid program
353: }
354: StringBuffer buf = new StringBuffer();
355:
356: /** Index of first rewrite we have not done */
357: int rewriteOpIndex = 0;
358:
359: int tokenCursor = start;
360: while (tokenCursor >= MIN_TOKEN_INDEX && tokenCursor <= end
361: && tokenCursor < tokens.size()) {
362: if (rewriteOpIndex < rewrites.size()) {
363: RewriteOperation op = (RewriteOperation) rewrites
364: .get(rewriteOpIndex);
365: while (tokenCursor == op.index
366: && rewriteOpIndex < rewrites.size()) {
367: /*
368: System.out.println("execute op "+rewriteOpIndex+
369: " (type "+op.getClass().getName()+")"
370: +" at index "+op.index);
371: */
372: tokenCursor = op.execute(buf);
373: rewriteOpIndex++;
374: if (rewriteOpIndex < rewrites.size()) {
375: op = (RewriteOperation) rewrites
376: .get(rewriteOpIndex);
377: }
378: }
379: }
380: if (tokenCursor < end) {
381: buf.append(getToken(tokenCursor).getText());
382: tokenCursor++;
383: }
384: }
385: // now see if there are operations (append) beyond last token index
386: for (int opi = rewriteOpIndex; opi < rewrites.size(); opi++) {
387: RewriteOperation op = (RewriteOperation) rewrites.get(opi);
388: op.execute(buf); // must be insertions if after last token
389: }
390:
391: return buf.toString();
392: }
393:
394: public String toDebugString() {
395: return toDebugString(MIN_TOKEN_INDEX, getTokenStreamSize());
396: }
397:
398: public String toDebugString(int start, int end) {
399: StringBuffer buf = new StringBuffer();
400: for (int i = start; i >= MIN_TOKEN_INDEX && i <= end
401: && i < tokens.size(); i++) {
402: buf.append(getToken(i));
403: }
404: return buf.toString();
405: }
406:
407: public int getLastRewriteTokenIndex() {
408: return getLastRewriteTokenIndex(DEFAULT_PROGRAM_NAME);
409: }
410:
411: protected int getLastRewriteTokenIndex(String programName) {
412: Integer I = (Integer) lastRewriteTokenIndexes.get(programName);
413: if (I == null) {
414: return -1;
415: }
416: return I.intValue();
417: }
418:
419: protected void setLastRewriteTokenIndex(String programName, int i) {
420: lastRewriteTokenIndexes.put(programName, new Integer(i));
421: }
422:
423: protected List getProgram(String name) {
424: List is = (List) programs.get(name);
425: if (is == null) {
426: is = initializeProgram(name);
427: }
428: return is;
429: }
430:
431: private List initializeProgram(String name) {
432: List is = new ArrayList(PROGRAM_INIT_SIZE);
433: programs.put(name, is);
434: return is;
435: }
436: }
|