001: /*
002: [The "BSD licence"]
003: Copyright (c) 2005-2006 Terence Parr
004: All rights reserved.
005:
006: Redistribution and use in source and binary forms, with or without
007: modification, are permitted provided that the following conditions
008: are met:
009: 1. Redistributions of source code must retain the above copyright
010: notice, this list of conditions and the following disclaimer.
011: 2. Redistributions in binary form must reproduce the above copyright
012: notice, this list of conditions and the following disclaimer in the
013: documentation and/or other materials provided with the distribution.
014: 3. The name of the author may not be used to endorse or promote products
015: derived from this software without specific prior written permission.
016:
017: THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
018: IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
019: OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
020: IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
021: INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
022: NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
023: DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
024: THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
025: (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
026: THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
027: */
028: package org.antlr.runtime;
029:
030: import java.util.*;
031:
032: /** The most common stream of tokens is one where every token is buffered up
033: * and tokens are prefiltered for a certain channel (the parser will only
034: * see these tokens and cannot change the filter channel number during the
035: * parse).
036: *
037: * TODO: how to access the full token stream? How to track all tokens matched per rule?
038: */
039: public class CommonTokenStream implements TokenStream {
040: protected TokenSource tokenSource;
041:
042: /** Record every single token pulled from the source so we can reproduce
043: * chunks of it later.
044: */
045: protected List tokens;
046:
047: /** Map<tokentype, channel> to override some Tokens' channel numbers */
048: protected Map channelOverrideMap;
049:
050: /** Set<tokentype>; discard any tokens with this type */
051: protected Set discardSet;
052:
053: /** Skip tokens on any channel but this one; this is how we skip whitespace... */
054: protected int channel = Token.DEFAULT_CHANNEL;
055:
056: /** By default, track all incoming tokens */
057: protected boolean discardOffChannelTokens = false;
058:
059: /** Track the last mark() call result value for use in rewind(). */
060: protected int lastMarker;
061:
062: /** The index into the tokens list of the current token (next token
063: * to consume). p==-1 indicates that the tokens list is empty
064: */
065: protected int p = -1;
066:
067: public CommonTokenStream() {
068: tokens = new ArrayList(500);
069: }
070:
071: public CommonTokenStream(TokenSource tokenSource) {
072: this ();
073: this .tokenSource = tokenSource;
074: }
075:
076: public CommonTokenStream(TokenSource tokenSource, int channel) {
077: this (tokenSource);
078: this .channel = channel;
079: }
080:
081: /** Reset this token stream by setting its token source. */
082: public void setTokenSource(TokenSource tokenSource) {
083: this .tokenSource = tokenSource;
084: tokens.clear();
085: p = -1;
086: channel = Token.DEFAULT_CHANNEL;
087: }
088:
089: /** Load all tokens from the token source and put in tokens.
090: * This is done upon first LT request because you might want to
091: * set some token type / channel overrides before filling buffer.
092: */
093: protected void fillBuffer() {
094: int index = 0;
095: Token t = tokenSource.nextToken();
096: while (t != null && t.getType() != CharStream.EOF) {
097: boolean discard = false;
098: // is there a channel override for token type?
099: if (channelOverrideMap != null) {
100: Integer channelI = (Integer) channelOverrideMap
101: .get(new Integer(t.getType()));
102: if (channelI != null) {
103: t.setChannel(channelI.intValue());
104: }
105: }
106: if (discardSet != null
107: && discardSet.contains(new Integer(t.getType()))) {
108: discard = true;
109: } else if (discardOffChannelTokens
110: && t.getChannel() != this .channel) {
111: discard = true;
112: }
113: if (!discard) {
114: t.setTokenIndex(index);
115: tokens.add(t);
116: index++;
117: }
118: t = tokenSource.nextToken();
119: }
120: // leave p pointing at first token on channel
121: p = 0;
122: p = skipOffTokenChannels(p);
123: }
124:
125: /** Move the input pointer to the next incoming token. The stream
126: * must become active with LT(1) available. consume() simply
127: * moves the input pointer so that LT(1) points at the next
128: * input symbol. Consume at least one token.
129: *
130: * Walk past any token not on the channel the parser is listening to.
131: */
132: public void consume() {
133: if (p < tokens.size()) {
134: p++;
135: p = skipOffTokenChannels(p); // leave p on valid token
136: }
137: }
138:
139: /** Given a starting index, return the index of the first on-channel
140: * token.
141: */
142: protected int skipOffTokenChannels(int i) {
143: int n = tokens.size();
144: while (i < n && ((Token) tokens.get(i)).getChannel() != channel) {
145: i++;
146: }
147: return i;
148: }
149:
150: protected int skipOffTokenChannelsReverse(int i) {
151: while (i >= 0
152: && ((Token) tokens.get(i)).getChannel() != channel) {
153: i--;
154: }
155: return i;
156: }
157:
158: /** A simple filter mechanism whereby you can tell this token stream
159: * to force all tokens of type ttype to be on channel. For example,
160: * when interpreting, we cannot exec actions so we need to tell
161: * the stream to force all WS and NEWLINE to be a different, ignored
162: * channel.
163: */
164: public void setTokenTypeChannel(int ttype, int channel) {
165: if (channelOverrideMap == null) {
166: channelOverrideMap = new HashMap();
167: }
168: channelOverrideMap
169: .put(new Integer(ttype), new Integer(channel));
170: }
171:
172: public void discardTokenType(int ttype) {
173: if (discardSet == null) {
174: discardSet = new HashSet();
175: }
176: discardSet.add(new Integer(ttype));
177: }
178:
179: public void discardOffChannelTokens(boolean discardOffChannelTokens) {
180: this .discardOffChannelTokens = discardOffChannelTokens;
181: }
182:
183: public List getTokens() {
184: if (p == -1) {
185: fillBuffer();
186: }
187: return tokens;
188: }
189:
190: public List getTokens(int start, int stop) {
191: return getTokens(start, stop, (BitSet) null);
192: }
193:
194: /** Given a start and stop index, return a List of all tokens in
195: * the token type BitSet. Return null if no tokens were found. This
196: * method looks at both on and off channel tokens.
197: */
198: public List getTokens(int start, int stop, BitSet types) {
199: if (p == -1) {
200: fillBuffer();
201: }
202: if (stop >= tokens.size()) {
203: stop = tokens.size() - 1;
204: }
205: if (start < 0) {
206: start = 0;
207: }
208: if (start > stop) {
209: return null;
210: }
211:
212: // list = tokens[start:stop]:{Token t, t.getType() in types}
213: List filteredTokens = new ArrayList();
214: for (int i = start; i <= stop; i++) {
215: Token t = (Token) tokens.get(i);
216: if (types == null || types.member(t.getType())) {
217: filteredTokens.add(t);
218: }
219: }
220: if (filteredTokens.size() == 0) {
221: filteredTokens = null;
222: }
223: return filteredTokens;
224: }
225:
226: public List getTokens(int start, int stop, List types) {
227: return getTokens(start, stop, new BitSet(types));
228: }
229:
230: public List getTokens(int start, int stop, int ttype) {
231: return getTokens(start, stop, BitSet.of(ttype));
232: }
233:
234: /** Get the ith token from the current position 1..n where k=1 is the
235: * first symbol of lookahead.
236: */
237: public Token LT(int k) {
238: if (p == -1) {
239: fillBuffer();
240: }
241: if (k == 0) {
242: return null;
243: }
244: if (k < 0) {
245: return LB(-k);
246: }
247: //System.out.print("LT(p="+p+","+k+")=");
248: if ((p + k - 1) >= tokens.size()) {
249: return Token.EOF_TOKEN;
250: }
251: //System.out.println(tokens.get(p+k-1));
252: int i = p;
253: int n = 1;
254: // find k good tokens
255: while (n < k) {
256: // skip off-channel tokens
257: i = skipOffTokenChannels(i + 1); // leave p on valid token
258: n++;
259: }
260: if (i >= tokens.size()) {
261: return Token.EOF_TOKEN;
262: }
263: return (Token) tokens.get(i);
264: }
265:
266: /** Look backwards k tokens on-channel tokens */
267: protected Token LB(int k) {
268: //System.out.print("LB(p="+p+","+k+") ");
269: if (p == -1) {
270: fillBuffer();
271: }
272: if (k == 0) {
273: return null;
274: }
275: if ((p - k) < 0) {
276: return null;
277: }
278:
279: int i = p;
280: int n = 1;
281: // find k good tokens looking backwards
282: while (n <= k) {
283: // skip off-channel tokens
284: i = skipOffTokenChannelsReverse(i - 1); // leave p on valid token
285: n++;
286: }
287: if (i < 0) {
288: return null;
289: }
290: return (Token) tokens.get(i);
291: }
292:
293: /** Return absolute token i; ignore which channel the tokens are on;
294: * that is, count all tokens not just on-channel tokens.
295: */
296: public Token get(int i) {
297: return (Token) tokens.get(i);
298: }
299:
300: public int LA(int i) {
301: return LT(i).getType();
302: }
303:
304: public int mark() {
305: if (p == -1) {
306: fillBuffer();
307: }
308: lastMarker = index();
309: return lastMarker;
310: }
311:
312: public void release(int marker) {
313: // no resources to release
314: }
315:
316: public int size() {
317: return tokens.size();
318: }
319:
320: public int index() {
321: return p;
322: }
323:
324: public void rewind(int marker) {
325: seek(marker);
326: }
327:
328: public void rewind() {
329: seek(lastMarker);
330: }
331:
332: public void seek(int index) {
333: p = index;
334: }
335:
336: public TokenSource getTokenSource() {
337: return tokenSource;
338: }
339:
340: public String toString() {
341: if (p == -1) {
342: fillBuffer();
343: }
344: return toString(0, tokens.size() - 1);
345: }
346:
347: public String toString(int start, int stop) {
348: if (start < 0 || stop < 0) {
349: return null;
350: }
351: if (p == -1) {
352: fillBuffer();
353: }
354: if (stop >= tokens.size()) {
355: stop = tokens.size() - 1;
356: }
357: StringBuffer buf = new StringBuffer();
358: for (int i = start; i <= stop; i++) {
359: Token t = (Token) tokens.get(i);
360: buf.append(t.getText());
361: }
362: return buf.toString();
363: }
364:
365: public String toString(Token start, Token stop) {
366: if (start != null && stop != null) {
367: return toString(start.getTokenIndex(), stop.getTokenIndex());
368: }
369: return null;
370: }
371: }
|