001: /*
002: [The "BSD licence"]
003: Copyright (c) 2005-2006 Terence Parr
004: All rights reserved.
005:
006: Redistribution and use in source and binary forms, with or without
007: modification, are permitted provided that the following conditions
008: are met:
009: 1. Redistributions of source code must retain the above copyright
010: notice, this list of conditions and the following disclaimer.
011: 2. Redistributions in binary form must reproduce the above copyright
012: notice, this list of conditions and the following disclaimer in the
013: documentation and/or other materials provided with the distribution.
014: 3. The name of the author may not be used to endorse or promote products
015: derived from this software without specific prior written permission.
016:
017: THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
018: IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
019: OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
020: IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
021: INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
022: NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
023: DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
024: THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
025: (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
026: THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
027: */
028: package org.antlr.analysis;
029:
030: import org.antlr.tool.Grammar;
031: import org.antlr.tool.GrammarAST;
032: import org.antlr.misc.IntervalSet;
033: import org.antlr.misc.IntSet;
034:
035: /** A state machine transition label. A label can be either a simple
036: * label such as a token or character. A label can be a set of char or
037: * tokens. It can be an epsilon transition. It can be a semantic predicate
038: * (which assumes an epsilon transition) or a tree of predicates (in a DFA).
039: */
040: public class Label implements Comparable, Cloneable {
041: public static final int INVALID = -6;
042:
043: public static final int EPSILON = -5;
044:
045: public static final String EPSILON_STR = "<EPSILON>";
046:
047: /** label is a semantic predicate; implies label is epsilon also */
048: public static final int SEMPRED = -4;
049:
050: /** label is a set of tokens or char */
051: public static final int SET = -3;
052:
053: /** End of Token is like EOF for lexer rules. It implies that no more
054: * characters are available and that NFA conversion should terminate
055: * for this path. For example
056: *
057: * A : 'a' 'b' | 'a' ;
058: *
059: * yields a DFA predictor:
060: *
061: * o-a->o-b->1 predict alt 1
062: * |
063: * |-EOT->o predict alt 2
064: *
065: * To generate code for EOT, treat it as the "default" path, which
066: * implies there is no way to mismatch a char for the state from
067: * which the EOT emanates.
068: */
069: public static final int EOT = -2;
070:
071: public static final int EOF = -1;
072:
073: /** We have labels like EPSILON that are below 0; it's hard to
074: * store them in an array with negative index so use this
075: * constant as an index shift when accessing arrays based upon
076: * token type. If real token type is i, then array index would be
077: * NUM_FAUX_LABELS + i.
078: */
079: public static final int NUM_FAUX_LABELS = -INVALID;
080:
081: /** Anything at this value or larger can be considered a simple atom int
082: * for easy comparison during analysis only; faux labels are not used
083: * during parse time for real token types or char values.
084: */
085: public static final int MIN_ATOM_VALUE = EOT;
086:
087: // TODO: is 0 a valid unicode char? max is FFFF -1, right?
088: public static final int MIN_CHAR_VALUE = '\u0000';
089: public static final int MAX_CHAR_VALUE = '\uFFFE';
090:
091: /** End of rule token type; imaginary token type used only for
092: * local, partial FOLLOW sets to indicate that the local FOLLOW
093: * hit the end of rule. During error recovery, the local FOLLOW
094: * of a token reference may go beyond the end of the rule and have
095: * to use FOLLOW(rule). I have to just shift the token types to 2..n
096: * rather than 1..n to accommodate this imaginary token in my bitsets.
097: * If I didn't use a bitset implementation for runtime sets, I wouldn't
098: * need this. EOF is another candidate for a run time token type for
099: * parsers. Follow sets are not computed for lexers so we do not have
100: * this issue.
101: */
102: public static final int EOR_TOKEN_TYPE = org.antlr.runtime.Token.EOR_TOKEN_TYPE;
103:
104: public static final int DOWN = org.antlr.runtime.Token.DOWN;
105: public static final int UP = org.antlr.runtime.Token.UP;
106:
107: /** tokens and char range overlap; tokens are MIN_TOKEN_TYPE..n */
108: public static final int MIN_TOKEN_TYPE = org.antlr.runtime.Token.MIN_TOKEN_TYPE;
109:
110: /** The wildcard '.' char atom implies all valid characters==UNICODE */
111: //public static final IntSet ALLCHAR = IntervalSet.of(MIN_CHAR_VALUE,MAX_CHAR_VALUE);
112: /** The token type or character value; or, signifies special label. */
113: protected int label;
114:
115: /** A tree of semantic predicates from the grammar AST if label==SEMPRED.
116: * In the NFA, labels will always be exactly one predicate, but the DFA
117: * may have to combine a bunch of them as it collects predicates from
118: * multiple NFA configurations into a single DFA state.
119: */
120: protected SemanticContext semanticContext;
121:
122: /** A set of token types or character codes if label==SET */
123: // TODO: try IntervalSet for everything
124: protected IntSet labelSet;
125:
126: public Label(int label) {
127: this .label = label;
128: }
129:
130: /** Make a semantic predicate label */
131: public Label(GrammarAST predicateASTNode) {
132: this (SEMPRED);
133: this .semanticContext = new SemanticContext.Predicate(
134: predicateASTNode);
135: }
136:
137: /** Make a semantic predicates label */
138: public Label(SemanticContext semCtx) {
139: this (SEMPRED);
140: this .semanticContext = semCtx;
141: }
142:
143: /** Make a set label */
144: public Label(IntSet labelSet) {
145: if (labelSet == null) {
146: this .label = SET;
147: this .labelSet = IntervalSet.of(INVALID);
148: return;
149: }
150: int singleAtom = labelSet.getSingleElement();
151: if (singleAtom != INVALID) {
152: // convert back to a single atomic element if |labelSet|==1
153: label = singleAtom;
154: return;
155: }
156: this .label = SET;
157: this .labelSet = labelSet;
158: }
159:
160: public Object clone() {
161: Label l;
162: try {
163: l = (Label) super .clone();
164: l.label = this .label;
165: l.labelSet = new IntervalSet();
166: l.labelSet.addAll(this .labelSet);
167: } catch (CloneNotSupportedException e) {
168: throw new InternalError();
169: }
170: return l;
171: }
172:
173: public void add(Label a) {
174: if (isAtom()) {
175: labelSet = IntervalSet.of(label);
176: label = SET;
177: if (a.isAtom()) {
178: labelSet.add(a.getAtom());
179: } else if (a.isSet()) {
180: labelSet.addAll(a.getSet());
181: } else {
182: throw new IllegalStateException(
183: "can't add element to Label of type " + label);
184: }
185: return;
186: }
187: if (isSet()) {
188: if (a.isAtom()) {
189: labelSet.add(a.getAtom());
190: } else if (a.isSet()) {
191: labelSet.addAll(a.getSet());
192: } else {
193: throw new IllegalStateException(
194: "can't add element to Label of type " + label);
195: }
196: return;
197: }
198: throw new IllegalStateException(
199: "can't add element to Label of type " + label);
200: }
201:
202: public boolean isAtom() {
203: return label >= MIN_ATOM_VALUE;
204: }
205:
206: public boolean isEpsilon() {
207: return label == EPSILON;
208: }
209:
210: public boolean isSemanticPredicate() {
211: return label == SEMPRED;
212: }
213:
214: public boolean isSet() {
215: return label == SET;
216: }
217:
218: /** return the single atom label or INVALID if not a single atom */
219: public int getAtom() {
220: if (isAtom()) {
221: return label;
222: }
223: return INVALID;
224: }
225:
226: public IntSet getSet() {
227: if (label != SET) {
228: // convert single element to a set if they ask for it.
229: return IntervalSet.of(label);
230: }
231: return labelSet;
232: }
233:
234: public void setSet(IntSet set) {
235: label = SET;
236: labelSet = set;
237: }
238:
239: public SemanticContext getSemanticContext() {
240: return semanticContext;
241: }
242:
243: public boolean matches(int atom) {
244: if (label == atom) {
245: return true; // handle the single atom case efficiently
246: }
247: if (isSet()) {
248: return labelSet.member(atom);
249: }
250: return false;
251: }
252:
253: public boolean matches(IntSet set) {
254: if (isAtom()) {
255: return set.member(getAtom());
256: }
257: if (isSet()) {
258: // matches if intersection non-nil
259: return !getSet().and(set).isNil();
260: }
261: return false;
262: }
263:
264: public boolean matches(Label other) {
265: if (other.isSet()) {
266: return matches(other.getSet());
267: }
268: if (other.isAtom()) {
269: return matches(other.getAtom());
270: }
271: return false;
272: }
273:
274: public int hashCode() {
275: switch (label) {
276: case SET:
277: return labelSet.hashCode();
278: case SEMPRED:
279: return semanticContext.hashCode();
280: default:
281: return label;
282: }
283: }
284:
285: public boolean equals(Object o) {
286: if (o == null) {
287: return false;
288: }
289: // labels must be the same even if epsilon or set or sempred etc...
290: if (label != ((Label) o).label) {
291: return false;
292: }
293: if (label == SET) {
294: return this .labelSet.equals(((Label) o).labelSet);
295: }
296: return true; // label values are same, so true
297: }
298:
299: public int compareTo(Object o) {
300: return this .label - ((Label) o).label;
301: }
302:
303: /** Predicates are lists of AST nodes from the NFA created from the
304: * grammar, but the same predicate could be cut/paste into multiple
305: * places in the grammar. I must compare the text of all the
306: * predicates to truly answer whether {p1,p2} .equals {p1,p2}.
307: * Unfortunately, I cannot rely on the AST.equals() to work properly
308: * so I must do a brute force O(n^2) nested traversal of the Set
309: * doing a String compare.
310: *
311: * At this point, Labels are not compared for equals when they are
312: * predicates, but here's the code for future use.
313: */
314: /*
315: protected boolean predicatesEquals(Set others) {
316: Iterator iter = semanticContext.iterator();
317: while (iter.hasNext()) {
318: AST predAST = (AST) iter.next();
319: Iterator inner = semanticContext.iterator();
320: while (inner.hasNext()) {
321: AST otherPredAST = (AST) inner.next();
322: if ( !predAST.getText().equals(otherPredAST.getText()) ) {
323: return false;
324: }
325: }
326: }
327: return true;
328: }
329: */
330:
331: public String toString() {
332: switch (label) {
333: case SET:
334: return labelSet.toString();
335: case SEMPRED:
336: return "{" + semanticContext + "}?";
337: default:
338: return String.valueOf(label);
339: }
340: }
341:
342: public String toString(Grammar g) {
343: switch (label) {
344: case SET:
345: return labelSet.toString(g);
346: case SEMPRED:
347: return "{" + semanticContext + "}?";
348: default:
349: return g.getTokenDisplayName(label);
350: }
351: }
352:
353: /*
354: public String predicatesToString() {
355: if ( semanticContext==NFAConfiguration.DEFAULT_CLAUSE_SEMANTIC_CONTEXT ) {
356: return "!other preds";
357: }
358: StringBuffer buf = new StringBuffer();
359: Iterator iter = semanticContext.iterator();
360: while (iter.hasNext()) {
361: AST predAST = (AST) iter.next();
362: buf.append(predAST.getText());
363: if ( iter.hasNext() ) {
364: buf.append("&");
365: }
366: }
367: return buf.toString();
368: }
369: */
370: }
|