001: /*
002: [The "BSD licence"]
003: Copyright (c) 2005-2006 Terence Parr
004: All rights reserved.
005:
006: Redistribution and use in source and binary forms, with or without
007: modification, are permitted provided that the following conditions
008: are met:
009: 1. Redistributions of source code must retain the above copyright
010: notice, this list of conditions and the following disclaimer.
011: 2. Redistributions in binary form must reproduce the above copyright
012: notice, this list of conditions and the following disclaimer in the
013: documentation and/or other materials provided with the distribution.
014: 3. The name of the author may not be used to endorse or promote products
015: derived from this software without specific prior written permission.
016:
017: THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
018: IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
019: OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
020: IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
021: INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
022: NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
023: DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
024: THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
025: (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
026: THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
027: */
028: package org.antlr.tool;
029:
030: import antlr.BaseAST;
031: import antlr.Token;
032: import antlr.TokenWithIndex;
033: import antlr.collections.AST;
034: import org.antlr.analysis.DFA;
035: import org.antlr.analysis.NFAState;
036: import org.antlr.misc.IntSet;
037: import org.antlr.stringtemplate.StringTemplate;
038:
039: import java.util.*;
040:
041: /** Grammars are first converted to ASTs using this class and then are
042: * converted to NFAs via a tree walker.
043: *
044: * The reader may notice that I have made a very non-OO decision in this
045: * class to track variables for many different kinds of nodes. It wastes
046: * space for nodes that don't need the values and OO principles cry out
047: * for a new class type for each kind of node in my tree. I am doing this
048: * on purpose for a variety of reasons. I don't like using the type
049: * system for different node types; it yields too many damn class files
050: * which I hate. Perhaps if I put them all in one file. Most importantly
051: * though I hate all the type casting that would have to go on. I would
052: * have all sorts of extra work to do. Ick. Anyway, I'm doing all this
053: * on purpose, not out of ignorance. ;)
054: */
055: public class GrammarAST extends BaseAST {
056: static int count = 0;
057:
058: public int ID = ++count;
059:
060: /** This AST node was created from what token? */
061: public Token token = null;
062:
063: protected String enclosingRule = null;
064:
065: /** If this is a RULE node then track rule's start, stop tokens' index. */
066: public int ruleStartTokenIndex;
067: public int ruleStopTokenIndex;
068:
069: /** If this is a decision node, what is the lookahead DFA? */
070: public DFA lookaheadDFA = null;
071:
072: /** What NFA start state was built from this node? */
073: public NFAState NFAStartState = null;
074:
075: /** This is used for TREE_BEGIN nodes to point into
076: * the NFA. TREE_BEGINs point at left edge of DOWN for LOOK computation
077: * purposes (Nullable tree child list needs special code gen when matching).
078: */
079: public NFAState NFATreeDownState = null;
080:
081: /** Rule ref nodes, token refs, set, and NOT set refs need to track their
082: * location in the generated NFA so that local FOLLOW sets can be
083: * computed during code gen for automatic error recovery.
084: */
085: public NFAState followingNFAState = null;
086:
087: /** If this is a SET node, what are the elements? */
088: protected IntSet setValue = null;
089:
090: /** If this is a BLOCK node, track options here */
091: protected Map options;
092:
093: /** If this is a BLOCK node for a rewrite rule, track referenced
094: * elements here. Don't track elements in nested subrules.
095: */
096: public Set<GrammarAST> rewriteRefsShallow;
097:
098: /* If REWRITE node, track EVERY element and label ref to right of ->
099: * for this rewrite rule. There could be multiple of these per
100: * rule:
101: *
102: * a : ( ... -> ... | ... -> ... ) -> ... ;
103: *
104: * We may need a list of all refs to do definitions for whole rewrite
105: * later.
106: *
107: * If BLOCK then tracks every element at that level and below.
108: */
109: public Set<GrammarAST> rewriteRefsDeep;
110:
111: public static final Set legalBlockOptions = new HashSet() {
112: {
113: add("k");
114: add("greedy");
115: add("backtrack");
116: add("memoize");
117: }
118: };
119:
120: /** What are the default options for a subrule? */
121: public static final Map defaultBlockOptions = new HashMap() {
122: {
123: put("greedy", "true");
124: }
125: };
126:
127: /** if this is an ACTION node, this is the outermost enclosing
128: * alt num in rule. For actions, define.g sets these (used to
129: * be codegen.g). We need these set so we can examine actions
130: * early, before code gen, for refs to rule predefined properties
131: * and rule labels. For most part define.g sets outerAltNum, but
132: * codegen.g does the ones for %foo(a={$ID.text}) type refs as
133: * the {$ID...} is not seen as an action until code gen pulls apart.
134: */
135: public int outerAltNum;
136:
137: /** if this is a TOKEN_REF or RULE_REF node, this is the code StringTemplate
138: * generated for this node. We need to update it later to add
139: * a label if someone does $tokenref or $ruleref in an action.
140: */
141: public StringTemplate code;
142:
143: public GrammarAST() {
144: ;
145: }
146:
147: public GrammarAST(int t, String txt) {
148: initialize(t, txt);
149: }
150:
151: public void initialize(int i, String s) {
152: token = new TokenWithIndex(i, s);
153: }
154:
155: public void initialize(AST ast) {
156: this .token = ((GrammarAST) ast).token;
157: }
158:
159: public void initialize(Token token) {
160: this .token = token;
161: }
162:
163: public DFA getLookaheadDFA() {
164: return lookaheadDFA;
165: }
166:
167: public void setLookaheadDFA(DFA lookaheadDFA) {
168: this .lookaheadDFA = lookaheadDFA;
169: }
170:
171: public Token getToken() {
172: return token;
173: }
174:
175: public NFAState getNFAStartState() {
176: return NFAStartState;
177: }
178:
179: public void setNFAStartState(NFAState nfaStartState) {
180: this .NFAStartState = nfaStartState;
181: }
182:
183: /** Save the option key/value pair and process it; return the key
184: * or null if invalid option.
185: */
186: public String setOption(Grammar grammar, String key, Object value) {
187: if (!legalBlockOptions.contains(key)) {
188: ErrorManager.grammarError(ErrorManager.MSG_ILLEGAL_OPTION,
189: grammar, token, key);
190: return null;
191: }
192: if (value instanceof String) {
193: String vs = (String) value;
194: if (vs.charAt(0) == '"') {
195: value = vs.substring(1, vs.length() - 1); // strip quotes
196: }
197: }
198: if (options == null) {
199: options = new HashMap();
200: }
201: if (key.equals("k")) {
202: grammar.numberOfManualLookaheadOptions++;
203: }
204: options.put(key, value);
205: return key;
206: }
207:
208: public Object getOption(String key) {
209: Object value = null;
210: if (options != null) {
211: value = options.get(key);
212: }
213: if (value == null) {
214: value = defaultBlockOptions.get(key);
215: }
216: return value;
217: }
218:
219: public void setOptions(Grammar grammar, Map options) {
220: if (options == null) {
221: this .options = null;
222: return;
223: }
224: Set keys = options.keySet();
225: for (Iterator it = keys.iterator(); it.hasNext();) {
226: String optionName = (String) it.next();
227: String stored = setOption(grammar, optionName, options
228: .get(optionName));
229: if (stored == null) {
230: it.remove();
231: }
232: }
233: }
234:
235: public Map getOptions() {
236: return options;
237: }
238:
239: public String getText() {
240: if (token != null) {
241: return token.getText();
242: }
243: return "";
244: }
245:
246: public void setType(int type) {
247: token.setType(type);
248: }
249:
250: public void setText(String text) {
251: token.setText(text);
252: }
253:
254: public int getType() {
255: if (token != null) {
256: return token.getType();
257: }
258: return -1;
259: }
260:
261: public int getLine() {
262: int line = 0;
263: if (token != null) {
264: line = token.getLine();
265: }
266: if (line == 0) {
267: AST child = getFirstChild();
268: if (child != null) {
269: line = child.getLine();
270: }
271: }
272: return line;
273: }
274:
275: public int getColumn() {
276: int col = 0;
277: if (token != null) {
278: col = token.getColumn();
279: }
280: if (col == 0) {
281: AST child = getFirstChild();
282: if (child != null) {
283: col = child.getColumn();
284: }
285: }
286: return col;
287: }
288:
289: public void setLine(int line) {
290: token.setLine(line);
291: }
292:
293: public void setColumn(int col) {
294: token.setColumn(col);
295: }
296:
297: public void setEnclosingRule(String rule) {
298: this .enclosingRule = rule;
299: }
300:
301: public String getEnclosingRule() {
302: return enclosingRule;
303: }
304:
305: public IntSet getSetValue() {
306: return setValue;
307: }
308:
309: public void setSetValue(IntSet setValue) {
310: this .setValue = setValue;
311: }
312:
313: public GrammarAST getLastChild() {
314: return ((GrammarAST) getFirstChild()).getLastSibling();
315: }
316:
317: public GrammarAST getLastSibling() {
318: GrammarAST t = this ;
319: GrammarAST last = null;
320: while (t != null) {
321: last = t;
322: t = (GrammarAST) t.getNextSibling();
323: }
324: return last;
325: }
326:
327: /** Get the ith child from 0 */
328: public GrammarAST getChild(int i) {
329: int n = 0;
330: AST t = getFirstChild();
331: while (t != null) {
332: if (n == i) {
333: return (GrammarAST) t;
334: }
335: n++;
336: t = (GrammarAST) t.getNextSibling();
337: }
338: return null;
339: }
340:
341: public GrammarAST getFirstChildWithType(int ttype) {
342: AST t = getFirstChild();
343: while (t != null) {
344: if (t.getType() == ttype) {
345: return (GrammarAST) t;
346: }
347: t = (GrammarAST) t.getNextSibling();
348: }
349: return null;
350: }
351:
352: public GrammarAST[] getChildrenAsArray() {
353: AST t = getFirstChild();
354: GrammarAST[] array = new GrammarAST[getNumberOfChildren()];
355: int i = 0;
356: while (t != null) {
357: array[i] = (GrammarAST) t;
358: t = t.getNextSibling();
359: i++;
360: }
361: return array;
362: }
363:
364: /** Return a reference to the first node (depth-first) that has
365: * token type ttype. Assume 'this' is a root node; don't visit siblings
366: * of root. Return null if no node found with ttype.
367: */
368: public GrammarAST findFirstType(int ttype) {
369: // check this node (the root) first
370: if (this .getType() == ttype) {
371: return this ;
372: }
373: // else check children
374: GrammarAST child = (GrammarAST) this .getFirstChild();
375: while (child != null) {
376: GrammarAST result = child.findFirstType(ttype);
377: if (result != null) {
378: return result;
379: }
380: child = (GrammarAST) child.getNextSibling();
381: }
382: return null;
383: }
384:
385: /** Make nodes unique based upon Token so we can add them to a Set; if
386: * not a GrammarAST, check type.
387: */
388: public boolean equals(AST ast) {
389: if (!(ast instanceof GrammarAST)) {
390: return this .getType() == ast.getType();
391: }
392: GrammarAST t = (GrammarAST) ast;
393: return token.getLine() == t.getLine()
394: && token.getColumn() == t.getColumn();
395: }
396:
397: /** See if tree has exact token types and structure; no text */
398: public boolean hasSameTreeStructure(AST t) {
399: // check roots first.
400: if (this .getType() != t.getType())
401: return false;
402: // if roots match, do full list match test on children.
403: if (this .getFirstChild() != null) {
404: if (!(((GrammarAST) this .getFirstChild())
405: .hasSameListStructure(t.getFirstChild())))
406: return false;
407: }
408: // sibling has no kids, make sure t doesn't either
409: else if (t.getFirstChild() != null) {
410: return false;
411: }
412: return true;
413: }
414:
415: public boolean hasSameListStructure(AST t) {
416: AST sibling;
417:
418: // the empty tree is not a match of any non-null tree.
419: if (t == null) {
420: return false;
421: }
422:
423: // Otherwise, start walking sibling lists. First mismatch, return false.
424: for (sibling = this ; sibling != null && t != null; sibling = sibling
425: .getNextSibling(), t = t.getNextSibling()) {
426: // as a quick optimization, check roots first.
427: if (sibling.getType() != t.getType()) {
428: return false;
429: }
430: // if roots match, do full list match test on children.
431: if (sibling.getFirstChild() != null) {
432: if (!((GrammarAST) sibling.getFirstChild())
433: .hasSameListStructure(t.getFirstChild())) {
434: return false;
435: }
436: }
437: // sibling has no kids, make sure t doesn't either
438: else if (t.getFirstChild() != null) {
439: return false;
440: }
441: }
442: if (sibling == null && t == null) {
443: return true;
444: }
445: // one sibling list has more than the other
446: return false;
447: }
448:
449: public static GrammarAST dup(AST t) {
450: if (t == null) {
451: return null;
452: }
453: GrammarAST dup_t = new GrammarAST();
454: dup_t.initialize(t);
455: return dup_t;
456: }
457:
458: public static void main(String[] args) {
459: GrammarAST t = new GrammarAST();
460: }
461:
462: /** Duplicate tree including siblings of root. */
463: public static GrammarAST dupListNoActions(GrammarAST t,
464: GrammarAST parent) {
465: GrammarAST result = dupTreeNoActions(t, parent); // if t == null, then result==null
466: GrammarAST nt = result;
467: while (t != null) { // for each sibling of the root
468: t = (GrammarAST) t.getNextSibling();
469: if (t != null && t.getType() == ANTLRParser.ACTION) {
470: continue;
471: }
472: GrammarAST d = dupTreeNoActions(t, parent);
473: if (d != null) {
474: if (nt != null) {
475: nt.setNextSibling(d); // dup each subtree, building new tree
476: }
477: nt = d;
478: }
479: }
480: return result;
481: }
482:
483: /**Duplicate a tree, assuming this is a root node of a tree--
484: * duplicate that node and what's below; ignore siblings of root node.
485: */
486: public static GrammarAST dupTreeNoActions(GrammarAST t,
487: GrammarAST parent) {
488: if (t == null) {
489: return null;
490: }
491: int ttype = t.getType();
492: if (ttype == ANTLRParser.REWRITE) {
493: return null;
494: }
495: if (ttype == ANTLRParser.BANG || ttype == ANTLRParser.ROOT) {
496: return (GrammarAST) t.getFirstChild(); // return x from ^(ROOT x)
497: }
498: if ((ttype == ANTLRParser.ASSIGN || ttype == ANTLRParser.PLUS_ASSIGN)
499: && (parent == null || parent.getType() != ANTLRParser.OPTIONS)) {
500: return dupTreeNoActions(t.getChild(1), t); // return x from ^(ASSIGN label x)
501: }
502: GrammarAST result = dup(t); // make copy of root
503: // copy all children of root.
504: GrammarAST kids = dupListNoActions((GrammarAST) t
505: .getFirstChild(), t);
506: result.setFirstChild(kids);
507: return result;
508: }
509:
510: }
|