001: package org.antlr.tool;
002:
003: import org.antlr.analysis.NFAState;
004: import org.antlr.analysis.Transition;
005: import org.antlr.analysis.RuleClosureTransition;
006:
007: import java.util.List;
008: import java.util.HashSet;
009: import java.util.ArrayList;
010: import java.util.Set;
011:
012: /** Factor out routines that check sanity of rules, alts, grammars, etc.. */
013: public class GrammarSanity {
014: protected Grammar grammar;
015:
016: public GrammarSanity(Grammar grammar) {
017: this .grammar = grammar;
018: }
019:
020: /** Check all rules for infinite left recursion before analysis. Return list
021: * of troublesome rule cycles. This method has two side-effects: it notifies
022: * the error manager that we have problems and it sets the list of
023: * recursive rules that we should ignore during analysis.
024: *
025: * Return type: List<Set<String(rule-name)>>.
026: */
027: public List checkAllRulesForLeftRecursion() {
028: grammar.createNFAs(); // make sure we have NFAs
029: grammar.leftRecursiveRules = new HashSet();
030: List listOfRecursiveCycles = new ArrayList(); // List<Set<String(rule-name)>>
031: for (int i = 0; i < grammar.ruleIndexToRuleList.size(); i++) {
032: String ruleName = (String) grammar.ruleIndexToRuleList
033: .elementAt(i);
034: if (ruleName != null) {
035: NFAState s = grammar.getRuleStartState(ruleName);
036: grammar.visitedDuringRecursionCheck = new HashSet();
037: grammar.visitedDuringRecursionCheck.add(ruleName);
038: Set visitedStates = new HashSet();
039: traceStatesLookingForLeftRecursion(s, visitedStates,
040: listOfRecursiveCycles);
041: }
042: }
043: if (listOfRecursiveCycles.size() > 0) {
044: ErrorManager.leftRecursionCycles(listOfRecursiveCycles);
045: }
046: return listOfRecursiveCycles;
047: }
048:
049: /** From state s, look for any transition to a rule that is currently
050: * being traced. When tracing r, visitedDuringRecursionCheck has r
051: * initially. If you reach an accept state, return but notify the
052: * invoking rule that it is nullable, which implies that invoking
053: * rule must look at follow transition for that invoking state.
054: * The visitedStates tracks visited states within a single rule so
055: * we can avoid epsilon-loop-induced infinite recursion here. Keep
056: * filling the cycles in listOfRecursiveCycles and also, as a
057: * side-effect, set leftRecursiveRules.
058: */
059: protected boolean traceStatesLookingForLeftRecursion(NFAState s,
060: Set visitedStates, List listOfRecursiveCycles) {
061: if (s.isAcceptState()) {
062: // this rule must be nullable!
063: // At least one epsilon edge reached accept state
064: return true;
065: }
066: if (visitedStates.contains(s)) {
067: // within same rule, we've hit same state; quit looping
068: return false;
069: }
070: visitedStates.add(s);
071: boolean stateReachesAcceptState = false;
072: Transition t0 = s.transition(0);
073: if (t0 instanceof RuleClosureTransition) {
074: String targetRuleName = ((NFAState) t0.target)
075: .getEnclosingRule();
076: if (grammar.visitedDuringRecursionCheck
077: .contains(targetRuleName)) {
078: // record left-recursive rule, but don't go back in
079: grammar.leftRecursiveRules.add(targetRuleName);
080: /*
081: System.out.println("already visited "+targetRuleName+", calling from "+
082: s.getEnclosingRule());
083: */
084: addRulesToCycle(targetRuleName, s.getEnclosingRule(),
085: listOfRecursiveCycles);
086: } else {
087: // must visit if not already visited; send new visitedStates set
088: grammar.visitedDuringRecursionCheck.add(targetRuleName);
089: boolean callReachedAcceptState = traceStatesLookingForLeftRecursion(
090: (NFAState) t0.target, new HashSet(),
091: listOfRecursiveCycles);
092: // we're back from visiting that rule
093: grammar.visitedDuringRecursionCheck
094: .remove(targetRuleName);
095: // must keep going in this rule then
096: if (callReachedAcceptState) {
097: NFAState followingState = ((RuleClosureTransition) t0)
098: .getFollowState();
099: stateReachesAcceptState |= traceStatesLookingForLeftRecursion(
100: followingState, visitedStates,
101: listOfRecursiveCycles);
102: }
103: }
104: } else if (t0.label.isEpsilon()) {
105: stateReachesAcceptState |= traceStatesLookingForLeftRecursion(
106: (NFAState) t0.target, visitedStates,
107: listOfRecursiveCycles);
108: }
109: // else it has a labeled edge
110:
111: // now do the other transition if it exists
112: Transition t1 = s.transition(1);
113: if (t1 != null) {
114: stateReachesAcceptState |= traceStatesLookingForLeftRecursion(
115: (NFAState) t1.target, visitedStates,
116: listOfRecursiveCycles);
117: }
118: return stateReachesAcceptState;
119: }
120:
121: /** enclosingRuleName calls targetRuleName, find the cycle containing
122: * the target and add the caller. Find the cycle containing the caller
123: * and add the target. If no cycles contain either, then create a new
124: * cycle. listOfRecursiveCycles is List<Set<String>> that holds a list
125: * of cycles (sets of rule names).
126: */
127: protected void addRulesToCycle(String targetRuleName,
128: String enclosingRuleName, List listOfRecursiveCycles) {
129: boolean foundCycle = false;
130: for (int i = 0; i < listOfRecursiveCycles.size(); i++) {
131: Set rulesInCycle = (Set) listOfRecursiveCycles.get(i);
132: // ensure both rules are in same cycle
133: if (rulesInCycle.contains(targetRuleName)) {
134: rulesInCycle.add(enclosingRuleName);
135: foundCycle = true;
136: }
137: if (rulesInCycle.contains(enclosingRuleName)) {
138: rulesInCycle.add(targetRuleName);
139: foundCycle = true;
140: }
141: }
142: if (!foundCycle) {
143: Set cycle = new HashSet();
144: cycle.add(targetRuleName);
145: cycle.add(enclosingRuleName);
146: listOfRecursiveCycles.add(cycle);
147: }
148: }
149:
150: public void checkRuleReference(GrammarAST refAST,
151: GrammarAST argsAST, String currentRuleName) {
152: Rule r = grammar.getRule(refAST.getText());
153: if (refAST.getType() == ANTLRParser.RULE_REF) {
154: if (argsAST != null) {
155: // rule[args]; ref has args
156: if (r != null && r.argActionAST == null) {
157: // but rule def has no args
158: ErrorManager.grammarError(
159: ErrorManager.MSG_RULE_HAS_NO_ARGS, grammar,
160: argsAST.getToken(), r.name);
161: }
162: } else {
163: // rule ref has no args
164: if (r != null && r.argActionAST != null) {
165: // but rule def has args
166: ErrorManager.grammarError(
167: ErrorManager.MSG_MISSING_RULE_ARGS,
168: grammar, refAST.getToken(), r.name);
169: }
170: }
171: } else if (refAST.getType() == ANTLRParser.TOKEN_REF) {
172: if (grammar.type != Grammar.LEXER) {
173: if (argsAST != null) {
174: // args on a token ref not in a lexer rule
175: ErrorManager.grammarError(
176: ErrorManager.MSG_ARGS_ON_TOKEN_REF,
177: grammar, refAST.getToken(), refAST
178: .getText());
179: }
180: return; // ignore token refs in nonlexers
181: }
182: if (argsAST != null) {
183: // tokenRef[args]; ref has args
184: if (r != null && r.argActionAST == null) {
185: // but token rule def has no args
186: ErrorManager.grammarError(
187: ErrorManager.MSG_RULE_HAS_NO_ARGS, grammar,
188: argsAST.getToken(), r.name);
189: }
190: } else {
191: // token ref has no args
192: if (r != null && r.argActionAST != null) {
193: // but token rule def has args
194: ErrorManager.grammarError(
195: ErrorManager.MSG_MISSING_RULE_ARGS,
196: grammar, refAST.getToken(), r.name);
197: }
198: }
199: }
200: }
201:
202: /** Rules in tree grammar that use -> rewrites and are spitting out
203: * templates via output=template and then use rewrite=true must only
204: * use -> on alts that are simple nodes or trees or single rule refs
205: * that match either nodes or trees. The altAST is the ALT node
206: * for an ALT. Verify that its first child is simple. Must be either
207: * ( ALT ^( A B ) <end-of-alt> ) or ( ALT A <end-of-alt> ) or
208: * other element.
209: *
210: * Ignore predicates in front and labels.
211: */
212: public void ensureAltIsSimpleNodeOrTree(GrammarAST altAST,
213: GrammarAST elementAST, int outerAltNum) {
214: if (isValidSimpleElementNode(elementAST)) {
215: GrammarAST next = (GrammarAST) elementAST.getNextSibling();
216: if (!isNextNonActionElementEOA(next)) {
217: ErrorManager.grammarWarning(
218: ErrorManager.MSG_REWRITE_FOR_MULTI_ELEMENT_ALT,
219: grammar, next.token, new Integer(outerAltNum));
220: }
221: return;
222: }
223: switch (elementAST.getType()) {
224: case ANTLRParser.ASSIGN: // labels ok on non-rule refs
225: case ANTLRParser.PLUS_ASSIGN:
226: if (isValidSimpleElementNode(elementAST.getChild(1))) {
227: return;
228: }
229: break;
230: case ANTLRParser.ACTION: // skip past actions
231: case ANTLRParser.SEMPRED:
232: case ANTLRParser.SYN_SEMPRED:
233: case ANTLRParser.BACKTRACK_SEMPRED:
234: case ANTLRParser.GATED_SEMPRED:
235: ensureAltIsSimpleNodeOrTree(altAST, (GrammarAST) elementAST
236: .getNextSibling(), outerAltNum);
237: return;
238: }
239: ErrorManager.grammarWarning(
240: ErrorManager.MSG_REWRITE_FOR_MULTI_ELEMENT_ALT,
241: grammar, elementAST.token, new Integer(outerAltNum));
242: }
243:
244: protected boolean isValidSimpleElementNode(GrammarAST t) {
245: switch (t.getType()) {
246: case ANTLRParser.TREE_BEGIN:
247: case ANTLRParser.TOKEN_REF:
248: case ANTLRParser.CHAR_LITERAL:
249: case ANTLRParser.STRING_LITERAL:
250: case ANTLRParser.WILDCARD:
251: return true;
252: default:
253: return false;
254: }
255: }
256:
257: protected boolean isNextNonActionElementEOA(GrammarAST t) {
258: while (t.getType() == ANTLRParser.ACTION
259: || t.getType() == ANTLRParser.SEMPRED) {
260: t = (GrammarAST) t.getNextSibling();
261: }
262: if (t.getType() == ANTLRParser.EOA) {
263: return true;
264: }
265: return false;
266: }
267: }
|