001: /*
002: [The "BSD licence"]
003: Copyright (c) 2005-2006 Terence Parr
004: All rights reserved.
005:
006: Redistribution and use in source and binary forms, with or without
007: modification, are permitted provided that the following conditions
008: are met:
009: 1. Redistributions of source code must retain the above copyright
010: notice, this list of conditions and the following disclaimer.
011: 2. Redistributions in binary form must reproduce the above copyright
012: notice, this list of conditions and the following disclaimer in the
013: documentation and/or other materials provided with the distribution.
014: 3. The name of the author may not be used to endorse or promote products
015: derived from this software without specific prior written permission.
016:
017: THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
018: IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
019: OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
020: IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
021: INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
022: NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
023: DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
024: THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
025: (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
026: THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
027: */
028: package org.antlr.test;
029:
030: import org.antlr.analysis.DFA;
031: import org.antlr.analysis.DFAOptimizer;
032: import org.antlr.codegen.CodeGenerator;
033: import org.antlr.tool.*;
034:
035: import java.util.List;
036:
037: public class TestCharDFAConversion extends BaseTest {
038:
039: /** Public default constructor used by TestRig */
040: public TestCharDFAConversion() {
041: }
042:
043: // R A N G E S & S E T S
044:
045: public void testSimpleRangeVersusChar() throws Exception {
046: Grammar g = new Grammar("lexer grammar t;\n"
047: + "A : 'a'..'z' '@' | 'k' '$' ;");
048: g.createLookaheadDFAs();
049: String expecting = ".s0-'k'->.s1\n"
050: + ".s0-{'a'..'j', 'l'..'z'}->:s3=>1\n"
051: + ".s1-'$'->:s2=>2\n" + ".s1-'@'->:s3=>1\n";
052: checkDecision(g, 1, expecting, null);
053: }
054:
055: public void testRangeWithDisjointSet() throws Exception {
056: Grammar g = new Grammar("lexer grammar t;\n"
057: + "A : 'a'..'z' '@'\n" + " | ('k'|'9'|'p') '$'\n"
058: + " ;\n");
059: g.createLookaheadDFAs();
060: // must break up a..z into {'a'..'j', 'l'..'o', 'q'..'z'}
061: String expecting = ".s0-'9'->:s2=>2\n"
062: + ".s0-{'a'..'j', 'l'..'o', 'q'..'z'}->:s3=>1\n"
063: + ".s0-{'k', 'p'}->.s1\n" + ".s1-'$'->:s2=>2\n"
064: + ".s1-'@'->:s3=>1\n";
065: checkDecision(g, 1, expecting, null);
066: }
067:
068: public void testDisjointSetCollidingWithTwoRanges()
069: throws Exception {
070: Grammar g = new Grammar("lexer grammar t;\n"
071: + "A : ('a'..'z'|'0'..'9') '@'\n"
072: + " | ('k'|'9'|'p') '$'\n" + " ;\n");
073: g.createLookaheadDFAs();
074: // must break up a..z into {'a'..'j', 'l'..'o', 'q'..'z'} and 0..9
075: // into 0..8
076: String expecting = ".s0-{'0'..'8', 'a'..'j', 'l'..'o', 'q'..'z'}->:s3=>1\n"
077: + ".s0-{'9', 'k', 'p'}->.s1\n"
078: + ".s1-'$'->:s2=>2\n"
079: + ".s1-'@'->:s3=>1\n";
080: checkDecision(g, 1, expecting, null);
081: }
082:
083: public void testDisjointSetCollidingWithTwoRangesCharsFirst()
084: throws Exception {
085: Grammar g = new Grammar("lexer grammar t;\n"
086: + "A : ('k'|'9'|'p') '$'\n"
087: + " | ('a'..'z'|'0'..'9') '@'\n" + " ;\n");
088: g.createLookaheadDFAs();
089: // must break up a..z into {'a'..'j', 'l'..'o', 'q'..'z'} and 0..9
090: // into 0..8
091: String expecting = ".s0-{'0'..'8', 'a'..'j', 'l'..'o', 'q'..'z'}->:s2=>2\n"
092: + ".s0-{'9', 'k', 'p'}->.s1\n"
093: + ".s1-'$'->:s3=>1\n"
094: + ".s1-'@'->:s2=>2\n";
095: checkDecision(g, 1, expecting, null);
096: }
097:
098: public void testDisjointSetCollidingWithTwoRangesAsSeparateAlts()
099: throws Exception {
100: Grammar g = new Grammar("lexer grammar t;\n"
101: + "A : 'a'..'z' '@'\n" + " | 'k' '$'\n"
102: + " | '9' '$'\n" + " | 'p' '$'\n"
103: + " | '0'..'9' '@'\n" + " ;\n");
104: g.createLookaheadDFAs();
105: // must break up a..z into {'a'..'j', 'l'..'o', 'q'..'z'} and 0..9
106: // into 0..8
107: String expecting = ".s0-'0'..'8'->:s8=>5\n" + ".s0-'9'->.s6\n"
108: + ".s0-'k'->.s1\n" + ".s0-'p'->.s4\n"
109: + ".s0-{'a'..'j', 'l'..'o', 'q'..'z'}->:s3=>1\n"
110: + ".s1-'$'->:s2=>2\n" + ".s1-'@'->:s3=>1\n"
111: + ".s4-'$'->:s5=>4\n" + ".s4-'@'->:s3=>1\n"
112: + ".s6-'$'->:s7=>3\n" + ".s6-'@'->:s8=>5\n";
113: checkDecision(g, 1, expecting, null);
114: }
115:
116: public void testKeywordVersusID() throws Exception {
117: Grammar g = new Grammar("lexer grammar t;\n" + "IF : 'if' ;\n" + // choose this over ID
118: "ID : ('a'..'z')+ ;\n");
119: String expecting = ".s0-'a'..'z'->:s2=>1\n"
120: + ".s0-<EOT>->:s1=>2\n";
121: checkDecision(g, 1, expecting, null);
122: expecting = ".s0-'i'->.s1\n"
123: + ".s0-{'a'..'h', 'j'..'z'}->:s4=>2\n"
124: + ".s1-'f'->.s2\n" + ".s1-<EOT>->:s4=>2\n"
125: + ".s2-'a'..'z'->:s4=>2\n" + ".s2-<EOT>->:s3=>1\n";
126: checkDecision(g, 2, expecting, null);
127: }
128:
129: public void testIdenticalRules() throws Exception {
130: Grammar g = new Grammar("lexer grammar t;\n" + "A : 'a' ;\n"
131: + "B : 'a' ;\n"); // can't reach this
132: String expecting = ".s0-'a'->.s1\n" + ".s1-<EOT>->:s2=>1\n";
133:
134: ErrorQueue equeue = new ErrorQueue();
135: ErrorManager.setErrorListener(equeue);
136:
137: checkDecision(g, 1, expecting, new int[] { 2 });
138:
139: assertEquals("unexpected number of expected problems", 1,
140: equeue.size());
141: Message msg = (Message) equeue.warnings.get(0);
142: assertTrue("warning must be an unreachable alt",
143: msg instanceof GrammarUnreachableAltsMessage);
144: GrammarUnreachableAltsMessage u = (GrammarUnreachableAltsMessage) msg;
145: assertEquals("[2]", u.alts.toString());
146:
147: }
148:
149: public void testAdjacentNotCharLoops() throws Exception {
150: Grammar g = new Grammar("lexer grammar t;\n"
151: + "A : (~'r')+ ;\n" + "B : (~'s')+ ;\n");
152: String expecting = ".s0-'r'->:s3=>2\n" + ".s0-'s'->:s2=>1\n"
153: + ".s0-{'\\u0000'..'q', 't'..'\\uFFFE'}->.s1\n"
154: + ".s1-'r'->:s3=>2\n" + ".s1-<EOT>->:s2=>1\n"
155: + ".s1-{'\\u0000'..'q', 't'..'\\uFFFE'}->.s1\n";
156: checkDecision(g, 3, expecting, null);
157: }
158:
159: public void testNonAdjacentNotCharLoops() throws Exception {
160: Grammar g = new Grammar("lexer grammar t;\n"
161: + "A : (~'r')+ ;\n" + "B : (~'t')+ ;\n");
162: String expecting = ".s0-'r'->:s3=>2\n" + ".s0-'t'->:s2=>1\n"
163: + ".s0-{'\\u0000'..'q', 's', 'u'..'\\uFFFE'}->.s1\n"
164: + ".s1-'r'->:s3=>2\n" + ".s1-<EOT>->:s2=>1\n"
165: + ".s1-{'\\u0000'..'q', 's', 'u'..'\\uFFFE'}->.s1\n";
166: checkDecision(g, 3, expecting, null);
167: }
168:
169: public void testLoopsWithOptimizedOutExitBranches()
170: throws Exception {
171: Grammar g = new Grammar("lexer grammar t;\n"
172: + "A : 'x'* ~'x'+ ;\n");
173: String expecting = ".s0-'x'->:s2=>1\n"
174: + ".s0-{'\\u0000'..'w', 'y'..'\\uFFFE'}->:s1=>2\n";
175: checkDecision(g, 1, expecting, null);
176:
177: // The optimizer yanks out all exit branches from EBNF blocks
178: // This is ok because we've already verified there are no problems
179: // with the enter/exit decision
180: DFAOptimizer optimizer = new DFAOptimizer(g);
181: optimizer.optimize();
182: FASerializer serializer = new FASerializer(g);
183: DFA dfa = g.getLookaheadDFA(1);
184: String result = serializer.serialize(dfa.startState);
185: expecting = ".s0-'x'->:s1=>1\n";
186: assertEquals(expecting, result);
187: }
188:
189: // N O N G R E E D Y
190:
191: public void testNonGreedy() throws Exception {
192: Grammar g = new Grammar("lexer grammar t;\n"
193: + "CMT : '/*' ( options {greedy=false;} : . )* '*/' ;");
194: String expecting = ".s0-'*'->.s1\n"
195: + ".s0-{'\\u0000'..')', '+'..'\\uFFFE'}->:s3=>1\n"
196: + ".s1-'/'->:s2=>2\n"
197: + ".s1-{'\\u0000'..'.', '0'..'\\uFFFE'}->:s3=>1\n";
198: checkDecision(g, 1, expecting, null);
199: }
200:
201: public void testNonGreedyWildcardStar() throws Exception {
202: Grammar g = new Grammar(
203: "lexer grammar t;\n"
204: + "SLCMT : '//' ( options {greedy=false;} : . )* '\n' ;");
205: String expecting = ".s0-'\\n'->:s1=>2\n"
206: + ".s0-{'\\u0000'..'\\t', '\\u000B'..'\\uFFFE'}->:s2=>1\n";
207: checkDecision(g, 1, expecting, null);
208: }
209:
210: public void testNonGreedyByDefaultWildcardStar() throws Exception {
211: Grammar g = new Grammar("lexer grammar t;\n"
212: + "SLCMT : '//' .* '\n' ;");
213: String expecting = ".s0-'\\n'->:s1=>2\n"
214: + ".s0-{'\\u0000'..'\\t', '\\u000B'..'\\uFFFE'}->:s2=>1\n";
215: checkDecision(g, 1, expecting, null);
216: }
217:
218: public void testNonGreedyWildcardPlus() throws Exception {
219: // same DFA as nongreedy .* but code gen checks number of
220: // iterations at runtime
221: Grammar g = new Grammar(
222: "lexer grammar t;\n"
223: + "SLCMT : '//' ( options {greedy=false;} : . )+ '\n' ;");
224: String expecting = ".s0-'\\n'->:s1=>2\n"
225: + ".s0-{'\\u0000'..'\\t', '\\u000B'..'\\uFFFE'}->:s2=>1\n";
226: checkDecision(g, 1, expecting, null);
227: }
228:
229: public void testNonGreedyByDefaultWildcardPlus() throws Exception {
230: Grammar g = new Grammar("lexer grammar t;\n"
231: + "SLCMT : '//' .+ '\n' ;");
232: String expecting = ".s0-'\\n'->:s1=>2\n"
233: + ".s0-{'\\u0000'..'\\t', '\\u000B'..'\\uFFFE'}->:s2=>1\n";
234: checkDecision(g, 1, expecting, null);
235: }
236:
237: public void testNonGreedyByDefaultWildcardPlusWithParens()
238: throws Exception {
239: Grammar g = new Grammar("lexer grammar t;\n"
240: + "SLCMT : '//' (.)+ '\n' ;");
241: String expecting = ".s0-'\\n'->:s1=>2\n"
242: + ".s0-{'\\u0000'..'\\t', '\\u000B'..'\\uFFFE'}->:s2=>1\n";
243: checkDecision(g, 1, expecting, null);
244: }
245:
246: public void testNonWildcardNonGreedy() throws Exception {
247: Grammar g = new Grammar("lexer grammar t;\n"
248: + "DUH : (options {greedy=false;}:'x'|'y')* 'xy' ;");
249: String expecting = ".s0-'x'->.s1\n" + ".s0-'y'->:s4=>2\n"
250: + ".s1-'x'->:s3=>1\n" + ".s1-'y'->:s2=>3\n";
251: checkDecision(g, 1, expecting, null);
252: }
253:
254: public void testNonWildcardEOTMakesItWorkWithoutNonGreedyOption()
255: throws Exception {
256: Grammar g = new Grammar("lexer grammar t;\n"
257: + "DUH : ('x'|'y')* 'xy' ;");
258: String expecting = ".s0-'x'->.s1\n" + ".s0-'y'->:s3=>1\n"
259: + ".s1-'x'->:s3=>1\n" + ".s1-'y'->.s2\n"
260: + ".s2-'x'..'y'->:s3=>1\n" + ".s2-<EOT>->:s4=>2\n";
261: checkDecision(g, 1, expecting, null);
262: }
263:
264: public void testAltConflictsWithLoopThenExit() throws Exception {
265: // \" predicts alt 1, but wildcard then " can predict exit also
266: Grammar g = new Grammar(
267: "lexer grammar t;\n"
268: + "STRING : '\"' (options {greedy=false;}: '\\\\\"' | .)* '\"' ;\n");
269: String expecting = ".s0-'\"'->:s1=>3\n"
270: + ".s0-'\\\\'->.s2\n"
271: + ".s0-{'\\u0000'..'!', '#'..'[', ']'..'\\uFFFE'}->:s4=>2\n"
272: + ".s2-'\"'->:s3=>1\n"
273: + ".s2-{'\\u0000'..'!', '#'..'\\uFFFE'}->:s4=>2\n";
274: checkDecision(g, 1, expecting, null);
275: }
276:
277: public void testNonGreedyLoopThatNeverLoops() throws Exception {
278: Grammar g = new Grammar("lexer grammar t;\n"
279: + "DUH : (options {greedy=false;}:'x')+ ;"); // loop never matched
280: String expecting = ":s0=>2\n";
281:
282: ErrorQueue equeue = new ErrorQueue();
283: ErrorManager.setErrorListener(equeue);
284:
285: checkDecision(g, 1, expecting, new int[] { 1 });
286:
287: assertEquals("unexpected number of expected problems", 1,
288: equeue.size());
289: Message msg = (Message) equeue.warnings.get(0);
290: assertTrue("warning must be an unreachable alt",
291: msg instanceof GrammarUnreachableAltsMessage);
292: GrammarUnreachableAltsMessage u = (GrammarUnreachableAltsMessage) msg;
293: assertEquals("[1]", u.alts.toString());
294: }
295:
296: public void testRecursive() throws Exception {
297: // this is cool because the 3rd alt includes !(all other possibilities)
298: Grammar g = new Grammar("lexer grammar duh;\n"
299: + "SUBTEMPLATE\n" + " : '{'\n"
300: + " ( SUBTEMPLATE\n"
301: + " | ESC\n"
302: + " | ~('}'|'\\\\'|'{')\n"
303: + " )*\n" + " '}'\n"
304: + " ;\n" + "fragment\n"
305: + "ESC : '\\\\' . ;");
306: g.createLookaheadDFAs();
307: String expecting = ".s0-'\\\\'->:s3=>2\n"
308: + ".s0-'{'->:s2=>1\n"
309: + ".s0-'}'->:s1=>4\n"
310: + ".s0-{'\\u0000'..'[', ']'..'z', '|', '~'..'\\uFFFE'}->:s4=>3\n";
311: checkDecision(g, 1, expecting, null);
312: }
313:
314: public void testRecursive2() throws Exception {
315: // this is also cool because it resolves \\ to be ESC alt; it's just
316: // less efficient of a DFA
317: Grammar g = new Grammar("lexer grammar duh;\n"
318: + "SUBTEMPLATE\n" + " : '{'\n"
319: + " ( SUBTEMPLATE\n"
320: + " | ESC\n"
321: + " | ~('}'|'{')\n"
322: + " )*\n" + " '}'\n"
323: + " ;\n" + "fragment\n"
324: + "ESC : '\\\\' . ;");
325: g.createLookaheadDFAs();
326: String expecting = ".s0-'\\\\'->.s3\n"
327: + ".s0-'{'->:s2=>1\n"
328: + ".s0-'}'->:s1=>4\n"
329: + ".s0-{'\\u0000'..'[', ']'..'z', '|', '~'..'\\uFFFE'}->:s5=>3\n"
330: + ".s3-'\\\\'->:s8=>2\n"
331: + ".s3-'{'->:s7=>2\n"
332: + ".s3-'}'->.s4\n"
333: + ".s3-{'\\u0000'..'[', ']'..'z', '|', '~'..'\\uFFFE'}->:s6=>2\n"
334: + ".s4-'\\u0000'..'\\uFFFE'->:s6=>2\n"
335: + ".s4-<EOT>->:s5=>3\n";
336: checkDecision(g, 1, expecting, null);
337: }
338:
339: public void testNotFragmentInLexer() throws Exception {
340: Grammar g = new Grammar("lexer grammar T;\n"
341: + "A : 'a' | ~B {;} ;\n" + "fragment B : 'a' ;\n");
342: g.createLookaheadDFAs();
343: String expecting = ".s0-'a'->:s1=>1\n"
344: + ".s0-{'\\u0000'..'`', 'b'..'\\uFFFE'}->:s2=>2\n";
345: checkDecision(g, 1, expecting, null);
346: }
347:
348: public void testNotSetFragmentInLexer() throws Exception {
349: Grammar g = new Grammar("lexer grammar T;\n"
350: + "A : B | ~B {;} ;\n" + "fragment B : 'a'|'b' ;\n");
351: g.createLookaheadDFAs();
352: String expecting = ".s0-'a'..'b'->:s1=>1\n"
353: + ".s0-{'\\u0000'..'`', 'c'..'\\uFFFE'}->:s2=>2\n";
354: checkDecision(g, 1, expecting, null);
355: }
356:
357: public void testNotTokenInLexer() throws Exception {
358: Grammar g = new Grammar("lexer grammar T;\n"
359: + "A : 'x' ('a' | ~B {;}) ;\n" + "B : 'a' ;\n");
360: g.createLookaheadDFAs();
361: String expecting = ".s0-'a'->:s1=>1\n"
362: + ".s0-{'\\u0000'..'`', 'b'..'\\uFFFE'}->:s2=>2\n";
363: checkDecision(g, 1, expecting, null);
364: }
365:
366: public void testNotComplicatedSetRuleInLexer() throws Exception {
367: Grammar g = new Grammar("lexer grammar T;\n"
368: + "A : B | ~B {;} ;\n"
369: + "fragment B : 'a'|'b'|'c'..'e'|C ;\n"
370: + "fragment C : 'f' ;\n"); // has to seen from B to C
371: g.createLookaheadDFAs();
372: String expecting = ".s0-'a'..'f'->:s1=>1\n"
373: + ".s0-{'\\u0000'..'`', 'g'..'\\uFFFE'}->:s2=>2\n";
374: checkDecision(g, 1, expecting, null);
375: }
376:
377: public void testNotSetWithRuleInLexer() throws Exception {
378: Grammar g = new Grammar("lexer grammar T;\n"
379: + "T : ~('a' | B) | 'a';\n" + "fragment\n"
380: + "B : 'b' ;\n" + "C : ~'x'{;} ;"); // force Tokens to not collapse T|C
381: g.createLookaheadDFAs();
382: String expecting = ".s0-'b'->:s3=>2\n"
383: + ".s0-'x'->:s2=>1\n"
384: + ".s0-{'\\u0000'..'a', 'c'..'w', 'y'..'\\uFFFE'}->.s1\n"
385: + ".s1-<EOT>->:s2=>1\n";
386: checkDecision(g, 1, expecting, null);
387: }
388:
389: public void testSetCallsRuleWithNot() throws Exception {
390: Grammar g = new Grammar("lexer grammar A;\n" + "T : ~'x' ;\n"
391: + "S : 'x' (T | 'x') ;\n");
392: g.createLookaheadDFAs();
393: String expecting = ".s0-'x'->:s2=>2\n"
394: + ".s0-{'\\u0000'..'w', 'y'..'\\uFFFE'}->:s1=>1\n";
395: checkDecision(g, 1, expecting, null);
396: }
397:
398: public void testSynPredInLexer() throws Exception {
399: Grammar g = new Grammar("lexer grammar T;\n"
400: + "LT: '<' ' '*\n"
401: + " | ('<' IDENT) => '<' IDENT '>'\n" + // this was causing syntax error
402: " ;\n" + "IDENT: 'a'+;\n");
403: // basically, Tokens rule should not do set compression test
404: g.createLookaheadDFAs();
405: String expecting = ".s0-'<'->:s1=>1\n" + ".s0-'a'->:s2=>2\n";
406: checkDecision(g, 4, expecting, null); // 4 is Tokens rule
407: }
408:
409: // S U P P O R T
410:
411: public void _template() throws Exception {
412: Grammar g = new Grammar("grammar T;\n" + "a : A | B;");
413: g.createLookaheadDFAs();
414: String expecting = "\n";
415: checkDecision(g, 1, expecting, null);
416: }
417:
418: protected void checkDecision(Grammar g, int decision,
419: String expecting, int[] expectingUnreachableAlts)
420: throws Exception {
421:
422: // mimic actions of org.antlr.Tool first time for grammar g
423: if (g.getCodeGenerator() == null) {
424: CodeGenerator generator = new CodeGenerator(null, g, "Java");
425: g.setCodeGenerator(generator);
426: g.createNFAs();
427: g.createLookaheadDFAs();
428: }
429:
430: DFA dfa = g.getLookaheadDFA(decision);
431: assertNotNull("unknown decision #" + decision, dfa);
432: FASerializer serializer = new FASerializer(g);
433: String result = serializer.serialize(dfa.startState);
434: //System.out.print(result);
435: List nonDetAlts = dfa.getUnreachableAlts();
436: //System.out.println("alts w/o predict state="+nonDetAlts);
437:
438: // first make sure nondeterministic alts are as expected
439: if (expectingUnreachableAlts == null) {
440: if (nonDetAlts.size() != 0) {
441: System.err
442: .println("nondeterministic alts (should be empty): "
443: + nonDetAlts);
444: }
445: assertEquals("unreachable alts mismatch", 0, nonDetAlts
446: .size());
447: } else {
448: for (int i = 0; i < expectingUnreachableAlts.length; i++) {
449: assertTrue("unreachable alts mismatch", nonDetAlts
450: .contains(new Integer(
451: expectingUnreachableAlts[i])));
452: }
453: }
454: assertEquals(expecting, result);
455: }
456:
457: }
|