001: /*
002: (c) Copyright 2004, 2005, 2006, 2007, 2008 Hewlett-Packard Development Company, LP, all rights reserved.
003: [See end of file]
004: $Id: TestPerlyParser.java,v 1.17 2008/01/02 12:11:05 andy_seaborne Exp $
005: */
006: package com.hp.hpl.jena.graph.query.regexptrees.test;
007:
008: import java.util.Arrays;
009:
010: import junit.framework.TestSuite;
011:
012: import com.hp.hpl.jena.graph.query.regexptrees.*;
013: import com.hp.hpl.jena.graph.test.GraphTestBase;
014:
015: /**
016: TestPerlyParser - tests for the parser of Perl REs into RegexpTrees.
017: @author kers
018: */
019: public class TestPerlyParser extends GraphTestBase {
020: public TestPerlyParser(String name) {
021: super (name);
022: }
023:
024: public static TestSuite suite() {
025: return new TestSuite(TestPerlyParser.class);
026: }
027:
028: protected static class FlagException extends RuntimeException {
029: }
030:
031: public void testAlternateGenerator() {
032: RegexpTreeGenerator g = new SimpleGenerator() {
033: public RegexpTree getAnySingle() {
034: throw new FlagException();
035: }
036: };
037: PerlPatternParser p = new PerlPatternParser(".", g);
038: try {
039: p.parseAtom();
040: fail("should be using supplied generator");
041: } catch (FlagException e) {
042: pass();
043: }
044: }
045:
046: public void testLit() {
047: assertEquals(Text.create("a"), Text.create("a"));
048: assertDiffer(Text.create("a"), Text.create("b"));
049: assertEquals(Text.create("aga").hashCode(), Text.create("aga")
050: .hashCode());
051: }
052:
053: public void testInitialParserState() {
054: assertEquals(0, new PerlPatternParser("hello").getPointer());
055: assertEquals("hello", new PerlPatternParser("hello")
056: .getString());
057: }
058:
059: public void testLetterAtoms() {
060: for (char ch = 0; ch < 256; ch += 1)
061: if (Character.isLetter(ch)) {
062: PerlPatternParser p = new PerlPatternParser("" + ch);
063: assertEquals(Text.create(ch), p.parseAtom());
064: assertEquals(1, p.getPointer());
065: }
066: }
067:
068: public void testEmptyExpression() {
069: assertEquals(new Nothing(), element(""));
070: }
071:
072: public void testDotAtom() {
073: testSimpleSpecialAtom(RegexpTree.ANY, ".");
074: }
075:
076: public void testHatAtom() {
077: testSimpleSpecialAtom(RegexpTree.SOL, "^");
078: }
079:
080: public void testDollarAtom() {
081: testSimpleSpecialAtom(RegexpTree.EOL, "$");
082: }
083:
084: public void testTerminatorsReturnNull() {
085: assertEquals(new Nothing(), element("|"));
086: }
087:
088: public void testSimpleBackslashEscapes() {
089: for (char ch = 0; ch < 256; ch += 1) {
090: if ("bBAZnrtfdDwWSsxc0123456789".indexOf(ch) < 0)
091: assertEquals(Text.create(ch), new PerlPatternParser(
092: "\\" + ch).parseAtom());
093: }
094: }
095:
096: public void testSpecialBackslashEscapes() {
097: String specials = "bBAZ";
098: for (int i = 0; i < specials.length(); i += 1)
099: try {
100: new PerlPatternParser("\\" + specials.charAt(i))
101: .parseAtom();
102: fail("backslash escape " + specials.charAt(i));
103: } catch (PerlPatternParser.SyntaxException e) {
104: pass();
105: }
106: }
107:
108: public void testWordEscapes() {
109: String letters = "abcdefghijklmnopqrstuvwxyz";
110: String wordChars = "0123456789" + letters + "_"
111: + letters.toUpperCase();
112: assertEquals(new AnyOf(wordChars), element("\\w"));
113: assertEquals(new NoneOf(wordChars), element("\\W"));
114: }
115:
116: public void testDigitEscapes() {
117: assertEquals(new AnyOf("0123456789"), element("\\d"));
118: assertEquals(new NoneOf("0123456789"), element("\\D"));
119: }
120:
121: public void testWhitespaceEscapes() {
122: assertEquals(Text.create("\n"), element("\\n"));
123: assertEquals(Text.create("\t"), element("\\t"));
124: assertEquals(Text.create("\f"), element("\\f"));
125: assertEquals(Text.create("\r"), element("\\r"));
126: assertEquals(new AnyOf(" \r\n\t\f"), element("\\s"));
127: assertEquals(new NoneOf(" \r\n\t\f"), element("\\S"));
128: }
129:
130: public void testHexEscapes() {
131: assertParse(Text.create("\u00ac"), "\\xac");
132: assertParse(Text.create("\u00ff"), "\\xff");
133: assertParse(Text.create("\u0012"), "\\x12");
134: assertParse(Text.create("\u00af"), "\\xAF");
135: }
136:
137: public void testControlEscapes() {
138: assertParse(Text.create("\u0001"), "\\cA");
139: assertParse(Text.create("\u001a"), "\\cZ");
140: }
141:
142: public void testNoQuantifier() {
143: RegexpTree d = RegexpTree.ANY;
144: assertSame(d, quantifier("", d));
145: assertSame(d, quantifier("x", d));
146: assertSame(d, quantifier("[", d));
147: assertSame(d, quantifier("(", d));
148: assertSame(d, quantifier(".", d));
149: assertSame(d, quantifier("\\", d));
150: }
151:
152: public void testStarQuantifier() {
153: RegexpTree d = RegexpTree.EOL;
154: assertEquals(new ZeroOrMore(d), quantifier("*", d));
155: }
156:
157: public void testPlusQuantifier() {
158: RegexpTree d = RegexpTree.SOL;
159: assertEquals(new OneOrMore(d), quantifier("+", d));
160: }
161:
162: public void testQueryQuantifier() {
163: RegexpTree d = RegexpTree.ANY;
164: assertEquals(new Optional(d), quantifier("?", d));
165: }
166:
167: public void testUnboundQuantifiers() {
168: testUnboundQuantifier("*");
169: testUnboundQuantifier("+");
170: testUnboundQuantifier("?");
171: testUnboundQuantifier("{");
172: }
173:
174: /**
175: check that the quantifier string <code>q</code>throws a syntax error if it's
176: not preceeded by an atom.
177: */
178: private void testUnboundQuantifier(String q) {
179: PerlPatternParser p = new PerlPatternParser(q);
180: try {
181: p.parseElement();
182: fail("must trap unbound quantifier " + q);
183: } catch (PerlPatternParser.SyntaxException e) {
184: pass();
185: }
186: }
187:
188: public void testUnitSeq() {
189: PerlPatternParser p = new PerlPatternParser("x");
190: assertEquals(Text.create("x"), p.parseSeq());
191: }
192:
193: public void testSeq() {
194: PerlPatternParser p = new PerlPatternParser("^.$");
195: assertEquals(seq3(new StartOfLine(), new AnySingle(),
196: new EndOfLine()), p.parseSeq());
197: }
198:
199: public void testBracketConstruction() {
200: assertParse(new Paren(Text.create("x")), "(x)");
201: }
202:
203: public void testBracketClosure() {
204: PerlPatternParser p = new PerlPatternParser("()y");
205: assertEquals(seq2(new Paren(new Nothing()), Text.create("y")),
206: p.parseAlts());
207: }
208:
209: public void testDetectsMissingClosingBracket() {
210: PerlPatternParser p = new PerlPatternParser("(x");
211: try {
212: p.parseAlts();
213: fail("should detect missing close bracket");
214: } catch (PerlPatternParser.SyntaxException e) {
215: pass();
216: }
217: }
218:
219: public void testAlt() {
220: PerlPatternParser L = new PerlPatternParser("abc");
221: PerlPatternParser R = new PerlPatternParser("def");
222: PerlPatternParser p = new PerlPatternParser("abc|def");
223: assertEquals(alt(L.parseSeq(), R.parseSeq()), p.parseAlts());
224: }
225:
226: public void testSimpleClass() {
227: assertParse(new AnyOf("x1B"), "[x1B]");
228: }
229:
230: public void testSimpleClassNegated() {
231: assertParse(new NoneOf("b0#"), "[^b0#]");
232: }
233:
234: public void testClassRangeAlphabet() {
235: assertParse(new AnyOf("ABCDEFGHIJKLMNOPQRSTUVWXYZ"), "[A-Z]");
236: }
237:
238: public void testClassRangeSomeLetters() {
239: assertParse(new AnyOf("abcdef"), "[a-f]");
240: }
241:
242: public void testClassRangeDigits() {
243: assertParse(new AnyOf("abc0123456789rst"), "[a-c0-9r-t]");
244: }
245:
246: public void testClassHats() {
247: assertParse(new AnyOf("ab^cd"), "[ab^cd]");
248: }
249:
250: public void testClassRange() {
251: assertParse(new AnyOf("-R"), "[-R]");
252: }
253:
254: public void testClassBackslash() {
255: assertParse(new AnyOf("]"), "[\\]]");
256: }
257:
258: public void testBackReference() {
259: assertParse(seq2(new Paren(Text.create("x")),
260: new BackReference(1)), "(x)\\1");
261: }
262:
263: public void testOctalNonBackReference() {
264: assertParse(seq2(new Paren(Text.create("x")), Text
265: .create("\10")), "(x)\\10");
266: }
267:
268: protected RegexpTree seq2(RegexpTree a, RegexpTree b) {
269: return Sequence
270: .create(Arrays.asList(new RegexpTree[] { a, b }));
271: }
272:
273: protected RegexpTree seq3(RegexpTree a, RegexpTree b, RegexpTree c) {
274: return Sequence.create(Arrays
275: .asList(new RegexpTree[] { a, b, c }));
276: }
277:
278: protected RegexpTree alt(RegexpTree L, RegexpTree R) {
279: return Alternatives.create(Arrays.asList(new RegexpTree[] { L,
280: R }));
281: }
282:
283: public void testPerlParse() {
284: assertInstanceOf(Alternatives.class, PerlPatternParser
285: .parse("this is|a pattern"));
286: assertInstanceOf(Alternatives.class, PerlPatternParser.parse(
287: "this is|a pattern", new SimpleGenerator()));
288: }
289:
290: public void testOldSeq() {
291: PerlPatternParser p = new PerlPatternParser("hello");
292: assertEquals(Text.create("h"), p.parseAtom());
293: assertEquals(1, p.getPointer());
294: assertEquals(Text.create("e"), p.parseAtom());
295: assertEquals(2, p.getPointer());
296: assertEquals(Text.create("l"), p.parseAtom());
297: assertEquals(3, p.getPointer());
298: assertEquals(Text.create("l"), p.parseAtom());
299: assertEquals(4, p.getPointer());
300: assertEquals(Text.create("o"), p.parseAtom());
301: assertEquals(5, p.getPointer());
302: assertEquals(new Nothing(), p.parseAtom());
303: }
304:
305: public void assertParse(RegexpTree wanted, String toParse) {
306: assertEquals(wanted, new PerlPatternParser(toParse).parseAlts());
307: }
308:
309: public void testSimpleSpecialAtom(Object wanted, String toParse) {
310: PerlPatternParser p = new PerlPatternParser(toParse);
311: assertEquals(wanted, p.parseAtom());
312: assertEquals(1, p.getPointer());
313: }
314:
315: protected RegexpTree quantifier(String toParse, RegexpTree x) {
316: return new PerlPatternParser(toParse).parseQuantifier(x);
317: }
318:
319: protected RegexpTree element(String toParse) {
320: return new PerlPatternParser(toParse).parseElement();
321: }
322: }
323:
324: /*
325: (c) Copyright 2004, 2005, 2006, 2007, 2008 Hewlett-Packard Development Company, LP
326: All rights reserved.
327:
328: Redistribution and use in source and binary forms, with or without
329: modification, are permitted provided that the following conditions
330: are met:
331:
332: 1. Redistributions of source code must retain the above copyright
333: notice, this list of conditions and the following disclaimer.
334:
335: 2. Redistributions in binary form must reproduce the above copyright
336: notice, this list of conditions and the following disclaimer in the
337: documentation and/or other materials provided with the distribution.
338:
339: 3. The name of the author may not be used to endorse or promote products
340: derived from this software without specific prior written permission.
341:
342: THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
343: IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
344: OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
345: IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
346: INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
347: NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
348: DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
349: THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
350: (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
351: THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
352: */
|