001: /*
002: * TestTokenizerFlags.java: JUnit test for TokenizerProperties implementations
003: *
004: * Copyright (C) 2004 Heiko Blau
005: *
006: * This file belongs to the JTopas test suite.
007: * The JTopas test suite is free software; you can redistribute it and/or modify it
008: * under the terms of the GNU Lesser General Public License as published by the
009: * Free Software Foundation; either version 2.1 of the License, or (at your option)
010: * any later version.
011: *
012: * This software is distributed in the hope that it will be useful, but WITHOUT
013: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
014: * FITNESS FOR A PARTICULAR PURPOSE.
015: * See the GNU Lesser General Public License for more details.
016: *
017: * You should have received a copy of the GNU Lesser General Public License along
018: * with the JTopas test suite. If not, write to the
019: *
020: * Free Software Foundation, Inc.
021: * 59 Temple Place, Suite 330,
022: * Boston, MA 02111-1307
023: * USA
024: *
025: * or check the Internet: http://www.fsf.org
026: *
027: * The JTopas test suite uses the test framework JUnit by Kent Beck and Erich Gamma.
028: * You should have received a copy of their JUnit licence agreement along with
029: * the JTopas test suite.
030: *
031: * We do NOT provide the JUnit archive junit.jar nessecary to compile and run
032: * our tests, since we assume, that You either have it already or would like
033: * to get the current release Yourself.
034: * Please visit either:
035: * http://sourceforge.net/projects/junit
036: * or
037: * http://junit.org
038: * to obtain JUnit.
039: *
040: * Contact:
041: * email: heiko@susebox.de
042: */
043:
044: package de.susebox.jtopas;
045:
046: //-----------------------------------------------------------------------------
047: // Imports
048: //
049: import java.util.Iterator;
050:
051: import junit.framework.Test;
052: import junit.framework.TestCase;
053: import junit.framework.TestSuite;
054:
055: import de.susebox.TestUtilities;
056:
057: //-----------------------------------------------------------------------------
058: // Class TestTokenizerFlags
059: //
060:
061: /**<p>
062: * This class tests the behaviour of a {@link Tokenizer} regarding the flags set
063: * in the backing {@link TokenizerProperties} and the registered
064: * {@link TokenizerProperty} objects.
065: *</p>
066: *
067: * @see TokenizerProperties
068: * @see TokenizerProperty
069: * @see Tokenizer
070: * @author Heiko Blau
071: */
072: public class TestTokenizerFlags extends TestCase {
073:
074: //---------------------------------------------------------------------------
075: // main method
076: //
077:
078: /**
079: * call this method to invoke the tests
080: */
081: public static void main(String[] args) {
082: String[] tests = { TestTokenizerFlags.class.getName() };
083:
084: TestUtilities.run(tests, args);
085: }
086:
087: //---------------------------------------------------------------------------
088: // suite method
089: //
090:
091: /**
092: * Implementation of the JUnit method <code>suite</code>. For each set of test
093: * properties one or more tests are instantiated.
094: *
095: * @return a test suite
096: */
097: public static Test suite() {
098: TestSuite suite = new TestSuite(TestTokenizerFlags.class
099: .getName());
100:
101: suite.addTest(new TestTokenizerFlags("testCasePrecedence"));
102: suite.addTest(new TestTokenizerFlags("testTokenPosOnly"));
103: suite.addTest(new TestTokenizerFlags("testReturnImageParts"));
104: suite.addTest(new TestTokenizerFlags("testReturnWhitespaces"));
105: return suite;
106: }
107:
108: //---------------------------------------------------------------------------
109: // Constructor
110: //
111:
112: /**
113: * Default constructor. Standard input {@link java.lang.System#in} is used
114: * to construct the input stream reader.
115: */
116: public TestTokenizerFlags(String test) {
117: super (test);
118: }
119:
120: //---------------------------------------------------------------------------
121: // test cases
122: //
123:
124: /**
125: * Verifying the {@link TokenizerProperties#F_CASE} and TokenizerProperties#F_NO_CASE}
126: * flags.
127: */
128: public void testCasePrecedence() throws Throwable {
129: StandardTokenizer tokenizer;
130: StandardTokenizerProperties props;
131: Token token;
132:
133: // Precedence of the case flag
134: Object noCaseComp = new Object();
135: Object caseComp = new Object();
136: Object CASEComp = new Object();
137:
138: props = new StandardTokenizerProperties();
139: props.addKeyword("noCase", noCaseComp, Flags.F_NO_CASE);
140: props.addKeyword("case", caseComp);
141: props.addKeyword("CASE", CASEComp);
142:
143: tokenizer = new StandardTokenizer(props);
144: try {
145: tokenizer.setSource(new StringSource(
146: "noCase NOCASE NocASE nocase"));
147: for (int index = 0; index < 4; index++) {
148: assertTrue("No more token at index " + index + ".",
149: tokenizer.hasMoreToken());
150: token = tokenizer.nextToken();
151: assertTrue("Token is null at index " + index + ".",
152: token != null);
153: switch (index) {
154: case 4:
155: assertTrue("Token is not EOF at index " + index
156: + ".", token.getType() == Token.EOF);
157: break;
158: default:
159: assertTrue("Token is no keyword at index " + index
160: + ".", token.getType() == Token.KEYWORD);
161: assertTrue("Unexpected token companion at index "
162: + index + ".",
163: token.getCompanion() == noCaseComp);
164: }
165: }
166:
167: tokenizer
168: .setSource(new StringSource("case Case CASE cASE"));
169: for (int index = 0; index < 5; index++) {
170: assertTrue("No more token at index " + index + ".",
171: tokenizer.hasMoreToken());
172: token = tokenizer.nextToken();
173: assertTrue("Token is null at index " + index + ".",
174: token != null);
175: switch (index) {
176: case 0:
177: case 2:
178: assertTrue("Token is no keyword at index " + index
179: + ".", token.getType() == Token.KEYWORD);
180: if (index == 0) {
181: assertTrue(
182: "Unexpected token companion at index "
183: + index + ".", token
184: .getCompanion() == caseComp);
185: } else {
186: assertTrue(
187: "Unexpected token companion at index "
188: + index + ".", token
189: .getCompanion() == CASEComp);
190: }
191: break;
192: case 4:
193: assertTrue("Token is not EOF at index " + index
194: + ".", token.getType() == Token.EOF);
195: break;
196: default:
197: assertTrue("No normal token at index " + index
198: + ".", token.getType() == Token.NORMAL);
199: }
200: }
201: } finally {
202: tokenizer.close();
203: }
204: }
205:
206: /**
207: * Verifying the {@link TokenizerProperties#F_TOKEN_POS_ONLY} flag.
208: */
209: public void testTokenPosOnly() throws Throwable {
210: StandardTokenizer tokenizer;
211: StandardTokenizerProperties props;
212: Token token;
213:
214: // Precedence of the case flag
215: props = new StandardTokenizerProperties(Flags.F_TOKEN_POS_ONLY);
216: props.addProperty(new TokenizerProperty(Token.STRING,
217: new String[] { "\"", "\"", "\"" }, null, 0,
218: Flags.F_TOKEN_POS_ONLY));
219: props.addString("'", "'", "'");
220:
221: tokenizer = new StandardTokenizer(props);
222: try {
223: tokenizer.setSource(new StringSource(
224: "\"a string that is returned\""));
225: assertTrue("No more token.", tokenizer.hasMoreToken());
226: token = tokenizer.nextToken();
227: assertTrue("Token is no string.", token != null
228: && token.getType() == Token.STRING);
229: assertTrue("Token has no image.", token.getImage() != null);
230: assertTrue("No more token.", tokenizer.hasMoreToken());
231: token = tokenizer.nextToken();
232: assertTrue("Token is not EOF.", token != null
233: && token.getType() == Token.EOF);
234:
235: tokenizer.setSource(new StringSource(
236: "'a string that is not returned'"));
237: assertTrue("No more token.", tokenizer.hasMoreToken());
238: token = tokenizer.nextToken();
239: assertTrue("Token is no string.", token != null
240: && token.getType() == Token.STRING);
241: assertTrue("Token has image.", token.getImage() == null);
242: assertTrue("No more token.", tokenizer.hasMoreToken());
243: token = tokenizer.nextToken();
244: assertTrue("Token is not EOF.", token != null
245: && token.getType() == Token.EOF);
246: } finally {
247: tokenizer.close();
248: }
249: }
250:
251: /**
252: * Verifying the {@link TokenizerProperties#F_RETURN_IMAGE_PARTS} flag.
253: */
254: public void testReturnImageParts() throws Throwable {
255: StandardTokenizer tokenizer;
256: StandardTokenizerProperties props;
257: Token token;
258: String[] imageParts;
259:
260: // add properties
261: props = new StandardTokenizerProperties(Flags.F_TOKEN_POS_ONLY);
262: props.setSeparators(":");
263: props.addString("\"", "\"", "\\", null,
264: Flags.F_RETURN_IMAGE_PARTS);
265: props.addPattern("([\\+\\-]?)([0-9]+)", null,
266: Flags.F_RETURN_IMAGE_PARTS);
267: props.addPattern("([\\+\\-]?)([0-9]+)\\.([0-9]+)", null,
268: Flags.F_RETURN_IMAGE_PARTS);
269: props
270: .addString("'", "'", "'", null,
271: Flags.F_RETURN_IMAGE_PARTS);
272: props.addLineComment("--", null, Flags.F_RETURN_LINE_COMMENTS
273: | Flags.F_RETURN_IMAGE_PARTS);
274: props.addBlockComment("[[", "]]", null,
275: Flags.F_RETURN_IMAGE_PARTS
276: | Flags.F_RETURN_BLOCK_COMMENTS);
277:
278: // tokenize data
279: tokenizer = new StandardTokenizer(props);
280: try {
281: tokenizer
282: .setSource(new StringSource(
283: "[[\rblock comment with empty first line]] ImageParts [[with a block comment\r over 2 lines]]: +9745 1 -234 +0.09 14.1 \"a \\\"string\\\" with escapes\" 'a SQL string with ''escapes'' and\n with linefeeds\r\n' -- a line comment"));
284:
285: // first block comment
286: assertTrue("No more token.", tokenizer.hasMoreToken());
287: token = tokenizer.nextToken();
288: verifyImageParts(token, new String[] { "",
289: "block comment with empty first line" });
290:
291: // token "ImageParts"
292: assertTrue("No more token.", tokenizer.hasMoreToken());
293: token = tokenizer.nextToken();
294: assertTrue(token.getType() == Token.NORMAL);
295:
296: // block comment token
297: assertTrue("No more token.", tokenizer.hasMoreToken());
298: token = tokenizer.nextToken();
299: verifyImageParts(token, new String[] {
300: "with a block comment", " over 2 lines" });
301:
302: // token ":"
303: assertTrue("No more token.", tokenizer.hasMoreToken());
304: token = tokenizer.nextToken();
305: assertTrue(token.getType() == Token.SEPARATOR);
306:
307: // token "+9745"
308: assertTrue("No more token.", tokenizer.hasMoreToken());
309: token = tokenizer.nextToken();
310: verifyImageParts(token,
311: new String[] { "+9745", "+", "9745" });
312:
313: // token "1"
314: assertTrue("No more token.", tokenizer.hasMoreToken());
315: token = tokenizer.nextToken();
316: verifyImageParts(token, new String[] { "1", "", "1" });
317:
318: // token "-234"
319: assertTrue("No more token.", tokenizer.hasMoreToken());
320: token = tokenizer.nextToken();
321: verifyImageParts(token, new String[] { "-234", "-", "234" });
322:
323: // token "+0.09"
324: assertTrue("No more token.", tokenizer.hasMoreToken());
325: token = tokenizer.nextToken();
326: verifyImageParts(token, new String[] { "+0.09", "+", "0",
327: "09" });
328:
329: // token "14.1"
330: assertTrue("No more token.", tokenizer.hasMoreToken());
331: token = tokenizer.nextToken();
332: verifyImageParts(token, new String[] { "14.1", "", "14",
333: "1" });
334:
335: // string token #1
336: assertTrue("No more token.", tokenizer.hasMoreToken());
337: token = tokenizer.nextToken();
338: verifyImageParts(token,
339: new String[] { "a \"string\" with escapes" });
340:
341: // string token #2
342: assertTrue("No more token.", tokenizer.hasMoreToken());
343: token = tokenizer.nextToken();
344: verifyImageParts(token, new String[] {
345: "a SQL string with 'escapes' and",
346: " with linefeeds", "" });
347:
348: // line comment token
349: assertTrue("No more token.", tokenizer.hasMoreToken());
350: token = tokenizer.nextToken();
351: verifyImageParts(token, new String[] { " a line comment" });
352:
353: // EOF reached
354: assertTrue("No more token.", tokenizer.hasMoreToken());
355: token = tokenizer.nextToken();
356: assertTrue("Token is not EOF.", token != null
357: && token.getType() == Token.EOF);
358: } finally {
359: tokenizer.close();
360: }
361: }
362:
363: /**
364: * Test preceedence of {@link Tokenizer} flags over the {@link TokenizerProperties}
365: * flags
366: */
367: public void testReturnWhitespaces() throws Throwable {
368: String source = "// a text with several token types\n"
369: + "void main(int argc, char* argv[]) {\n"
370: + "printf(\"Hello, world!\");\n" + "}";
371: int[] flagMasks = { 0, Flags.F_RETURN_WHITESPACES,
372: Flags.F_RETURN_WHITESPACES | Flags.F_COUNT_LINES };
373: int[][] expected = {
374: { Token.KEYWORD, Token.NORMAL, Token.SEPARATOR,
375: Token.KEYWORD, Token.NORMAL, Token.SEPARATOR,
376: Token.KEYWORD, Token.SEPARATOR, Token.NORMAL,
377: Token.SEPARATOR, Token.SEPARATOR,
378: Token.SEPARATOR, Token.SEPARATOR, Token.NORMAL,
379: Token.SEPARATOR, Token.STRING, Token.SEPARATOR,
380: Token.SEPARATOR, Token.SEPARATOR, Token.EOF },
381: { Token.LINE_COMMENT, Token.KEYWORD, Token.WHITESPACE,
382: Token.NORMAL, Token.SEPARATOR, Token.KEYWORD,
383: Token.WHITESPACE, Token.NORMAL,
384: Token.SEPARATOR, Token.WHITESPACE,
385: Token.KEYWORD, Token.SEPARATOR,
386: Token.WHITESPACE, Token.NORMAL,
387: Token.SEPARATOR, Token.SEPARATOR,
388: Token.SEPARATOR, Token.WHITESPACE,
389: Token.SEPARATOR, Token.WHITESPACE,
390: Token.NORMAL, Token.SEPARATOR, Token.STRING,
391: Token.SEPARATOR, Token.SEPARATOR,
392: Token.WHITESPACE, Token.SEPARATOR, Token.EOF },
393: { Token.LINE_COMMENT, Token.KEYWORD, Token.WHITESPACE,
394: Token.NORMAL, Token.SEPARATOR, Token.KEYWORD,
395: Token.WHITESPACE, Token.NORMAL,
396: Token.SEPARATOR, Token.WHITESPACE,
397: Token.KEYWORD, Token.SEPARATOR,
398: Token.WHITESPACE, Token.NORMAL,
399: Token.SEPARATOR, Token.SEPARATOR,
400: Token.SEPARATOR, Token.WHITESPACE,
401: Token.SEPARATOR, Token.WHITESPACE,
402: Token.NORMAL, Token.SEPARATOR, Token.STRING,
403: Token.SEPARATOR, Token.SEPARATOR,
404: Token.WHITESPACE, Token.SEPARATOR, Token.EOF } };
405: int[][] starts = {
406: { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
407: -1, -1, -1, -1, -1, -1, -1 },
408: { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
409: -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
410: -1, -1, -1 },
411: { 0, 0, 4, 5, 9, 10, 13, 14, 18, 19, 20, 24, 25, 26,
412: 30, 31, 32, 33, 34, 35, 0, 6, 7, 22, 23, 24, 0,
413: 1 } };
414:
415: TokenizerProperties props = new StandardTokenizerProperties(/*Flags.F_TOKEN_POS_ONLY*/);
416:
417: // add properties
418: props.addLineComment("//");
419: props.addString("\"", "\"", "\\");
420: props.addKeyword("void");
421: props.addKeyword("int");
422: props.addKeyword("char");
423:
424: for (int index = 0; index < flagMasks.length; ++index) {
425: Token token;
426: int count = 0;
427: Tokenizer tokenizer = new StandardTokenizer(props);
428:
429: try {
430: tokenizer.changeParseFlags(flagMasks[index],
431: flagMasks[index]);
432: tokenizer.setSource(new StringSource(source));
433:
434: while (tokenizer.hasMoreToken()) {
435: token = tokenizer.nextToken();
436:
437: System.out.println(token);
438: assertTrue("Index " + index + ", token #" + count
439: + ": expected token type \""
440: + Token.getTypeName(expected[index][count])
441: + "\", got \""
442: + Token.getTypeName(token.getType())
443: + "\": " + token,
444: token.getType() == expected[index][count]);
445: assertTrue(
446: "Index " + index + ", token #" + count
447: + ": expected start column "
448: + starts[index][count] + ", got "
449: + token.getStartColumn() + ": "
450: + token,
451: token.getStartColumn() == starts[index][count]);
452: count++;
453: }
454: } finally {
455: tokenizer.close();
456: }
457: }
458: }
459:
460: //---------------------------------------------------------------------------
461: // Implementation
462: //
463:
464: /**
465: * Checks image parts
466: */
467: private void verifyImageParts(Token token, String[] expected)
468: throws Throwable {
469: String[] imageParts = token.getImageParts();
470:
471: if (expected != null) {
472: assertTrue("Token has no image parts: " + token,
473: imageParts != null);
474: assertTrue("Expected " + expected.length
475: + " image parts, got " + imageParts.length + ": "
476: + token, imageParts.length == expected.length);
477:
478: for (int index = 0; index < expected.length; ++index) {
479: if (expected[index] != null) {
480: assertTrue("Image part " + index + ": expected \""
481: + expected[index] + "\", got \""
482: + imageParts[index] + "\": " + token,
483: imageParts[index] != null
484: && imageParts[index]
485: .equals(expected[index]));
486: } else {
487: assertTrue("Image part " + index
488: + ": expected null, got \""
489: + imageParts[index] + "\": " + token,
490: imageParts[index] == null);
491: }
492: }
493: } else {
494: assertTrue("Expected no image parts, got " + imageParts
495: + ": " + token, imageParts == null
496: || imageParts.length == 0);
497: }
498: }
499:
500: //---------------------------------------------------------------------------
501: // Members
502: //
503: }
|