001: /*
002: * TestPatternMatching.java: JUnit test for regular expression tokenizing
003: *
004: * Copyright (C) 2003 Heiko Blau
005: *
006: * This file belongs to the JTopas test suite.
007: * The JTopas test suite is free software; you can redistribute it and/or modify it
008: * under the terms of the GNU Lesser General Public License as published by the
009: * Free Software Foundation; either version 2.1 of the License, or (at your option)
010: * any later version.
011: *
012: * This software is distributed in the hope that it will be useful, but WITHOUT
013: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
014: * FITNESS FOR A PARTICULAR PURPOSE.
015: * See the GNU Lesser General Public License for more details.
016: *
017: * You should have received a copy of the GNU Lesser General Public License along
018: * with the JTopas test suite. If not, write to the
019: *
020: * Free Software Foundation, Inc.
021: * 59 Temple Place, Suite 330,
022: * Boston, MA 02111-1307
023: * USA
024: *
025: * or check the Internet: http://www.fsf.org
026: *
027: * The JTopas test suite uses the test framework JUnit by Kent Beck and Erich Gamma.
028: * You should have received a copy of their JUnit licence agreement along with
029: * the JTopas test suite.
030: *
031: * We do NOT provide the JUnit archive junit.jar nessecary to compile and run
032: * our tests, since we assume, that You either have it already or would like
033: * to get the current release Yourself.
034: * Please visit either:
035: * http://sourceforge.net/projects/junit
036: * or
037: * http://junit.org
038: * to obtain JUnit.
039: *
040: * Contact:
041: * email: heiko@susebox.de
042: */
043:
044: package de.susebox.jtopas;
045:
046: //-----------------------------------------------------------------------------
047: // Imports
048: //
049: import java.io.Reader;
050: import java.io.StringReader;
051:
052: import junit.framework.Test;
053: import junit.framework.TestCase;
054: import junit.framework.TestSuite;
055: import junit.framework.Assert;
056:
057: import de.susebox.TestUtilities;
058:
059: //-----------------------------------------------------------------------------
060: // Class TestPatternMatching
061: //
062:
063: /**<p>
064: * The class contains a number of test cases related to the pattern matching
065: * facility in a {@link Tokenizer}.
066: *</p>
067: *
068: * @see Tokenizer
069: * @see StandardTokenizer
070: * @see StandardTokenizerProperties
071: * @author Heiko Blau
072: */
073: public class TestPatternMatching extends TestCase {
074:
075: //---------------------------------------------------------------------------
076: // properties
077: //
078:
079: //---------------------------------------------------------------------------
080: // main method
081: //
082:
083: /**
084: * call this method to invoke the tests
085: */
086: public static void main(String[] args) {
087: String[] tests = { TestPatternMatching.class.getName() };
088:
089: TestUtilities.run(tests, args);
090: }
091:
092: //---------------------------------------------------------------------------
093: // suite method
094: //
095:
096: /**
097: * Implementation of the JUnit method <code>suite</code>. For each set of test
098: * properties one or more tests are instantiated.
099: *
100: * @return a test suite
101: */
102: public static Test suite() {
103: TestSuite suite = new TestSuite(TestPatternMatching.class
104: .getName());
105:
106: suite.addTest(new TestPatternMatching(
107: "compareJTopasAgainstPattern"));
108: suite.addTest(new TestPatternMatching("testNumberPattern"));
109: suite.addTest(new TestPatternMatching("testPatternExt"));
110: suite.addTest(new TestPatternMatching("testPatternTokenizer"));
111: suite.addTest(new TestPatternMatching("testFreePattern"));
112: return suite;
113: }
114:
115: //---------------------------------------------------------------------------
116: // Constructor
117: //
118:
119: /**
120: * Default constructor. Standard input {@link java.lang.System#in} is used
121: * to construct the input stream reader.
122: */
123: public TestPatternMatching(String test) {
124: super (test);
125: }
126:
127: //---------------------------------------------------------------------------
128: // Fixture setup and release
129: //
130:
131: /**
132: * Sets up the fixture, for example, open a network connection.
133: * This method is called before a test is executed.
134: */
135: protected void setUp() throws Exception {
136: }
137:
138: /**
139: * Tears down the fixture, for example, close a network connection.
140: * This method is called after a test is executed.
141: */
142: protected void tearDown() throws Exception {
143: }
144:
145: //---------------------------------------------------------------------------
146: // test cases
147: //
148:
149: /**
150: * Test the case, when a line comment is not terminated by a newline character.
151: * This happens when the last line of a file is a line comment without a
152: * newline on its end.
153: * This is a rather common situation.
154: */
155: public void testNumberPattern() throws Throwable {
156: Reader reader = new StringReader(
157: "0.95 123 -1 -123123.92382 0,32 +4,001 -123,213 -0");
158: TokenizerProperties props = new StandardTokenizerProperties();
159: StandardTokenizer tokenizer = new StandardTokenizer(props);
160:
161: props.removeSeparators(".\\-+,");
162: props.addPattern("[+\\-]?[0-9]+\\.?[0-9]*");
163: props.addPattern("[+\\-]?[0-9]+,?[0-9]*");
164: tokenizer.setSource(reader);
165:
166: while (tokenizer.hasMoreToken()) {
167: Token token = tokenizer.nextToken();
168:
169: System.out.println(token);
170: if (token.getType() != Token.EOF) {
171: assertTrue("Wrong token type "
172: + Token.getTypeName(token.getType()) + " for: "
173: + token.getImage(),
174: token.getType() == Token.PATTERN);
175: }
176: }
177: }
178:
179: /**
180: * Test the case, when a line comment is not terminated by a newline character.
181: * This happens when the last line of a file is a line comment without a
182: * newline on its end.
183: * This is a rather common situation.
184: */
185: public void testPatternExt() throws Throwable {
186: Reader reader = new StringReader(
187: "// the main method\nvoid main100(100)\n{ int x1 = 0.95; int x2 = -1; int x3 = 3.0 -1.0; }");
188: int[] expected = { Token.NORMAL, Token.NORMAL, Token.SEPARATOR,
189: Token.PATTERN, Token.SEPARATOR, Token.SEPARATOR,
190: Token.NORMAL, Token.NORMAL, Token.SEPARATOR,
191: Token.PATTERN, Token.SEPARATOR, Token.NORMAL,
192: Token.NORMAL, Token.SEPARATOR, Token.PATTERN,
193: Token.SEPARATOR, Token.NORMAL, Token.NORMAL,
194: Token.SEPARATOR, Token.PATTERN, Token.PATTERN,
195: Token.SEPARATOR, Token.SEPARATOR, Token.EOF };
196: TokenizerProperties props = new StandardTokenizerProperties();
197: StandardTokenizer tokenizer = new StandardTokenizer(props);
198:
199: props.removeSeparators(".\\-");
200: props.addLineComment("//");
201: props.addPattern("[+\\-]?[0-9]+\\.?[0-9]*");
202: tokenizer.setSource(reader);
203:
204: int index = 0;
205: while (tokenizer.hasMoreToken()) {
206: Token token = tokenizer.nextToken();
207:
208: System.out.println(token);
209: assertTrue("Wrong token type "
210: + Token.getTypeName(token.getType()) + " for: "
211: + token.getImage(),
212: token.getType() == expected[index]);
213: index++;
214: }
215: }
216:
217: /**
218: * A tokenizer working almost completely with pattern and whitespaces
219: */
220: public void testPatternTokenizer() throws Throwable {
221: Reader reader = new StringReader("// the main method\n"
222: + "void main(String[] argv)\n" + "{\n"
223: + "int x1 = 0.95; // 1. variable\n"
224: + "int x2 = -1; // 2. variable\n"
225: + "int x3 = x1 - x2; // 3. variable\n"
226: + "int x4 = +1.01; // 4. variable\n" + "}\n");
227:
228: Object lineComment = new String("<line comment>");
229: Object identifier = new String("<identifier>");
230: Object number = new String("<number>");
231: Object separator = new String("<separator>");
232:
233: Object[] expected = { lineComment, identifier, identifier,
234: separator, identifier, separator, separator,
235: identifier, separator, separator, identifier,
236: identifier, separator, number, separator, lineComment,
237: identifier, identifier, separator, number, separator,
238: lineComment, identifier, identifier, separator,
239: identifier, separator, identifier, separator,
240: lineComment, identifier, identifier, separator, number,
241: separator, lineComment, separator };
242:
243: TokenizerProperties props = new StandardTokenizerProperties();
244: StandardTokenizer tokenizer = new StandardTokenizer(props);
245:
246: props.addPattern("[+\\-]?[0-9]+\\.?[0-9]*", number);
247: props.addPattern("[a-z][a-z0-9]*", identifier, Flags.F_NO_CASE);
248: props.addPattern("//.*$", lineComment, Flags.F_FREE_PATTERN);
249: props.addSpecialSequence("{", separator);
250: props.addSpecialSequence("}", separator);
251: props.addSpecialSequence("(", separator);
252: props.addSpecialSequence(")", separator);
253: props.addSpecialSequence("[", separator);
254: props.addSpecialSequence("]", separator);
255: props.addSpecialSequence("=", separator);
256: props.addSpecialSequence(";", separator);
257: props.addSpecialSequence("-", separator);
258: tokenizer.setSource(reader);
259:
260: int index = 0;
261: while (tokenizer.hasMoreToken()) {
262: Token token = tokenizer.nextToken();
263:
264: if (token.getType() != Token.EOF) {
265: System.out.println(token);
266: assertTrue("Wrong token companion: "
267: + token.getCompanion() + " for: "
268: + token.getImage(),
269: token.getCompanion() == expected[index]);
270: }
271: index++;
272: }
273: }
274:
275: /**
276: * A tokenizer working almost completely with pattern and whitespaces
277: */
278: public void testFreePattern() throws Throwable {
279: Reader reader = new StringReader("// the main method\n"
280: + "void main(String[] argv)\n" + "{\n"
281: + "int x1 =+1.01;\n" + "int x2 =- 2.02;\n"
282: + "int x3 = + 2.02; // line comment\n"
283: + "int x4 = - 2.02+ 3.1232;\n"
284: + "fct(x1, x2, x3)// a call\n" + "}\n");
285:
286: Object lineComment = new String("<line comment>");
287: Object identifier = new String("<identifier>");
288: Object number = new String("<number>");
289: Object separator = new String("<separator>");
290: Object paraList = new String("<parameter list>");
291:
292: Object[] expected = { lineComment, identifier, identifier,
293: paraList, separator, identifier, identifier, separator,
294: number, separator, identifier, identifier, separator,
295: number, separator, identifier, identifier, separator,
296: number, separator, lineComment, identifier, identifier,
297: separator, number, number, separator, identifier,
298: paraList, lineComment, separator };
299:
300: TokenizerProperties props = new StandardTokenizerProperties();
301: StandardTokenizer tokenizer = new StandardTokenizer(props);
302:
303: props.addPattern("[+\\-]?[ \t]*[0-9]+\\.?[0-9]*", number,
304: Flags.F_FREE_PATTERN);
305: props.addPattern("[a-z][a-z0-9]*", identifier, Flags.F_NO_CASE);
306: props.addPattern("//.*$", lineComment, Flags.F_FREE_PATTERN);
307: props.addPattern("\\(.*\\)", paraList, Flags.F_FREE_PATTERN);
308: props.addSpecialSequence("{", separator);
309: props.addSpecialSequence("}", separator);
310: props.addSpecialSequence("(", separator);
311: props.addSpecialSequence(")", separator);
312: props.addSpecialSequence("[", separator);
313: props.addSpecialSequence("]", separator);
314: props.addSpecialSequence("=", separator);
315: props.addSpecialSequence(";", separator);
316: props.addSpecialSequence("-", separator);
317: tokenizer.setSource(reader);
318:
319: int index = 0;
320: while (tokenizer.hasMoreToken()) {
321: Token token = tokenizer.nextToken();
322:
323: if (token.getType() != Token.EOF) {
324: System.out.println(token);
325: assertTrue("Wrong token companion: "
326: + token.getCompanion() + " for: "
327: + token.getImage(),
328: token.getCompanion() == expected[index]);
329: }
330: index++;
331: }
332: }
333:
334: /**
335: * Comparing JTopas string and comment handling against regular expression
336: * handling
337: */
338: public void compareJTopasAgainstPattern() throws Throwable {
339: String data = "/* File: $FILENAME */\n"
340: + "/**\n"
341: + "* Starting with a Javadoc comment.\n"
342: + "* This comment describes the class below.\n"
343: + "*/\n"
344: + "public class TestClass {\n"
345: + " //-------------------------------------------------------------------\n"
346: + " // Constants\n"
347: + " //-------------------------------------------------------------------\n"
348: + "\n"
349: + " /**\n"
350: + " * A constant with its own comment\n"
351: + " */\n"
352: + " public static final String URL = \"http://jtopas.sourceforge.net/jtopas/index.html\";\n"
353: + "\n"
354: + " /**\n"
355: + " * Another constant with its own comment\n"
356: + " */\n"
357: + " public static final String HELP =\n"
358: + " \"This is the help for JTopas.\"\n"
359: + " + \"You can obtain the software from \" + URL + \".\"\n"
360: + " + \"It is a realy easy to use library.\";\n"
361: + "\n"
362: + " /**\n"
363: + " * The main method takes the usual array of arguments. It also accepts\n"
364: + " * <code>null</code>.\n"
365: + " *\n"
366: + " * @param args the arguments to the main method.\n"
367: + " */\n"
368: + " public void main(String[] argv)\n"
369: + " {\n"
370: + " // a loop over all arguments\n"
371: + " for (int ii = 0; ii < argv.length; ++ii) {\n"
372: + " char cc1 = 'A';\n"
373: + " char cc2 = 'B';\n"
374: // + " char cc3 = '\\'';\n"
375: + " System.out.println(\"String #\" + ii +\": \" + argv[ii] + \".\";\n"
376: + " }\n"
377: + " // ready message\n"
378: + " System.out.println(\"Ready printing Strings.\";\n"
379: + " /*\n"
380: + " here we add future extensions:\n"
381: + " for instance the exit call :-)\n"
382: + " */\n" + " }\n" + "}\n";
383:
384: Object docComment = new String("<doc comment>");
385: Object blockComment = new String("<block comment>");
386: Object lineComment = new String("<line comment>");
387: Object string = new String("<string>");
388: Object character = new String("<character>");
389:
390: TokenizerProperties jtopasProps = new StandardTokenizerProperties(
391: Flags.F_RETURN_WHITESPACES | Flags.F_TOKEN_POS_ONLY);
392: TokenizerProperties patternProps = new StandardTokenizerProperties(
393: Flags.F_RETURN_WHITESPACES | Flags.F_TOKEN_POS_ONLY);
394:
395: // patternProps.addPattern("/\\*\\*.*\\*/", docComment, Flags.F_FREE_PATTERN);
396: patternProps.addPattern("/\\*.*?\\*/", blockComment,
397: Flags.F_FREE_PATTERN);
398: patternProps.addPattern("//.*?$", lineComment,
399: Flags.F_FREE_PATTERN);
400: patternProps.addPattern("\"[^$\"]*?\"", string,
401: Flags.F_FREE_PATTERN);
402: // patternProps.addPattern("\"[[^$\"]|[\\\\&&\"]]*?\"", string, Flags.F_FREE_PATTERN);
403: patternProps.addPattern("'.'", character, Flags.F_FREE_PATTERN);
404: // patternProps.addPattern("'[[^']|[\\\\&&']]+?'", character, Flags.F_FREE_PATTERN);
405:
406: // jtopasProps.addBlockComment("/**", "*/", docComment);
407: jtopasProps.addBlockComment("/*", "*/", blockComment);
408: jtopasProps.addLineComment("//", lineComment);
409: jtopasProps.addString("\"", "\"", "\\", string);
410: jtopasProps.addString("'", "'", "\\", character);
411:
412: tokenize(jtopasProps, data);
413: tokenize(patternProps, data);
414: }
415:
416: /**
417: * Tokenize with the given tokenizer.
418: */
419: private void tokenize(TokenizerProperties props, String data)
420: throws Throwable {
421: long startTime = System.currentTimeMillis();
422: StandardTokenizer tokenizer = new StandardTokenizer(props);
423:
424: try {
425: for (int ii = 0; ii < 100; ++ii) {
426: Reader reader = new StringReader(data);
427:
428: try {
429: tokenizer.setSource(reader);
430: while (tokenizer.hasMoreToken()) {
431: Token token = tokenizer.nextToken();
432: // System.out.println( + ": " + tokenizer.currentImage());
433: }
434: } finally {
435: reader.close();
436: }
437: }
438: } finally {
439: tokenizer.close();
440: }
441:
442: // print elapsed time
443: long diffTime = System.currentTimeMillis() - startTime;
444: System.out.println(" Finished after " + diffTime
445: + " milliseconds.");
446: }
447: }
|