001: /*
002: * TestEmbeddedTokenizer.java: JUnit test for the StandardTokenizer
003: *
004: * Copyright (C) 2001 Heiko Blau
005: *
006: * This file belongs to the Susebox Java core test suite.
007: * The Susebox Java core test suite is free software; you can redistribute it
008: * and/or modify it under the terms of the GNU Lesser General Public License as
009: * published by the Free Software Foundation; either version 2.1 of the License,
010: * or (at your option) any later version.
011: *
012: * This software is distributed in the hope that it will be useful, but WITHOUT
013: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
014: * FITNESS FOR A PARTICULAR PURPOSE.
015: * See the GNU Lesser General Public License for more details.
016: *
017: * You should have received a copy of the GNU Lesser General Public License along
018: * with the Susebox Java core test suite. If not, write to the
019: *
020: * Free Software Foundation, Inc.
021: * 59 Temple Place, Suite 330,
022: * Boston, MA 02111-1307
023: * USA
024: *
025: * or check the Internet: http://www.fsf.org
026: *
027: * The Susebox Java core test suite uses the test framework JUnit by Kent Beck
028: * and Erich Gamma. You should have received a copy of their JUnit licence
029: * agreement along with the Susebox Java test suite.
030: *
031: * We do NOT provide the JUnit archive junit.jar nessecary to compile and run
032: * our tests, since we assume, that You either have it already or would like
033: * to get the current release Yourself.
034: * Please visit either:
035: * http://sourceforge.net/projects/junit
036: * or
037: * http://junit.org
038: * to obtain JUnit.
039: *
040: * Contact:
041: * email: heiko@susebox.de
042: */
043:
044: package de.susebox.jtopas;
045:
046: //-----------------------------------------------------------------------------
047: // Imports
048: //
049: import java.io.InputStream;
050: import java.io.FileInputStream;
051: import java.io.InputStreamReader;
052: import java.util.Vector;
053: import java.util.Properties;
054: import java.net.URL;
055:
056: import junit.framework.Test;
057: import junit.framework.TestCase;
058: import junit.framework.TestSuite;
059: import junit.framework.Assert;
060:
061: import de.susebox.java.lang.ExtRuntimeException;
062:
063: import de.susebox.TestUtilities;
064:
065: //-----------------------------------------------------------------------------
066: // Class TestEmbeddedTokenizer
067: //
068:
069: /**<p>
070: * This unit test checks the embedded-tokenizer feature of the class {@link StandardTokenizer}.
071: * With this technique it is possible to parse multipart documents like HTML with
072: * embedded CSS and script parts, Java and javadoc comments etc.
073: *</p><p>
074: * This test suite works with a test configuration file. This file contains some
075: * sets of properties, each set for one or more different test runs.
076: *</p><p>
077: * The properties are defined as class constants. In the configuration file, a
078: * property consists of the property name and a number identifying the property
079: * set.
080: *</p>
081: *
082: * @see StandardTokenizer
083: * @author Heiko Blau
084: */
085: public class TestEmbeddedTokenizer extends TestCase {
086:
087: //---------------------------------------------------------------------------
088: // properties
089: //
090:
091: /**
092: * The name of the test configuration file. This file will be read by
093: * {@link java.lang.Class#getResourceAsStream}.
094: */
095: public static final String CONFIG_FILE = "TestEmbeddedTokenizer.conf";
096:
097: /**
098: * Property for the test {@link #testEmbeddedTokenizer}
099: */
100: public static final String PROP_PATH = "Path";
101:
102: /**
103: * Property for the test {@link #testJavaTokenizer}
104: */
105: public static final String PROP_JAVAPATH = "JavaPath";
106:
107: //---------------------------------------------------------------------------
108: // main method
109: //
110:
111: /**
112: * call this method to invoke the tests.
113: *
114: * @param args unused
115: */
116: public static void main(String[] args) {
117: String[] tests = { TestEmbeddedTokenizer.class.getName() };
118:
119: TestUtilities.run(tests, args);
120: }
121:
122: //---------------------------------------------------------------------------
123: // suite method
124: //
125:
126: /**
127: * Implementation of the JUnit method <code>suite</code>. For each set of test
128: * properties one or more tests are instantiated.
129: *
130: * @return a test suite
131: */
132: public static Test suite() {
133: TestSuite suite = new TestSuite(TestEmbeddedTokenizer.class
134: .getName());
135: Properties props = new Properties();
136: int count = 1;
137: String path;
138: URL url;
139:
140: try {
141: props.load(TestEmbeddedTokenizer.class
142: .getResourceAsStream(CONFIG_FILE));
143: } catch (Exception ex) {
144: throw new ExtRuntimeException(ex);
145: }
146:
147: // test on HTML files
148: while ((path = props.getProperty(PROP_PATH + count)) != null) {
149: if ((url = TestEmbeddedTokenizer.class.getResource(path)) != null) {
150: path = url.getFile();
151: }
152: suite.addTest(new TestEmbeddedTokenizer(
153: "testEmbeddedTokenizer", path));
154: count++;
155: }
156:
157: // tests on Java files
158: count = 1;
159: while ((path = props.getProperty(PROP_JAVAPATH + count)) != null) {
160: if ((url = TestEmbeddedTokenizer.class.getResource(path)) != null) {
161: path = url.getFile();
162: }
163: suite.addTest(new TestEmbeddedTokenizer(
164: "testJavaTokenizer", path));
165: count++;
166: }
167: return suite;
168: }
169:
170: //---------------------------------------------------------------------------
171: // Constructor
172: //
173:
174: /**
175: * Initializing the instance with the test file path
176: *
177: * @param test which test method should be invoked
178: * @param path name of test configuration file
179: */
180: public TestEmbeddedTokenizer(String test, String path) {
181: super (test);
182: _path = path;
183: }
184:
185: //---------------------------------------------------------------------------
186: // Fixture setup and release
187: //
188:
189: /**
190: * Sets up the fixture, for example, open a network connection.
191: * This method is called before a test is executed.
192: *
193: * @throws Exception for anything that might go wrong
194: */
195: protected void setUp() throws Exception {
196: InputStream stream = new FileInputStream(_path);
197:
198: _reader = new InputStreamReader(stream);
199: }
200:
201: /**
202: * Tears down the fixture, for example, close a network connection.
203: * This method is called after a test is executed.
204: *
205: * @throws Exception for anything that might go wrong
206: */
207: protected void tearDown() throws Exception {
208: _reader.close();
209: }
210:
211: //---------------------------------------------------------------------------
212: // test cases
213: //
214:
215: /**
216: * This method reads the given stream as a Java source. It extracts javadoc
217: * comments and source code.
218: * There should be a class or interface name in every Java source. The opening
219: * and closing brackets should match etc.
220: *
221: * @throws Throwable for anything that might go wrong
222: * @see #testEmbeddedTokenizer
223: */
224: public void testJavaTokenizer() throws Throwable {
225: long start = System.currentTimeMillis();
226: StandardTokenizerProperties javaProps = new StandardTokenizerProperties();
227: StandardTokenizerProperties docProps = new StandardTokenizerProperties();
228: StandardTokenizer javaTokenizer = new StandardTokenizer(
229: javaProps);
230: StandardTokenizer docTokenizer = new StandardTokenizer(docProps);
231: StandardTokenizer currTokenizer = javaTokenizer;
232: Object openBlock = new Object();
233: Object closeBlock = new Object();
234: Object atSign = new Object();
235: int blockBalance = 0;
236: Token token;
237: int lastStartLineNo = -1;
238: int lastStartColNo = -1;
239:
240: javaProps.setParseFlags(Flags.F_TOKEN_POS_ONLY
241: | Flags.F_KEEP_DATA | Flags.F_COUNT_LINES);
242: docProps.setParseFlags(Flags.F_NO_CASE);
243:
244: javaProps.addSpecialSequence("/**", docTokenizer);
245: javaProps.addSpecialSequence("{", openBlock);
246: javaProps.addSpecialSequence("}", closeBlock);
247: javaProps.addBlockComment(
248: TokenizerProperties.DEFAULT_BLOCK_COMMENT_START,
249: TokenizerProperties.DEFAULT_BLOCK_COMMENT_END);
250: javaProps
251: .addLineComment(TokenizerProperties.DEFAULT_LINE_COMMENT);
252: javaProps.addString(TokenizerProperties.DEFAULT_STRING_START,
253: TokenizerProperties.DEFAULT_STRING_END,
254: TokenizerProperties.DEFAULT_STRING_ESCAPE);
255: javaProps.addString("'", "'", "\\");
256: docProps.addSpecialSequence("*/", javaTokenizer);
257: docProps.addSpecialSequence("@", atSign);
258: docProps.addKeyword("param");
259: docProps.addKeyword("return");
260: docProps.addKeyword("throws");
261: docProps.addKeyword("author");
262: docProps.addKeyword("version");
263: docProps.addKeyword("link");
264: docProps.addKeyword("see");
265: docProps.addKeyword("deprecated");
266:
267: javaTokenizer.setSource(_reader);
268: javaTokenizer.addTokenizer(docTokenizer);
269:
270: System.out.println("\nStart parsing \"" + _path + "\"");
271: while (currTokenizer.hasMoreToken()) {
272: token = currTokenizer.nextToken();
273:
274: // Line counting test
275: assertTrue(token.getStartLine() >= lastStartLineNo);
276: if (token.getStartLine() == lastStartLineNo) {
277: assertTrue(token.getStartColumn() >= lastStartColNo);
278: if (token.getEndLine() == lastStartLineNo) {
279: assertTrue(token.getEndColumn() == token
280: .getStartColumn()
281: + token.getLength());
282: }
283: }
284: lastStartLineNo = token.getStartLine();
285: lastStartColNo = token.getStartColumn();
286:
287: // tokenizer switching
288: switch (token.getType()) {
289: case Token.SPECIAL_SEQUENCE:
290: if (token.getCompanion() instanceof StandardTokenizer) {
291: StandardTokenizer tokenizer = (StandardTokenizer) token
292: .getCompanion();
293:
294: currTokenizer.switchTo(tokenizer);
295: currTokenizer = tokenizer;
296: } else if (token.getCompanion() == openBlock) {
297: blockBalance++;
298: } else if (token.getCompanion() == closeBlock) {
299: blockBalance--;
300: } else if (token.getCompanion() == atSign) {
301: token = currTokenizer.nextToken();
302: assertTrue(
303: "Expected keyword after @ sign in javadoc comment, but found \""
304: + currTokenizer.currentImage(),
305: token.getType() == Token.KEYWORD);
306: }
307: break;
308: }
309: }
310:
311: // some checks
312: assertTrue("Braces should be balanced in Java file \"" + _path
313: + "\", but detected inbalance " + blockBalance,
314: blockBalance == 0);
315:
316: // print elapsed time
317: long diff = System.currentTimeMillis() - start;
318: System.out.println("Finished after " + diff + " milliseconds");
319: }
320:
321: /**
322: * The method takes the HTML file given in the constructor, and parses with
323: * the main HTML tokenizer and two embedded tokenizers for JavaScript and
324: * CSS.
325: *
326: * @throws Throwable for anything that might go wrong
327: * @see #testEmbeddedTokenizer
328: */
329: public void testEmbeddedTokenizer() throws Throwable {
330: long start = System.currentTimeMillis();
331: StandardTokenizerProperties htmlProps = new StandardTokenizerProperties();
332: StandardTokenizerProperties jsProps = new StandardTokenizerProperties();
333: StandardTokenizerProperties cssProps = new StandardTokenizerProperties();
334: StandardTokenizer htmlTokenizer = new StandardTokenizer(
335: htmlProps);
336: StandardTokenizer jsTokenizer = new StandardTokenizer(jsProps);
337: StandardTokenizer cssTokenizer = new StandardTokenizer(cssProps);
338: String keywordLang = new String("LANGUAGE");
339: Object endOfEmbedded = new Object();
340: Object startOfTag = new Object();
341: Object endOfTag = new Object();
342: Object endOfScript = new Object();
343: Token token;
344: int lastStartLineNo = -1;
345: int lastStartColNo = -1;
346:
347: htmlProps.setParseFlags(Flags.F_TOKEN_POS_ONLY
348: | Flags.F_KEEP_DATA | Flags.F_COUNT_LINES);
349: cssProps
350: .setParseFlags(Flags.F_TOKEN_POS_ONLY | Flags.F_NO_CASE);
351: jsProps.setParseFlags(Flags.F_TOKEN_POS_ONLY);
352:
353: htmlProps.addKeyword("SCRIPT", jsTokenizer);
354: htmlProps.addKeyword("LANGUAGE", keywordLang);
355: htmlProps.addKeyword("STYLE", cssTokenizer);
356: htmlProps.addSpecialSequence("<", startOfTag);
357: htmlProps.addSpecialSequence(">", endOfTag);
358: htmlProps.addBlockComment("<!--", "-->");
359: htmlProps.addString(TokenizerProperties.DEFAULT_STRING_START,
360: TokenizerProperties.DEFAULT_STRING_END,
361: TokenizerProperties.DEFAULT_STRING_ESCAPE);
362: htmlProps.setSeparators(TokenizerProperties.DEFAULT_SEPARATORS);
363:
364: jsProps.addBlockComment(
365: TokenizerProperties.DEFAULT_BLOCK_COMMENT_START,
366: TokenizerProperties.DEFAULT_BLOCK_COMMENT_END);
367: jsProps.addSpecialSequence("<!--");
368: jsProps.addSpecialSequence("-->", endOfEmbedded);
369: jsProps.setSeparators(TokenizerProperties.DEFAULT_SEPARATORS);
370:
371: cssProps.addSpecialSequence("<!--");
372: cssProps.addSpecialSequence("-->", endOfEmbedded);
373:
374: htmlTokenizer.setSource(_reader);
375: htmlTokenizer.addTokenizer(jsTokenizer);
376: htmlTokenizer.addTokenizer(cssTokenizer);
377:
378: System.out.println("\nStart parsing \"" + _path + "\"");
379: while (htmlTokenizer.hasMoreToken()) {
380: token = htmlTokenizer.nextToken();
381:
382: // Line counting test
383: assertTrue(token.getStartLine() >= lastStartLineNo);
384: if (token.getStartLine() == lastStartLineNo) {
385: assertTrue(token.getStartColumn() >= lastStartColNo);
386: if (token.getEndLine() == lastStartLineNo) {
387: assertTrue(token.getEndColumn() == token
388: .getStartColumn()
389: + token.getLength());
390: }
391: }
392: lastStartLineNo = token.getStartLine();
393: lastStartColNo = token.getStartColumn();
394:
395: // Tokenizer switching
396: switch (token.getType()) {
397: case Token.SPECIAL_SEQUENCE:
398:
399: // dealing with JavaScript
400: if (token.getCompanion() == startOfTag) {
401: token = htmlTokenizer.nextToken();
402: if (token.getType() == Token.KEYWORD
403: && token.getCompanion() == jsTokenizer) {
404: token = htmlTokenizer.nextToken();
405: assertTrue("Found token \""
406: + htmlTokenizer.currentImage()
407: + "\". Expected \"" + keywordLang
408: + "\".",
409: token.getCompanion() == keywordLang); // see above; should be the LANGUAGE token
410: token = htmlTokenizer.nextToken();
411: assertTrue("Found token \""
412: + htmlTokenizer.currentImage()
413: + "\". Expected \"=\".", htmlTokenizer
414: .currentImage().equals("=")); // see above; should be "="
415: token = htmlTokenizer.nextToken();
416: assertTrue("Found token \""
417: + htmlTokenizer.currentImage()
418: + "\". Expected string.", token
419: .getType() == Token.STRING); // see above; should be "JavaScript"
420:
421: // exclude JavaScript-Includes
422: token = htmlTokenizer.nextToken();
423: if (token.getCompanion() == endOfTag) {
424: htmlTokenizer.switchTo(jsTokenizer);
425:
426: // continuing with JavaScriptTokenizer
427: while (jsTokenizer.hasMoreToken()) {
428: token = jsTokenizer.nextToken();
429: if (token.getType() == Token.SPECIAL_SEQUENCE
430: && token.getCompanion() == endOfEmbedded) {
431: jsTokenizer.switchTo(htmlTokenizer);
432: break;
433: }
434: }
435:
436: // now we should find the end-of script tag
437: token = htmlTokenizer.nextToken();
438: assertTrue("Found token \""
439: + htmlTokenizer.currentImage()
440: + "\". Expected start of tag.",
441: token.getCompanion() == startOfTag);
442: token = htmlTokenizer.nextToken();
443: assertTrue("Found token \""
444: + htmlTokenizer.currentImage()
445: + "\". Expected \"/\".",
446: htmlTokenizer.currentImage()
447: .equals("/"));
448: token = htmlTokenizer.nextToken();
449: assertTrue("Found token \""
450: + htmlTokenizer.currentImage()
451: + "\". Expected script.", token
452: .getCompanion() == jsTokenizer);
453: token = htmlTokenizer.nextToken();
454: assertTrue("Found token \""
455: + htmlTokenizer.currentImage()
456: + "\". Expected end of tag.", token
457: .getCompanion() == endOfTag);
458: }
459:
460: // dealing with Cascading Style Sheets (CSS
461: } else if (token.getType() == Token.KEYWORD
462: && token.getCompanion() == jsTokenizer) {
463: token = htmlTokenizer.nextToken();
464: assertTrue("Found token \""
465: + htmlTokenizer.currentImage()
466: + "\". Expected end of tag.", token
467: .getCompanion() == endOfTag); // should be the end of tag
468:
469: htmlTokenizer.switchTo(cssTokenizer);
470: while (cssTokenizer.hasMoreToken()) {
471: token = cssTokenizer.nextToken();
472: if (token.getType() == Token.SPECIAL_SEQUENCE
473: && token.getCompanion() == endOfEmbedded) {
474: jsTokenizer.switchTo(htmlTokenizer);
475: break;
476: }
477: }
478:
479: // now we should find the end-of-style tag
480: token = htmlTokenizer.nextToken();
481: assertTrue("Found token \""
482: + htmlTokenizer.currentImage()
483: + "\". Expected start of tag.", token
484: .getCompanion() == startOfTag);
485: token = htmlTokenizer.nextToken();
486: assertTrue("Found token \""
487: + htmlTokenizer.currentImage()
488: + "\". Expected \"/\".", htmlTokenizer
489: .currentImage().equals("/"));
490: token = htmlTokenizer.nextToken();
491: assertTrue("Found token \""
492: + htmlTokenizer.currentImage()
493: + "\". Expected script.", token
494: .getCompanion() == cssTokenizer);
495: token = htmlTokenizer.nextToken();
496: assertTrue("Found token \""
497: + htmlTokenizer.currentImage()
498: + "\". Expected end of tag.", token
499: .getCompanion() == endOfTag);
500: }
501: }
502: break;
503: }
504: }
505:
506: long diff = System.currentTimeMillis() - start;
507: System.out.println("Finished after " + diff + " milliseconds");
508: }
509:
510: //---------------------------------------------------------------------------
511: // Members
512: //
513: protected InputStreamReader _reader = null;
514: protected String _path = null;
515: }
|