0001: /*
0002: * TestDifficultSituations.java: JUnit test for a Tokenizer
0003: *
0004: * Copyright (C) 2002 Heiko Blau
0005: *
0006: * This file belongs to the JTopas test suite.
0007: * The JTopas test suite is free software; you can redistribute it and/or modify it
0008: * under the terms of the GNU Lesser General Public License as published by the
0009: * Free Software Foundation; either version 2.1 of the License, or (at your option)
0010: * any later version.
0011: *
0012: * This software is distributed in the hope that it will be useful, but WITHOUT
0013: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
0014: * FITNESS FOR A PARTICULAR PURPOSE.
0015: * See the GNU Lesser General Public License for more details.
0016: *
0017: * You should have received a copy of the GNU Lesser General Public License along
0018: * with the JTopas test suite. If not, write to the
0019: *
0020: * Free Software Foundation, Inc.
0021: * 59 Temple Place, Suite 330,
0022: * Boston, MA 02111-1307
0023: * USA
0024: *
0025: * or check the Internet: http://www.fsf.org
0026: *
0027: * The JTopas test suite uses the test framework JUnit by Kent Beck and Erich Gamma.
0028: * You should have received a copy of their JUnit licence agreement along with
0029: * the JTopas test suite.
0030: *
0031: * We do NOT provide the JUnit archive junit.jar nessecary to compile and run
0032: * our tests, since we assume, that You either have it already or would like
0033: * to get the current release Yourself.
0034: * Please visit either:
0035: * http://sourceforge.net/projects/junit
0036: * or
0037: * http://junit.org
0038: * to obtain JUnit.
0039: *
0040: * Contact:
0041: * email: heiko@susebox.de
0042: */
0043:
0044: package de.susebox.jtopas;
0045:
0046: //-----------------------------------------------------------------------------
0047: // Imports
0048: //
0049: import java.lang.reflect.Constructor;
0050: import java.io.Reader;
0051: import java.io.StringReader;
0052:
0053: import junit.framework.Test;
0054: import junit.framework.TestCase;
0055: import junit.framework.TestSuite;
0056: import junit.framework.Assert;
0057:
0058: import de.susebox.TestUtilities;
0059:
0060: //-----------------------------------------------------------------------------
0061: // Class TestDifficultSituations
0062: //
0063:
0064: /**<p>
0065: * The class contains a number of test cases that are supposed to be difficult
0066: * to handle for a {@link Tokenizer}, e.g. EOF conditions inside strings etc.
0067: *</p>
0068: *
0069: * @see Tokenizer
0070: * @see StandardTokenizer
0071: * @see StandardTokenizerProperties
0072: * @author Heiko Blau
0073: */
0074: public class TestDifficultSituations extends TestCase {
0075:
0076: //---------------------------------------------------------------------------
0077: // properties
0078: //
0079:
0080: //---------------------------------------------------------------------------
0081: // main method
0082: //
0083:
0084: /**
0085: * call this method to invoke the tests
0086: */
0087: public static void main(String[] args) {
0088: String[] tests = { TestDifficultSituations.class.getName() };
0089:
0090: TestUtilities.run(tests, args);
0091: }
0092:
0093: //---------------------------------------------------------------------------
0094: // suite method
0095: //
0096:
0097: /**
0098: * Implementation of the JUnit method <code>suite</code>. For each set of test
0099: * properties one or more tests are instantiated.
0100: *
0101: * @return a test suite
0102: */
0103: public static Test suite() {
0104: TestSuite suite = new TestSuite(TestDifficultSituations.class
0105: .getName());
0106: Class[] sourceClasses = { ReaderSource.class,
0107: StringSource.class };
0108:
0109: for (int sourceIndex = 0; sourceIndex < sourceClasses.length; ++sourceIndex) {
0110: suite.addTest(new TestDifficultSituations(
0111: "testSequencesAndSeparators",
0112: sourceClasses[sourceIndex]));
0113: suite.addTest(new TestDifficultSituations(
0114: "testSmallSource", sourceClasses[sourceIndex]));
0115: suite.addTest(new TestDifficultSituations(
0116: "testEmptySource", sourceClasses[sourceIndex]));
0117: suite.addTest(new TestDifficultSituations(
0118: "testSimilarSpecialSequences",
0119: sourceClasses[sourceIndex]));
0120: suite.addTest(new TestDifficultSituations(
0121: "testNonASCIICharacters",
0122: sourceClasses[sourceIndex]));
0123: suite
0124: .addTest(new TestDifficultSituations(
0125: "testEOFInLineComment",
0126: sourceClasses[sourceIndex]));
0127: suite
0128: .addTest(new TestDifficultSituations(
0129: "testEOFInBlockComment",
0130: sourceClasses[sourceIndex]));
0131: suite.addTest(new TestDifficultSituations(
0132: "testEOFInString", sourceClasses[sourceIndex]));
0133: suite.addTest(new TestDifficultSituations(
0134: "testStringEscapes1", sourceClasses[sourceIndex]));
0135: suite.addTest(new TestDifficultSituations(
0136: "testStringEscapes2", sourceClasses[sourceIndex]));
0137: suite.addTest(new TestDifficultSituations(
0138: "testNestedComments", sourceClasses[sourceIndex]));
0139: suite.addTest(new TestDifficultSituations(
0140: "testReaderSwitching", sourceClasses[sourceIndex]));
0141: suite.addTest(new TestDifficultSituations("testDOSEOL",
0142: sourceClasses[sourceIndex]));
0143: suite.addTest(new TestDifficultSituations("testMACEOL",
0144: sourceClasses[sourceIndex]));
0145: suite.addTest(new TestDifficultSituations(
0146: "testSpecialCalls", sourceClasses[sourceIndex]));
0147: suite.addTest(new TestDifficultSituations(
0148: "testLineCounting", sourceClasses[sourceIndex]));
0149: suite.addTest(new TestDifficultSituations(
0150: "testUncommonWhitespaces",
0151: sourceClasses[sourceIndex]));
0152: suite.addTest(new TestDifficultSituations(
0153: "testWhitespaceHandling",
0154: sourceClasses[sourceIndex]));
0155: }
0156: return suite;
0157: }
0158:
0159: //---------------------------------------------------------------------------
0160: // Constructor
0161: //
0162:
0163: /**
0164: * Default constructor. Standard input {@link java.lang.System#in} is used
0165: * to construct the input stream reader.
0166: */
0167: public TestDifficultSituations(String test, Class sourceClass) {
0168: super (test);
0169: _sourceClass = sourceClass;
0170: }
0171:
0172: //---------------------------------------------------------------------------
0173: // Fixture setup and release
0174: //
0175:
0176: /**
0177: * Sets up the fixture, for example, open a network connection.
0178: * This method is called before a test is executed.
0179: */
0180: protected void setUp() throws Exception {
0181: }
0182:
0183: /**
0184: * Tears down the fixture, for example, close a network connection.
0185: * This method is called after a test is executed.
0186: */
0187: protected void tearDown() throws Exception {
0188: }
0189:
0190: //---------------------------------------------------------------------------
0191: // test cases
0192: //
0193:
0194: // various constants
0195: private static final String PLUS = "+";
0196: private static final String DOUBLE_PLUS = "++";
0197: private static final String TRIPLE_PLUS = "+++";
0198: private static final String PLUS_EQUAL = "+=";
0199: private static final String PLUS_MINUS = "+-";
0200: private static final String HTML_OPEN = "<";
0201: private static final String HTML_COMMENT1 = "<!";
0202: private static final String HTML_COMMENT2 = "<!--";
0203: private static final String HTML_HEAD = "<head>";
0204: private static final String HTML_HEADER = "<h>";
0205: private static final String HTML_HT = "<ht>";
0206: private static final String HTML_CLOSE = ">";
0207: private static final String MINUS = "-";
0208: private static final String DOUBLE_MINUS = "--";
0209: private static final String HTML_COMMENT_END = "-->";
0210: private static final String HTML_HEAD_END = "</head>";
0211: private static final String HTML_HEADER_END = "</h>";
0212: private static final String SHIFT_LEFT = "<<";
0213: private static final String SHIFT_RIGHT = ">>";
0214: private static final String COLON = ".";
0215: private static final String EURO = "€";
0216: private static final String DOUBLE_EURO = "€€";
0217: private static final String EUROURO = "€uro";
0218: private static final String AE = "æ";
0219: private static final String OERE = "ø";
0220: private static final String BUG = "ð";
0221: private static final String DOUBLE_BUG = "ðð";
0222:
0223: /**
0224: * Test similar special sequences.
0225: */
0226: public void testSimilarSpecialSequences() throws Throwable {
0227: TokenizerSource source = getSource("lots+of++special+=sequences+in+++a+-row\n"
0228: + "with <HEAD>HTML-tags-in-between</head>\n"
0229: + "like <h>headings</h><open and close> tags\n"
0230: + "and <!even--comments-->+<!--in<ht>many+=forms-->>\n"
0231: + "some<<as>>operators.\n" + "+++++<<<>>>.\n");
0232: String[] expectedToken = { PLUS, DOUBLE_PLUS,
0233: PLUS_EQUAL,
0234: PLUS,
0235: TRIPLE_PLUS,
0236: PLUS_MINUS, // "lots+of++special+=sequences+in+++a+-row\n"
0237: HTML_HEAD, MINUS,
0238: MINUS,
0239: MINUS,
0240: HTML_HEAD_END, // "with <HEAD>HTML-tags-in-between</head>\n"
0241: HTML_HEADER,
0242: HTML_HEADER_END,
0243: HTML_OPEN,
0244: HTML_CLOSE, // "like <h>headings</h><open and close> tags\n"
0245: HTML_COMMENT1, DOUBLE_MINUS, HTML_COMMENT_END, PLUS,
0246: HTML_COMMENT2, HTML_HT, PLUS_EQUAL,
0247: HTML_COMMENT_END,
0248: HTML_CLOSE, // "and <!even--comments-->+<!--in<ht>many+=forms-->>\n"
0249: SHIFT_LEFT, SHIFT_RIGHT,
0250: COLON, // "some<<as>>operators."
0251: TRIPLE_PLUS, DOUBLE_PLUS, SHIFT_LEFT, HTML_OPEN,
0252: SHIFT_RIGHT, HTML_CLOSE, COLON // "+++++<<<>>>.\n"
0253: };
0254:
0255: TokenizerProperties props = new StandardTokenizerProperties();
0256: Tokenizer tokenizer = getTokenizer(props);
0257:
0258: try {
0259: props.addSpecialSequence(COLON, COLON);
0260: props.addSpecialSequence(PLUS, PLUS);
0261: props.addSpecialSequence(DOUBLE_PLUS, DOUBLE_PLUS);
0262: props.addSpecialSequence(TRIPLE_PLUS, TRIPLE_PLUS);
0263: props.addSpecialSequence(PLUS_EQUAL, PLUS_EQUAL);
0264: props.addSpecialSequence(PLUS_MINUS, PLUS_MINUS);
0265: props.addSpecialSequence(SHIFT_LEFT, SHIFT_LEFT);
0266: props.addSpecialSequence(HTML_OPEN, HTML_OPEN,
0267: Flags.F_NO_CASE);
0268: props.addSpecialSequence(HTML_COMMENT1, HTML_COMMENT1,
0269: Flags.F_NO_CASE);
0270: props.addSpecialSequence(HTML_COMMENT2, HTML_COMMENT2,
0271: Flags.F_NO_CASE);
0272: props.addSpecialSequence(HTML_HEAD, HTML_HEAD,
0273: Flags.F_NO_CASE);
0274: props.addSpecialSequence(HTML_HEADER, HTML_HEADER,
0275: Flags.F_NO_CASE);
0276: props.addSpecialSequence(HTML_HT, HTML_HT, Flags.F_NO_CASE);
0277: props.addSpecialSequence(HTML_CLOSE, HTML_CLOSE,
0278: Flags.F_NO_CASE);
0279: props.addSpecialSequence(SHIFT_RIGHT, SHIFT_RIGHT);
0280: props.addSpecialSequence(MINUS, MINUS);
0281: props.addSpecialSequence(DOUBLE_MINUS, DOUBLE_MINUS);
0282: props.addSpecialSequence(HTML_COMMENT_END,
0283: HTML_COMMENT_END, Flags.F_NO_CASE);
0284: props.addSpecialSequence(HTML_HEAD_END, HTML_HEAD_END,
0285: Flags.F_NO_CASE);
0286: props.addSpecialSequence(HTML_HEADER_END, HTML_HEADER_END,
0287: Flags.F_NO_CASE);
0288: tokenizer.setSource(source);
0289:
0290: // start tokenizing
0291: int index = 0;
0292:
0293: while (tokenizer.hasMoreToken()) {
0294: Token token = tokenizer.nextToken();
0295: boolean isOK;
0296:
0297: switch (token.getType()) {
0298: case Token.NORMAL:
0299: System.out.println(token.getImage());
0300: break;
0301: case Token.SPECIAL_SEQUENCE:
0302: if (props.isFlagSet(props.getSpecialSequence(token
0303: .getImage()), Flags.F_NO_CASE)) {
0304: isOK = expectedToken[index]
0305: .equalsIgnoreCase(token.getImage());
0306: } else {
0307: isOK = expectedToken[index].equals(token
0308: .getImage());
0309: }
0310: assertTrue("Index " + index + ": expected \""
0311: + expectedToken[index] + "\", got \""
0312: + token.getImage() + "\".", isOK);
0313: index++;
0314: break;
0315: }
0316: }
0317: } finally {
0318: tokenizer.close();
0319: }
0320: }
0321:
0322: /**
0323: * Test similar special sequences.
0324: */
0325: public void testNonASCIICharacters() throws Throwable {
0326: TokenizerSource source = getSource("1€ is an æ to much. Orøtakeðthis: €€ or €uro and ðð.");
0327:
0328: String[] expectedToken = { EURO, AE, OERE, BUG, DOUBLE_EURO,
0329: EUROURO, DOUBLE_BUG };
0330:
0331: TokenizerProperties props = new StandardTokenizerProperties();
0332: Tokenizer tokenizer = getTokenizer(props);
0333:
0334: try {
0335: props.addSpecialSequence(EURO, EURO);
0336: props.addSpecialSequence(DOUBLE_EURO, DOUBLE_EURO);
0337: props.addSpecialSequence(EUROURO, EUROURO);
0338: props.addSpecialSequence(AE, AE);
0339: props.addSpecialSequence(OERE, OERE);
0340: props.addSpecialSequence(BUG, BUG);
0341: props.addSpecialSequence(DOUBLE_BUG, DOUBLE_BUG);
0342: tokenizer.setSource(source);
0343:
0344: // start tokenizing
0345: int index = 0;
0346:
0347: while (tokenizer.hasMoreToken()) {
0348: Token token = tokenizer.nextToken();
0349: boolean isOK;
0350:
0351: switch (token.getType()) {
0352: case Token.NORMAL:
0353: System.out.println(token.getImage());
0354: break;
0355: case Token.SPECIAL_SEQUENCE:
0356: assertTrue("Index " + index + ": expected \""
0357: + expectedToken[index] + "\", got \""
0358: + token.getImage() + "\".",
0359: expectedToken[index].equals(token
0360: .getImage()));
0361: index++;
0362: break;
0363: }
0364: }
0365: } finally {
0366: tokenizer.close();
0367: }
0368: }
0369:
0370: /**
0371: * Test the case of an completely empty data source. This is always a good
0372: * candidate for failures :-)
0373: */
0374: public void testEmptySource() throws Throwable {
0375: TokenizerSource source = getSource("");
0376: TokenizerProperties props = new StandardTokenizerProperties();
0377: Tokenizer tokenizer = getTokenizer(props);
0378: Token token;
0379:
0380: try {
0381: props.setParseFlags(Flags.F_RETURN_WHITESPACES);
0382: props.addLineComment("//");
0383: tokenizer.setSource(source);
0384:
0385: assertTrue(tokenizer.hasMoreToken());
0386: token = tokenizer.nextToken();
0387: assertTrue(token.getType() == Token.EOF);
0388: assertTrue(!tokenizer.hasMoreToken());
0389: } finally {
0390: tokenizer.close();
0391: }
0392: }
0393:
0394: /**
0395: * Test small sources.
0396: */
0397: public void testSmallSource() throws Throwable {
0398: TokenizerProperties props = new StandardTokenizerProperties();
0399: Tokenizer tokenizer = getTokenizer(props);
0400: Token token;
0401:
0402: try {
0403: props.setParseFlags(Flags.F_RETURN_WHITESPACES);
0404: props.addLineComment("//");
0405: props.addSpecialSequence(PLUS, PLUS);
0406: props.addSpecialSequence(DOUBLE_PLUS, DOUBLE_PLUS);
0407: props.addSpecialSequence(MINUS, MINUS);
0408: props.addSpecialSequence(DOUBLE_MINUS, DOUBLE_MINUS);
0409:
0410: // a single character
0411: char[] contents = new char[8192];
0412: int bytes;
0413:
0414: tokenizer.setSource(getSource("A"));
0415:
0416: assertTrue(tokenizer.hasMoreToken());
0417: token = tokenizer.nextToken();
0418: assertTrue(token.getType() == Token.NORMAL);
0419: assertTrue(token.getImage().equals("A"));
0420: assertTrue(tokenizer.hasMoreToken());
0421: token = tokenizer.nextToken();
0422: assertTrue(token.getType() == Token.EOF);
0423: assertTrue(!tokenizer.hasMoreToken());
0424:
0425: // a single special sequence
0426: tokenizer.setSource(getSource("++"));
0427:
0428: assertTrue(tokenizer.hasMoreToken());
0429: token = tokenizer.nextToken();
0430: assertTrue(token.getType() == Token.SPECIAL_SEQUENCE);
0431: assertTrue(token.getCompanion() == DOUBLE_PLUS);
0432: assertTrue(tokenizer.hasMoreToken());
0433: token = tokenizer.nextToken();
0434: assertTrue(token.getType() == Token.EOF);
0435: assertTrue(!tokenizer.hasMoreToken());
0436:
0437: // an empty line comment
0438: tokenizer.setSource(getSource("//"));
0439:
0440: assertTrue(tokenizer.hasMoreToken());
0441: token = tokenizer.nextToken();
0442: assertTrue(token.getType() == Token.LINE_COMMENT);
0443: assertTrue(token.getImage().equals("//"));
0444: assertTrue(tokenizer.hasMoreToken());
0445: token = tokenizer.nextToken();
0446: assertTrue(token.getType() == Token.EOF);
0447: assertTrue(!tokenizer.hasMoreToken());
0448:
0449: } finally {
0450: // Cleanup
0451: tokenizer.close();
0452: }
0453: }
0454:
0455: /**
0456: * Test the case, when a line comment is not terminated by a newline character.
0457: * This happens when the last line of a file is a line comment without a
0458: * newline on its end.
0459: * This is a rather common situation.
0460: */
0461: public void testEOFInLineComment() throws Throwable {
0462: TokenizerSource source = getSource("// end of file occurs in line comment.");
0463: TokenizerProperties props = new StandardTokenizerProperties();
0464: Tokenizer tokenizer = getTokenizer(props);
0465: Token token;
0466:
0467: try {
0468: props.setParseFlags(Flags.F_RETURN_WHITESPACES);
0469: props.addLineComment("//");
0470: tokenizer.setSource(source);
0471:
0472: assertTrue(tokenizer.hasMoreToken());
0473: token = tokenizer.nextToken();
0474: assertTrue(token.getType() == Token.LINE_COMMENT);
0475: assertTrue(tokenizer.hasMoreToken());
0476: token = tokenizer.nextToken();
0477: assertTrue(token.getType() == Token.EOF);
0478: } finally {
0479: // Cleanup
0480: tokenizer.close();
0481: }
0482: }
0483:
0484: /**
0485: * Test the case, when a block comment is not terminated. That means EOF
0486: * occurs unexpectedly in a block comment.
0487: */
0488: public void testEOFInBlockComment() throws Throwable {
0489: TokenizerSource source = getSource("/* end of file occurs\nin a block comment.");
0490: TokenizerProperties props = new StandardTokenizerProperties();
0491: Tokenizer tokenizer = getTokenizer(props);
0492: Token token;
0493:
0494: try {
0495: props.setParseFlags(Flags.F_RETURN_WHITESPACES);
0496: props.addBlockComment("/*", "*/");
0497: tokenizer.setSource(source);
0498:
0499: assertTrue(tokenizer.hasMoreToken());
0500: token = tokenizer.nextToken();
0501: assertTrue(token.getType() == Token.BLOCK_COMMENT);
0502: assertTrue(tokenizer.hasMoreToken());
0503: token = tokenizer.nextToken();
0504: assertTrue(token.getType() == Token.EOF);
0505: } finally {
0506: // Cleanup
0507: tokenizer.close();
0508: }
0509: }
0510:
0511: /**
0512: * Test the case, when a block comment is not terminated. That means EOF
0513: * occurs unexpectedly in a block comment.
0514: */
0515: public void testEOFInString() throws Throwable {
0516: TokenizerSource source = getSource("-- end of file in String\n\"Thats the string, but rather unterminated |-(");
0517: TokenizerProperties props = new StandardTokenizerProperties();
0518: Tokenizer tokenizer = getTokenizer(props);
0519: Token token;
0520:
0521: try {
0522: props.addLineComment("--");
0523: props.addString("\"", "\"", "\"");
0524: tokenizer.setSource(source);
0525:
0526: assertTrue(tokenizer.hasMoreToken());
0527: token = tokenizer.nextToken();
0528: assertTrue(token.getType() == Token.STRING);
0529: assertTrue(tokenizer.hasMoreToken());
0530: token = tokenizer.nextToken();
0531: assertTrue(token.getType() == Token.EOF);
0532: } finally {
0533: // Cleanup
0534: tokenizer.close();
0535: }
0536: }
0537:
0538: /**
0539: * Test various calls to methods with a special contract.
0540: */
0541: public void testSpecialCalls() throws Throwable {
0542: TokenizerSource source = getSource("A simple text");
0543: TokenizerProperties props = new StandardTokenizerProperties();
0544: Tokenizer tokenizer = getTokenizer(props);
0545: Token token = null;
0546:
0547: try {
0548: tokenizer.setSource(source);
0549:
0550: try {
0551: tokenizer.currentToken();
0552: assertTrue(
0553: "Tokenizer should have thrown an exception here.",
0554: false);
0555: } catch (TokenizerException ex) {
0556: }
0557: ;
0558: try {
0559: tokenizer.currentImage();
0560: assertTrue(
0561: "Tokenizer should have thrown an exception here.",
0562: false);
0563: } catch (TokenizerException ex) {
0564: }
0565: ;
0566:
0567: while (tokenizer.hasMoreToken()) {
0568: Token newToken = tokenizer.nextToken();
0569: assertTrue(!tokenizer.currentToken().equals(token));
0570: assertTrue(tokenizer.currentToken() != null);
0571: assertTrue(tokenizer.currentToken().equals(newToken));
0572: assertTrue(tokenizer.currentToken().equals(
0573: tokenizer.currentToken()));
0574: if (newToken.getType() != Token.EOF) {
0575: assertTrue(tokenizer.currentImage() != null);
0576: assertTrue(tokenizer.currentImage().equals(
0577: tokenizer.currentImage()));
0578: } else {
0579: assertTrue(!tokenizer.hasMoreToken());
0580: }
0581: token = newToken;
0582: }
0583: } finally {
0584: // Cleanup
0585: tokenizer.close();
0586: }
0587: }
0588:
0589: /**
0590: * Test various situations of string escapes, if the escape character is the
0591: * backslash (not equal to the string character).
0592: * This test takes a number of lines each with a string including escapes in
0593: * it. It passes if the right number of strings is returned and also the line
0594: * counting is ok.
0595: */
0596: public void testStringEscapes1() throws Throwable {
0597: TokenizerSource source = getSource("\"String escape \\\" in the middle\"\n"
0598: + "\"String escape on end \\\"\"\n"
0599: + "\"\\\" String escape on begin\"\n"
0600: + "\"Two string escapes \\\"\\\" after each other\"\n"
0601: + "\"Two string escapes on end \\\"\\\"\"\n");
0602:
0603: int lines = 5;
0604: TokenizerProperties props = new StandardTokenizerProperties();
0605: Tokenizer tokenizer = getTokenizer(props);
0606: Token token;
0607:
0608: try {
0609: props.setParseFlags(Flags.F_RETURN_WHITESPACES
0610: | Flags.F_COUNT_LINES);
0611: props.addString("\"", "\"", "\\");
0612: tokenizer.setSource(source);
0613:
0614: for (int line = 0; line < lines; ++line) {
0615: assertTrue("(1) No more token at line " + line,
0616: tokenizer.hasMoreToken());
0617: token = tokenizer.nextToken();
0618: assertTrue("String not recognized at line " + line,
0619: token.getType() == Token.STRING);
0620: assertTrue("(2) No more token at line " + line,
0621: tokenizer.hasMoreToken());
0622: token = tokenizer.nextToken();
0623: assertTrue(
0624: "Newline not recognized as whitespace at line "
0625: + line,
0626: token.getType() == Token.WHITESPACE);
0627: }
0628: assertTrue(tokenizer.hasMoreToken());
0629: token = tokenizer.nextToken();
0630: assertTrue(token.getType() == Token.EOF);
0631: } finally {
0632: // Cleanup
0633: tokenizer.close();
0634: }
0635: }
0636:
0637: /**
0638: * Test various situations of string escapes, if the escape character is equal
0639: * to the string character).
0640: * This test takes a number of lines each with a string including escapes in
0641: * it. It passes if the right number of strings is returned and also the line
0642: * counting is ok.
0643: */
0644: public void testStringEscapes2() throws Throwable {
0645: TokenizerSource source = getSource("'String escape '' in the middle'\n"
0646: + "'String escape on end '''\n"
0647: + "''' String escape on begin'\n"
0648: + "'Two string escapes '''' after each other'\n"
0649: + "'Two string escapes on end '''''\n");
0650:
0651: int lines = 5;
0652: TokenizerProperties props = new StandardTokenizerProperties();
0653: Tokenizer tokenizer = getTokenizer(props);
0654: Token token;
0655:
0656: try {
0657: props.setParseFlags(Flags.F_RETURN_WHITESPACES
0658: | Flags.F_COUNT_LINES);
0659: props.addString("'", "'", "'");
0660: tokenizer.setSource(source);
0661:
0662: for (int line = 0; line < lines; ++line) {
0663: assertTrue("(1) No more token at line " + line,
0664: tokenizer.hasMoreToken());
0665: token = tokenizer.nextToken();
0666: assertTrue("String not recognized at line " + line,
0667: token.getType() == Token.STRING);
0668: assertTrue("(2) No more token at line " + line,
0669: tokenizer.hasMoreToken());
0670: token = tokenizer.nextToken();
0671: assertTrue(
0672: "Newline not recognized as whitespace at line "
0673: + line,
0674: token.getType() == Token.WHITESPACE);
0675: }
0676: assertTrue(tokenizer.hasMoreToken());
0677: token = tokenizer.nextToken();
0678: assertTrue(token.getType() == Token.EOF);
0679: } finally {
0680: // Cleanup
0681: tokenizer.close();
0682: }
0683: }
0684:
0685: /**
0686: * Test nested comments.
0687: */
0688: public void testNestedComments() throws Throwable {
0689: TokenizerSource source = getSource("// line comment including // line comment sequence\n"
0690: + "/* block comment with\n"
0691: + " /* a nested block\n"
0692: + " comment\n"
0693: + " */\n"
0694: + " normal token or not ?\n"
0695: + "*/\n"
0696: + "// line comment with /* block comment */\n"
0697: + "'a string with // line comment'\n"
0698: + "'a string with /* block comment */'\n");
0699:
0700: int lines = 10;
0701: TokenizerProperties props = new StandardTokenizerProperties();
0702: Tokenizer tokenizer = getTokenizer(props);
0703: Token token;
0704:
0705: try {
0706: props.setParseFlags(Flags.F_RETURN_WHITESPACES
0707: | Flags.F_COUNT_LINES
0708: | Flags.F_ALLOW_NESTED_COMMENTS);
0709: props
0710: .addLineComment(TokenizerProperties.DEFAULT_LINE_COMMENT);
0711: props.addBlockComment(
0712: TokenizerProperties.DEFAULT_BLOCK_COMMENT_START,
0713: TokenizerProperties.DEFAULT_BLOCK_COMMENT_END);
0714: props.addString("'", "'", "'");
0715: tokenizer.setSource(source);
0716:
0717: // first line comment
0718: assertTrue(tokenizer.hasMoreToken());
0719: token = tokenizer.nextToken();
0720: assertTrue("(1) line comment not recognized", token
0721: .getType() == Token.LINE_COMMENT);
0722: assertTrue("(2) wrong start position "
0723: + token.getStartPosition(), token
0724: .getStartPosition() == 0);
0725: assertTrue("(3) wrong start line " + token.getStartLine(),
0726: token.getStartLine() == 0);
0727: assertTrue("(4) wrong start column"
0728: + token.getStartColumn(),
0729: token.getStartColumn() == 0);
0730: assertTrue("(5) wrong end line " + token.getEndLine(),
0731: token.getEndLine() == token.getStartLine() + 1);
0732: assertTrue("(6) wrong end column" + token.getEndColumn(),
0733: token.getEndColumn() == 0);
0734:
0735: // block comment
0736: assertTrue(tokenizer.hasMoreToken());
0737: token = tokenizer.nextToken();
0738: assertTrue("(10) block comment not recognized", token
0739: .getType() == Token.BLOCK_COMMENT);
0740: assertTrue("(11) wrong start line " + token.getStartLine(),
0741: token.getStartLine() == 1);
0742: assertTrue("(12) wrong start column"
0743: + token.getStartColumn(),
0744: token.getStartColumn() == 0);
0745: assertTrue("(13) wrong end line " + token.getEndLine(),
0746: token.getEndLine() == token.getStartLine() + 5);
0747: assertTrue("(14) wrong end column" + token.getEndColumn(),
0748: token.getEndColumn() == 2);
0749: assertTrue(tokenizer.hasMoreToken());
0750: token = tokenizer.nextToken();
0751: assertTrue(
0752: "(15) newline behind block comment not recognized as whitespace",
0753: token.getType() == Token.WHITESPACE);
0754: assertTrue(
0755: "(16) newline behind block comment not recognized as literal",
0756: tokenizer.currentImage().equals("\n"));
0757:
0758: // second line comment
0759: assertTrue(tokenizer.hasMoreToken());
0760: token = tokenizer.nextToken();
0761: assertTrue("(21) line comment not recognized", token
0762: .getType() == Token.LINE_COMMENT);
0763: assertTrue("(22) wrong start line " + token.getStartLine(),
0764: token.getStartLine() == 7);
0765: assertTrue("(23) wrong end line " + token.getEndLine(),
0766: token.getEndLine() == token.getStartLine() + 1);
0767:
0768: // string 1
0769: assertTrue(tokenizer.hasMoreToken());
0770: token = tokenizer.nextToken();
0771: assertTrue("(31) string not recognized",
0772: token.getType() == Token.STRING);
0773: assertTrue("(32) wrong start line " + token.getStartLine(),
0774: token.getStartLine() == 8);
0775: assertTrue("(33) wrong start column"
0776: + token.getStartColumn(),
0777: token.getStartColumn() == 0);
0778: assertTrue("(34) wrong end line " + token.getEndLine(),
0779: token.getEndLine() == token.getStartLine());
0780: assertTrue(tokenizer.hasMoreToken());
0781: token = tokenizer.nextToken();
0782: assertTrue(
0783: "(35) newline behind string not recognized as whitespace",
0784: token.getType() == Token.WHITESPACE);
0785: assertTrue(
0786: "(36) newline behind string not recognized as literal",
0787: tokenizer.currentImage().equals("\n"));
0788:
0789: // string 2
0790: assertTrue(tokenizer.hasMoreToken());
0791: token = tokenizer.nextToken();
0792: assertTrue("(41) string not recognized",
0793: token.getType() == Token.STRING);
0794: assertTrue("(42) wrong start line " + token.getStartLine(),
0795: token.getStartLine() == 9);
0796: assertTrue("(43) wrong start column"
0797: + token.getStartColumn(),
0798: token.getStartColumn() == 0);
0799: assertTrue("(44) wrong end line " + token.getEndLine(),
0800: token.getEndLine() == token.getStartLine());
0801: assertTrue(tokenizer.hasMoreToken());
0802: token = tokenizer.nextToken();
0803: assertTrue(
0804: "(45) newline behind string not recognized as whitespace",
0805: token.getType() == Token.WHITESPACE);
0806: assertTrue(
0807: "(46) newline behind string not recognized as literal",
0808: tokenizer.currentImage().equals("\n"));
0809:
0810: // EOF should be reached here
0811: token = tokenizer.nextToken();
0812: assertTrue(token.getType() == Token.EOF);
0813:
0814: } finally {
0815: // Cleanup
0816: tokenizer.close();
0817: }
0818: }
0819:
0820: /**
0821: * Test reader switching
0822: */
0823: public void testReaderSwitching() throws Throwable {
0824: TokenizerSource source1 = getSource("0/2 4/6 8/10");
0825: TokenizerSource source2 = getSource("0/2 4/6 8/10");
0826: TokenizerSource source3 = getSource("0/2 4/6 8/10");
0827: TokenizerSource[] sources = { source1, source2, source3 };
0828:
0829: TokenizerProperties props = new StandardTokenizerProperties();
0830: Tokenizer tokenizer = getTokenizer(props);
0831: Token token;
0832:
0833: try {
0834: for (int sourceIndex = 0; sourceIndex < sources.length; ++sourceIndex) {
0835: tokenizer.setSource(sources[sourceIndex]);
0836: for (int ii = 0; ii <= 8; ii += 4) {
0837: assertTrue(tokenizer.hasMoreToken());
0838: token = tokenizer.nextToken();
0839: assertTrue("Wrong start position "
0840: + token.getStartPosition(), token
0841: .getStartPosition() == ii);
0842: assertTrue("Wrong type " + token.getType(), token
0843: .getType() == Token.NORMAL);
0844: assertTrue("Token not recognized as literal",
0845: tokenizer.currentImage().equals(
0846: Integer.toString(ii)));
0847: assertTrue(tokenizer.hasMoreToken());
0848: token = tokenizer.nextToken();
0849: assertTrue("Wrong start position "
0850: + token.getStartPosition(), token
0851: .getStartPosition() == ii + 1);
0852: assertTrue("Wrong type " + token.getType(), token
0853: .getType() == Token.SEPARATOR);
0854: assertTrue("Separator not recognized as literal",
0855: tokenizer.currentImage().equals("/"));
0856: assertTrue(tokenizer.hasMoreToken());
0857: token = tokenizer.nextToken();
0858: assertTrue("Wrong start position "
0859: + token.getStartPosition(), token
0860: .getStartPosition() == ii + 2);
0861: assertTrue("Wrong type " + token.getType(), token
0862: .getType() == Token.NORMAL);
0863: assertTrue("Token not recognized as literal",
0864: tokenizer.currentImage().equals(
0865: Integer.toString(ii + 2)));
0866: }
0867: }
0868: } finally {
0869: // Cleanup
0870: tokenizer.close();
0871: }
0872: }
0873:
0874: /**
0875: * Line counting and line comments in DOS files
0876: */
0877: public void testDOSEOL() throws Throwable {
0878: TokenizerSource source = getSource("// line comment with DOS line ending\r\n"
0879: + "void main(int argc)\r\n"
0880: + "{\r\n"
0881: + " // another line comment\r\n"
0882: + " /* a block comment\r\n"
0883: + " with more than one line\r\n"
0884: + " */\r\n"
0885: + "}\r\n");
0886:
0887: int lines = 8;
0888: TokenizerProperties props = new StandardTokenizerProperties();
0889: Tokenizer tokenizer = getTokenizer(props);
0890: Token token;
0891:
0892: try {
0893: props.setParseFlags(Flags.F_RETURN_WHITESPACES
0894: | Flags.F_COUNT_LINES);
0895: props
0896: .addLineComment(TokenizerProperties.DEFAULT_LINE_COMMENT);
0897: props.addBlockComment(
0898: TokenizerProperties.DEFAULT_BLOCK_COMMENT_START,
0899: TokenizerProperties.DEFAULT_BLOCK_COMMENT_END);
0900: props.addString("\"", "\"", "\\");
0901: tokenizer.setSource(source);
0902:
0903: // zero line comment
0904: assertTrue(tokenizer.hasMoreToken());
0905: token = tokenizer.nextToken();
0906: assertTrue("(1) line comment not recognized", token
0907: .getType() == Token.LINE_COMMENT);
0908: assertTrue("(2) start line wrong",
0909: token.getStartLine() == 0);
0910: assertTrue("(3) start column wrong",
0911: token.getStartColumn() == 0);
0912: assertTrue("(4) end line wrong", token.getEndLine() == 1);
0913: assertTrue("(5) end column wrong",
0914: token.getEndColumn() == 0);
0915:
0916: // first line: void
0917: assertTrue(tokenizer.hasMoreToken());
0918: token = tokenizer.nextToken();
0919: assertTrue("(10) token \"void\" not recognized.", token
0920: .getType() == Token.NORMAL
0921: && token.getImage().equals("void"));
0922: assertTrue("(11) start line wrong",
0923: token.getStartLine() == 1);
0924: assertTrue("(12) start column wrong", token
0925: .getStartColumn() == 0);
0926: assertTrue("(13) end line wrong", token.getEndLine() == 1);
0927: assertTrue("(14) end column wrong",
0928: token.getEndColumn() == 4);
0929:
0930: assertTrue(tokenizer.hasMoreToken());
0931: token = tokenizer.nextToken();
0932: assertTrue("(15) whitespace not recognized", token
0933: .getType() == Token.WHITESPACE);
0934:
0935: // first line: main
0936: assertTrue(tokenizer.hasMoreToken());
0937: token = tokenizer.nextToken();
0938: assertTrue("(20) token \"main\" not recognized.", token
0939: .getType() == Token.NORMAL
0940: && token.getImage().equals("main"));
0941: assertTrue("(21) start line wrong",
0942: token.getStartLine() == 1);
0943: assertTrue("(22) start column wrong", token
0944: .getStartColumn() == 5);
0945: assertTrue("(23) end line wrong", token.getEndLine() == 1);
0946: assertTrue("(24) end column wrong",
0947: token.getEndColumn() == 9);
0948:
0949: // first line: (
0950: assertTrue(tokenizer.hasMoreToken());
0951: token = tokenizer.nextToken();
0952: assertTrue("(30) token \"(\" not recognized.", token
0953: .getType() == Token.SEPARATOR
0954: && token.getImage().equals("("));
0955: assertTrue("(31) start line wrong",
0956: token.getStartLine() == 1);
0957: assertTrue("(32) start column wrong", token
0958: .getStartColumn() == 9);
0959: assertTrue("(33) end line wrong", token.getEndLine() == 1);
0960: assertTrue("(34) end column wrong",
0961: token.getEndColumn() == 10);
0962:
0963: // first line: int
0964: assertTrue(tokenizer.hasMoreToken());
0965: token = tokenizer.nextToken();
0966: assertTrue("(40) token \"int\" not recognized.", token
0967: .getType() == Token.NORMAL
0968: && token.getImage().equals("int"));
0969: assertTrue("(41) start line wrong",
0970: token.getStartLine() == 1);
0971: assertTrue("(42) start column wrong", token
0972: .getStartColumn() == 10);
0973: assertTrue("(43) end line wrong", token.getEndLine() == 1);
0974: assertTrue("(44) end column wrong",
0975: token.getEndColumn() == 13);
0976:
0977: assertTrue(tokenizer.hasMoreToken());
0978: token = tokenizer.nextToken();
0979: assertTrue("(45) whitespace not recognized", token
0980: .getType() == Token.WHITESPACE);
0981:
0982: // first line: argc
0983: assertTrue(tokenizer.hasMoreToken());
0984: token = tokenizer.nextToken();
0985: assertTrue("(50) token \"argc\" not recognized.", token
0986: .getType() == Token.NORMAL
0987: && token.getImage().equals("argc"));
0988: assertTrue("(51) start line wrong",
0989: token.getStartLine() == 1);
0990: assertTrue("(52) start column wrong", token
0991: .getStartColumn() == 14);
0992: assertTrue("(53) end line wrong", token.getEndLine() == 1);
0993: assertTrue("(54) end column wrong",
0994: token.getEndColumn() == 18);
0995:
0996: // first line: )
0997: assertTrue(tokenizer.hasMoreToken());
0998: token = tokenizer.nextToken();
0999: assertTrue("(60) token \")\" not recognized.", token
1000: .getType() == Token.SEPARATOR
1001: && token.getImage().equals(")"));
1002: assertTrue("(61) start line wrong",
1003: token.getStartLine() == 1);
1004: assertTrue("(62) start column wrong", token
1005: .getStartColumn() == 18);
1006: assertTrue("(63) end line wrong", token.getEndLine() == 1);
1007: assertTrue("(64) end column wrong",
1008: token.getEndColumn() == 19);
1009:
1010: // first line: EOL
1011: assertTrue(tokenizer.hasMoreToken());
1012: token = tokenizer.nextToken();
1013: assertTrue("(60) token \"\\r\\n\" not recognized.", token
1014: .getType() == Token.WHITESPACE
1015: && token.getImage().equals("\r\n"));
1016: assertTrue("(61) start line wrong",
1017: token.getStartLine() == 1);
1018: assertTrue("(62) start column wrong", token
1019: .getStartColumn() == 19);
1020: assertTrue("(63) end line wrong", token.getEndLine() == 2);
1021: assertTrue("(64) end column wrong",
1022: token.getEndColumn() == 0);
1023: assertTrue("(65) wrong length", token.getLength() == 2);
1024:
1025: // second line: {
1026: assertTrue(tokenizer.hasMoreToken());
1027: token = tokenizer.nextToken();
1028: assertTrue("(70) token \"{\" not recognized.", token
1029: .getType() == Token.SEPARATOR
1030: && token.getImage().equals("{"));
1031: assertTrue("(71) start line wrong",
1032: token.getStartLine() == 2);
1033: assertTrue("(72) start column wrong", token
1034: .getStartColumn() == 0);
1035: assertTrue("(73) end line wrong", token.getEndLine() == 2);
1036: assertTrue("(74) end column wrong",
1037: token.getEndColumn() == 1);
1038:
1039: // second/third line: EOL + whitespaces
1040: assertTrue(tokenizer.hasMoreToken());
1041: token = tokenizer.nextToken();
1042: assertTrue("(80) token \"\\r\\n \" not recognized.", token
1043: .getType() == Token.WHITESPACE
1044: && token.getImage().equals("\r\n "));
1045: assertTrue("(81) start line wrong",
1046: token.getStartLine() == 2);
1047: assertTrue("(82) start column wrong", token
1048: .getStartColumn() == 1);
1049: assertTrue("(83) end line wrong", token.getEndLine() == 3);
1050: assertTrue("(84) end column wrong",
1051: token.getEndColumn() == 2);
1052: assertTrue("(85) wrong length", token.getLength() == 4);
1053:
1054: // third line: line comment
1055: assertTrue(tokenizer.hasMoreToken());
1056: token = tokenizer.nextToken();
1057: assertTrue("(91) line comment not recognized", token
1058: .getType() == Token.LINE_COMMENT);
1059: assertTrue("(92) start line wrong",
1060: token.getStartLine() == 3);
1061: assertTrue("(93) start column wrong", token
1062: .getStartColumn() == 2);
1063: assertTrue("(94) end line wrong", token.getEndLine() == 4);
1064: assertTrue("(95) end column wrong",
1065: token.getEndColumn() == 0);
1066:
1067: assertTrue(tokenizer.hasMoreToken());
1068: token = tokenizer.nextToken();
1069: assertTrue("(96) whitespace not recognized", token
1070: .getType() == Token.WHITESPACE);
1071:
1072: // forth line: block comment
1073: assertTrue(tokenizer.hasMoreToken());
1074: token = tokenizer.nextToken();
1075: assertTrue("(101) block comment not recognized", token
1076: .getType() == Token.BLOCK_COMMENT);
1077: assertTrue("(102) start line wrong",
1078: token.getStartLine() == 4);
1079: assertTrue("(103) start column wrong", token
1080: .getStartColumn() == 2);
1081: assertTrue("(104) end line wrong", token.getEndLine() == 6);
1082: assertTrue("(105) end column wrong",
1083: token.getEndColumn() == 4);
1084:
1085: // 6th line: EOL
1086: assertTrue(tokenizer.hasMoreToken());
1087: token = tokenizer.nextToken();
1088: assertTrue("(110) token \"\\r\\n\" not recognized.", token
1089: .getType() == Token.WHITESPACE
1090: && token.getImage().equals("\r\n"));
1091: assertTrue("(111) start line wrong",
1092: token.getStartLine() == 6);
1093: assertTrue("(112) start column wrong", token
1094: .getStartColumn() == 4);
1095: assertTrue("(113) end line wrong", token.getEndLine() == 7);
1096: assertTrue("(114) end column wrong",
1097: token.getEndColumn() == 0);
1098: assertTrue("(115) wrong length", token.getLength() == 2);
1099:
1100: // 7th line: }
1101: assertTrue(tokenizer.hasMoreToken());
1102: token = tokenizer.nextToken();
1103: assertTrue("(120) token \"}\" not recognized.", token
1104: .getType() == Token.SEPARATOR
1105: && token.getImage().equals("}"));
1106: assertTrue("(121) start line wrong",
1107: token.getStartLine() == 7);
1108: assertTrue("(122) start column wrong", token
1109: .getStartColumn() == 0);
1110: assertTrue("(123) end line wrong", token.getEndLine() == 7);
1111: assertTrue("(124) end column wrong",
1112: token.getEndColumn() == 1);
1113:
1114: // 7th line: EOL
1115: assertTrue(tokenizer.hasMoreToken());
1116: token = tokenizer.nextToken();
1117: assertTrue("(130) token \"\\r\\n\" not recognized.", token
1118: .getType() == Token.WHITESPACE
1119: && token.getImage().equals("\r\n"));
1120: assertTrue("(131) start line wrong",
1121: token.getStartLine() == 7);
1122: assertTrue("(132) start column wrong", token
1123: .getStartColumn() == 1);
1124: assertTrue("(133) end line wrong", token.getEndLine() == 8);
1125: assertTrue("(134) end column wrong",
1126: token.getEndColumn() == 0);
1127: assertTrue("(135) wrong length", token.getLength() == 2);
1128:
1129: } finally {
1130: // Cleanup
1131: tokenizer.close();
1132: }
1133: }
1134:
1135: /**
1136: * Line counting and line comments in MAC files
1137: */
1138: public void testMACEOL() throws Throwable {
1139: TokenizerSource source = getSource("// line comment with DOS line ending\r"
1140: + "void main(int argc)\r"
1141: + "{\r"
1142: + " // another line comment\r"
1143: + " /* a block comment\r"
1144: + " with more than one line\r" + " */\r" + "}\r");
1145:
1146: int lines = 8;
1147: TokenizerProperties props = new StandardTokenizerProperties();
1148: Tokenizer tokenizer = getTokenizer(props);
1149: Token token;
1150:
1151: try {
1152: props.setParseFlags(Flags.F_RETURN_WHITESPACES
1153: | Flags.F_COUNT_LINES);
1154: props
1155: .addLineComment(TokenizerProperties.DEFAULT_LINE_COMMENT);
1156: props.addBlockComment(
1157: TokenizerProperties.DEFAULT_BLOCK_COMMENT_START,
1158: TokenizerProperties.DEFAULT_BLOCK_COMMENT_END);
1159: props.addString("\"", "\"", "\\");
1160: tokenizer.setSource(source);
1161:
1162: // zero line comment
1163: assertTrue(tokenizer.hasMoreToken());
1164: token = tokenizer.nextToken();
1165: assertTrue("(1) line comment not recognized", token
1166: .getType() == Token.LINE_COMMENT);
1167: assertTrue("(2) start line wrong",
1168: token.getStartLine() == 0);
1169: assertTrue("(3) start column wrong",
1170: token.getStartColumn() == 0);
1171: assertTrue("(4) end line wrong", token.getEndLine() == 1);
1172: assertTrue("(5) end column wrong",
1173: token.getEndColumn() == 0);
1174:
1175: // first line: void
1176: assertTrue(tokenizer.hasMoreToken());
1177: token = tokenizer.nextToken();
1178: assertTrue("(10) token \"void\" not recognized.", token
1179: .getType() == Token.NORMAL
1180: && token.getImage().equals("void"));
1181: assertTrue("(11) start line wrong",
1182: token.getStartLine() == 1);
1183: assertTrue("(12) start column wrong", token
1184: .getStartColumn() == 0);
1185: assertTrue("(13) end line wrong", token.getEndLine() == 1);
1186: assertTrue("(14) end column wrong",
1187: token.getEndColumn() == 4);
1188:
1189: assertTrue(tokenizer.hasMoreToken());
1190: token = tokenizer.nextToken();
1191: assertTrue("(15) whitespace not recognized", token
1192: .getType() == Token.WHITESPACE);
1193:
1194: // first line: main
1195: assertTrue(tokenizer.hasMoreToken());
1196: token = tokenizer.nextToken();
1197: assertTrue("(20) token \"main\" not recognized.", token
1198: .getType() == Token.NORMAL
1199: && token.getImage().equals("main"));
1200: assertTrue("(21) start line wrong",
1201: token.getStartLine() == 1);
1202: assertTrue("(22) start column wrong", token
1203: .getStartColumn() == 5);
1204: assertTrue("(23) end line wrong", token.getEndLine() == 1);
1205: assertTrue("(24) end column wrong",
1206: token.getEndColumn() == 9);
1207:
1208: // first line: (
1209: assertTrue(tokenizer.hasMoreToken());
1210: token = tokenizer.nextToken();
1211: assertTrue("(30) token \"(\" not recognized.", token
1212: .getType() == Token.SEPARATOR
1213: && token.getImage().equals("("));
1214: assertTrue("(31) start line wrong",
1215: token.getStartLine() == 1);
1216: assertTrue("(32) start column wrong", token
1217: .getStartColumn() == 9);
1218: assertTrue("(33) end line wrong", token.getEndLine() == 1);
1219: assertTrue("(34) end column wrong",
1220: token.getEndColumn() == 10);
1221:
1222: // first line: int
1223: assertTrue(tokenizer.hasMoreToken());
1224: token = tokenizer.nextToken();
1225: assertTrue("(40) token \"int\" not recognized.", token
1226: .getType() == Token.NORMAL
1227: && token.getImage().equals("int"));
1228: assertTrue("(41) start line wrong",
1229: token.getStartLine() == 1);
1230: assertTrue("(42) start column wrong", token
1231: .getStartColumn() == 10);
1232: assertTrue("(43) end line wrong", token.getEndLine() == 1);
1233: assertTrue("(44) end column wrong",
1234: token.getEndColumn() == 13);
1235:
1236: assertTrue(tokenizer.hasMoreToken());
1237: token = tokenizer.nextToken();
1238: assertTrue("(45) whitespace not recognized", token
1239: .getType() == Token.WHITESPACE);
1240:
1241: // first line: argc
1242: assertTrue(tokenizer.hasMoreToken());
1243: token = tokenizer.nextToken();
1244: assertTrue("(50) token \"argc\" not recognized.", token
1245: .getType() == Token.NORMAL
1246: && token.getImage().equals("argc"));
1247: assertTrue("(51) start line wrong",
1248: token.getStartLine() == 1);
1249: assertTrue("(52) start column wrong", token
1250: .getStartColumn() == 14);
1251: assertTrue("(53) end line wrong", token.getEndLine() == 1);
1252: assertTrue("(54) end column wrong",
1253: token.getEndColumn() == 18);
1254:
1255: // first line: )
1256: assertTrue(tokenizer.hasMoreToken());
1257: token = tokenizer.nextToken();
1258: assertTrue("(60) token \")\" not recognized.", token
1259: .getType() == Token.SEPARATOR
1260: && token.getImage().equals(")"));
1261: assertTrue("(61) start line wrong",
1262: token.getStartLine() == 1);
1263: assertTrue("(62) start column wrong", token
1264: .getStartColumn() == 18);
1265: assertTrue("(63) end line wrong", token.getEndLine() == 1);
1266: assertTrue("(64) end column wrong",
1267: token.getEndColumn() == 19);
1268:
1269: // first line: EOL
1270: assertTrue(tokenizer.hasMoreToken());
1271: token = tokenizer.nextToken();
1272: assertTrue("(60) token \"\\r\" not recognized.", token
1273: .getType() == Token.WHITESPACE
1274: && token.getImage().equals("\r"));
1275: assertTrue("(61) start line wrong",
1276: token.getStartLine() == 1);
1277: assertTrue("(62) start column wrong", token
1278: .getStartColumn() == 19);
1279: assertTrue("(63) end line wrong", token.getEndLine() == 2);
1280: assertTrue("(64) end column wrong",
1281: token.getEndColumn() == 0);
1282: assertTrue("(65) wrong length", token.getLength() == 1);
1283:
1284: // second line: {
1285: assertTrue(tokenizer.hasMoreToken());
1286: token = tokenizer.nextToken();
1287: assertTrue("(70) token \"{\" not recognized.", token
1288: .getType() == Token.SEPARATOR
1289: && token.getImage().equals("{"));
1290: assertTrue("(71) start line wrong",
1291: token.getStartLine() == 2);
1292: assertTrue("(72) start column wrong", token
1293: .getStartColumn() == 0);
1294: assertTrue("(73) end line wrong", token.getEndLine() == 2);
1295: assertTrue("(74) end column wrong",
1296: token.getEndColumn() == 1);
1297:
1298: // second/third line: EOL + whitespaces
1299: assertTrue(tokenizer.hasMoreToken());
1300: token = tokenizer.nextToken();
1301: assertTrue("(80) token \"\\r \" not recognized.", token
1302: .getType() == Token.WHITESPACE
1303: && token.getImage().equals("\r "));
1304: assertTrue("(81) start line wrong",
1305: token.getStartLine() == 2);
1306: assertTrue("(82) start column wrong", token
1307: .getStartColumn() == 1);
1308: assertTrue("(83) end line wrong", token.getEndLine() == 3);
1309: assertTrue("(84) end column wrong",
1310: token.getEndColumn() == 2);
1311: assertTrue("(85) wrong length", token.getLength() == 3);
1312:
1313: // third line: line comment
1314: assertTrue(tokenizer.hasMoreToken());
1315: token = tokenizer.nextToken();
1316: assertTrue("(91) line comment not recognized", token
1317: .getType() == Token.LINE_COMMENT);
1318: assertTrue("(92) start line wrong",
1319: token.getStartLine() == 3);
1320: assertTrue("(93) start column wrong", token
1321: .getStartColumn() == 2);
1322: assertTrue("(94) end line wrong", token.getEndLine() == 4);
1323: assertTrue("(95) end column wrong",
1324: token.getEndColumn() == 0);
1325:
1326: assertTrue(tokenizer.hasMoreToken());
1327: token = tokenizer.nextToken();
1328: assertTrue("(96) whitespace not recognized", token
1329: .getType() == Token.WHITESPACE);
1330:
1331: // forth line: block comment
1332: assertTrue(tokenizer.hasMoreToken());
1333: token = tokenizer.nextToken();
1334: assertTrue("(101) block comment not recognized", token
1335: .getType() == Token.BLOCK_COMMENT);
1336: assertTrue("(102) start line wrong",
1337: token.getStartLine() == 4);
1338: assertTrue("(103) start column wrong", token
1339: .getStartColumn() == 2);
1340: assertTrue("(104) end line wrong", token.getEndLine() == 6);
1341: assertTrue("(105) end column wrong",
1342: token.getEndColumn() == 4);
1343:
1344: // 6th line: EOL
1345: assertTrue(tokenizer.hasMoreToken());
1346: token = tokenizer.nextToken();
1347: assertTrue("(110) token \"\\r\" not recognized.", token
1348: .getType() == Token.WHITESPACE
1349: && token.getImage().equals("\r"));
1350: assertTrue("(111) start line wrong",
1351: token.getStartLine() == 6);
1352: assertTrue("(112) start column wrong", token
1353: .getStartColumn() == 4);
1354: assertTrue("(113) end line wrong", token.getEndLine() == 7);
1355: assertTrue("(114) end column wrong",
1356: token.getEndColumn() == 0);
1357: assertTrue("(115) wrong length", token.getLength() == 1);
1358:
1359: // 7th line: }
1360: assertTrue(tokenizer.hasMoreToken());
1361: token = tokenizer.nextToken();
1362: assertTrue("(120) token \"}\" not recognized.", token
1363: .getType() == Token.SEPARATOR
1364: && token.getImage().equals("}"));
1365: assertTrue("(121) start line wrong",
1366: token.getStartLine() == 7);
1367: assertTrue("(122) start column wrong", token
1368: .getStartColumn() == 0);
1369: assertTrue("(123) end line wrong", token.getEndLine() == 7);
1370: assertTrue("(124) end column wrong",
1371: token.getEndColumn() == 1);
1372:
1373: // 7th line: EOL
1374: assertTrue(tokenizer.hasMoreToken());
1375: token = tokenizer.nextToken();
1376: assertTrue("(130) token \"\\r\" not recognized.", token
1377: .getType() == Token.WHITESPACE
1378: && token.getImage().equals("\r"));
1379: assertTrue("(131) start line wrong",
1380: token.getStartLine() == 7);
1381: assertTrue("(132) start column wrong", token
1382: .getStartColumn() == 1);
1383: assertTrue("(133) end line wrong", token.getEndLine() == 8);
1384: assertTrue("(134) end column wrong",
1385: token.getEndColumn() == 0);
1386: assertTrue("(135) wrong length", token.getLength() == 1);
1387:
1388: } finally {
1389: // Cleanup
1390: tokenizer.close();
1391: }
1392: }
1393:
1394: /**
1395: * Line counting with setReadPosition
1396: */
1397: public void testLineCounting() throws Throwable {
1398: TokenizerSource source = getSource("01234 67 9\r\n"
1399: + "0 2 4 6 8\r" + " 1 3 5 7 9\n" + "01 34 67 9\n"
1400: + "/* block comment\n" + " in three lines\r\n"
1401: + "*/\n" + "// line comment 1\r"
1402: + "// line comment 2\r\n" + "// line comment 3\n"
1403: + "abc // line comment 1\r"
1404: + "01 34 67 // line comment 2\r\n"
1405: + "/* block comment */ // line comment 3\n");
1406:
1407: int[] expectedLines = { 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
1408: 3, 3, 3, 3, 4, 7, 8, 9, 10, 10, 11, 11, 11, 11, 12, 12 };
1409: int[] expectedColumns = { 0, 6, 9, 0, 2, 4, 6, 8, 1, 3, 5, 7,
1410: 9, 0, 3, 6, 9, 0, 0, 0, 0, 0, 4, 0, 3, 6, 9, 0, 20 };
1411:
1412: TokenizerProperties props = new StandardTokenizerProperties();
1413: Tokenizer tokenizer = getTokenizer(props);
1414: Token token1;
1415: Token token2;
1416: int line = 0;
1417: int column = 0;
1418: int index = 0;
1419:
1420: try {
1421: props.setParseFlags(Flags.F_RETURN_WHITESPACES
1422: | Flags.F_COUNT_LINES);
1423: props
1424: .addLineComment(TokenizerProperties.DEFAULT_LINE_COMMENT);
1425: props.addBlockComment(
1426: TokenizerProperties.DEFAULT_BLOCK_COMMENT_START,
1427: TokenizerProperties.DEFAULT_BLOCK_COMMENT_END);
1428: tokenizer.setSource(source);
1429:
1430: while (tokenizer.hasMoreToken()) {
1431: token1 = tokenizer.nextToken();
1432: assertTrue("Wrong line/column " + token1.getStartLine()
1433: + "/" + token1.getStartColumn(), token1
1434: .getStartLine() == line
1435: && token1.getStartColumn() == column);
1436:
1437: tokenizer.setReadPositionRelative(-token1.getLength());
1438: token2 = tokenizer.nextToken();
1439: assertTrue("Wrong line/column " + token2.getStartLine()
1440: + "/" + token2.getStartColumn(), token2
1441: .getStartLine() == line
1442: && token2.getStartColumn() == column);
1443:
1444: assertTrue("Token mismatch:\n " + token1 + "\n "
1445: + token2, token1.equals(token2));
1446:
1447: line = token1.getEndLine();
1448: column = token1.getEndColumn();
1449:
1450: // cross check the line and columns
1451: if (token1.getType() != Token.WHITESPACE
1452: && token1.getType() != Token.EOF) {
1453: assertTrue(
1454: "Expected line " + expectedLines[index]
1455: + ", found "
1456: + token1.getStartLine(),
1457: token1.getStartLine() == expectedLines[index]);
1458: assertTrue("Expected column "
1459: + expectedColumns[index] + ", found "
1460: + token1.getStartColumn(), token1
1461: .getStartColumn() == expectedColumns[index]);
1462: index++;
1463: }
1464: }
1465: } finally {
1466: // Cleanup
1467: tokenizer.close();
1468: }
1469: }
1470:
1471: /**
1472: * Test the uncommon whitespaces. Note that the \r\n-combination is only treated
1473: * as one newline only, if both characters fall into one token.
1474: */
1475: public void testUncommonWhitespaces() throws Throwable {
1476: String data = "This text has spaces\r"
1477: + "and newlines. Depending on the flags\n"
1478: + "the spaces are considered as special sequences\r\n"
1479: + "or real\twhitespaces.\n\n" + "/** also included\r"
1480: + "* are line and block comments\r" + "*/\n"
1481: + "here comes // the line comment\n"
1482: + "// and another\n";
1483:
1484: TokenizerProperties props = new StandardTokenizerProperties();
1485: Tokenizer tokenizer = getTokenizer(props);
1486: String[] ws = { "\r\n", " \t", " \t\n", " \t\r", " \n", " \r",
1487: "\t\r", "\t\n" };
1488: int[] wsCount = { 5, 18, 22, 20, 21, 19, 3, 5 };
1489: int[] seqCount = { 21, 7, 2, 5, 3, 6, 25, 22 };
1490: int[] lineCount = { 10, 11, 11, 11, 11, 11, 11, 11 };
1491: TokenizerProperty spaceProp = new TokenizerProperty(
1492: Token.SPECIAL_SEQUENCE, new String[] { " " });
1493: TokenizerProperty tabProp = new TokenizerProperty(
1494: Token.SPECIAL_SEQUENCE, new String[] { "\t" });
1495: TokenizerProperty lfProp = new TokenizerProperty(
1496: Token.SPECIAL_SEQUENCE, new String[] { "\n" });
1497: TokenizerProperty crProp = new TokenizerProperty(
1498: Token.SPECIAL_SEQUENCE, new String[] { "\r" });
1499:
1500: try {
1501: props.setParseFlags(Flags.F_RETURN_WHITESPACES
1502: | Flags.F_COUNT_LINES);
1503: props
1504: .addLineComment(TokenizerProperties.DEFAULT_LINE_COMMENT);
1505: props.addBlockComment(
1506: TokenizerProperties.DEFAULT_BLOCK_COMMENT_START,
1507: TokenizerProperties.DEFAULT_BLOCK_COMMENT_END);
1508: props.addProperty(spaceProp);
1509: props.addProperty(tabProp);
1510: props.addProperty(lfProp);
1511: props.addProperty(crProp);
1512:
1513: for (int ii = 0; ii < ws.length; ++ii) {
1514: int seqCounter = 0;
1515: int wsCounter = 0;
1516:
1517: props.setWhitespaces(ws[ii]);
1518: tokenizer.setSource(getSource(data));
1519:
1520: System.out.println("Loop " + ii);
1521: while (tokenizer.hasMoreToken()) {
1522: Token token = tokenizer.nextToken();
1523:
1524: System.out.println(token.toString());
1525: switch (token.getType()) {
1526: case Token.SPECIAL_SEQUENCE:
1527: seqCounter++;
1528: break;
1529: case Token.WHITESPACE:
1530: wsCounter++;
1531: break;
1532: case Token.EOF:
1533: assertTrue("Loop " + ii + ": Expected "
1534: + lineCount[ii] + " lines, got "
1535: + token.getEndLine(),
1536: lineCount[ii] == token.getEndLine());
1537: break;
1538: }
1539: }
1540: assertTrue("Loop " + ii + ": Expected " + wsCount[ii]
1541: + " whitespaces, got " + wsCounter,
1542: wsCount[ii] == wsCounter);
1543: assertTrue("Loop " + ii + ": Expected " + seqCount[ii]
1544: + " special sequences, got " + seqCounter,
1545: seqCount[ii] == seqCounter);
1546: }
1547: } finally {
1548: // Cleanup
1549: tokenizer.close();
1550: }
1551: }
1552:
1553: /**
1554: * Test the various whitespace flags
1555: */
1556: public void testWhitespaceHandling() throws Throwable {
1557: String data = "/* this is a block comment "
1558: + " followed by a newline (whitespace) sequence */\r\n"
1559: + "// a line comment\r\n"
1560: + "// another line comment\r\n"
1561: + " /* whitespaces with a block comment in between */ \n"
1562: + "// a EOF-terminated line comment";
1563:
1564: TokenizerProperties props = new StandardTokenizerProperties();
1565: Tokenizer tokenizer = getTokenizer(props);
1566: int[] flags = {
1567: Flags.F_RETURN_BLOCK_COMMENTS,
1568: Flags.F_RETURN_LINE_COMMENTS,
1569: Flags.F_RETURN_BLOCK_COMMENTS
1570: + Flags.F_RETURN_LINE_COMMENTS,
1571: Flags.F_RETURN_WHITESPACES,
1572: Flags.F_RETURN_LINE_COMMENTS
1573: + Flags.F_RETURN_SIMPLE_WHITESPACES,
1574: Flags.F_RETURN_BLOCK_COMMENTS
1575: + Flags.F_RETURN_SIMPLE_WHITESPACES,
1576: Flags.F_RETURN_SIMPLE_WHITESPACES, 0 };
1577: boolean[] propsFlag = { true, false };
1578:
1579: try {
1580: props
1581: .addLineComment(TokenizerProperties.DEFAULT_LINE_COMMENT);
1582: props.addBlockComment(
1583: TokenizerProperties.DEFAULT_BLOCK_COMMENT_START,
1584: TokenizerProperties.DEFAULT_BLOCK_COMMENT_END);
1585:
1586: for (int ii = 0; ii < propsFlag.length; ++ii) {
1587: for (int kk = 0; kk < flags.length; ++kk) {
1588: if (propsFlag[ii]) {
1589: props.setParseFlags(flags[kk]);
1590: } else {
1591: tokenizer.changeParseFlags(flags[kk],
1592: Flags.F_RETURN_WHITESPACES);
1593: }
1594:
1595: tokenizer.setSource(getSource(data));
1596:
1597: System.out.println("Loop " + ii + "/" + kk);
1598: while (tokenizer.hasMoreToken()) {
1599: Token token = tokenizer.nextToken();
1600:
1601: System.out.println(token.toString());
1602: switch (token.getType()) {
1603: case Token.BLOCK_COMMENT:
1604: assertTrue(
1605: "Tokenizer returned a block comment without the flag set: "
1606: + tokenizer.currentImage(),
1607: (flags[kk] & Flags.F_RETURN_BLOCK_COMMENTS) != 0);
1608: break;
1609: case Token.LINE_COMMENT:
1610: assertTrue(
1611: "Tokenizer returned a line comment without the flag set: "
1612: + tokenizer.currentImage(),
1613: (flags[kk] & Flags.F_RETURN_LINE_COMMENTS) != 0);
1614: break;
1615: case Token.WHITESPACE:
1616: assertTrue(
1617: "Tokenizer returned a simple whitespace sequence without the flag set: "
1618: + tokenizer.currentImage(),
1619: (flags[kk] & Flags.F_RETURN_SIMPLE_WHITESPACES) != 0);
1620: break;
1621: }
1622: }
1623: }
1624: }
1625: } finally {
1626: // Cleanup
1627: tokenizer.close();
1628: }
1629: }
1630:
1631: /**
1632: * Check mixed special sequences and separators
1633: */
1634: public void testSequencesAndSeparators() throws Throwable {
1635: String data = "(...::==:=: =====>==<=..()>>>>> >> >>>>)";
1636: int[] expected = { Token.SEPARATOR, // (
1637: Token.SPECIAL_SEQUENCE, // ..
1638: Token.SEPARATOR, // .
1639: Token.SEPARATOR, // :
1640: Token.SPECIAL_SEQUENCE, // :=
1641: Token.SEPARATOR, // =
1642: Token.SPECIAL_SEQUENCE, // :=
1643: Token.SEPARATOR, // :
1644: Token.SPECIAL_SEQUENCE, // ==
1645: Token.SPECIAL_SEQUENCE, // ==
1646: Token.SEPARATOR, // =
1647: Token.SPECIAL_SEQUENCE, // >=
1648: Token.SEPARATOR, // =
1649: Token.SPECIAL_SEQUENCE, // <=
1650: Token.SPECIAL_SEQUENCE, // ..
1651: Token.SPECIAL_SEQUENCE, // ()
1652: Token.SPECIAL_SEQUENCE, // >>>
1653: Token.SPECIAL_SEQUENCE, // >>
1654: Token.SPECIAL_SEQUENCE, // >>
1655: Token.SPECIAL_SEQUENCE, // >>>
1656: Token.SEPARATOR, // >
1657: Token.SEPARATOR, // )
1658: Token.EOF };
1659:
1660: TokenizerProperties props = new StandardTokenizerProperties();
1661: Tokenizer tokenizer = getTokenizer(props);
1662: int count = 0;
1663:
1664: try {
1665: props.addSpecialSequence(":=");
1666: props.addSpecialSequence(">=");
1667: props.addSpecialSequence("<=");
1668: props.addSpecialSequence("==");
1669: props.addSpecialSequence("..");
1670: props.addSpecialSequence("()");
1671: props.addSpecialSequence("..");
1672: props.addSpecialSequence(">>>");
1673: props.addSpecialSequence(">>");
1674:
1675: tokenizer.setSource(getSource(data));
1676:
1677: while (tokenizer.hasMoreToken()) {
1678: Token token = tokenizer.nextToken();
1679:
1680: System.out.println(token.getImage());
1681: assertTrue("Token #" + (count + 1) + ": expected type "
1682: + Token.getTypeName(expected[count]) + ", got "
1683: + Token.getTypeName(token.getType()), token
1684: .getType() == expected[count]);
1685: count++;
1686: }
1687: } finally {
1688: tokenizer.close();
1689: }
1690: }
1691:
1692: //---------------------------------------------------------------------------
1693: // Implementation
1694: //
1695:
1696: /**
1697: * Get the {@link TokenizerSource}.
1698: */
1699: private TokenizerSource getSource(String data) {
1700: try {
1701: return (TokenizerSource) _sourceClass.getConstructor(
1702: new Class[] { String.class }).newInstance(
1703: new Object[] { data });
1704: } catch (Throwable ex) {
1705: return new ReaderSource(new StringReader(data));
1706: }
1707: }
1708:
1709: /**
1710: * Get the {@link Tokenizer} instance according to the class passed to the
1711: * constructor.
1712: */
1713: private Tokenizer getTokenizer(TokenizerProperties props)
1714: throws Throwable {
1715: return new StandardTokenizer(props);
1716: }
1717:
1718: //---------------------------------------------------------------------------
1719: // Members
1720: //
1721: private Class _sourceClass;
1722: }
|