001: /*
002: * TestTokenizer.java
003: *
004: * This work is free software; you can redistribute it and/or modify
005: * it under the terms of the GNU General Public License as published
006: * by the Free Software Foundation; either version 2 of the License,
007: * or (at your option) any later version.
008: *
009: * This work is distributed in the hope that it will be useful, but
010: * WITHOUT ANY WARRANTY; without even the implied warranty of
011: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
012: * General Public License for more details.
013: *
014: * You should have received a copy of the GNU General Public License
015: * along with this program; if not, write to the Free Software
016: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
017: * USA
018: *
019: * As a special exception, the copyright holders of this library give
020: * you permission to link this library with independent modules to
021: * produce an executable, regardless of the license terms of these
022: * independent modules, and to copy and distribute the resulting
023: * executable under terms of your choice, provided that you also meet,
024: * for each linked independent module, the terms and conditions of the
025: * license of that module. An independent module is a module which is
026: * not derived from or based on this library. If you modify this
027: * library, you may extend this exception to your version of the
028: * library, but you are not obligated to do so. If you do not wish to
029: * do so, delete this exception statement from your version.
030: *
031: * Copyright (c) 2003 Per Cederberg. All rights reserved.
032: */
033:
034: package net.percederberg.grammatica.parser;
035:
036: import java.io.StringReader;
037:
038: import junit.framework.TestCase;
039:
040: /**
041: * A test case for the Tokenizer class.
042: *
043: * @author Per Cederberg, <per at percederberg dot net>
044: * @version 1.4
045: */
046: public class TestTokenizer extends TestCase {
047:
048: /**
049: * The end of file token identifier.
050: */
051: private static final int EOF = 0;
052:
053: /**
054: * The keyword token identifier.
055: */
056: private static final int KEYWORD = 1;
057:
058: /**
059: * The number token identifier.
060: */
061: private static final int NUMBER = 2;
062:
063: /**
064: * The whitespace token identifier.
065: */
066: private static final int WHITESPACE = 3;
067:
068: /**
069: * The error token identifier.
070: */
071: private static final int ERROR = 4;
072:
073: /**
074: * Test various invalid patterns.
075: */
076: public void testInvalidPattern() {
077: Tokenizer tokenizer = createTokenizer("");
078: TokenPattern pattern;
079:
080: pattern = new TokenPattern(NUMBER, "NUMBER",
081: TokenPattern.REGEXP_TYPE + 13, "13");
082: failAddPattern(tokenizer, pattern);
083: pattern = new TokenPattern(NUMBER, "NUMBER",
084: TokenPattern.REGEXP_TYPE, "1(3");
085: failAddPattern(tokenizer, pattern);
086: }
087:
088: /**
089: * Tests the tokenizer with empty input.
090: */
091: public void testEmptyInput() {
092: Tokenizer tokenizer = createDefaultTokenizer("");
093:
094: readToken(tokenizer, EOF);
095: }
096:
097: /**
098: * Tests the ignored tokens.
099: */
100: public void testIgnoreTokens() {
101: Tokenizer tokenizer = createDefaultTokenizer(" 12 keyword 0 ");
102:
103: readToken(tokenizer, NUMBER);
104: readToken(tokenizer, KEYWORD);
105: readToken(tokenizer, NUMBER);
106: readToken(tokenizer, EOF);
107: }
108:
109: /**
110: * Tests the ignored tokens.
111: */
112: public void testErrorTokens() {
113: Tokenizer tokenizer = createDefaultTokenizer("12 error1 ");
114:
115: readToken(tokenizer, NUMBER);
116: failReadToken(tokenizer);
117: readToken(tokenizer, NUMBER);
118: readToken(tokenizer, EOF);
119: }
120:
121: /**
122: * Test the parse error recovery.
123: */
124: public void testParseError() {
125: Tokenizer tokenizer = createDefaultTokenizer("12 (keyword)");
126:
127: readToken(tokenizer, NUMBER);
128: failReadToken(tokenizer);
129: readToken(tokenizer, KEYWORD);
130: failReadToken(tokenizer);
131: readToken(tokenizer, EOF);
132: }
133:
134: /**
135: * Tests the token list functions.
136: */
137: public void testTokenList() {
138: Tokenizer tokenizer = createDefaultTokenizer("12 keyword 0");
139: Token token;
140:
141: assertEquals("default token list setting", false, tokenizer
142: .getUseTokenList());
143: tokenizer.setUseTokenList(true);
144: token = readToken(tokenizer, NUMBER);
145: readToken(tokenizer, KEYWORD);
146: readToken(tokenizer, NUMBER);
147: readToken(tokenizer, EOF);
148: assertEquals("previous token", null, token.getPreviousToken());
149: token = token.getNextToken();
150: assertEquals("token id", WHITESPACE, token.getId());
151: token = token.getNextToken();
152: assertEquals("token id", KEYWORD, token.getId());
153: token = token.getNextToken();
154: assertEquals("token id", WHITESPACE, token.getId());
155: token = token.getNextToken();
156: assertEquals("token id", NUMBER, token.getId());
157: assertEquals("next token", null, token.getNextToken());
158: token = token.getPreviousToken();
159: assertEquals("token id", WHITESPACE, token.getId());
160: token = token.getPreviousToken();
161: assertEquals("token id", KEYWORD, token.getId());
162: token = token.getPreviousToken();
163: assertEquals("token id", WHITESPACE, token.getId());
164: token = token.getPreviousToken();
165: assertEquals("token id", NUMBER, token.getId());
166: }
167:
168: /**
169: * Creates a new tokenizer.
170: *
171: * @param input the input string
172: *
173: * @return a new tokenizer
174: */
175: private Tokenizer createTokenizer(String input) {
176: return new Tokenizer(new StringReader(input));
177: }
178:
179: /**
180: * Creates a new default tokenizer that recognizes a trivial
181: * language.
182: *
183: * @param input the input string
184: *
185: * @return a new tokenizer
186: */
187: private Tokenizer createDefaultTokenizer(String input) {
188: Tokenizer tokenizer = createTokenizer(input);
189: TokenPattern pattern;
190:
191: pattern = new TokenPattern(KEYWORD, "KEYWORD",
192: TokenPattern.STRING_TYPE, "keyword");
193: addPattern(tokenizer, pattern);
194: pattern = new TokenPattern(NUMBER, "NUMBER",
195: TokenPattern.REGEXP_TYPE, "[0-9]+");
196: addPattern(tokenizer, pattern);
197: pattern = new TokenPattern(WHITESPACE, "WHITESPACE",
198: TokenPattern.REGEXP_TYPE, "[ \t\n]+");
199: pattern.setIgnore();
200: addPattern(tokenizer, pattern);
201: pattern = new TokenPattern(ERROR, "ERROR",
202: TokenPattern.STRING_TYPE, "error");
203: pattern.setError();
204: addPattern(tokenizer, pattern);
205:
206: return tokenizer;
207: }
208:
209: /**
210: * Adds a pattern to the tokenizer and reports a test failure if
211: * it failed.
212: *
213: * @param tokenizer the tokenizer
214: * @param pattern the pattern to add
215: */
216: private void addPattern(Tokenizer tokenizer, TokenPattern pattern) {
217: try {
218: tokenizer.addPattern(pattern);
219: } catch (ParserCreationException e) {
220: fail("couldn't add pattern " + pattern.getName() + ": "
221: + e.getMessage());
222: }
223: }
224:
225: /**
226: * Adds a pattern to the tokenizer and reports a test failure if
227: * it failed.
228: *
229: * @param tokenizer the tokenizer
230: * @param pattern the pattern to add
231: */
232: private void failAddPattern(Tokenizer tokenizer,
233: TokenPattern pattern) {
234: try {
235: tokenizer.addPattern(pattern);
236: fail("could add pattern " + pattern.getName());
237: } catch (ParserCreationException e) {
238: // Failure was expected
239: }
240: }
241:
242: /**
243: * Reads the next token. This method reports a test failure if a
244: * token couldn't be read.
245: *
246: * @param tokenizer the tokenizer to use
247: *
248: * @return the token read
249: */
250: private Token readToken(Tokenizer tokenizer) {
251: try {
252: return tokenizer.next();
253: } catch (ParseException e) {
254: fail("couldn't read next token: " + e.getMessage());
255: return null; // Unreachable
256: }
257: }
258:
259: /**
260: * Reads the next token and checks it's id. This method reports a
261: * test failure if the right token couldn't be read.
262: *
263: * @param tokenizer the tokenizer to use
264: * @param id the expected token id
265: *
266: * @return the token read
267: */
268: private Token readToken(Tokenizer tokenizer, int id) {
269: Token token = readToken(tokenizer);
270:
271: if (id == EOF) {
272: if (token != null) {
273: fail("expected end of file, found " + token);
274: }
275: } else {
276: if (token != null) {
277: assertEquals("token id", id, token.getId());
278: } else {
279: fail("expected " + id + ", found EOF");
280: }
281: }
282: return token;
283: }
284:
285: /**
286: * Fails to read the next token. This method reports a test
287: * failure if a token could be read.
288: *
289: * @param tokenizer the tokenizer to use
290: */
291: private void failReadToken(Tokenizer tokenizer) {
292: Token token;
293:
294: try {
295: token = tokenizer.next();
296: fail("could read token " + token.toString());
297: } catch (ParseException e) {
298: // Failure was expected
299: }
300: }
301: }
|