001: /*
002: * Copyright 2004-2008 H2 Group. Licensed under the H2 License, Version 1.0
003: * (license2)
004: * Initial Developer: H2 Group
005: */
006: package org.h2.test.coverage;
007:
008: import java.io.EOFException;
009: import java.io.IOException;
010: import java.io.Reader;
011:
012: /**
013: * Helper class for the java file parser.
014: */
015: public class Tokenizer {
016: private StringBuffer buffer;
017:
018: private Reader reader;
019:
020: private char[] chars = new char[20];
021: private int peekChar;
022: private int line = 1;
023:
024: private byte[] charTypes = new byte[256];
025: private static final byte WHITESPACE = 1;
026: private static final byte ALPHA = 4;
027: private static final byte QUOTE = 8;
028:
029: private int type = TYPE_NOTHING;
030: public static final int TYPE_EOF = -1;
031: public static final int TYPE_WORD = -2;
032: private static final int TYPE_NOTHING = -3;
033: private String value;
034:
035: private Tokenizer() {
036: wordChars('a', 'z');
037: wordChars('A', 'Z');
038: wordChars('0', '9');
039: wordChars('.', '.');
040: wordChars('+', '+');
041: wordChars('-', '-');
042: wordChars('_', '_');
043: wordChars(128 + 32, 255);
044: whitespaceChars(0, ' ');
045: charTypes['"'] = QUOTE;
046: charTypes['\''] = QUOTE;
047: }
048:
049: public Tokenizer(Reader r) {
050: this ();
051: reader = r;
052: }
053:
054: public String getString() {
055: return value;
056: }
057:
058: private void wordChars(int low, int hi) {
059: while (low <= hi) {
060: charTypes[low++] |= ALPHA;
061: }
062: }
063:
064: private void whitespaceChars(int low, int hi) {
065: while (low <= hi) {
066: charTypes[low++] = WHITESPACE;
067: }
068: }
069:
070: private int read() throws IOException {
071: int i = reader.read();
072: if (i != -1) {
073: append(i);
074: }
075: return i;
076: }
077:
078: public void initToken() {
079: buffer = new StringBuffer();
080: }
081:
082: public String getToken() {
083: buffer.setLength(buffer.length() - 1);
084: return buffer.toString();
085: }
086:
087: private void append(int i) {
088: buffer.append((char) i);
089: }
090:
091: public int nextToken() throws IOException {
092: byte[] ct = charTypes;
093: int c;
094: value = null;
095:
096: if (type == TYPE_NOTHING) {
097: c = read();
098: if (c >= 0) {
099: type = c;
100: }
101: } else {
102: c = peekChar;
103: if (c < 0) {
104: try {
105: c = read();
106: if (c >= 0) {
107: type = c;
108: }
109: } catch (EOFException e) {
110: c = -1;
111: }
112: }
113: }
114:
115: if (c < 0) {
116: return type = TYPE_EOF;
117: }
118: int charType = c < 256 ? ct[c] : ALPHA;
119: while ((charType & WHITESPACE) != 0) {
120: if (c == '\r') {
121: line++;
122: c = read();
123: if (c == '\n') {
124: c = read();
125: }
126: } else {
127: if (c == '\n') {
128: line++;
129: }
130: c = read();
131: }
132: if (c < 0) {
133: return type = TYPE_EOF;
134: }
135: charType = c < 256 ? ct[c] : ALPHA;
136: }
137: if ((charType & ALPHA) != 0) {
138: initToken();
139: append(c);
140: int i = 0;
141: do {
142: if (i >= chars.length) {
143: char[] nb = new char[chars.length * 2];
144: System.arraycopy(chars, 0, nb, 0, chars.length);
145: chars = nb;
146: }
147: chars[i++] = (char) c;
148: c = read();
149: charType = c < 0 ? WHITESPACE : c < 256 ? ct[c] : ALPHA;
150: } while ((charType & ALPHA) != 0);
151: peekChar = c;
152: value = String.copyValueOf(chars, 0, i);
153: return type = TYPE_WORD;
154: }
155: if ((charType & QUOTE) != 0) {
156: initToken();
157: append(c);
158: type = c;
159: int i = 0;
160: // \octal needs a lookahead
161: peekChar = read();
162: while (peekChar >= 0 && peekChar != type
163: && peekChar != '\n' && peekChar != '\r') {
164: if (peekChar == '\\') {
165: c = read();
166: int first = c; // to allow \377, but not \477
167: if (c >= '0' && c <= '7') {
168: c = c - '0';
169: int c2 = read();
170: if ('0' <= c2 && c2 <= '7') {
171: c = (c << 3) + (c2 - '0');
172: c2 = read();
173: if ('0' <= c2 && c2 <= '7' && first <= '3') {
174: c = (c << 3) + (c2 - '0');
175: peekChar = read();
176: } else {
177: peekChar = c2;
178: }
179: } else {
180: peekChar = c2;
181: }
182: } else {
183: switch (c) {
184: case 'b':
185: c = '\b';
186: break;
187: case 'f':
188: c = '\f';
189: break;
190: case 'n':
191: c = '\n';
192: break;
193: case 'r':
194: c = '\r';
195: break;
196: case 't':
197: c = '\t';
198: break;
199: default:
200: }
201: peekChar = read();
202: }
203: } else {
204: c = peekChar;
205: peekChar = read();
206: }
207:
208: if (i >= chars.length) {
209: char[] nb = new char[chars.length * 2];
210: System.arraycopy(chars, 0, nb, 0, chars.length);
211: chars = nb;
212: }
213: chars[i++] = (char) c;
214: }
215: if (peekChar == type) {
216: // keep \n or \r intact in peekChar
217: peekChar = read();
218: }
219: value = String.copyValueOf(chars, 0, i);
220: return type;
221: }
222: if (c == '/') {
223: c = read();
224: if (c == '*') {
225: int prevChar = 0;
226: while ((c = read()) != '/' || prevChar != '*') {
227: if (c == '\r') {
228: line++;
229: c = read();
230: if (c == '\n') {
231: c = read();
232: }
233: } else {
234: if (c == '\n') {
235: line++;
236: c = read();
237: }
238: }
239: if (c < 0) {
240: return type = TYPE_EOF;
241: }
242: prevChar = c;
243: }
244: peekChar = read();
245: return nextToken();
246: } else if (c == '/') {
247: while ((c = read()) != '\n' && c != '\r' && c >= 0) {
248: // nothing
249: }
250: peekChar = c;
251: return nextToken();
252: } else {
253: peekChar = c;
254: return type = '/';
255: }
256: }
257: peekChar = read();
258: return type = c;
259: }
260:
261: public int getLine() {
262: return line;
263: }
264: }
|