001: // Copyright (c) 2003-2004 Brian Wellington (bwelling@xbill.org)
002: //
003: // Copyright (C) 2003-2004 Nominum, Inc.
004: //
005: // Permission to use, copy, modify, and distribute this software for any
006: // purpose with or without fee is hereby granted, provided that the above
007: // copyright notice and this permission notice appear in all copies.
008: //
009: // THE SOFTWARE IS PROVIDED "AS IS" AND NOMINUM DISCLAIMS ALL WARRANTIES
010: // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
011: // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL NOMINUM BE LIABLE FOR ANY
012: // SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
013: // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
014: // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
015: // OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
016: //
017:
018: package org.xbill.DNS;
019:
020: import java.io.*;
021: import java.net.*;
022:
023: import org.xbill.DNS.utils.*;
024:
025: /**
026: * Tokenizer is used to parse DNS records and zones from text format,
027: *
028: * @author Brian Wellington
029: * @author Bob Halley
030: */
031:
032: public class Tokenizer {
033:
034: private static String delim = " \t\n;()\"";
035: private static String quotes = "\"";
036:
037: /** End of file */
038: public static final int EOF = 0;
039:
040: /** End of line */
041: public static final int EOL = 1;
042:
043: /** Whitespace; only returned when wantWhitespace is set */
044: public static final int WHITESPACE = 2;
045:
046: /** An identifier (unquoted string) */
047: public static final int IDENTIFIER = 3;
048:
049: /** A quoted string */
050: public static final int QUOTED_STRING = 4;
051:
052: /** A comment; only returned when wantComment is set */
053: public static final int COMMENT = 5;
054:
055: private PushbackInputStream is;
056: private boolean ungottenToken;
057: private int multiline;
058: private boolean quoting;
059: private String delimiters;
060: private Token current;
061: private StringBuffer sb;
062: private boolean wantClose;
063:
064: private String filename;
065: private int line;
066:
067: public static class Token {
068: /** The type of token. */
069: public int type;
070:
071: /** The value of the token, or null for tokens without values. */
072: public String value;
073:
074: private Token() {
075: type = -1;
076: value = null;
077: }
078:
079: private Token set(int type, StringBuffer value) {
080: if (type < 0)
081: throw new IllegalArgumentException();
082: this .type = type;
083: this .value = value == null ? null : value.toString();
084: return this ;
085: }
086:
087: /**
088: * Converts the token to a string containing a representation useful
089: * for debugging.
090: */
091: public String toString() {
092: switch (type) {
093: case EOF:
094: return "<eof>";
095: case EOL:
096: return "<eol>";
097: case WHITESPACE:
098: return "<whitespace>";
099: case IDENTIFIER:
100: return "<identifier: " + value + ">";
101: case QUOTED_STRING:
102: return "<quoted_string: " + value + ">";
103: case COMMENT:
104: return "<comment: " + value + ">";
105: default:
106: return "<unknown>";
107: }
108: }
109:
110: /** Indicates whether this token contains a string. */
111: public boolean isString() {
112: return (type == IDENTIFIER || type == QUOTED_STRING);
113: }
114:
115: /** Indicates whether this token contains an EOL or EOF. */
116: public boolean isEOL() {
117: return (type == EOL || type == EOF);
118: }
119: }
120:
121: class TokenizerException extends TextParseException {
122: String message;
123:
124: public TokenizerException(String filename, int line,
125: String message) {
126: super (filename + ":" + line + ": " + message);
127: this .message = message;
128: }
129:
130: public String getBaseMessage() {
131: return message;
132: }
133: }
134:
135: /**
136: * Creates a Tokenizer from an arbitrary input stream.
137: * @param is The InputStream to tokenize.
138: */
139: public Tokenizer(InputStream is) {
140: if (!(is instanceof BufferedInputStream))
141: is = new BufferedInputStream(is);
142: this .is = new PushbackInputStream(is, 2);
143: ungottenToken = false;
144: multiline = 0;
145: quoting = false;
146: delimiters = delim;
147: current = new Token();
148: sb = new StringBuffer();
149: filename = "<none>";
150: line = 1;
151: }
152:
153: /**
154: * Creates a Tokenizer from a string.
155: * @param s The String to tokenize.
156: */
157: public Tokenizer(String s) {
158: this (new ByteArrayInputStream(s.getBytes()));
159: }
160:
161: /**
162: * Creates a Tokenizer from a file.
163: * @param f The File to tokenize.
164: */
165: public Tokenizer(File f) throws FileNotFoundException {
166: this (new FileInputStream(f));
167: wantClose = true;
168: filename = f.getName();
169: }
170:
171: private int getChar() throws IOException {
172: int c = is.read();
173: if (c == '\r') {
174: int next = is.read();
175: if (next != '\n')
176: is.unread(next);
177: c = '\n';
178: }
179: if (c == '\n')
180: line++;
181: return c;
182: }
183:
184: private void ungetChar(int c) throws IOException {
185: if (c == -1)
186: return;
187: is.unread(c);
188: if (c == '\n')
189: line--;
190: }
191:
192: private int skipWhitespace() throws IOException {
193: int skipped = 0;
194: while (true) {
195: int c = getChar();
196: if (c != ' ' && c != '\t') {
197: if (!(c == '\n' && multiline > 0)) {
198: ungetChar(c);
199: return skipped;
200: }
201: }
202: skipped++;
203: }
204: }
205:
206: private void checkUnbalancedParens() throws TextParseException {
207: if (multiline > 0)
208: throw exception("unbalanced parentheses");
209: }
210:
211: /**
212: * Gets the next token from a tokenizer.
213: * @param wantWhitespace If true, leading whitespace will be returned as a
214: * token.
215: * @param wantComment If true, comments are returned as tokens.
216: * @return The next token in the stream.
217: * @throws TextParseException The input was invalid.
218: * @throws IOException An I/O error occurred.
219: */
220: public Token get(boolean wantWhitespace, boolean wantComment)
221: throws IOException {
222: int type;
223: int c;
224:
225: if (ungottenToken) {
226: ungottenToken = false;
227: if (current.type == WHITESPACE) {
228: if (wantWhitespace)
229: return current;
230: } else if (current.type == COMMENT) {
231: if (wantComment)
232: return current;
233: } else {
234: if (current.type == EOL)
235: line++;
236: return current;
237: }
238: }
239: int skipped = skipWhitespace();
240: if (skipped > 0 && wantWhitespace)
241: return current.set(WHITESPACE, null);
242: type = IDENTIFIER;
243: sb.setLength(0);
244: while (true) {
245: c = getChar();
246: if (c == -1 || delimiters.indexOf(c) != -1) {
247: if (c == -1) {
248: if (quoting)
249: throw exception("EOF in " + "quoted string");
250: else if (sb.length() == 0)
251: return current.set(EOF, null);
252: else
253: return current.set(type, sb);
254: }
255: if (sb.length() == 0 && type != QUOTED_STRING) {
256: if (c == '(') {
257: multiline++;
258: skipWhitespace();
259: continue;
260: } else if (c == ')') {
261: if (multiline <= 0)
262: throw exception("invalid " + "close "
263: + "parenthesis");
264: multiline--;
265: skipWhitespace();
266: continue;
267: } else if (c == '"') {
268: if (!quoting) {
269: quoting = true;
270: delimiters = quotes;
271: type = QUOTED_STRING;
272: } else {
273: quoting = false;
274: delimiters = delim;
275: skipWhitespace();
276: }
277: continue;
278: } else if (c == '\n') {
279: return current.set(EOL, null);
280: } else if (c == ';') {
281: while (true) {
282: c = getChar();
283: if (c == '\n' || c == -1)
284: break;
285: sb.append((char) c);
286: }
287: if (wantComment) {
288: ungetChar(c);
289: return current.set(COMMENT, sb);
290: } else if (c == -1 && type != QUOTED_STRING) {
291: checkUnbalancedParens();
292: return current.set(EOF, null);
293: } else if (multiline > 0) {
294: skipWhitespace();
295: sb.setLength(0);
296: continue;
297: } else
298: return current.set(EOL, null);
299: } else
300: throw new IllegalStateException();
301: } else
302: ungetChar(c);
303: break;
304: } else if (c == '\\') {
305: c = getChar();
306: if (c == -1)
307: throw exception("unterminated escape sequence");
308: sb.append('\\');
309: } else if (quoting && c == '\n') {
310: throw exception("newline in quoted string");
311: }
312: sb.append((char) c);
313: }
314: if (sb.length() == 0 && type != QUOTED_STRING) {
315: checkUnbalancedParens();
316: return current.set(EOF, null);
317: }
318: return current.set(type, sb);
319: }
320:
321: /**
322: * Gets the next token from a tokenizer, ignoring whitespace and comments.
323: * @return The next token in the stream.
324: * @throws TextParseException The input was invalid.
325: * @throws IOException An I/O error occurred.
326: */
327: public Token get() throws IOException {
328: return get(false, false);
329: }
330:
331: /**
332: * Returns a token to the stream, so that it will be returned by the next call
333: * to get().
334: * @throws IllegalStateException There are already ungotten tokens.
335: */
336: public void unget() {
337: if (ungottenToken)
338: throw new IllegalStateException(
339: "Cannot unget multiple tokens");
340: if (current.type == EOL)
341: line--;
342: ungottenToken = true;
343: }
344:
345: /**
346: * Gets the next token from a tokenizer and converts it to a string.
347: * @return The next token in the stream, as a string.
348: * @throws TextParseException The input was invalid or not a string.
349: * @throws IOException An I/O error occurred.
350: */
351: public String getString() throws IOException {
352: Token next = get();
353: if (!next.isString()) {
354: throw exception("expected a string");
355: }
356: return next.value;
357: }
358:
359: private String _getIdentifier(String expected) throws IOException {
360: Token next = get();
361: if (next.type != IDENTIFIER)
362: throw exception("expected " + expected);
363: return next.value;
364: }
365:
366: /**
367: * Gets the next token from a tokenizer, ensures it is an unquoted string,
368: * and converts it to a string.
369: * @return The next token in the stream, as a string.
370: * @throws TextParseException The input was invalid or not an unquoted string.
371: * @throws IOException An I/O error occurred.
372: */
373: public String getIdentifier() throws IOException {
374: return _getIdentifier("an identifier");
375: }
376:
377: /**
378: * Gets the next token from a tokenizer and converts it to a long.
379: * @return The next token in the stream, as a long.
380: * @throws TextParseException The input was invalid or not a long.
381: * @throws IOException An I/O error occurred.
382: */
383: public long getLong() throws IOException {
384: String next = _getIdentifier("an integer");
385: if (!Character.isDigit(next.charAt(0)))
386: throw exception("expected an integer");
387: try {
388: return Long.parseLong(next);
389: } catch (NumberFormatException e) {
390: throw exception("expected an integer");
391: }
392: }
393:
394: /**
395: * Gets the next token from a tokenizer and converts it to an unsigned 32 bit
396: * integer.
397: * @return The next token in the stream, as an unsigned 32 bit integer.
398: * @throws TextParseException The input was invalid or not an unsigned 32
399: * bit integer.
400: * @throws IOException An I/O error occurred.
401: */
402: public long getUInt32() throws IOException {
403: long l = getLong();
404: if (l < 0 || l > 0xFFFFFFFFL)
405: throw exception("expected an 32 bit unsigned integer");
406: return l;
407: }
408:
409: /**
410: * Gets the next token from a tokenizer and converts it to an unsigned 16 bit
411: * integer.
412: * @return The next token in the stream, as an unsigned 16 bit integer.
413: * @throws TextParseException The input was invalid or not an unsigned 16
414: * bit integer.
415: * @throws IOException An I/O error occurred.
416: */
417: public int getUInt16() throws IOException {
418: long l = getLong();
419: if (l < 0 || l > 0xFFFFL)
420: throw exception("expected an 16 bit unsigned integer");
421: return (int) l;
422: }
423:
424: /**
425: * Gets the next token from a tokenizer and converts it to an unsigned 8 bit
426: * integer.
427: * @return The next token in the stream, as an unsigned 8 bit integer.
428: * @throws TextParseException The input was invalid or not an unsigned 8
429: * bit integer.
430: * @throws IOException An I/O error occurred.
431: */
432: public int getUInt8() throws IOException {
433: long l = getLong();
434: if (l < 0 || l > 0xFFL)
435: throw exception("expected an 8 bit unsigned integer");
436: return (int) l;
437: }
438:
439: /**
440: * Gets the next token from a tokenizer and parses it as a TTL.
441: * @return The next token in the stream, as an unsigned 32 bit integer.
442: * @throws TextParseException The input was not valid.
443: * @throws IOException An I/O error occurred.
444: * @see TTL
445: */
446: public long getTTL() throws IOException {
447: String next = _getIdentifier("a TTL value");
448: try {
449: return TTL.parseTTL(next);
450: } catch (NumberFormatException e) {
451: throw exception("expected a TTL value");
452: }
453: }
454:
455: /**
456: * Gets the next token from a tokenizer and parses it as if it were a TTL.
457: * @return The next token in the stream, as an unsigned 32 bit integer.
458: * @throws TextParseException The input was not valid.
459: * @throws IOException An I/O error occurred.
460: * @see TTL
461: */
462: public long getTTLLike() throws IOException {
463: String next = _getIdentifier("a TTL-like value");
464: try {
465: return TTL.parse(next, false);
466: } catch (NumberFormatException e) {
467: throw exception("expected a TTL-like value");
468: }
469: }
470:
471: /**
472: * Gets the next token from a tokenizer and converts it to a name.
473: * @param origin The origin to append to relative names.
474: * @return The next token in the stream, as a name.
475: * @throws TextParseException The input was invalid or not a valid name.
476: * @throws IOException An I/O error occurred.
477: * @throws RelativeNameException The parsed name was relative, even with the
478: * origin.
479: * @see Name
480: */
481: public Name getName(Name origin) throws IOException {
482: String next = _getIdentifier("a name");
483: try {
484: Name name = Name.fromString(next, origin);
485: if (!name.isAbsolute())
486: throw new RelativeNameException(name);
487: return name;
488: } catch (TextParseException e) {
489: throw exception(e.getMessage());
490: }
491: }
492:
493: /**
494: * Gets the next token from a tokenizer and converts it to an IP Address.
495: * @param family The address family.
496: * @return The next token in the stream, as an InetAddress
497: * @throws TextParseException The input was invalid or not a valid address.
498: * @throws IOException An I/O error occurred.
499: * @see Address
500: */
501: public InetAddress getAddress(int family) throws IOException {
502: String next = _getIdentifier("an address");
503: try {
504: return Address.getByAddress(next, family);
505: } catch (UnknownHostException e) {
506: throw exception(e.getMessage());
507: }
508: }
509:
510: /**
511: * Gets the next token from a tokenizer, which must be an EOL or EOF.
512: * @throws TextParseException The input was invalid or not an EOL or EOF token.
513: * @throws IOException An I/O error occurred.
514: */
515: public void getEOL() throws IOException {
516: Token next = get();
517: if (next.type != EOL && next.type != EOF) {
518: throw exception("expected EOL or EOF");
519: }
520: }
521:
522: /**
523: * Returns a concatenation of the remaining strings from a Tokenizer.
524: */
525: private String remainingStrings() throws IOException {
526: StringBuffer buffer = null;
527: while (true) {
528: Tokenizer.Token t = get();
529: if (!t.isString())
530: break;
531: if (buffer == null)
532: buffer = new StringBuffer();
533: buffer.append(t.value);
534: }
535: unget();
536: if (buffer == null)
537: return null;
538: return buffer.toString();
539: }
540:
541: /**
542: * Gets the remaining string tokens until an EOL/EOF is seen, concatenates
543: * them together, and converts the base64 encoded data to a byte array.
544: * @param required If true, an exception will be thrown if no strings remain;
545: * otherwise null be be returned.
546: * @return The byte array containing the decoded strings, or null if there
547: * were no strings to decode.
548: * @throws TextParseException The input was invalid.
549: * @throws IOException An I/O error occurred.
550: */
551: public byte[] getBase64(boolean required) throws IOException {
552: String s = remainingStrings();
553: if (s == null) {
554: if (required)
555: throw exception("expected base64 encoded string");
556: else
557: return null;
558: }
559: byte[] array = base64.fromString(s);
560: if (array == null)
561: throw exception("invalid base64 encoding");
562: return array;
563: }
564:
565: /**
566: * Gets the remaining string tokens until an EOL/EOF is seen, concatenates
567: * them together, and converts the base64 encoded data to a byte array.
568: * @return The byte array containing the decoded strings, or null if there
569: * were no strings to decode.
570: * @throws TextParseException The input was invalid.
571: * @throws IOException An I/O error occurred.
572: */
573: public byte[] getBase64() throws IOException {
574: return getBase64(false);
575: }
576:
577: /**
578: * Gets the remaining string tokens until an EOL/EOF is seen, concatenates
579: * them together, and converts the hex encoded data to a byte array.
580: * @param required If true, an exception will be thrown if no strings remain;
581: * otherwise null be be returned.
582: * @return The byte array containing the decoded strings, or null if there
583: * were no strings to decode.
584: * @throws TextParseException The input was invalid.
585: * @throws IOException An I/O error occurred.
586: */
587: public byte[] getHex(boolean required) throws IOException {
588: String s = remainingStrings();
589: if (s == null) {
590: if (required)
591: throw exception("expected hex encoded string");
592: else
593: return null;
594: }
595: byte[] array = base16.fromString(s);
596: if (array == null)
597: throw exception("invalid hex encoding");
598: return array;
599: }
600:
601: /**
602: * Gets the remaining string tokens until an EOL/EOF is seen, concatenates
603: * them together, and converts the hex encoded data to a byte array.
604: * @return The byte array containing the decoded strings, or null if there
605: * were no strings to decode.
606: * @throws TextParseException The input was invalid.
607: * @throws IOException An I/O error occurred.
608: */
609: public byte[] getHex() throws IOException {
610: return getHex(false);
611: }
612:
613: /**
614: * Creates an exception which includes the current state in the error message
615: * @param s The error message to include.
616: * @return The exception to be thrown
617: */
618: public TextParseException exception(String s) {
619: return new TokenizerException(filename, line, s);
620: }
621:
622: /**
623: * Closes any files opened by this tokenizer.
624: */
625: public void close() {
626: if (wantClose) {
627: try {
628: is.close();
629: } catch (IOException e) {
630: }
631: }
632: }
633:
634: protected void finalize() {
635: close();
636: }
637:
638: }
|