001: /*
002: **********************************************************************
003: * Copyright (c) 2004, International Business Machines
004: * Corporation and others. All Rights Reserved.
005: **********************************************************************
006: * Author: Alan Liu
007: * Created: March 16 2004
008: * Since: ICU 3.0
009: **********************************************************************
010: */
011: package com.ibm.icu.impl.data;
012:
013: import java.io.IOException;
014:
015: import com.ibm.icu.impl.UCharacterProperty;
016: import com.ibm.icu.impl.Utility;
017: import com.ibm.icu.text.UTF16;
018:
019: /**
020: * An iterator class that returns successive string tokens from some
021: * source. String tokens are, in general, separated by rule white
022: * space in the source test. Furthermore, they may be delimited by
023: * either single or double quotes (opening and closing quotes must
024: * match). Escapes are processed using standard ICU unescaping.
025: */
026: public class TokenIterator {
027:
028: private ResourceReader reader;
029: private String line;
030: private StringBuffer buf;
031: private boolean done;
032: private int pos;
033: private int lastpos;
034:
035: /**
036: * Construct an iterator over the tokens returned by the given
037: * ResourceReader, ignoring blank lines and comment lines (first
038: * non-blank character is '#'). Note that trailing comments on a
039: * line, beginning with the first unquoted '#', are recognized.
040: */
041: public TokenIterator(ResourceReader r) {
042: reader = r;
043: line = null;
044: done = false;
045: buf = new StringBuffer();
046: pos = lastpos = -1;
047: }
048:
049: /**
050: * Return the next token from this iterator, or null if the last
051: * token has been returned.
052: */
053: public String next() throws IOException {
054: if (done) {
055: return null;
056: }
057: for (;;) {
058: if (line == null) {
059: line = reader.readLineSkippingComments();
060: if (line == null) {
061: done = true;
062: return null;
063: }
064: pos = 0;
065: }
066: buf.setLength(0);
067: lastpos = pos;
068: pos = nextToken(pos);
069: if (pos < 0) {
070: line = null;
071: continue;
072: }
073: return buf.toString();
074: }
075: }
076:
077: /**
078: * Return the one-based line number of the line of the last token returned by
079: * next(). Should only be called
080: * after a call to next(); otherwise the return
081: * value is undefined.
082: */
083: public int getLineNumber() {
084: return reader.getLineNumber();
085: }
086:
087: /**
088: * Return a string description of the position of the last line
089: * returned by readLine() or readLineSkippingComments().
090: */
091: public String describePosition() {
092: return reader.describePosition() + ':' + (lastpos + 1);
093: }
094:
095: /**
096: * Read the next token from 'this.line' and append it to
097: * 'this.buf'. Tokens are separated by rule white space. Tokens
098: * may also be delimited by double or single quotes. The closing
099: * quote must match the opening quote. If a '#' is encountered,
100: * the rest of the line is ignored, unless it is backslash-escaped
101: * or within quotes.
102: * @param pos the offset into the string
103: * @return offset to the next character to read from line, or if
104: * the end of the line is reached without scanning a valid token,
105: * -1
106: */
107: private int nextToken(int pos) {
108: pos = Utility.skipWhitespace(line, pos);
109: if (pos == line.length()) {
110: return -1;
111: }
112: int startpos = pos;
113: char c = line.charAt(pos++);
114: char quote = 0;
115: switch (c) {
116: case '"':
117: case '\'':
118: quote = c;
119: break;
120: case '#':
121: return -1;
122: default:
123: buf.append(c);
124: break;
125: }
126: int[] posref = null;
127: while (pos < line.length()) {
128: c = line.charAt(pos); // 16-bit ok
129: if (c == '\\') {
130: if (posref == null) {
131: posref = new int[1];
132: }
133: posref[0] = pos + 1;
134: int c32 = Utility.unescapeAt(line, posref);
135: if (c32 < 0) {
136: throw new RuntimeException("Invalid escape at "
137: + reader.describePosition() + ':' + pos);
138: }
139: UTF16.append(buf, c32);
140: pos = posref[0];
141: } else if ((quote != 0 && c == quote)
142: || (quote == 0 && UCharacterProperty
143: .isRuleWhiteSpace(c))) {
144: return ++pos;
145: } else if (quote == 0 && c == '#') {
146: return pos; // do NOT increment
147: } else {
148: buf.append(c);
149: ++pos;
150: }
151: }
152: if (quote != 0) {
153: throw new RuntimeException("Unterminated quote at "
154: + reader.describePosition() + ':' + startpos);
155: }
156: return pos;
157: }
158: }
|