001: /*
002: * Copyright 1999,2004 The Apache Software Foundation.
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016:
017: package org.apache.catalina.util;
018:
019: /**
020: * Utility class for string parsing that is higher performance than
021: * StringParser for simple delimited text cases. Parsing is performed
022: * by setting the string, and then using the <code>findXxxx()</code> and
023: * <code>skipXxxx()</code> families of methods to remember significant
024: * offsets. To retrieve the parsed substrings, call the <code>extract()</code>
025: * method with the appropriate saved offset values.
026: *
027: * @author Craig R. McClanahan
028: * @version $Revision: 1.2 $ $Date: 2004/02/27 14:58:51 $
029: */
030:
031: public final class StringParser {
032:
033: // ----------------------------------------------------------- Constructors
034:
035: /**
036: * Construct a string parser with no preset string to be parsed.
037: */
038: public StringParser() {
039:
040: this (null);
041:
042: }
043:
044: /**
045: * Construct a string parser that is initialized to parse the specified
046: * string.
047: *
048: * @param string The string to be parsed
049: */
050: public StringParser(String string) {
051:
052: super ();
053: setString(string);
054:
055: }
056:
057: // ----------------------------------------------------- Instance Variables
058:
059: /**
060: * The characters of the current string, as a character array. Stored
061: * when the string is first specified to speed up access to characters
062: * being compared during parsing.
063: */
064: private char chars[] = null;
065:
066: /**
067: * The zero-relative index of the current point at which we are
068: * positioned within the string being parsed. <strong>NOTE</strong>:
069: * the value of this index can be one larger than the index of the last
070: * character of the string (i.e. equal to the string length) if you
071: * parse off the end of the string. This value is useful for extracting
072: * substrings that include the end of the string.
073: */
074: private int index = 0;
075:
076: /**
077: * The length of the String we are currently parsing. Stored when the
078: * string is first specified to avoid repeated recalculations.
079: */
080: private int length = 0;
081:
082: /**
083: * The String we are currently parsing.
084: */
085: private String string = null;
086:
087: // ------------------------------------------------------------- Properties
088:
089: /**
090: * Return the zero-relative index of our current parsing position
091: * within the string being parsed.
092: */
093: public int getIndex() {
094:
095: return (this .index);
096:
097: }
098:
099: /**
100: * Return the length of the string we are parsing.
101: */
102: public int getLength() {
103:
104: return (this .length);
105:
106: }
107:
108: /**
109: * Return the String we are currently parsing.
110: */
111: public String getString() {
112:
113: return (this .string);
114:
115: }
116:
117: /**
118: * Set the String we are currently parsing. The parser state is also reset
119: * to begin at the start of this string.
120: *
121: * @param string The string to be parsed.
122: */
123: public void setString(String string) {
124:
125: this .string = string;
126: if (string != null) {
127: this .length = string.length();
128: chars = this .string.toCharArray();
129: } else {
130: this .length = 0;
131: chars = new char[0];
132: }
133: reset();
134:
135: }
136:
137: // --------------------------------------------------------- Public Methods
138:
139: /**
140: * Advance the current parsing position by one, if we are not already
141: * past the end of the string.
142: */
143: public void advance() {
144:
145: if (index < length)
146: index++;
147:
148: }
149:
150: /**
151: * Extract and return a substring that starts at the specified position,
152: * and extends to the end of the string being parsed. If this is not
153: * possible, a zero-length string is returned.
154: *
155: * @param start Starting index, zero relative, inclusive
156: */
157: public String extract(int start) {
158:
159: if ((start < 0) || (start >= length))
160: return ("");
161: else
162: return (string.substring(start));
163:
164: }
165:
166: /**
167: * Extract and return a substring that starts at the specified position,
168: * and ends at the character before the specified position. If this is
169: * not possible, a zero-length string is returned.
170: *
171: * @param start Starting index, zero relative, inclusive
172: * @param end Ending index, zero relative, exclusive
173: */
174: public String extract(int start, int end) {
175:
176: if ((start < 0) || (start >= end) || (end > length))
177: return ("");
178: else
179: return (string.substring(start, end));
180:
181: }
182:
183: /**
184: * Return the index of the next occurrence of the specified character,
185: * or the index of the character after the last position of the string
186: * if no more occurrences of this character are found. The current
187: * parsing position is updated to the returned value.
188: *
189: * @param ch Character to be found
190: */
191: public int findChar(char ch) {
192:
193: while ((index < length) && (ch != chars[index]))
194: index++;
195: return (index);
196:
197: }
198:
199: /**
200: * Return the index of the next occurrence of a non-whitespace character,
201: * or the index of the character after the last position of the string
202: * if no more non-whitespace characters are found. The current
203: * parsing position is updated to the returned value.
204: */
205: public int findText() {
206:
207: while ((index < length) && isWhite(chars[index]))
208: index++;
209: return (index);
210:
211: }
212:
213: /**
214: * Return the index of the next occurrence of a whitespace character,
215: * or the index of the character after the last position of the string
216: * if no more whitespace characters are found. The current parsing
217: * position is updated to the returned value.
218: */
219: public int findWhite() {
220:
221: while ((index < length) && !isWhite(chars[index]))
222: index++;
223: return (index);
224:
225: }
226:
227: /**
228: * Reset the current state of the parser to the beginning of the
229: * current string being parsed.
230: */
231: public void reset() {
232:
233: index = 0;
234:
235: }
236:
237: /**
238: * Advance the current parsing position while it is pointing at the
239: * specified character, or until it moves past the end of the string.
240: * Return the final value.
241: *
242: * @param ch Character to be skipped
243: */
244: public int skipChar(char ch) {
245:
246: while ((index < length) && (ch == chars[index]))
247: index++;
248: return (index);
249:
250: }
251:
252: /**
253: * Advance the current parsing position while it is pointing at a
254: * non-whitespace character, or until it moves past the end of the string.
255: * Return the final value.
256: */
257: public int skipText() {
258:
259: while ((index < length) && !isWhite(chars[index]))
260: index++;
261: return (index);
262:
263: }
264:
265: /**
266: * Advance the current parsing position while it is pointing at a
267: * whitespace character, or until it moves past the end of the string.
268: * Return the final value.
269: */
270: public int skipWhite() {
271:
272: while ((index < length) && isWhite(chars[index]))
273: index++;
274: return (index);
275:
276: }
277:
278: // ------------------------------------------------------ Protected Methods
279:
280: /**
281: * Is the specified character considered to be whitespace?
282: *
283: * @param ch Character to be checked
284: */
285: protected boolean isWhite(char ch) {
286:
287: if ((ch == ' ') || (ch == '\t') || (ch == '\r') || (ch == '\n'))
288: return (true);
289: else
290: return (false);
291:
292: }
293:
294: }
|