001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017:
018: package org.apache.catalina.util;
019:
020: /**
021: * Utility class for string parsing that is higher performance than
022: * StringParser for simple delimited text cases. Parsing is performed
023: * by setting the string, and then using the <code>findXxxx()</code> and
024: * <code>skipXxxx()</code> families of methods to remember significant
025: * offsets. To retrieve the parsed substrings, call the <code>extract()</code>
026: * method with the appropriate saved offset values.
027: *
028: * @author Craig R. McClanahan
029: * @version $Revision: 467222 $ $Date: 2006-10-24 05:17:11 +0200 (mar., 24 oct. 2006) $
030: */
031:
032: public final class StringParser {
033:
034: // ----------------------------------------------------------- Constructors
035:
036: /**
037: * Construct a string parser with no preset string to be parsed.
038: */
039: public StringParser() {
040:
041: this (null);
042:
043: }
044:
045: /**
046: * Construct a string parser that is initialized to parse the specified
047: * string.
048: *
049: * @param string The string to be parsed
050: */
051: public StringParser(String string) {
052:
053: super ();
054: setString(string);
055:
056: }
057:
058: // ----------------------------------------------------- Instance Variables
059:
060: /**
061: * The characters of the current string, as a character array. Stored
062: * when the string is first specified to speed up access to characters
063: * being compared during parsing.
064: */
065: private char chars[] = null;
066:
067: /**
068: * The zero-relative index of the current point at which we are
069: * positioned within the string being parsed. <strong>NOTE</strong>:
070: * the value of this index can be one larger than the index of the last
071: * character of the string (i.e. equal to the string length) if you
072: * parse off the end of the string. This value is useful for extracting
073: * substrings that include the end of the string.
074: */
075: private int index = 0;
076:
077: /**
078: * The length of the String we are currently parsing. Stored when the
079: * string is first specified to avoid repeated recalculations.
080: */
081: private int length = 0;
082:
083: /**
084: * The String we are currently parsing.
085: */
086: private String string = null;
087:
088: // ------------------------------------------------------------- Properties
089:
090: /**
091: * Return the zero-relative index of our current parsing position
092: * within the string being parsed.
093: */
094: public int getIndex() {
095:
096: return (this .index);
097:
098: }
099:
100: /**
101: * Return the length of the string we are parsing.
102: */
103: public int getLength() {
104:
105: return (this .length);
106:
107: }
108:
109: /**
110: * Return the String we are currently parsing.
111: */
112: public String getString() {
113:
114: return (this .string);
115:
116: }
117:
118: /**
119: * Set the String we are currently parsing. The parser state is also reset
120: * to begin at the start of this string.
121: *
122: * @param string The string to be parsed.
123: */
124: public void setString(String string) {
125:
126: this .string = string;
127: if (string != null) {
128: this .length = string.length();
129: chars = this .string.toCharArray();
130: } else {
131: this .length = 0;
132: chars = new char[0];
133: }
134: reset();
135:
136: }
137:
138: // --------------------------------------------------------- Public Methods
139:
140: /**
141: * Advance the current parsing position by one, if we are not already
142: * past the end of the string.
143: */
144: public void advance() {
145:
146: if (index < length)
147: index++;
148:
149: }
150:
151: /**
152: * Extract and return a substring that starts at the specified position,
153: * and extends to the end of the string being parsed. If this is not
154: * possible, a zero-length string is returned.
155: *
156: * @param start Starting index, zero relative, inclusive
157: */
158: public String extract(int start) {
159:
160: if ((start < 0) || (start >= length))
161: return ("");
162: else
163: return (string.substring(start));
164:
165: }
166:
167: /**
168: * Extract and return a substring that starts at the specified position,
169: * and ends at the character before the specified position. If this is
170: * not possible, a zero-length string is returned.
171: *
172: * @param start Starting index, zero relative, inclusive
173: * @param end Ending index, zero relative, exclusive
174: */
175: public String extract(int start, int end) {
176:
177: if ((start < 0) || (start >= end) || (end > length))
178: return ("");
179: else
180: return (string.substring(start, end));
181:
182: }
183:
184: /**
185: * Return the index of the next occurrence of the specified character,
186: * or the index of the character after the last position of the string
187: * if no more occurrences of this character are found. The current
188: * parsing position is updated to the returned value.
189: *
190: * @param ch Character to be found
191: */
192: public int findChar(char ch) {
193:
194: while ((index < length) && (ch != chars[index]))
195: index++;
196: return (index);
197:
198: }
199:
200: /**
201: * Return the index of the next occurrence of a non-whitespace character,
202: * or the index of the character after the last position of the string
203: * if no more non-whitespace characters are found. The current
204: * parsing position is updated to the returned value.
205: */
206: public int findText() {
207:
208: while ((index < length) && isWhite(chars[index]))
209: index++;
210: return (index);
211:
212: }
213:
214: /**
215: * Return the index of the next occurrence of a whitespace character,
216: * or the index of the character after the last position of the string
217: * if no more whitespace characters are found. The current parsing
218: * position is updated to the returned value.
219: */
220: public int findWhite() {
221:
222: while ((index < length) && !isWhite(chars[index]))
223: index++;
224: return (index);
225:
226: }
227:
228: /**
229: * Reset the current state of the parser to the beginning of the
230: * current string being parsed.
231: */
232: public void reset() {
233:
234: index = 0;
235:
236: }
237:
238: /**
239: * Advance the current parsing position while it is pointing at the
240: * specified character, or until it moves past the end of the string.
241: * Return the final value.
242: *
243: * @param ch Character to be skipped
244: */
245: public int skipChar(char ch) {
246:
247: while ((index < length) && (ch == chars[index]))
248: index++;
249: return (index);
250:
251: }
252:
253: /**
254: * Advance the current parsing position while it is pointing at a
255: * non-whitespace character, or until it moves past the end of the string.
256: * Return the final value.
257: */
258: public int skipText() {
259:
260: while ((index < length) && !isWhite(chars[index]))
261: index++;
262: return (index);
263:
264: }
265:
266: /**
267: * Advance the current parsing position while it is pointing at a
268: * whitespace character, or until it moves past the end of the string.
269: * Return the final value.
270: */
271: public int skipWhite() {
272:
273: while ((index < length) && isWhite(chars[index]))
274: index++;
275: return (index);
276:
277: }
278:
279: // ------------------------------------------------------ Protected Methods
280:
281: /**
282: * Is the specified character considered to be whitespace?
283: *
284: * @param ch Character to be checked
285: */
286: protected boolean isWhite(char ch) {
287:
288: if ((ch == ' ') || (ch == '\t') || (ch == '\r') || (ch == '\n'))
289: return (true);
290: else
291: return (false);
292:
293: }
294:
295: }
|