001: /*
002: * This class provides a more sophisticated tokenizer than that available
003: * with StringTokenizer. In particular, it handles tokenizing of fields
004: * in brackets, and will ignore separators in quotes or brackets.
005: *
006: * $Author: davis $
007: * $Date: 2004/12/18 21:29:04 $
008: * $Revision: 1.1 $
009: *
010: * This library is free software; you can redistribute it and/or
011: * modify it under the terms of the GNU Lesser General Public
012: * License as published by the Free Software Foundation; either
013: * version 2.1 of the License, or (at your option) any later version.
014: *
015: * This library is distributed in the hope that it will be useful,
016: * but WITHOUT ANY WARRANTY; without even the implied warranty of
017: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
018: * Lesser General Public License for more details.
019: *
020: * You should have received a copy of the GNU Lesser General Public
021: * License along with this library; if not, write to the Free Software
022: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
023: *
024: * Revision History:
025: *
026: * Written by Davis Swan in April, 2004.
027: */
028: package com.sqlmagic.tinysql;
029:
030: import java.text.*;
031: import java.util.*;
032: import java.lang.*;
033:
034: public class FieldTokenizer {
035: String[] fields;
036: int fieldIndex;
037:
038: /*
039: * Split an input string into fields based upon the input separator, ignoring
040: * separators that might occur within brackets or quoted string. If the
041: * separator is (, return strings outside and inside of the brackets. The
042: * parameter returnSep indicates whether or not the actual separator characters
043: * themselves should be returned.
044: */
045: public FieldTokenizer(String inputString, char separator,
046: boolean returnSep) {
047: char quoteChar, nextChar, bracketQuoteChar;
048: char[] charArray = { ' ' };
049: Vector tempStrings;
050: int i, leftBracketCount, rightBracketCount, startPosn, endPosn;
051: String tempString;
052: boolean debug = false;
053: if (inputString.indexOf(separator) < 0) {
054: fields = new String[1];
055: fields[0] = inputString;
056: if (inputString.trim().length() == 0)
057: return;
058: }
059: if (debug)
060: System.out.println("FieldTokenizer: " + " separator is "
061: + separator + " string is <" + inputString + ">");
062: charArray[0] = separator;
063: tempStrings = new Vector();
064: leftBracketCount = 0;
065: rightBracketCount = 0;
066: quoteChar = ' ';
067: bracketQuoteChar = ' ';
068: startPosn = 0;
069: endPosn = 0;
070: for (i = 0; i < inputString.length(); i++) {
071: nextChar = inputString.charAt(i);
072: endPosn = i;
073: if (nextChar == '\'' | nextChar == '"') {
074: /*
075: * Set the bracketQuoteChar for quotes within a bracket
076: * delimited string. This will allow handling of brackets
077: * within quoted strings that are embedded within the brackets.
078: */
079: if (leftBracketCount > 0) {
080: if (bracketQuoteChar == ' ')
081: bracketQuoteChar = nextChar;
082: else if (nextChar == bracketQuoteChar)
083: bracketQuoteChar = ' ';
084: continue;
085: }
086: if (quoteChar == ' ')
087: quoteChar = nextChar;
088: else if (nextChar == quoteChar) {
089: /*
090: * A matching quote character has been found. Check for two
091: * adjacent single quotes which represent an embedded single
092: * quote.
093: */
094: if (i < inputString.length() - 1
095: & quoteChar == '\'') {
096: if (inputString.charAt(i + 1) == '\'')
097: i++;
098: else
099: quoteChar = ' ';
100: } else {
101: quoteChar = ' ';
102: }
103: }
104: } else if (nextChar == '(' | nextChar == ')') {
105: /*
106: * Ignore brackets inside quoted strings.
107: */
108: if (quoteChar != ' ' | bracketQuoteChar != ' ')
109: continue;
110: if (nextChar == '(') {
111: leftBracketCount++;
112: /*
113: * If bracket is the separator, return the string before the
114: * left bracket.
115: */
116: if (separator == '(' & leftBracketCount == 1) {
117: tempString = "";
118: if (endPosn > startPosn)
119: tempString = inputString.substring(
120: startPosn, endPosn);
121: if (tempString.trim().length() > 0)
122: tempStrings.addElement(tempString.trim());
123: if (returnSep)
124: tempStrings.addElement("(");
125: startPosn = endPosn + 1;
126: }
127: } else if (nextChar == ')') {
128: /*
129: * Handle nested sets of brackets.
130: */
131: rightBracketCount++;
132: if (leftBracketCount > 0
133: & leftBracketCount == rightBracketCount) {
134: if (separator == '(') {
135: /*
136: * If bracket is the separator, return the string between the
137: * brackets.
138: */
139: tempString = "";
140: if (endPosn > startPosn)
141: tempString = inputString.substring(
142: startPosn, endPosn);
143: if (tempString.trim().length() > 0)
144: tempStrings.addElement(tempString
145: .trim());
146: if (returnSep)
147: tempStrings.addElement(")");
148: startPosn = endPosn + 1;
149: }
150: leftBracketCount = 0;
151: rightBracketCount = 0;
152: }
153: }
154: /*
155: * If the separator character has been found and we are not within
156: * brackets and we are not within a quoted string (as indicated
157: * by a blank quoteChar value), then build the next output string.
158: */
159: } else if (nextChar == separator & leftBracketCount == 0
160: & quoteChar == ' ') {
161:
162: tempString = "";
163: if (endPosn > startPosn)
164: tempString = inputString.substring(startPosn,
165: endPosn).trim();
166: if (tempString.length() > 0)
167: tempStrings.addElement(tempString);
168: if (returnSep)
169: tempStrings.addElement(new String(charArray));
170: startPosn = endPosn + 1;
171: }
172: }
173: /*
174: * Pick up the last string if there is one.
175: */
176: if (endPosn >= startPosn) {
177: tempString = inputString.substring(startPosn, endPosn + 1)
178: .trim();
179: if (tempString.length() > 0)
180: tempStrings.addElement(tempString);
181: }
182: /*
183: * Create output string array from Vector.
184: */
185: if (tempStrings.size() == 0) {
186: fields = new String[1];
187: fields[0] = inputString;
188: if (debug)
189: System.out.println("FieldTokenizer output: <"
190: + inputString + ">");
191: } else {
192: fields = new String[tempStrings.size()];
193: for (i = 0; i < tempStrings.size(); i++) {
194: fields[i] = (String) tempStrings.elementAt(i);
195: if (debug)
196: System.out.println("FieldTokenizer output[" + i
197: + "]: <" + fields[i] + ">");
198: }
199: }
200: fieldIndex = 0;
201: }
202:
203: /*
204: * Method to return the fields as an array of strings.
205: */
206: public String[] getFields() {
207: return fields;
208: }
209:
210: /*
211: * Method to return the count of fields.
212: */
213: public int countFields() {
214: return fields.length;
215: }
216:
217: /*
218: * Methods to return a particular field. A default value can be provided.
219: */
220: public String getField(int inputIndex) {
221: return getField(inputIndex, "NULL");
222: }
223:
224: public String getField(int inputIndex, String defaultString) {
225: if (inputIndex < 0 | inputIndex >= fields.length)
226: return defaultString;
227: else
228: return fields[inputIndex];
229: }
230:
231: public int getInt(int inputIndex, int defaultInt) {
232: String numStr;
233: int outputInt;
234: if (inputIndex < 0 | inputIndex >= fields.length)
235: return defaultInt;
236: else {
237: numStr = getField(inputIndex);
238: try {
239: return Integer.parseInt(numStr);
240: } catch (Exception e) {
241: return defaultInt;
242: }
243: }
244: }
245:
246: /*
247: * Methods used to get fields sequentially.
248: */
249: public boolean hasMoreFields() {
250: if (fieldIndex < fields.length)
251: return true;
252: else
253: return false;
254: }
255:
256: public String nextField() {
257: String returnString;
258: if (fieldIndex < fields.length) {
259: returnString = fields[fieldIndex];
260: fieldIndex++;
261: } else
262: returnString = (String) null;
263: return returnString;
264: }
265: }
|