001: /**
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */package org.apache.solr.util;
017:
018: import java.util.List;
019: import java.util.ArrayList;
020: import java.io.IOException;
021:
022: /**
023: * @author yonik
024: * @version $Id: StrUtils.java 508805 2007-02-17 20:42:45Z yonik $
025: */
026: public class StrUtils {
027:
028: /**
029: * Split a string based on a separator, but don't split if it's inside
030: * a string. Assume '\' escapes the next char both inside and
031: * outside strings.
032: */
033: public static List<String> splitSmart(String s, char separator) {
034: ArrayList<String> lst = new ArrayList<String>(4);
035: int pos = 0, start = 0, end = s.length();
036: char inString = 0;
037: char ch = 0;
038: while (pos < end) {
039: char prevChar = ch;
040: ch = s.charAt(pos++);
041: if (ch == '\\') { // skip escaped chars
042: pos++;
043: } else if (inString != 0 && ch == inString) {
044: inString = 0;
045: } else if (ch == '\'' || ch == '"') {
046: // If char is directly preceeded by a number or letter
047: // then don't treat it as the start of a string.
048: // Examples: 50" TV, or can't
049: if (!Character.isLetterOrDigit(prevChar)) {
050: inString = ch;
051: }
052: } else if (ch == separator && inString == 0) {
053: lst.add(s.substring(start, pos - 1));
054: start = pos;
055: }
056: }
057: if (start < end) {
058: lst.add(s.substring(start, end));
059: }
060:
061: /***
062: if (SolrCore.log.isLoggable(Level.FINEST)) {
063: SolrCore.log.finest("splitCommand=" + lst);
064: }
065: ***/
066:
067: return lst;
068: }
069:
070: /** Splits a backslash escaped string on the separator.
071: * <p>
072: * Current backslash escaping supported:
073: * <br> \n \t \r \b \f are escaped the same as a Java String
074: * <br> Other characters following a backslash are produced verbatim (\c => c)
075: *
076: * @param s the string to split
077: * @param separator the separator to split on
078: * @param decode decode backslash escaping
079: */
080: public static List<String> splitSmart(String s, String separator,
081: boolean decode) {
082: ArrayList<String> lst = new ArrayList<String>(2);
083: StringBuilder sb = new StringBuilder();
084: int pos = 0, end = s.length();
085: while (pos < end) {
086: if (s.startsWith(separator, pos)) {
087: if (sb.length() > 0) {
088: lst.add(sb.toString());
089: sb = new StringBuilder();
090: }
091: pos += separator.length();
092: continue;
093: }
094:
095: char ch = s.charAt(pos++);
096: if (ch == '\\') {
097: if (!decode)
098: sb.append(ch);
099: if (pos >= end)
100: break; // ERROR, or let it go?
101: ch = s.charAt(pos++);
102: if (decode) {
103: switch (ch) {
104: case 'n':
105: ch = '\n';
106: break;
107: case 't':
108: ch = '\t';
109: break;
110: case 'r':
111: ch = '\r';
112: break;
113: case 'b':
114: ch = '\b';
115: break;
116: case 'f':
117: ch = '\f';
118: break;
119: }
120: }
121: }
122:
123: sb.append(ch);
124: }
125:
126: if (sb.length() > 0) {
127: lst.add(sb.toString());
128: }
129:
130: return lst;
131: }
132:
133: public static List<String> splitWS(String s, boolean decode) {
134: ArrayList<String> lst = new ArrayList<String>(2);
135: StringBuilder sb = new StringBuilder();
136: int pos = 0, end = s.length();
137: while (pos < end) {
138: char ch = s.charAt(pos++);
139: if (Character.isWhitespace(ch)) {
140: if (sb.length() > 0) {
141: lst.add(sb.toString());
142: sb = new StringBuilder();
143: }
144: continue;
145: }
146:
147: if (ch == '\\') {
148: if (!decode)
149: sb.append(ch);
150: if (pos >= end)
151: break; // ERROR, or let it go?
152: ch = s.charAt(pos++);
153: if (decode) {
154: switch (ch) {
155: case 'n':
156: ch = '\n';
157: break;
158: case 't':
159: ch = '\t';
160: break;
161: case 'r':
162: ch = '\r';
163: break;
164: case 'b':
165: ch = '\b';
166: break;
167: case 'f':
168: ch = '\f';
169: break;
170: }
171: }
172: }
173:
174: sb.append(ch);
175: }
176:
177: if (sb.length() > 0) {
178: lst.add(sb.toString());
179: }
180:
181: return lst;
182: }
183:
184: public static List<String> toLower(List<String> strings) {
185: ArrayList<String> ret = new ArrayList<String>(strings.size());
186: for (String str : strings) {
187: ret.add(str.toLowerCase());
188: }
189: return ret;
190: }
191:
192: /** Return if a string starts with '1', 't', or 'T'
193: * and return false otherwise.
194: */
195: public static boolean parseBoolean(String s) {
196: char ch = s.length() > 0 ? s.charAt(0) : 0;
197: return (ch == '1' || ch == 't' || ch == 'T');
198: }
199:
200: /**
201: * URLEncodes a value, replacing only enough chars so that
202: * the URL may be unambiguously pasted back into a browser.
203: * <p>
204: * Characters with a numeric value less than 32 are encoded.
205: * &,=,%,+,space are encoded.
206: * <p>
207: */
208: public static void partialURLEncodeVal(Appendable dest, String val)
209: throws IOException {
210: for (int i = 0; i < val.length(); i++) {
211: char ch = val.charAt(i);
212: if (ch < 32) {
213: dest.append('%');
214: // Hmmm, if we used StringBuilder rather than Appendable, it
215: // could add an integer more efficiently.
216: dest.append(Integer.toString(ch));
217: } else {
218: switch (ch) {
219: case ' ':
220: dest.append('+');
221: break;
222: case '&':
223: dest.append("%26");
224: break;
225: case '%':
226: dest.append("%25");
227: break;
228: case '=':
229: dest.append("%3D");
230: break;
231: case '+':
232: dest.append("%2B");
233: break;
234: default:
235: dest.append(ch);
236: break;
237: }
238: }
239: }
240: }
241:
242: }
|