001: package org.drools.util;
002:
003: /*
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import java.io.BufferedReader;
021: import java.io.FileReader;
022: import java.io.IOException;
023: import java.io.Reader;
024: import java.util.ArrayList;
025: import java.util.List;
026:
027: /**
028: * Ripped form commons StringUtil:
029: *
030: * <p>Operations on {@link java.lang.String} that are
031: * <code>null</code> safe.</p>
032: *
033: * <ul>
034: * <li><b>IsEmpty/IsBlank</b>
035: * - checks if a String contains text</li>
036: * <li><b>Trim/Strip</b>
037: * - removes leading and trailing whitespace</li>
038: * <li><b>Equals</b>
039: * - compares two strings null-safe</li>
040: * <li><b>IndexOf/LastIndexOf/Contains</b>
041: * - null-safe index-of checks
042: * <li><b>IndexOfAny/LastIndexOfAny/IndexOfAnyBut/LastIndexOfAnyBut</b>
043: * - index-of any of a set of Strings</li>
044: * <li><b>ContainsOnly/ContainsNone</b>
045: * - does String contains only/none of these characters</li>
046: * <li><b>Substring/Left/Right/Mid</b>
047: * - null-safe substring extractions</li>
048: * <li><b>SubstringBefore/SubstringAfter/SubstringBetween</b>
049: * - substring extraction relative to other strings</li>
050: * <li><b>Split/Join</b>
051: * - splits a String into an array of substrings and vice versa</li>
052: * <li><b>Remove/Delete</b>
053: * - removes part of a String</li>
054: * <li><b>Replace/Overlay</b>
055: * - Searches a String and replaces one String with another</li>
056: * <li><b>Chomp/Chop</b>
057: * - removes the last part of a String</li>
058: * <li><b>LeftPad/RightPad/Center/Repeat</b>
059: * - pads a String</li>
060: * <li><b>UpperCase/LowerCase/SwapCase/Capitalize/Uncapitalize</b>
061: * - changes the case of a String</li>
062: * <li><b>CountMatches</b>
063: * - counts the number of occurrences of one String in another</li>
064: * <li><b>IsAlpha/IsNumeric/IsWhitespace/IsAsciiPrintable</b>
065: * - checks the characters in a String</li>
066: * <li><b>DefaultString</b>
067: * - protects against a null input String</li>
068: * <li><b>Reverse/ReverseDelimited</b>
069: * - reverses a String</li>
070: * <li><b>Abbreviate</b>
071: * - abbreviates a string using ellipsis</li>
072: * <li><b>Difference</b>
073: * - compares two Strings and reports on their differences</li>
074: * <li><b>LevensteinDistance</b>
075: * - the number of changes needed to change one String into another</li>
076: * </ul>
077: *
078: * <p>The <code>StringUtils</code> class defines certain words related to
079: * String handling.</p>
080: *
081: * <ul>
082: * <li>null - <code>null</code></li>
083: * <li>empty - a zero-length string (<code>""</code>)</li>
084: * <li>space - the space character (<code>' '</code>, char 32)</li>
085: * <li>whitespace - the characters defined by {@link Character#isWhitespace(char)}</li>
086: * <li>trim - the characters <= 32 as in {@link String#trim()}</li>
087: * </ul>
088: *
089: * <p><code>StringUtils</code> handles <code>null</code> input Strings quietly.
090: * That is to say that a <code>null</code> input will return <code>null</code>.
091: * Where a <code>boolean</code> or <code>int</code> is being returned
092: * details vary by method.</p>
093: *
094: * <p>A side effect of the <code>null</code> handling is that a
095: * <code>NullPointerException</code> should be considered a bug in
096: * <code>StringUtils</code> (except for deprecated methods).</p>
097: *
098: * <p>Methods in this class give sample code to explain their operation.
099: * The symbol <code>*</code> is used to indicate any input including <code>null</code>.</p>
100: *
101: * @see java.lang.String
102: * @author <a href="http://jakarta.apache.org/turbine/">Apache Jakarta Turbine</a>
103: * @author <a href="mailto:jon@latchkey.com">Jon S. Stevens</a>
104: * @author <a href="mailto:dlr@finemaltcoding.com">Daniel Rall</a>
105: * @author <a href="mailto:gcoladonato@yahoo.com">Greg Coladonato</a>
106: * @author <a href="mailto:ed@apache.org">Ed Korthof</a>
107: * @author <a href="mailto:rand_mcneely@yahoo.com">Rand McNeely</a>
108: * @author Stephen Colebourne
109: * @author <a href="mailto:fredrik@westermarck.com">Fredrik Westermarck</a>
110: * @author Holger Krauth
111: * @author <a href="mailto:alex@purpletech.com">Alexander Day Chaffee</a>
112: * @author <a href="mailto:hps@intermeta.de">Henning P. Schmiedehausen</a>
113: * @author Arun Mammen Thomas
114: * @author Gary Gregory
115: * @author Phil Steitz
116: * @author Al Chou
117: * @author Michael Davey
118: * @author Reuben Sivan
119: * @author Chris Hyzer
120: * @since 1.0
121: * @version $Id$
122: */
123: public class StringUtils {
124:
125: /**
126: * An empty immutable <code>String</code> array.
127: */
128: public static final String[] EMPTY_STRING_ARRAY = new String[0];
129:
130: // Performance testing notes (JDK 1.4, Jul03, scolebourne)
131: // Whitespace:
132: // Character.isWhitespace() is faster than WHITESPACE.indexOf()
133: // where WHITESPACE is a string of all whitespace characters
134: //
135: // Character access:
136: // String.charAt(n) versus toCharArray(), then array[n]
137: // String.charAt(n) is about 15% worse for a 10K string
138: // They are about equal for a length 50 string
139: // String.charAt(n) is about 4 times better for a length 3 string
140: // String.charAt(n) is best bet overall
141: //
142: // Append:
143: // String.concat about twice as fast as StringBuffer.append
144: // (not sure who tested this)
145:
146: /**
147: * The empty String <code>""</code>.
148: * @since 2.0
149: */
150: public static final String EMPTY = "";
151:
152: /**
153: * Represents a failed index search.
154: * @since 2.1
155: */
156: public static final int INDEX_NOT_FOUND = -1;
157:
158: /**
159: * <p>The maximum size to which the padding constant(s) can expand.</p>
160: */
161: private static final int PAD_LIMIT = 8192;
162:
163: /**
164: * <p><code>StringUtils</code> instances should NOT be constructed in
165: * standard programming. Instead, the class should be used as
166: * <code>StringUtils.trim(" foo ");</code>.</p>
167: *
168: * <p>This constructor is public to permit tools that require a JavaBean
169: * instance to operate.</p>
170: */
171: public StringUtils() {
172: super ();
173: }
174:
175: public static String ucFirst(final String name) {
176: return name.toUpperCase().charAt(0) + name.substring(1);
177: }
178:
179: // Empty checks
180: //-----------------------------------------------------------------------
181: /**
182: * <p>Checks if a String is empty ("") or null.</p>
183: *
184: * <pre>
185: * StringUtils.isEmpty(null) = true
186: * StringUtils.isEmpty("") = true
187: * StringUtils.isEmpty(" ") = false
188: * StringUtils.isEmpty("bob") = false
189: * StringUtils.isEmpty(" bob ") = false
190: * </pre>
191: *
192: * <p>NOTE: This method changed in Lang version 2.0.
193: * It no longer trims the String.
194: * That functionality is available in isBlank().</p>
195: *
196: * @param str the String to check, may be null
197: * @return <code>true</code> if the String is empty or null
198: */
199: public static boolean isEmpty(final String str) {
200: return str == null || str.length() == 0;
201: }
202:
203: // Padding
204: //-----------------------------------------------------------------------
205: /**
206: * <p>Repeat a String <code>repeat</code> times to form a
207: * new String.</p>
208: *
209: * <pre>
210: * StringUtils.repeat(null, 2) = null
211: * StringUtils.repeat("", 0) = ""
212: * StringUtils.repeat("", 2) = ""
213: * StringUtils.repeat("a", 3) = "aaa"
214: * StringUtils.repeat("ab", 2) = "abab"
215: * StringUtils.repeat("a", -2) = ""
216: * </pre>
217: *
218: * @param str the String to repeat, may be null
219: * @param repeat number of times to repeat str, negative treated as zero
220: * @return a new String consisting of the original String repeated,
221: * <code>null</code> if null String input
222: */
223: public static String repeat(final String str, final int repeat) {
224: // Performance tuned for 2.0 (JDK1.4)
225:
226: if (str == null) {
227: return null;
228: }
229: if (repeat <= 0) {
230: return EMPTY;
231: }
232: final int inputLength = str.length();
233: if (repeat == 1 || inputLength == 0) {
234: return str;
235: }
236: if (inputLength == 1 && repeat <= PAD_LIMIT) {
237: return padding(repeat, str.charAt(0));
238: }
239:
240: final int outputLength = inputLength * repeat;
241: switch (inputLength) {
242: case 1:
243: final char ch = str.charAt(0);
244: final char[] output1 = new char[outputLength];
245: for (int i = repeat - 1; i >= 0; i--) {
246: output1[i] = ch;
247: }
248: return new String(output1);
249: case 2:
250: final char ch0 = str.charAt(0);
251: final char ch1 = str.charAt(1);
252: final char[] output2 = new char[outputLength];
253: for (int i = repeat * 2 - 2; i >= 0; i--, i--) {
254: output2[i] = ch0;
255: output2[i + 1] = ch1;
256: }
257: return new String(output2);
258: default:
259: final StringBuffer buf = new StringBuffer(outputLength);
260: for (int i = 0; i < repeat; i++) {
261: buf.append(str);
262: }
263: return buf.toString();
264: }
265: }
266:
267: // Splitting
268: //-----------------------------------------------------------------------
269: /**
270: * <p>Splits the provided text into an array, using whitespace as the
271: * separator.
272: * Whitespace is defined by {@link Character#isWhitespace(char)}.</p>
273: *
274: * <p>The separator is not included in the returned String array.
275: * Adjacent separators are treated as one separator.
276: * For more control over the split use the StrTokenizer class.</p>
277: *
278: * <p>A <code>null</code> input String returns <code>null</code>.</p>
279: *
280: * <pre>
281: * StringUtils.split(null) = null
282: * StringUtils.split("") = []
283: * StringUtils.split("abc def") = ["abc", "def"]
284: * StringUtils.split("abc def") = ["abc", "def"]
285: * StringUtils.split(" abc ") = ["abc"]
286: * </pre>
287: *
288: * @param str the String to parse, may be null
289: * @return an array of parsed Strings, <code>null</code> if null String input
290: */
291: public static String[] split(String str) {
292: return split(str, null, -1);
293: }
294:
295: /**
296: * <p>Splits the provided text into an array, separator specified.
297: * This is an alternative to using StringTokenizer.</p>
298: *
299: * <p>The separator is not included in the returned String array.
300: * Adjacent separators are treated as one separator.
301: * For more control over the split use the StrTokenizer class.</p>
302: *
303: * <p>A <code>null</code> input String returns <code>null</code>.</p>
304: *
305: * <pre>
306: * StringUtils.split(null, *) = null
307: * StringUtils.split("", *) = []
308: * StringUtils.split("a.b.c", '.') = ["a", "b", "c"]
309: * StringUtils.split("a..b.c", '.') = ["a", "b", "c"]
310: * StringUtils.split("a:b:c", '.') = ["a:b:c"]
311: * StringUtils.split("a\tb\nc", null) = ["a", "b", "c"]
312: * StringUtils.split("a b c", ' ') = ["a", "b", "c"]
313: * </pre>
314: *
315: * @param str the String to parse, may be null
316: * @param separatorChar the character used as the delimiter,
317: * <code>null</code> splits on whitespace
318: * @return an array of parsed Strings, <code>null</code> if null String input
319: * @since 2.0
320: */
321: public static String[] split(String str, char separatorChar) {
322: return splitWorker(str, separatorChar, false);
323: }
324:
325: /**
326: * <p>Splits the provided text into an array, separators specified.
327: * This is an alternative to using StringTokenizer.</p>
328: *
329: * <p>The separator is not included in the returned String array.
330: * Adjacent separators are treated as one separator.
331: * For more control over the split use the StrTokenizer class.</p>
332: *
333: * <p>A <code>null</code> input String returns <code>null</code>.
334: * A <code>null</code> separatorChars splits on whitespace.</p>
335: *
336: * <pre>
337: * StringUtils.split(null, *) = null
338: * StringUtils.split("", *) = []
339: * StringUtils.split("abc def", null) = ["abc", "def"]
340: * StringUtils.split("abc def", " ") = ["abc", "def"]
341: * StringUtils.split("abc def", " ") = ["abc", "def"]
342: * StringUtils.split("ab:cd:ef", ":") = ["ab", "cd", "ef"]
343: * </pre>
344: *
345: * @param str the String to parse, may be null
346: * @param separatorChars the characters used as the delimiters,
347: * <code>null</code> splits on whitespace
348: * @return an array of parsed Strings, <code>null</code> if null String input
349: */
350: public static String[] split(String str, String separatorChars) {
351: return splitWorker(str, separatorChars, -1, false);
352: }
353:
354: /**
355: * <p>Splits the provided text into an array with a maximum length,
356: * separators specified.</p>
357: *
358: * <p>The separator is not included in the returned String array.
359: * Adjacent separators are treated as one separator.</p>
360: *
361: * <p>A <code>null</code> input String returns <code>null</code>.
362: * A <code>null</code> separatorChars splits on whitespace.</p>
363: *
364: * <p>If more than <code>min</code> delimited substrings are found, the last
365: * returned string includes all characters after the first <code>min - 1</code>
366: * returned strings (including separator characters).</p>
367: *
368: * <pre>
369: * StringUtils.split(null, *, *) = null
370: * StringUtils.split("", *, *) = []
371: * StringUtils.split("ab de fg", null, 0) = ["ab", "cd", "ef"]
372: * StringUtils.split("ab de fg", null, 0) = ["ab", "cd", "ef"]
373: * StringUtils.split("ab:cd:ef", ":", 0) = ["ab", "cd", "ef"]
374: * StringUtils.split("ab:cd:ef", ":", 2) = ["ab", "cd:ef"]
375: * </pre>
376: *
377: * @param str the String to parse, may be null
378: * @param separatorChars the characters used as the delimiters,
379: * <code>null</code> splits on whitespace
380: * @param min the maximum number of elements to include in the
381: * array. A zero or negative value implies no limit
382: * @return an array of parsed Strings, <code>null</code> if null String input
383: */
384: public static String[] split(String str, String separatorChars,
385: int max) {
386: return splitWorker(str, separatorChars, max, false);
387: }
388:
389: //-----------------------------------------------------------------------
390: /**
391: * <p>Splits the provided text into an array, using whitespace as the
392: * separator, preserving all tokens, including empty tokens created by
393: * adjacent separators. This is an alternative to using StringTokenizer.
394: * Whitespace is defined by {@link Character#isWhitespace(char)}.</p>
395: *
396: * <p>The separator is not included in the returned String array.
397: * Adjacent separators are treated as separators for empty tokens.
398: * For more control over the split use the StrTokenizer class.</p>
399: *
400: * <p>A <code>null</code> input String returns <code>null</code>.</p>
401: *
402: * <pre>
403: * StringUtils.splitPreserveAllTokens(null) = null
404: * StringUtils.splitPreserveAllTokens("") = []
405: * StringUtils.splitPreserveAllTokens("abc def") = ["abc", "def"]
406: * StringUtils.splitPreserveAllTokens("abc def") = ["abc", "", "def"]
407: * StringUtils.splitPreserveAllTokens(" abc ") = ["", "abc", ""]
408: * </pre>
409: *
410: * @param str the String to parse, may be <code>null</code>
411: * @return an array of parsed Strings, <code>null</code> if null String input
412: * @since 2.1
413: */
414: public static String[] splitPreserveAllTokens(final String str) {
415: return splitWorker(str, null, -1, true);
416: }
417:
418: /**
419: * <p>Splits the provided text into an array, separator specified,
420: * preserving all tokens, including empty tokens created by adjacent
421: * separators. This is an alternative to using StringTokenizer.</p>
422: *
423: * <p>The separator is not included in the returned String array.
424: * Adjacent separators are treated as separators for empty tokens.
425: * For more control over the split use the StrTokenizer class.</p>
426: *
427: * <p>A <code>null</code> input String returns <code>null</code>.</p>
428: *
429: * <pre>
430: * StringUtils.splitPreserveAllTokens(null, *) = null
431: * StringUtils.splitPreserveAllTokens("", *) = []
432: * StringUtils.splitPreserveAllTokens("a.b.c", '.') = ["a", "b", "c"]
433: * StringUtils.splitPreserveAllTokens("a..b.c", '.') = ["a", "", "b", "c"]
434: * StringUtils.splitPreserveAllTokens("a:b:c", '.') = ["a:b:c"]
435: * StringUtils.splitPreserveAllTokens("a\tb\nc", null) = ["a", "b", "c"]
436: * StringUtils.splitPreserveAllTokens("a b c", ' ') = ["a", "b", "c"]
437: * StringUtils.splitPreserveAllTokens("a b c ", ' ') = ["a", "b", "c", ""]
438: * StringUtils.splitPreserveAllTokens("a b c ", ' ') = ["a", "b", "c", "", ""]
439: * StringUtils.splitPreserveAllTokens(" a b c", ' ') = ["", a", "b", "c"]
440: * StringUtils.splitPreserveAllTokens(" a b c", ' ') = ["", "", a", "b", "c"]
441: * StringUtils.splitPreserveAllTokens(" a b c ", ' ') = ["", a", "b", "c", ""]
442: * </pre>
443: *
444: * @param str the String to parse, may be <code>null</code>
445: * @param separatorChar the character used as the delimiter,
446: * <code>null</code> splits on whitespace
447: * @return an array of parsed Strings, <code>null</code> if null String input
448: * @since 2.1
449: */
450: public static String[] splitPreserveAllTokens(final String str,
451: final char separatorChar) {
452: return splitWorker(str, separatorChar, true);
453: }
454:
455: /**
456: * Performs the logic for the <code>split</code> and
457: * <code>splitPreserveAllTokens</code> methods that do not return a
458: * maximum array length.
459: *
460: * @param str the String to parse, may be <code>null</code>
461: * @param separatorChar the separate character
462: * @param preserveAllTokens if <code>true</code>, adjacent separators are
463: * treated as empty token separators; if <code>false</code>, adjacent
464: * separators are treated as one separator.
465: * @return an array of parsed Strings, <code>null</code> if null String input
466: */
467: private static String[] splitWorker(final String str,
468: final char separatorChar, final boolean preserveAllTokens) {
469: // Performance tuned for 2.0 (JDK1.4)
470:
471: if (str == null) {
472: return null;
473: }
474: final int len = str.length();
475: if (len == 0) {
476: return EMPTY_STRING_ARRAY;
477: }
478: final List list = new ArrayList();
479: int i = 0, start = 0;
480: boolean match = false;
481: boolean lastMatch = false;
482: while (i < len) {
483: if (str.charAt(i) == separatorChar) {
484: if (match || preserveAllTokens) {
485: list.add(str.substring(start, i));
486: match = false;
487: lastMatch = true;
488: }
489: start = ++i;
490: continue;
491: } else {
492: lastMatch = false;
493: }
494: match = true;
495: i++;
496: }
497: if (match || (preserveAllTokens && lastMatch)) {
498: list.add(str.substring(start, i));
499: }
500: return (String[]) list.toArray(new String[list.size()]);
501: }
502:
503: /**
504: * <p>Splits the provided text into an array, separators specified,
505: * preserving all tokens, including empty tokens created by adjacent
506: * separators. This is an alternative to using StringTokenizer.</p>
507: *
508: * <p>The separator is not included in the returned String array.
509: * Adjacent separators are treated as separators for empty tokens.
510: * For more control over the split use the StrTokenizer class.</p>
511: *
512: * <p>A <code>null</code> input String returns <code>null</code>.
513: * A <code>null</code> separatorChars splits on whitespace.</p>
514: *
515: * <pre>
516: * StringUtils.splitPreserveAllTokens(null, *) = null
517: * StringUtils.splitPreserveAllTokens("", *) = []
518: * StringUtils.splitPreserveAllTokens("abc def", null) = ["abc", "def"]
519: * StringUtils.splitPreserveAllTokens("abc def", " ") = ["abc", "def"]
520: * StringUtils.splitPreserveAllTokens("abc def", " ") = ["abc", "", def"]
521: * StringUtils.splitPreserveAllTokens("ab:cd:ef", ":") = ["ab", "cd", "ef"]
522: * StringUtils.splitPreserveAllTokens("ab:cd:ef:", ":") = ["ab", "cd", "ef", ""]
523: * StringUtils.splitPreserveAllTokens("ab:cd:ef::", ":") = ["ab", "cd", "ef", "", ""]
524: * StringUtils.splitPreserveAllTokens("ab::cd:ef", ":") = ["ab", "", cd", "ef"]
525: * StringUtils.splitPreserveAllTokens(":cd:ef", ":") = ["", cd", "ef"]
526: * StringUtils.splitPreserveAllTokens("::cd:ef", ":") = ["", "", cd", "ef"]
527: * StringUtils.splitPreserveAllTokens(":cd:ef:", ":") = ["", cd", "ef", ""]
528: * </pre>
529: *
530: * @param str the String to parse, may be <code>null</code>
531: * @param separatorChars the characters used as the delimiters,
532: * <code>null</code> splits on whitespace
533: * @return an array of parsed Strings, <code>null</code> if null String input
534: * @since 2.1
535: */
536: public static String[] splitPreserveAllTokens(final String str,
537: final String separatorChars) {
538: return splitWorker(str, separatorChars, -1, true);
539: }
540:
541: /**
542: * <p>Splits the provided text into an array with a maximum length,
543: * separators specified, preserving all tokens, including empty tokens
544: * created by adjacent separators.</p>
545: *
546: * <p>The separator is not included in the returned String array.
547: * Adjacent separators are treated as separators for empty tokens.
548: * Adjacent separators are treated as one separator.</p>
549: *
550: * <p>A <code>null</code> input String returns <code>null</code>.
551: * A <code>null</code> separatorChars splits on whitespace.</p>
552: *
553: * <p>If more than <code>min</code> delimited substrings are found, the last
554: * returned string includes all characters after the first <code>min - 1</code>
555: * returned strings (including separator characters).</p>
556: *
557: * <pre>
558: * StringUtils.splitPreserveAllTokens(null, *, *) = null
559: * StringUtils.splitPreserveAllTokens("", *, *) = []
560: * StringUtils.splitPreserveAllTokens("ab de fg", null, 0) = ["ab", "cd", "ef"]
561: * StringUtils.splitPreserveAllTokens("ab de fg", null, 0) = ["ab", "cd", "ef"]
562: * StringUtils.splitPreserveAllTokens("ab:cd:ef", ":", 0) = ["ab", "cd", "ef"]
563: * StringUtils.splitPreserveAllTokens("ab:cd:ef", ":", 2) = ["ab", "cd:ef"]
564: * StringUtils.splitPreserveAllTokens("ab de fg", null, 2) = ["ab", " de fg"]
565: * StringUtils.splitPreserveAllTokens("ab de fg", null, 3) = ["ab", "", " de fg"]
566: * StringUtils.splitPreserveAllTokens("ab de fg", null, 4) = ["ab", "", "", "de fg"]
567: * </pre>
568: *
569: * @param str the String to parse, may be <code>null</code>
570: * @param separatorChars the characters used as the delimiters,
571: * <code>null</code> splits on whitespace
572: * @param min the maximum number of elements to include in the
573: * array. A zero or negative value implies no limit
574: * @return an array of parsed Strings, <code>null</code> if null String input
575: * @since 2.1
576: */
577: public static String[] splitPreserveAllTokens(final String str,
578: final String separatorChars, final int max) {
579: return splitWorker(str, separatorChars, max, true);
580: }
581:
582: /**
583: * Performs the logic for the <code>split</code> and
584: * <code>splitPreserveAllTokens</code> methods that return a maximum array
585: * length.
586: *
587: * @param str the String to parse, may be <code>null</code>
588: * @param separatorChars the separate character
589: * @param min the maximum number of elements to include in the
590: * array. A zero or negative value implies no limit.
591: * @param preserveAllTokens if <code>true</code>, adjacent separators are
592: * treated as empty token separators; if <code>false</code>, adjacent
593: * separators are treated as one separator.
594: * @return an array of parsed Strings, <code>null</code> if null String input
595: */
596: private static String[] splitWorker(final String str,
597: final String separatorChars, final int max,
598: final boolean preserveAllTokens) {
599: // Performance tuned for 2.0 (JDK1.4)
600: // Direct code is quicker than StringTokenizer.
601: // Also, StringTokenizer uses isSpace() not isWhitespace()
602:
603: if (str == null) {
604: return null;
605: }
606: final int len = str.length();
607: if (len == 0) {
608: return EMPTY_STRING_ARRAY;
609: }
610: final List list = new ArrayList();
611: int sizePlus1 = 1;
612: int i = 0, start = 0;
613: boolean match = false;
614: boolean lastMatch = false;
615: if (separatorChars == null) {
616: // Null separator means use whitespace
617: while (i < len) {
618: if (Character.isWhitespace(str.charAt(i))) {
619: if (match || preserveAllTokens) {
620: lastMatch = true;
621: if (sizePlus1++ == max) {
622: i = len;
623: lastMatch = false;
624: }
625: list.add(str.substring(start, i));
626: match = false;
627: }
628: start = ++i;
629: continue;
630: } else {
631: lastMatch = false;
632: }
633: match = true;
634: i++;
635: }
636: } else if (separatorChars.length() == 1) {
637: // Optimise 1 character case
638: final char sep = separatorChars.charAt(0);
639: while (i < len) {
640: if (str.charAt(i) == sep) {
641: if (match || preserveAllTokens) {
642: lastMatch = true;
643: if (sizePlus1++ == max) {
644: i = len;
645: lastMatch = false;
646: }
647: list.add(str.substring(start, i));
648: match = false;
649: }
650: start = ++i;
651: continue;
652: } else {
653: lastMatch = false;
654: }
655: match = true;
656: i++;
657: }
658: } else {
659: // standard case
660: while (i < len) {
661: if (separatorChars.indexOf(str.charAt(i)) >= 0) {
662: if (match || preserveAllTokens) {
663: lastMatch = true;
664: if (sizePlus1++ == max) {
665: i = len;
666: lastMatch = false;
667: }
668: list.add(str.substring(start, i));
669: match = false;
670: }
671: start = ++i;
672: continue;
673: } else {
674: lastMatch = false;
675: }
676: match = true;
677: i++;
678: }
679: }
680: if (match || (preserveAllTokens && lastMatch)) {
681: list.add(str.substring(start, i));
682: }
683: return (String[]) list.toArray(new String[list.size()]);
684: }
685:
686: /**
687: * <p>Returns padding using the specified delimiter repeated
688: * to a given length.</p>
689: *
690: * <pre>
691: * StringUtils.padding(0, 'e') = ""
692: * StringUtils.padding(3, 'e') = "eee"
693: * StringUtils.padding(-2, 'e') = IndexOutOfBoundsException
694: * </pre>
695: *
696: * <p>Note: this method doesn't not support padding with
697: * <a href="http://www.unicode.org/glossary/#supplementary_character">Unicode Supplementary Characters</a>
698: * as they require a pair of <code>char</code>s to be represented.
699: * If you are needing to support full I18N of your applications
700: * consider using {@link #repeat(String, int)} instead.
701: * </p>
702: *
703: * @param repeat number of times to repeat delim
704: * @param padChar character to repeat
705: * @return String with repeated character
706: * @throws IndexOutOfBoundsException if <code>repeat < 0</code>
707: * @see #repeat(String, int)
708: */
709: public static String padding(final int repeat, final char padChar)
710: throws IndexOutOfBoundsException {
711: if (repeat < 0) {
712: throw new IndexOutOfBoundsException(
713: "Cannot pad a negative amount: " + repeat);
714: }
715: final char[] buf = new char[repeat];
716: for (int i = 0; i < buf.length; i++) {
717: buf[i] = padChar;
718: }
719: return new String(buf);
720: }
721:
722: /** @param filePath the name of the file to open. Not sure if it can accept URLs or just filenames. Path handling could be better, and buffer sizes are hardcoded
723: */
724: public static String readFileAsString(Reader reader) {
725: try {
726: StringBuffer fileData = new StringBuffer(1000);
727: char[] buf = new char[1024];
728: int numRead = 0;
729: while ((numRead = reader.read(buf)) != -1) {
730: String readData = String.valueOf(buf, 0, numRead);
731: fileData.append(readData);
732: buf = new char[1024];
733: }
734: reader.close();
735: return fileData.toString();
736: } catch (IOException e) {
737: throw new RuntimeException(e);
738: }
739: }
740: }
|