001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017: package org.apache.commons.lang;
018:
019: /**
020: * <p>Operations on Strings that contain words.</p>
021: *
022: * <p>This class tries to handle <code>null</code> input gracefully.
023: * An exception will not be thrown for a <code>null</code> input.
024: * Each method documents its behaviour in more detail.</p>
025: *
026: * @author Apache Jakarta Velocity
027: * @author Stephen Colebourne
028: * @author <a href="mailto:hps@intermeta.de">Henning P. Schmiedehausen</a>
029: * @author Gary Gregory
030: * @since 2.0
031: * @version $Id: WordUtils.java 471626 2006-11-06 04:02:09Z bayard $
032: */
033: public class WordUtils {
034:
035: /**
036: * <p><code>WordUtils</code> instances should NOT be constructed in
037: * standard programming. Instead, the class should be used as
038: * <code>WordUtils.wrap("foo bar", 20);</code>.</p>
039: *
040: * <p>This constructor is public to permit tools that require a JavaBean
041: * instance to operate.</p>
042: */
043: public WordUtils() {
044: super ();
045: }
046:
047: // Wrapping
048: //--------------------------------------------------------------------------
049: // /**
050: // * <p>Wraps a block of text to a specified line length using '\n' as
051: // * a newline.</p>
052: // *
053: // * <p>This method takes a block of text, which might have long lines in it
054: // * and wraps the long lines based on the supplied lineLength parameter.</p>
055: // *
056: // * <p>If a single word is longer than the line length (eg. a URL), it will
057: // * not be broken, and will display beyond the expected width.</p>
058: // *
059: // * <p>If there are tabs in inString, you are going to get results that are
060: // * a bit strange. Tabs are a single character but are displayed as 4 or 8
061: // * spaces. Remove the tabs.</p>
062: // *
063: // * @param str text which is in need of word-wrapping, may be null
064: // * @param lineLength the column to wrap the words at
065: // * @return the text with all the long lines word-wrapped
066: // * <code>null</code> if null string input
067: // */
068: // public static String wrapText(String str, int lineLength) {
069: // return wrap(str, null, lineLength);
070: // }
071:
072: // /**
073: // * <p>Wraps a block of text to a specified line length.</p>
074: // *
075: // * <p>This method takes a block of text, which might have long lines in it
076: // * and wraps the long lines based on the supplied lineLength parameter.</p>
077: // *
078: // * <p>If a single word is longer than the wrapColumn (eg. a URL), it will
079: // * not be broken, and will display beyond the expected width.</p>
080: // *
081: // * <p>If there are tabs in inString, you are going to get results that are
082: // * a bit strange. Tabs are a single character but are displayed as 4 or 8
083: // * spaces. Remove the tabs.</p>
084: // *
085: // * @param str text which is in need of word-wrapping, may be null
086: // * @param newLineChars the characters that define a newline, null treated as \n
087: // * @param lineLength the column to wrap the words at
088: // * @return the text with all the long lines word-wrapped
089: // * <code>null</code> if null string input
090: // */
091: // public static String wrapText(String str, String newLineChars, int lineLength) {
092: // if (str == null) {
093: // return null;
094: // }
095: // if (newLineChars == null) {
096: // newLineChars = "\n";
097: // }
098: // StringTokenizer lineTokenizer = new StringTokenizer(str, newLineChars, true);
099: // StringBuffer stringBuffer = new StringBuffer();
100: //
101: // while (lineTokenizer.hasMoreTokens()) {
102: // try {
103: // String nextLine = lineTokenizer.nextToken();
104: //
105: // if (nextLine.length() > lineLength) {
106: // // This line is long enough to be wrapped.
107: // nextLine = wrapLine(nextLine, null, lineLength, false);
108: // }
109: //
110: // stringBuffer.append(nextLine);
111: //
112: // } catch (NoSuchElementException nsee) {
113: // // thrown by nextToken(), but I don't know why it would
114: // break;
115: // }
116: // }
117: //
118: // return stringBuffer.toString();
119: // }
120:
121: // Wrapping
122: //-----------------------------------------------------------------------
123: /**
124: * <p>Wraps a single line of text, identifying words by <code>' '</code>.</p>
125: *
126: * <p>New lines will be separated by the system property line separator.
127: * Very long words, such as URLs will <i>not</i> be wrapped.</p>
128: *
129: * <p>Leading spaces on a new line are stripped.
130: * Trailing spaces are not stripped.</p>
131: *
132: * <pre>
133: * WordUtils.wrap(null, *) = null
134: * WordUtils.wrap("", *) = ""
135: * </pre>
136: *
137: * @param str the String to be word wrapped, may be null
138: * @param wrapLength the column to wrap the words at, less than 1 is treated as 1
139: * @return a line with newlines inserted, <code>null</code> if null input
140: */
141: public static String wrap(String str, int wrapLength) {
142: return wrap(str, wrapLength, null, false);
143: }
144:
145: /**
146: * <p>Wraps a single line of text, identifying words by <code>' '</code>.</p>
147: *
148: * <p>Leading spaces on a new line are stripped.
149: * Trailing spaces are not stripped.</p>
150: *
151: * <pre>
152: * WordUtils.wrap(null, *, *, *) = null
153: * WordUtils.wrap("", *, *, *) = ""
154: * </pre>
155: *
156: * @param str the String to be word wrapped, may be null
157: * @param wrapLength the column to wrap the words at, less than 1 is treated as 1
158: * @param newLineStr the string to insert for a new line,
159: * <code>null</code> uses the system property line separator
160: * @param wrapLongWords true if long words (such as URLs) should be wrapped
161: * @return a line with newlines inserted, <code>null</code> if null input
162: */
163: public static String wrap(String str, int wrapLength,
164: String newLineStr, boolean wrapLongWords) {
165: if (str == null) {
166: return null;
167: }
168: if (newLineStr == null) {
169: newLineStr = SystemUtils.LINE_SEPARATOR;
170: }
171: if (wrapLength < 1) {
172: wrapLength = 1;
173: }
174: int inputLineLength = str.length();
175: int offset = 0;
176: StringBuffer wrappedLine = new StringBuffer(
177: inputLineLength + 32);
178:
179: while ((inputLineLength - offset) > wrapLength) {
180: if (str.charAt(offset) == ' ') {
181: offset++;
182: continue;
183: }
184: int spaceToWrapAt = str.lastIndexOf(' ', wrapLength
185: + offset);
186:
187: if (spaceToWrapAt >= offset) {
188: // normal case
189: wrappedLine
190: .append(str.substring(offset, spaceToWrapAt));
191: wrappedLine.append(newLineStr);
192: offset = spaceToWrapAt + 1;
193:
194: } else {
195: // really long word or URL
196: if (wrapLongWords) {
197: // wrap really long word one line at a time
198: wrappedLine.append(str.substring(offset, wrapLength
199: + offset));
200: wrappedLine.append(newLineStr);
201: offset += wrapLength;
202: } else {
203: // do not wrap really long word, just extend beyond limit
204: spaceToWrapAt = str.indexOf(' ', wrapLength
205: + offset);
206: if (spaceToWrapAt >= 0) {
207: wrappedLine.append(str.substring(offset,
208: spaceToWrapAt));
209: wrappedLine.append(newLineStr);
210: offset = spaceToWrapAt + 1;
211: } else {
212: wrappedLine.append(str.substring(offset));
213: offset = inputLineLength;
214: }
215: }
216: }
217: }
218:
219: // Whatever is left in line is short enough to just pass through
220: wrappedLine.append(str.substring(offset));
221:
222: return wrappedLine.toString();
223: }
224:
225: // Capitalizing
226: //-----------------------------------------------------------------------
227: /**
228: * <p>Capitalizes all the whitespace separated words in a String.
229: * Only the first letter of each word is changed. To convert the
230: * rest of each word to lowercase at the same time,
231: * use {@link #capitalizeFully(String)}.</p>
232: *
233: * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
234: * A <code>null</code> input String returns <code>null</code>.
235: * Capitalization uses the unicode title case, normally equivalent to
236: * upper case.</p>
237: *
238: * <pre>
239: * WordUtils.capitalize(null) = null
240: * WordUtils.capitalize("") = ""
241: * WordUtils.capitalize("i am FINE") = "I Am FINE"
242: * </pre>
243: *
244: * @param str the String to capitalize, may be null
245: * @return capitalized String, <code>null</code> if null String input
246: * @see #uncapitalize(String)
247: * @see #capitalizeFully(String)
248: */
249: public static String capitalize(String str) {
250: return capitalize(str, null);
251: }
252:
253: /**
254: * <p>Capitalizes all the delimiter separated words in a String.
255: * Only the first letter of each word is changed. To convert the
256: * rest of each word to lowercase at the same time,
257: * use {@link #capitalizeFully(String, char[])}.</p>
258: *
259: * <p>The delimiters represent a set of characters understood to separate words.
260: * The first string character and the first non-delimiter character after a
261: * delimiter will be capitalized. </p>
262: *
263: * <p>A <code>null</code> input String returns <code>null</code>.
264: * Capitalization uses the unicode title case, normally equivalent to
265: * upper case.</p>
266: *
267: * <pre>
268: * WordUtils.capitalize(null, *) = null
269: * WordUtils.capitalize("", *) = ""
270: * WordUtils.capitalize(*, new char[0]) = *
271: * WordUtils.capitalize("i am fine", null) = "I Am Fine"
272: * WordUtils.capitalize("i aM.fine", {'.'}) = "I aM.Fine"
273: * </pre>
274: *
275: * @param str the String to capitalize, may be null
276: * @param delimiters set of characters to determine capitalization, null means whitespace
277: * @return capitalized String, <code>null</code> if null String input
278: * @see #uncapitalize(String)
279: * @see #capitalizeFully(String)
280: * @since 2.1
281: */
282: public static String capitalize(String str, char[] delimiters) {
283: int delimLen = (delimiters == null ? -1 : delimiters.length);
284: if (str == null || str.length() == 0 || delimLen == 0) {
285: return str;
286: }
287: int strLen = str.length();
288: StringBuffer buffer = new StringBuffer(strLen);
289: boolean capitalizeNext = true;
290: for (int i = 0; i < strLen; i++) {
291: char ch = str.charAt(i);
292:
293: if (isDelimiter(ch, delimiters)) {
294: buffer.append(ch);
295: capitalizeNext = true;
296: } else if (capitalizeNext) {
297: buffer.append(Character.toTitleCase(ch));
298: capitalizeNext = false;
299: } else {
300: buffer.append(ch);
301: }
302: }
303: return buffer.toString();
304: }
305:
306: //-----------------------------------------------------------------------
307: /**
308: * <p>Converts all the whitespace separated words in a String into capitalized words,
309: * that is each word is made up of a titlecase character and then a series of
310: * lowercase characters. </p>
311: *
312: * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
313: * A <code>null</code> input String returns <code>null</code>.
314: * Capitalization uses the unicode title case, normally equivalent to
315: * upper case.</p>
316: *
317: * <pre>
318: * WordUtils.capitalizeFully(null) = null
319: * WordUtils.capitalizeFully("") = ""
320: * WordUtils.capitalizeFully("i am FINE") = "I Am Fine"
321: * </pre>
322: *
323: * @param str the String to capitalize, may be null
324: * @return capitalized String, <code>null</code> if null String input
325: */
326: public static String capitalizeFully(String str) {
327: return capitalizeFully(str, null);
328: }
329:
330: /**
331: * <p>Converts all the delimiter separated words in a String into capitalized words,
332: * that is each word is made up of a titlecase character and then a series of
333: * lowercase characters. </p>
334: *
335: * <p>The delimiters represent a set of characters understood to separate words.
336: * The first string character and the first non-delimiter character after a
337: * delimiter will be capitalized. </p>
338: *
339: * <p>A <code>null</code> input String returns <code>null</code>.
340: * Capitalization uses the unicode title case, normally equivalent to
341: * upper case.</p>
342: *
343: * <pre>
344: * WordUtils.capitalizeFully(null, *) = null
345: * WordUtils.capitalizeFully("", *) = ""
346: * WordUtils.capitalizeFully(*, null) = *
347: * WordUtils.capitalizeFully(*, new char[0]) = *
348: * WordUtils.capitalizeFully("i aM.fine", {'.'}) = "I am.Fine"
349: * </pre>
350: *
351: * @param str the String to capitalize, may be null
352: * @param delimiters set of characters to determine capitalization, null means whitespace
353: * @return capitalized String, <code>null</code> if null String input
354: * @since 2.1
355: */
356: public static String capitalizeFully(String str, char[] delimiters) {
357: int delimLen = (delimiters == null ? -1 : delimiters.length);
358: if (str == null || str.length() == 0 || delimLen == 0) {
359: return str;
360: }
361: str = str.toLowerCase();
362: return capitalize(str, delimiters);
363: }
364:
365: //-----------------------------------------------------------------------
366: /**
367: * <p>Uncapitalizes all the whitespace separated words in a String.
368: * Only the first letter of each word is changed.</p>
369: *
370: * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
371: * A <code>null</code> input String returns <code>null</code>.</p>
372: *
373: * <pre>
374: * WordUtils.uncapitalize(null) = null
375: * WordUtils.uncapitalize("") = ""
376: * WordUtils.uncapitalize("I Am FINE") = "i am fINE"
377: * </pre>
378: *
379: * @param str the String to uncapitalize, may be null
380: * @return uncapitalized String, <code>null</code> if null String input
381: * @see #capitalize(String)
382: */
383: public static String uncapitalize(String str) {
384: return uncapitalize(str, null);
385: }
386:
387: /**
388: * <p>Uncapitalizes all the whitespace separated words in a String.
389: * Only the first letter of each word is changed.</p>
390: *
391: * <p>The delimiters represent a set of characters understood to separate words.
392: * The first string character and the first non-delimiter character after a
393: * delimiter will be uncapitalized. </p>
394: *
395: * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
396: * A <code>null</code> input String returns <code>null</code>.</p>
397: *
398: * <pre>
399: * WordUtils.uncapitalize(null, *) = null
400: * WordUtils.uncapitalize("", *) = ""
401: * WordUtils.uncapitalize(*, null) = *
402: * WordUtils.uncapitalize(*, new char[0]) = *
403: * WordUtils.uncapitalize("I AM.FINE", {'.'}) = "i AM.fINE"
404: * </pre>
405: *
406: * @param str the String to uncapitalize, may be null
407: * @param delimiters set of characters to determine uncapitalization, null means whitespace
408: * @return uncapitalized String, <code>null</code> if null String input
409: * @see #capitalize(String)
410: * @since 2.1
411: */
412: public static String uncapitalize(String str, char[] delimiters) {
413: int delimLen = (delimiters == null ? -1 : delimiters.length);
414: if (str == null || str.length() == 0 || delimLen == 0) {
415: return str;
416: }
417: int strLen = str.length();
418: StringBuffer buffer = new StringBuffer(strLen);
419: boolean uncapitalizeNext = true;
420: for (int i = 0; i < strLen; i++) {
421: char ch = str.charAt(i);
422:
423: if (isDelimiter(ch, delimiters)) {
424: buffer.append(ch);
425: uncapitalizeNext = true;
426: } else if (uncapitalizeNext) {
427: buffer.append(Character.toLowerCase(ch));
428: uncapitalizeNext = false;
429: } else {
430: buffer.append(ch);
431: }
432: }
433: return buffer.toString();
434: }
435:
436: //-----------------------------------------------------------------------
437: /**
438: * <p>Swaps the case of a String using a word based algorithm.</p>
439: *
440: * <ul>
441: * <li>Upper case character converts to Lower case</li>
442: * <li>Title case character converts to Lower case</li>
443: * <li>Lower case character after Whitespace or at start converts to Title case</li>
444: * <li>Other Lower case character converts to Upper case</li>
445: * </ul>
446: *
447: * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
448: * A <code>null</code> input String returns <code>null</code>.</p>
449: *
450: * <pre>
451: * StringUtils.swapCase(null) = null
452: * StringUtils.swapCase("") = ""
453: * StringUtils.swapCase("The dog has a BONE") = "tHE DOG HAS A bone"
454: * </pre>
455: *
456: * @param str the String to swap case, may be null
457: * @return the changed String, <code>null</code> if null String input
458: */
459: public static String swapCase(String str) {
460: int strLen;
461: if (str == null || (strLen = str.length()) == 0) {
462: return str;
463: }
464: StringBuffer buffer = new StringBuffer(strLen);
465:
466: boolean whitespace = true;
467: char ch = 0;
468: char tmp = 0;
469:
470: for (int i = 0; i < strLen; i++) {
471: ch = str.charAt(i);
472: if (Character.isUpperCase(ch)) {
473: tmp = Character.toLowerCase(ch);
474: } else if (Character.isTitleCase(ch)) {
475: tmp = Character.toLowerCase(ch);
476: } else if (Character.isLowerCase(ch)) {
477: if (whitespace) {
478: tmp = Character.toTitleCase(ch);
479: } else {
480: tmp = Character.toUpperCase(ch);
481: }
482: } else {
483: tmp = ch;
484: }
485: buffer.append(tmp);
486: whitespace = Character.isWhitespace(ch);
487: }
488: return buffer.toString();
489: }
490:
491: //-----------------------------------------------------------------------
492: /**
493: * <p>Extracts the initial letters from each word in the String.</p>
494: *
495: * <p>The first letter of the string and all first letters after
496: * whitespace are returned as a new string.
497: * Their case is not changed.</p>
498: *
499: * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
500: * A <code>null</code> input String returns <code>null</code>.</p>
501: *
502: * <pre>
503: * WordUtils.initials(null) = null
504: * WordUtils.initials("") = ""
505: * WordUtils.initials("Ben John Lee") = "BJL"
506: * WordUtils.initials("Ben J.Lee") = "BJ"
507: * </pre>
508: *
509: * @param str the String to get initials from, may be null
510: * @return String of initial letters, <code>null</code> if null String input
511: * @see #initials(String,char[])
512: * @since 2.2
513: */
514: public static String initials(String str) {
515: return initials(str, null);
516: }
517:
518: /**
519: * <p>Extracts the initial letters from each word in the String.</p>
520: *
521: * <p>The first letter of the string and all first letters after the
522: * defined delimiters are returned as a new string.
523: * Their case is not changed.</p>
524: *
525: * <p>If the delimiters array is null, then Whitespace is used.
526: * Whitespace is defined by {@link Character#isWhitespace(char)}.
527: * A <code>null</code> input String returns <code>null</code>.
528: * An empty delimiter array returns an empty String.</p>
529: *
530: * <pre>
531: * WordUtils.initials(null, *) = null
532: * WordUtils.initials("", *) = ""
533: * WordUtils.initials("Ben John Lee", null) = "BJL"
534: * WordUtils.initials("Ben J.Lee", null) = "BJ"
535: * WordUtils.initials("Ben J.Lee", [' ','.']) = "BJL"
536: * WordUtils.initials(*, new char[0]) = ""
537: * </pre>
538: *
539: * @param str the String to get initials from, may be null
540: * @param delimiters set of characters to determine words, null means whitespace
541: * @return String of initial letters, <code>null</code> if null String input
542: * @see #initials(String)
543: * @since 2.2
544: */
545: public static String initials(String str, char[] delimiters) {
546: if (str == null || str.length() == 0) {
547: return str;
548: }
549: if (delimiters != null && delimiters.length == 0) {
550: return "";
551: }
552: int strLen = str.length();
553: char[] buf = new char[strLen / 2 + 1];
554: int count = 0;
555: boolean lastWasGap = true;
556: for (int i = 0; i < strLen; i++) {
557: char ch = str.charAt(i);
558:
559: if (isDelimiter(ch, delimiters)) {
560: lastWasGap = true;
561: } else if (lastWasGap) {
562: buf[count++] = ch;
563: lastWasGap = false;
564: } else {
565: // ignore ch
566: }
567: }
568: return new String(buf, 0, count);
569: }
570:
571: //-----------------------------------------------------------------------
572: /**
573: * Is the character a delimiter.
574: *
575: * @param ch the character to check
576: * @param delimiters the delimiters
577: * @return true if it is a delimiter
578: */
579: private static boolean isDelimiter(char ch, char[] delimiters) {
580: if (delimiters == null) {
581: return Character.isWhitespace(ch);
582: }
583: for (int i = 0, isize = delimiters.length; i < isize; i++) {
584: if (ch == delimiters[i]) {
585: return true;
586: }
587: }
588: return false;
589: }
590:
591: }
|