001: /**
002: * $RCSfile: StringUtils.java,v $
003: * $Revision: 1.3 $
004: * $Date: 2006/01/07 00:21:06 $
005: *
006: * Copyright (C) 2000 CoolServlets.com. All rights reserved.
007: *
008: * ===================================================================
009: * The Apache Software License, Version 1.1
010: *
011: * Redistribution and use in source and binary forms, with or without
012: * modification, are permitted provided that the following conditions
013: * are met:
014: *
015: * 1. Redistributions of source code must retain the above copyright
016: * notice, this list of conditions and the following disclaimer.
017: *
018: * 2. Redistributions in binary form must reproduce the above copyright
019: * notice, this list of conditions and the following disclaimer in
020: * the documentation and/or other materials provided with the
021: * distribution.
022: *
023: * 3. The end-user documentation included with the redistribution,
024: * if any, must include the following acknowledgment:
025: * "This product includes software developed by
026: * CoolServlets.com (http://www.Yasna.com)."
027: * Alternately, this acknowledgment may appear in the software itself,
028: * if and wherever such third-party acknowledgments normally appear.
029: *
030: * 4. The names "Jive" and "CoolServlets.com" must not be used to
031: * endorse or promote products derived from this software without
032: * prior written permission. For written permission, please
033: * contact webmaster@Yasna.com.
034: *
035: * 5. Products derived from this software may not be called "Jive",
036: * nor may "Jive" appear in their name, without prior written
037: * permission of CoolServlets.com.
038: *
039: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
040: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
041: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
042: * DISCLAIMED. IN NO EVENT SHALL COOLSERVLETS.COM OR
043: * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
044: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
045: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
046: * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
047: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
048: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
049: * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
050: * SUCH DAMAGE.
051: * ====================================================================
052: *
053: * This software consists of voluntary contributions made by many
054: * individuals on behalf of CoolServlets.com. For more information
055: * on CoolServlets.com, please see <http://www.Yasna.com>.
056: */package com.Yasna.util;
057:
058: import java.security.*;
059: import java.text.*;
060: import java.util.*;
061:
062: /**
063: * Utility class to peform common String manipulation algorithms.
064: */
065: public class StringUtils {
066:
067: /**
068: * Initialization lock for the whole class. Init's only happen once per
069: * class load so this shouldn't be a bottleneck.
070: */
071: private static Object initLock = new Object();
072:
073: /**
074: * Replaces all instances of oldString with newString in line.
075: *
076: * @param line the String to search to perform replacements on
077: * @param oldString the String that should be replaced by newString
078: * @param newString the String that will replace all instances of oldString
079: *
080: * @return a String will all instances of oldString replaced by newString
081: */
082: public static final String replace(String line, String oldString,
083: String newString) {
084: if (line == null) {
085: return null;
086: }
087: int i = 0;
088: if ((i = line.indexOf(oldString, i)) >= 0) {
089: char[] line2 = line.toCharArray();
090: char[] newString2 = newString.toCharArray();
091: int oLength = oldString.length();
092: StringBuffer buf = new StringBuffer(line2.length);
093: buf.append(line2, 0, i).append(newString2);
094: i += oLength;
095: int j = i;
096: while ((i = line.indexOf(oldString, i)) > 0) {
097: buf.append(line2, j, i - j).append(newString2);
098: i += oLength;
099: j = i;
100: }
101: buf.append(line2, j, line2.length - j);
102: return buf.toString();
103: }
104: return line;
105: }
106:
107: /**
108: * Replaces all instances of oldString with newString in line with the
109: * added feature that matches of newString in oldString ignore case.
110: *
111: * @param line the String to search to perform replacements on
112: * @param oldString the String that should be replaced by newString
113: * @param newString the String that will replace all instances of oldString
114: *
115: * @return a String will all instances of oldString replaced by newString
116: */
117: public static final String replaceIgnoreCase(String line,
118: String oldString, String newString) {
119: if (line == null) {
120: return null;
121: }
122: String lcLine = line.toLowerCase();
123: String lcOldString = oldString.toLowerCase();
124: int i = 0;
125: if ((i = lcLine.indexOf(lcOldString, i)) >= 0) {
126: char[] line2 = line.toCharArray();
127: char[] newString2 = newString.toCharArray();
128: int oLength = oldString.length();
129: StringBuffer buf = new StringBuffer(line2.length);
130: buf.append(line2, 0, i).append(newString2);
131: i += oLength;
132: int j = i;
133: while ((i = lcLine.indexOf(lcOldString, i)) > 0) {
134: buf.append(line2, j, i - j).append(newString2);
135: i += oLength;
136: j = i;
137: }
138: buf.append(line2, j, line2.length - j);
139: return buf.toString();
140: }
141: return line;
142: }
143:
144: /**
145: * Replaces all instances of oldString with newString in line.
146: * The count Integer is updated with number of replaces.
147: *
148: * @param line the String to search to perform replacements on
149: * @param oldString the String that should be replaced by newString
150: * @param newString the String that will replace all instances of oldString
151: *
152: * @return a String will all instances of oldString replaced by newString
153: */
154: public static final String replace(String line, String oldString,
155: String newString, int[] count) {
156: if (line == null) {
157: return null;
158: }
159: int i = 0;
160: if ((i = line.indexOf(oldString, i)) >= 0) {
161: int counter = 0;
162: counter++;
163: char[] line2 = line.toCharArray();
164: char[] newString2 = newString.toCharArray();
165: int oLength = oldString.length();
166: StringBuffer buf = new StringBuffer(line2.length);
167: buf.append(line2, 0, i).append(newString2);
168: i += oLength;
169: int j = i;
170: while ((i = line.indexOf(oldString, i)) > 0) {
171: counter++;
172: buf.append(line2, j, i - j).append(newString2);
173: i += oLength;
174: j = i;
175: }
176: buf.append(line2, j, line2.length - j);
177: count[0] = counter;
178: return buf.toString();
179: }
180: return line;
181: }
182:
183: /**
184: * This method takes a string which may contain HTML tags (ie, <b>,
185: * <table>, etc) and converts the '<'' and '>' characters to
186: * their HTML escape sequences.
187: *
188: * @param input the text to be converted.
189: * @return the input string with the characters '<' and '>' replaced
190: * with their HTML escape sequences.
191: */
192: public static final String escapeHTMLTags(String input) {
193: //Check if the string is null or zero length -- if so, return
194: //what was sent in.
195: if (input == null || input.length() == 0) {
196: return input;
197: }
198: //Use a StringBuffer in lieu of String concatenation -- it is
199: //much more efficient this way.
200: StringBuffer buf = new StringBuffer(input.length());
201: char ch = ' ';
202: for (int i = 0; i < input.length(); i++) {
203: ch = input.charAt(i);
204: if (ch == '<') {
205: buf.append("<");
206: } else if (ch == '>') {
207: buf.append(">");
208: } else {
209: buf.append(ch);
210: }
211: }
212: return buf.toString();
213: }
214:
215: /**
216: * Used by the hash method.
217: */
218: private static MessageDigest digest = null;
219:
220: /**
221: * Hashes a String using the Md5 algorithm and returns the result as a
222: * String of hexadecimal numbers. This method is synchronized to avoid
223: * excessive MessageDigest object creation. If calling this method becomes
224: * a bottleneck in your code, you may wish to maintain a pool of
225: * MessageDigest objects instead of using this method.
226: * <p>
227: * A hash is a one-way function -- that is, given an
228: * input, an output is easily computed. However, given the output, the
229: * input is almost impossible to compute. This is useful for passwords
230: * since we can store the hash and a hacker will then have a very hard time
231: * determining the original password.
232: * <p>
233: * In Jive, every time a user logs in, we simply
234: * take their plain text password, compute the hash, and compare the
235: * generated hash to the stored hash. Since it is almost impossible that
236: * two passwords will generate the same hash, we know if the user gave us
237: * the correct password or not. The only negative to this system is that
238: * password recovery is basically impossible. Therefore, a reset password
239: * method is used instead.
240: *
241: * @param data the String to compute the hash of.
242: * @return a hashed version of the passed-in String
243: */
244: public synchronized static final String hash(String data) {
245: if (digest == null) {
246: try {
247: digest = MessageDigest.getInstance("MD5");
248: } catch (NoSuchAlgorithmException nsae) {
249: System.err
250: .println("Failed to load the MD5 MessageDigest. "
251: + "Jive will be unable to function normally.");
252: nsae.printStackTrace();
253: }
254: }
255: //Now, compute hash.
256: digest.update(data.getBytes());
257: return toHex(digest.digest());
258: }
259:
260: /**
261: * Turns an array of bytes into a String representing each byte as an
262: * unsigned hex number.
263: * <p>
264: * Method by Santeri Paavolainen, Helsinki Finland 1996<br>
265: * (c) Santeri Paavolainen, Helsinki Finland 1996<br>
266: * Distributed under LGPL.
267: *
268: * @param hash an rray of bytes to convert to a hex-string
269: * @return generated hex string
270: */
271: public static final String toHex(byte hash[]) {
272: StringBuffer buf = new StringBuffer(hash.length * 2);
273: int i;
274:
275: for (i = 0; i < hash.length; i++) {
276: if (((int) hash[i] & 0xff) < 0x10) {
277: buf.append("0");
278: }
279: buf.append(Long.toString((int) hash[i] & 0xff, 16));
280: }
281: return buf.toString();
282: }
283:
284: /**
285: * Converts a line of text into an array of lower case words. Words are
286: * delimited by the following characters: , .\r\n:/\+
287: * <p>
288: * In the future, this method should be changed to use a
289: * BreakIterator.wordInstance(). That class offers much more fexibility.
290: *
291: * @param text a String of text to convert into an array of words
292: * @return text broken up into an array of words.
293: */
294: public static final String[] toLowerCaseWordArray(String text) {
295: if (text == null || text.length() == 0) {
296: return new String[0];
297: }
298: StringTokenizer tokens = new StringTokenizer(text,
299: " ,\r\n.:/\\+");
300: String[] words = new String[tokens.countTokens()];
301: for (int i = 0; i < words.length; i++) {
302: words[i] = tokens.nextToken().toLowerCase();
303: }
304: return words;
305: }
306:
307: /**
308: * A list of some of the most common words. For searching and indexing, we
309: * often want to filter out these words since they just confuse searches.
310: * The list was not created scientifically so may be incomplete :)
311: */
312: private static final String[] commonWords = new String[] { "a",
313: "and", "as", "at", "be", "do", "i", "if", "in", "is", "it",
314: "so", "the", "to" };
315: private static Map commonWordsMap = null;
316:
317: /**
318: * Returns a new String array with some of the most common English words
319: * removed. The specific words removed are: a, and, as, at, be, do, i, if,
320: * in, is, it, so, the, to
321: */
322: public static final String[] removeCommonWords(String[] words) {
323: //See if common words map has been initialized. We don't statically
324: //initialize it to save some memory. Even though this a small savings,
325: //it adds up with hundreds of classes being loaded.
326: if (commonWordsMap == null) {
327: synchronized (initLock) {
328: if (commonWordsMap == null) {
329: commonWordsMap = new HashMap();
330: for (int i = 0; i < commonWords.length; i++) {
331: commonWordsMap.put(commonWords[i],
332: commonWords[i]);
333: }
334: }
335: }
336: }
337: //Now, add all words that aren't in the common map to results
338: ArrayList results = new ArrayList(words.length);
339: for (int i = 0; i < words.length; i++) {
340: if (!commonWordsMap.containsKey(words[i])) {
341: results.add(words[i]);
342: }
343: }
344: return (String[]) results.toArray(new String[results.size()]);
345: }
346:
347: /**
348: * Pseudo-random number generator object for use with randomString().
349: * The Random class is not considered to be cryptographically secure, so
350: * only use these random Strings for low to medium security applications.
351: */
352: private static Random randGen = null;
353:
354: /**
355: * Array of numbers and letters of mixed case. Numbers appear in the list
356: * twice so that there is a more equal chance that a number will be picked.
357: * We can use the array to get a random number or letter by picking a random
358: * array index.
359: */
360: private static char[] numbersAndLetters = null;
361:
362: /**
363: * Returns a random String of numbers and letters of the specified length.
364: * The method uses the Random class that is built-in to Java which is
365: * suitable for low to medium grade security uses. This means that the
366: * output is only pseudo random, i.e., each number is mathematically
367: * generated so is not truly random.<p>
368: *
369: * For every character in the returned String, there is an equal chance that
370: * it will be a letter or number. If a letter, there is an equal chance
371: * that it will be lower or upper case.<p>
372: *
373: * The specified length must be at least one. If not, the method will return
374: * null.
375: *
376: * @param length the desired length of the random String to return.
377: * @return a random String of numbers and letters of the specified length.
378: */
379: public static final String randomString(int length) {
380: if (length < 1) {
381: return null;
382: }
383: //Init of pseudo random number generator.
384: if (randGen == null) {
385: synchronized (initLock) {
386: if (randGen == null) {
387: randGen = new Random();
388: //Also initialize the numbersAndLetters array
389: numbersAndLetters = ("0123456789abcdefghijklmnopqrstuvwxyz"
390: + "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ")
391: .toCharArray();
392: }
393: }
394: }
395: //Create a char buffer to put random letters and numbers in.
396: char[] randBuffer = new char[length];
397: for (int i = 0; i < randBuffer.length; i++) {
398: randBuffer[i] = numbersAndLetters[randGen.nextInt(71)];
399: }
400: return new String(randBuffer);
401: }
402:
403: /**
404: * Intelligently chops a String at a word boundary (whitespace) that occurs
405: * at the specified index in the argument or before. However, if there is a
406: * newline character before <code>length</code>, the String will be chopped
407: * there. If no newline or whitespace is found in <code>string</code> up to
408: * the index <code>length</code>, the String will chopped at <code>length</code>.
409: * <p>
410: * For example, chopAtWord("This is a nice String", 10) will return
411: * "This is a" which is the first word boundary less than or equal to 10
412: * characters into the original String.
413: *
414: * @param string the String to chop.
415: * @param length the index in <code>string</code> to start looking for a
416: * whitespace boundary at.
417: * @return a substring of <code>string</code> whose length is less than or
418: * equal to <code>length</code>, and that is chopped at whitespace.
419: */
420: public static final String chopAtWord(String string, int length) {
421: if (string == null) {
422: return string;
423: }
424:
425: char[] charArray = string.toCharArray();
426: int sLength = string.length();
427: if (length < sLength) {
428: sLength = length;
429: }
430:
431: //First check if there is a newline character before length; if so,
432: //chop word there.
433: for (int i = 0; i < sLength - 1; i++) {
434: //Windows
435: if (charArray[i] == '\r' && charArray[i + 1] == '\n') {
436: return string.substring(0, i);
437: }
438: //Unix
439: else if (charArray[i] == '\n') {
440: return string.substring(0, i);
441: }
442: }
443: //Also check boundary case of Unix newline
444: if (charArray[sLength - 1] == '\n') {
445: return string.substring(0, sLength - 1);
446: }
447:
448: //Done checking for newline, now see if the total string is less than
449: //the specified chop point.
450: if (string.length() < length) {
451: return string;
452: }
453:
454: //No newline, so chop at the first whitespace.
455: for (int i = length - 1; i > 0; i--) {
456: if (charArray[i] == ' ') {
457: return string.substring(0, i).trim();
458: }
459: }
460:
461: //Did not find word boundary so return original String chopped at
462: //specified length.
463: return string.substring(0, length);
464: }
465:
466: /**
467: * Highlights words in a string. Words matching ignores case. The actual
468: * higlighting method is specified with the start and end higlight tags.
469: * Those might be beginning and ending HTML bold tags, or anything else.
470: *
471: * @param string the String to highlight words in.
472: * @param words an array of words that should be highlighted in the string.
473: * @param startHighlight the tag that should be inserted to start highlighting.
474: * @param endHighlight the tag that should be inserted to end highlighting.
475: * @return a new String with the specified words highlighted.
476: */
477: public static final String highlightWords(String string,
478: String[] words, String startHighlight, String endHighlight) {
479: if (string == null || words == null || startHighlight == null
480: || endHighlight == null) {
481: return null;
482: }
483:
484: //Iterate through each word.
485: for (int x = 0; x < words.length; x++) {
486: //we want to ignore case.
487: String lcString = string.toLowerCase();
488: //using a char [] is more efficient
489: char[] string2 = string.toCharArray();
490: String word = words[x].toLowerCase();
491:
492: //perform specialized replace logic
493: int i = 0;
494: if ((i = lcString.indexOf(word, i)) >= 0) {
495: int oLength = word.length();
496: StringBuffer buf = new StringBuffer(string2.length);
497:
498: //we only want to highlight distinct words and not parts of
499: //larger words. The method used below mostly solves this. There
500: //are a few cases where it doesn't, but it's close enough.
501: boolean startSpace = false;
502: char startChar = ' ';
503: if (i - 1 > 0) {
504: startChar = string2[i - 1];
505: if (!Character.isLetter(startChar)) {
506: startSpace = true;
507: }
508: }
509: boolean endSpace = false;
510: char endChar = ' ';
511: if (i + oLength < string2.length) {
512: endChar = string2[i + oLength];
513: if (!Character.isLetter(endChar)) {
514: endSpace = true;
515: }
516: }
517: if ((startSpace && endSpace) || (i == 0 && endSpace)) {
518: buf.append(string2, 0, i);
519: if (startSpace && startChar == ' ') {
520: buf.append(startChar);
521: }
522: buf.append(startHighlight);
523: buf.append(string2, i, oLength)
524: .append(endHighlight);
525: if (endSpace && endChar == ' ') {
526: buf.append(endChar);
527: }
528: } else {
529: buf.append(string2, 0, i);
530: buf.append(string2, i, oLength);
531: }
532:
533: i += oLength;
534: int j = i;
535: while ((i = lcString.indexOf(word, i)) > 0) {
536: startSpace = false;
537: startChar = string2[i - 1];
538: if (!Character.isLetter(startChar)) {
539: startSpace = true;
540: }
541:
542: endSpace = false;
543: if (i + oLength < string2.length) {
544: endChar = string2[i + oLength];
545: if (!Character.isLetter(endChar)) {
546: endSpace = true;
547: }
548: }
549: if ((startSpace && endSpace)
550: || i + oLength == string2.length) {
551: buf.append(string2, j, i - j);
552: if (startSpace && startChar == ' ') {
553: buf.append(startChar);
554: }
555: buf.append(startHighlight);
556: buf.append(string2, i, oLength).append(
557: endHighlight);
558: if (endSpace && endChar == ' ') {
559: buf.append(endChar);
560: }
561: } else {
562: buf.append(string2, j, i - j);
563: buf.append(string2, i, oLength);
564: }
565: i += oLength;
566: j = i;
567: }
568: buf.append(string2, j, string2.length - j);
569: string = buf.toString();
570: }
571: }
572: return string;
573: }
574:
575: /**
576: * Escapes all necessary characters in the String so that it can be used
577: * in an XML doc.
578: *
579: * @param string the string to escape.
580: * @return the string with appropriate characters escaped.
581: */
582: public static final String escapeForXML(String string) {
583: //Check if the string is null or zero length -- if so, return
584: //what was sent in.
585: if (string == null || string.length() == 0) {
586: return string;
587: }
588: char[] sArray = string.toCharArray();
589: StringBuffer buf = new StringBuffer(sArray.length);
590: char ch;
591: for (int i = 0; i < sArray.length; i++) {
592: ch = sArray[i];
593: if (ch == '<') {
594: buf.append("<");
595: } else if (ch == '&') {
596: buf.append("&");
597: } else if (ch == '"') {
598: buf.append(""");
599: } else {
600: buf.append(ch);
601: }
602: }
603: return buf.toString();
604: }
605:
606: }
|