001: /*
002: * NEMESIS-FORUM.
003: * Copyright (C) 2002 David Laurent(lithium2@free.fr). All rights reserved.
004: *
005: * Copyright (c) 2000 The Apache Software Foundation. All rights reserved.
006: *
007: * Copyright (C) 2001 Yasna.com. All rights reserved.
008: *
009: * Copyright (C) 2000 CoolServlets.com. All rights reserved.
010: *
011: * NEMESIS-FORUM. is free software; you can redistribute it and/or
012: * modify it under the terms of the Apache Software License, Version 1.1,
013: * or (at your option) any later version.
014: *
015: * NEMESIS-FORUM core framework, NEMESIS-FORUM backoffice, NEMESIS-FORUM frontoffice
016: * application are parts of NEMESIS-FORUM and are distributed under
017: * same terms of licence.
018: *
019: *
020: * NEMESIS-FORUM includes software developed by the Apache Software Foundation (http://www.apache.org/)
021: * and software developed by CoolServlets.com (http://www.coolservlets.com).
022: * and software developed by Yasna.com (http://www.yasna.com).
023: *
024: */
025:
026: package org.nemesis.forum.util;
027:
028: import java.security.MessageDigest;
029: import java.security.NoSuchAlgorithmException;
030: import java.util.ArrayList;
031: import java.util.HashMap;
032: import java.util.Map;
033: import java.util.Random;
034: import java.util.StringTokenizer;
035:
036: import org.apache.commons.logging.Log;
037: import org.apache.commons.logging.LogFactory;
038:
039: /**
040: * Utility class to peform common String manipulation algorithms.
041: */
042: public class StringUtils {
043: static protected Log log = LogFactory.getLog(StringUtils.class);
044: /**
045: * Initialization lock for the whole class. Init's only happen once per
046: * class load so this shouldn't be a bottleneck.
047: */
048: private static Object initLock = new Object();
049:
050: /**
051: * Replaces all instances of oldString with newString in line.
052: *
053: * @param line the String to search to perform replacements on
054: * @param oldString the String that should be replaced by newString
055: * @param newString the String that will replace all instances of oldString
056: *
057: * @return a String will all instances of oldString replaced by newString
058: */
059: public static final String replace(String line, String oldString,
060: String newString) {
061: if (line == null) {
062: return null;
063: }
064: int i = 0;
065: if ((i = line.indexOf(oldString, i)) >= 0) {
066: char[] line2 = line.toCharArray();
067: char[] newString2 = newString.toCharArray();
068: int oLength = oldString.length();
069: StringBuffer buf = new StringBuffer(line2.length);
070: buf.append(line2, 0, i).append(newString2);
071: i += oLength;
072: int j = i;
073: while ((i = line.indexOf(oldString, i)) > 0) {
074: buf.append(line2, j, i - j).append(newString2);
075: i += oLength;
076: j = i;
077: }
078: buf.append(line2, j, line2.length - j);
079: return buf.toString();
080: }
081: return line;
082: }
083:
084: /**
085: * Replaces all instances of oldString with newString in line with the
086: * added feature that matches of newString in oldString ignore case.
087: *
088: * @param line the String to search to perform replacements on
089: * @param oldString the String that should be replaced by newString
090: * @param newString the String that will replace all instances of oldString
091: *
092: * @return a String will all instances of oldString replaced by newString
093: */
094: public static final String replaceIgnoreCase(String line,
095: String oldString, String newString) {
096: if (line == null) {
097: return null;
098: }
099: String lcLine = line.toLowerCase();
100: String lcOldString = oldString.toLowerCase();
101: int i = 0;
102: if ((i = lcLine.indexOf(lcOldString, i)) >= 0) {
103: char[] line2 = line.toCharArray();
104: char[] newString2 = newString.toCharArray();
105: int oLength = oldString.length();
106: StringBuffer buf = new StringBuffer(line2.length);
107: buf.append(line2, 0, i).append(newString2);
108: i += oLength;
109: int j = i;
110: while ((i = lcLine.indexOf(lcOldString, i)) > 0) {
111: buf.append(line2, j, i - j).append(newString2);
112: i += oLength;
113: j = i;
114: }
115: buf.append(line2, j, line2.length - j);
116: return buf.toString();
117: }
118: return line;
119: }
120:
121: /**
122: * Replaces all instances of oldString with newString in line.
123: * The count Integer is updated with number of replaces.
124: *
125: * @param line the String to search to perform replacements on
126: * @param oldString the String that should be replaced by newString
127: * @param newString the String that will replace all instances of oldString
128: *
129: * @return a String will all instances of oldString replaced by newString
130: */
131: public static final String replace(String line, String oldString,
132: String newString, int[] count) {
133: if (line == null) {
134: return null;
135: }
136: int i = 0;
137: if ((i = line.indexOf(oldString, i)) >= 0) {
138: int counter = 0;
139: counter++;
140: char[] line2 = line.toCharArray();
141: char[] newString2 = newString.toCharArray();
142: int oLength = oldString.length();
143: StringBuffer buf = new StringBuffer(line2.length);
144: buf.append(line2, 0, i).append(newString2);
145: i += oLength;
146: int j = i;
147: while ((i = line.indexOf(oldString, i)) > 0) {
148: counter++;
149: buf.append(line2, j, i - j).append(newString2);
150: i += oLength;
151: j = i;
152: }
153: buf.append(line2, j, line2.length - j);
154: count[0] = counter;
155: return buf.toString();
156: }
157: return line;
158: }
159:
160: /**
161: * This method takes a string which may contain HTML tags (ie, <b>,
162: * <table>, etc) and converts the '<'' and '>' characters to
163: * their HTML escape sequences.
164: *
165: * @param input the text to be converted.
166: * @return the input string with the characters '<' and '>' replaced
167: * with their HTML escape sequences.
168: */
169: public static final String escapeHTMLTags(String input) {
170: //Check if the string is null or zero length -- if so, return
171: //what was sent in.
172: if (input == null || input.length() == 0) {
173: return input;
174: }
175: //Use a StringBuffer in lieu of String concatenation -- it is
176: //much more efficient this way.
177: StringBuffer buf = new StringBuffer(input.length());
178: char ch = ' ';
179: for (int i = 0; i < input.length(); i++) {
180: ch = input.charAt(i);
181: if (ch == '<') {
182: buf.append("<");
183: } else if (ch == '>') {
184: buf.append(">");
185: } else {
186: buf.append(ch);
187: }
188: }
189: return buf.toString();
190: }
191:
192: /**
193: * Used by the hash method.
194: */
195: private static MessageDigest digest = null;
196:
197: /**
198: * Hashes a String using the Md5 algorithm and returns the result as a
199: * String of hexadecimal numbers. This method is synchronized to avoid
200: * excessive MessageDigest object creation. If calling this method becomes
201: * a bottleneck in your code, you may wish to maintain a pool of
202: * MessageDigest objects instead of using this method.
203: * <p>
204: * A hash is a one-way function -- that is, given an
205: * input, an output is easily computed. However, given the output, the
206: * input is almost impossible to compute. This is useful for passwords
207: * since we can store the hash and a hacker will then have a very hard time
208: * determining the original password.
209: * <p>
210: * every time a user logs in, we simply
211: * take their plain text password, compute the hash, and compare the
212: * generated hash to the stored hash. Since it is almost impossible that
213: * two passwords will generate the same hash, we know if the user gave us
214: * the correct password or not. The only negative to this system is that
215: * password recovery is basically impossible. Therefore, a reset password
216: * method is used instead.
217: *
218: * @param data the String to compute the hash of.
219: * @return a hashed version of the passed-in String
220: */
221: public synchronized static final String hash(String data) {
222: if (digest == null) {
223: try {
224: digest = MessageDigest.getInstance("MD5");
225: } catch (NoSuchAlgorithmException nsae) {
226: log.error("Failed to load the MD5 MessageDigest. "
227: + "will be unable to function normally.", nsae);
228:
229: }
230: }
231: //Now, compute hash.
232: digest.update(data.getBytes());
233: return toHex(digest.digest());
234: }
235:
236: /**
237: * Turns an array of bytes into a String representing each byte as an
238: * unsigned hex number.
239: * <p>
240: * Method by Santeri Paavolainen, Helsinki Finland 1996<br>
241: * (c) Santeri Paavolainen, Helsinki Finland 1996<br>
242: * Distributed under LGPL.
243: *
244: * @param hash an rray of bytes to convert to a hex-string
245: * @return generated hex string
246: */
247: public static final String toHex(byte hash[]) {
248: StringBuffer buf = new StringBuffer(hash.length * 2);
249: int i;
250:
251: for (i = 0; i < hash.length; i++) {
252: if (((int) hash[i] & 0xff) < 0x10) {
253: buf.append("0");
254: }
255: buf.append(Long.toString((int) hash[i] & 0xff, 16));
256: }
257: return buf.toString();
258: }
259:
260: /**
261: * Converts a line of text into an array of lower case words. Words are
262: * delimited by the following characters: , .\r\n:/\+
263: * <p>
264: * In the future, this method should be changed to use a
265: * BreakIterator.wordInstance(). That class offers much more fexibility.
266: *
267: * @param text a String of text to convert into an array of words
268: * @return text broken up into an array of words.
269: */
270: public static final String[] toLowerCaseWordArray(String text) {
271: if (text == null || text.length() == 0) {
272: return new String[0];
273: }
274: StringTokenizer tokens = new StringTokenizer(text,
275: " ,\r\n.:/\\+");
276: String[] words = new String[tokens.countTokens()];
277: for (int i = 0; i < words.length; i++) {
278: words[i] = tokens.nextToken().toLowerCase();
279: }
280: return words;
281: }
282:
283: /**
284: * A list of some of the most common words. For searching and indexing, we
285: * often want to filter out these words since they just confuse searches.
286: * The list was not created scientifically so may be incomplete :)
287: */
288: private static final String[] commonWords = new String[] { "a",
289: "and", "as", "at", "be", "do", "i", "if", "in", "is", "it",
290: "so", "the", "to" };
291: private static Map commonWordsMap = null;
292:
293: /**
294: * Returns a new String array with some of the most common English words
295: * removed. The specific words removed are: a, and, as, at, be, do, i, if,
296: * in, is, it, so, the, to
297: */
298: public static final String[] removeCommonWords(String[] words) {
299: //See if common words map has been initialized. We don't statically
300: //initialize it to save some memory. Even though this a small savings,
301: //it adds up with hundreds of classes being loaded.
302: if (commonWordsMap == null) {
303: synchronized (initLock) {
304: if (commonWordsMap == null) {
305: commonWordsMap = new HashMap();
306: for (int i = 0; i < commonWords.length; i++) {
307: commonWordsMap.put(commonWords[i],
308: commonWords[i]);
309: }
310: }
311: }
312: }
313: //Now, add all words that aren't in the common map to results
314: ArrayList results = new ArrayList(words.length);
315: for (int i = 0; i < words.length; i++) {
316: if (!commonWordsMap.containsKey(words[i])) {
317: results.add(words[i]);
318: }
319: }
320: return (String[]) results.toArray(new String[results.size()]);
321: }
322:
323: /**
324: * Pseudo-random number generator object for use with randomString().
325: * The Random class is not considered to be cryptographically secure, so
326: * only use these random Strings for low to medium security applications.
327: */
328: private static Random randGen = null;
329:
330: /**
331: * Array of numbers and letters of mixed case. Numbers appear in the list
332: * twice so that there is a more equal chance that a number will be picked.
333: * We can use the array to get a random number or letter by picking a random
334: * array index.
335: */
336: private static char[] numbersAndLetters = null;
337:
338: /**
339: * Returns a random String of numbers and letters of the specified length.
340: * The method uses the Random class that is built-in to Java which is
341: * suitable for low to medium grade security uses. This means that the
342: * output is only pseudo random, i.e., each number is mathematically
343: * generated so is not truly random.<p>
344: *
345: * For every character in the returned String, there is an equal chance that
346: * it will be a letter or number. If a letter, there is an equal chance
347: * that it will be lower or upper case.<p>
348: *
349: * The specified length must be at least one. If not, the method will return
350: * null.
351: *
352: * @param length the desired length of the random String to return.
353: * @return a random String of numbers and letters of the specified length.
354: */
355: public static final String randomString(int length) {
356: if (length < 1) {
357: return null;
358: }
359: //Init of pseudo random number generator.
360: if (randGen == null) {
361: synchronized (initLock) {
362: if (randGen == null) {
363: randGen = new Random();
364: //Also initialize the numbersAndLetters array
365: numbersAndLetters = ("0123456789abcdefghijklmnopqrstuvwxyz"
366: + "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ")
367: .toCharArray();
368: }
369: }
370: }
371: //Create a char buffer to put random letters and numbers in.
372: char[] randBuffer = new char[length];
373: for (int i = 0; i < randBuffer.length; i++) {
374: randBuffer[i] = numbersAndLetters[randGen.nextInt(71)];
375: }
376: return new String(randBuffer);
377: }
378:
379: /**
380: * Intelligently chops a String at a word boundary (whitespace) that occurs
381: * at the specified index in the argument or before. However, if there is a
382: * newline character before <code>length</code>, the String will be chopped
383: * there. If no newline or whitespace is found in <code>string</code> up to
384: * the index <code>length</code>, the String will chopped at <code>length</code>.
385: * <p>
386: * For example, chopAtWord("This is a nice String", 10) will return
387: * "This is a" which is the first word boundary less than or equal to 10
388: * characters into the original String.
389: *
390: * @param string the String to chop.
391: * @param length the index in <code>string</code> to start looking for a
392: * whitespace boundary at.
393: * @return a substring of <code>string</code> whose length is less than or
394: * equal to <code>length</code>, and that is chopped at whitespace.
395: */
396: public static final String chopAtWord(String string, int length) {
397: if (string == null) {
398: return string;
399: }
400:
401: char[] charArray = string.toCharArray();
402: int sLength = string.length();
403: if (length < sLength) {
404: sLength = length;
405: }
406:
407: //First check if there is a newline character before length; if so,
408: //chop word there.
409: for (int i = 0; i < sLength - 1; i++) {
410: //Windows
411: if (charArray[i] == '\r' && charArray[i + 1] == '\n') {
412: return string.substring(0, i);
413: }
414: //Unix
415: else if (charArray[i] == '\n') {
416: return string.substring(0, i);
417: }
418: }
419: //Also check boundary case of Unix newline
420: if (charArray[sLength - 1] == '\n') {
421: return string.substring(0, sLength - 1);
422: }
423:
424: //Done checking for newline, now see if the total string is less than
425: //the specified chop point.
426: if (string.length() < length) {
427: return string;
428: }
429:
430: //No newline, so chop at the first whitespace.
431: for (int i = length - 1; i > 0; i--) {
432: if (charArray[i] == ' ') {
433: return string.substring(0, i).trim();
434: }
435: }
436:
437: //Did not find word boundary so return original String chopped at
438: //specified length.
439: return string.substring(0, length);
440: }
441:
442: /**
443: * Highlights words in a string. Words matching ignores case. The actual
444: * higlighting method is specified with the start and end higlight tags.
445: * Those might be beginning and ending HTML bold tags, or anything else.
446: *
447: * @param string the String to highlight words in.
448: * @param words an array of words that should be highlighted in the string.
449: * @param startHighlight the tag that should be inserted to start highlighting.
450: * @param endHighlight the tag that should be inserted to end highlighting.
451: * @return a new String with the specified words highlighted.
452: */
453: public static final String highlightWords(String string,
454: String[] words, String startHighlight, String endHighlight) {
455: if (string == null || words == null || startHighlight == null
456: || endHighlight == null) {
457: return null;
458: }
459:
460: //Iterate through each word.
461: for (int x = 0; x < words.length; x++) {
462: //we want to ignore case.
463: String lcString = string.toLowerCase();
464: //using a char [] is more efficient
465: char[] string2 = string.toCharArray();
466: String word = words[x].toLowerCase();
467:
468: //perform specialized replace logic
469: int i = 0;
470: if ((i = lcString.indexOf(word, i)) >= 0) {
471: int oLength = word.length();
472: StringBuffer buf = new StringBuffer(string2.length);
473:
474: //we only want to highlight distinct words and not parts of
475: //larger words. The method used below mostly solves this. There
476: //are a few cases where it doesn't, but it's close enough.
477: boolean startSpace = false;
478: char startChar = ' ';
479: if (i - 1 > 0) {
480: startChar = string2[i - 1];
481: if (!Character.isLetter(startChar)) {
482: startSpace = true;
483: }
484: }
485: boolean endSpace = false;
486: char endChar = ' ';
487: if (i + oLength < string2.length) {
488: endChar = string2[i + oLength];
489: if (!Character.isLetter(endChar)) {
490: endSpace = true;
491: }
492: }
493: if ((startSpace && endSpace) || (i == 0 && endSpace)) {
494: buf.append(string2, 0, i);
495: if (startSpace && startChar == ' ') {
496: buf.append(startChar);
497: }
498: buf.append(startHighlight);
499: buf.append(string2, i, oLength)
500: .append(endHighlight);
501: if (endSpace && endChar == ' ') {
502: buf.append(endChar);
503: }
504: } else {
505: buf.append(string2, 0, i);
506: buf.append(string2, i, oLength);
507: }
508:
509: i += oLength;
510: int j = i;
511: while ((i = lcString.indexOf(word, i)) > 0) {
512: startSpace = false;
513: startChar = string2[i - 1];
514: if (!Character.isLetter(startChar)) {
515: startSpace = true;
516: }
517:
518: endSpace = false;
519: if (i + oLength < string2.length) {
520: endChar = string2[i + oLength];
521: if (!Character.isLetter(endChar)) {
522: endSpace = true;
523: }
524: }
525: if ((startSpace && endSpace)
526: || i + oLength == string2.length) {
527: buf.append(string2, j, i - j);
528: if (startSpace && startChar == ' ') {
529: buf.append(startChar);
530: }
531: buf.append(startHighlight);
532: buf.append(string2, i, oLength).append(
533: endHighlight);
534: if (endSpace && endChar == ' ') {
535: buf.append(endChar);
536: }
537: } else {
538: buf.append(string2, j, i - j);
539: buf.append(string2, i, oLength);
540: }
541: i += oLength;
542: j = i;
543: }
544: buf.append(string2, j, string2.length - j);
545: string = buf.toString();
546: }
547: }
548: return string;
549: }
550:
551: /**
552: * Escapes all necessary characters in the String so that it can be used
553: * in an XML doc.
554: *
555: * @param string the string to escape.
556: * @return the string with appropriate characters escaped.
557: */
558: public static final String escapeForXML(String string) {
559: //Check if the string is null or zero length -- if so, return
560: //what was sent in.
561: if (string == null || string.length() == 0) {
562: return string;
563: }
564: char[] sArray = string.toCharArray();
565: StringBuffer buf = new StringBuffer(sArray.length);
566: char ch;
567: for (int i = 0; i < sArray.length; i++) {
568: ch = sArray[i];
569: if (ch == '<') {
570: buf.append("<");
571: } else if (ch == '&') {
572: buf.append("&");
573: } else if (ch == '"') {
574: buf.append(""");
575: } else {
576: buf.append(ch);
577: }
578: }
579: return buf.toString();
580: }
581:
582: }
|