Source Code Cross Referenced for StringUtils.java in » Forum » nemesis-forum » org » nemesis » forum » util » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI
Java
Java Tutorial
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Forum » nemesis forum » org.nemesis.forum.util
Source Cross Referenced Class Diagram Java Document (Java Doc)
001:        /*
002:         * NEMESIS-FORUM.
003:         * Copyright (C) 2002  David Laurent(lithium2@free.fr). All rights reserved.
004:         * 
005:         * Copyright (c) 2000 The Apache Software Foundation. All rights reserved.
006:         * 
007:         * Copyright (C) 2001 Yasna.com. All rights reserved.
008:         * 
009:         * Copyright (C) 2000 CoolServlets.com. All rights reserved.
010:         * 
011:         * NEMESIS-FORUM. is free software; you can redistribute it and/or
012:         * modify it under the terms of the Apache Software License, Version 1.1,
013:         * or (at your option) any later version.
014:         * 
015:         * NEMESIS-FORUM core framework, NEMESIS-FORUM backoffice, NEMESIS-FORUM frontoffice
016:         * application are parts of NEMESIS-FORUM and are distributed under
017:         * same terms of licence.
018:         * 
019:         * 
020:         * NEMESIS-FORUM includes software developed by the Apache Software Foundation (http://www.apache.org/)
021:         * and software developed by CoolServlets.com (http://www.coolservlets.com).
022:         * and software developed by Yasna.com (http://www.yasna.com).
023:         * 
024:         */
025:
026:        package org.nemesis.forum.util;
027:
028:        import java.security.MessageDigest;
029:        import java.security.NoSuchAlgorithmException;
030:        import java.util.ArrayList;
031:        import java.util.HashMap;
032:        import java.util.Map;
033:        import java.util.Random;
034:        import java.util.StringTokenizer;
035:
036:        import org.apache.commons.logging.Log;
037:        import org.apache.commons.logging.LogFactory;
038:
039:        /**
040:         * Utility class to peform common String manipulation algorithms.
041:         */
042:        public class StringUtils {
043:            static protected Log log = LogFactory.getLog(StringUtils.class);
044:            /**
045:             * Initialization lock for the whole class. Init's only happen once per
046:             * class load so this shouldn't be a bottleneck.
047:             */
048:            private static Object initLock = new Object();
049:
050:            /**
051:             * Replaces all instances of oldString with newString in line.
052:             *
053:             * @param line the String to search to perform replacements on
054:             * @param oldString the String that should be replaced by newString
055:             * @param newString the String that will replace all instances of oldString
056:             *
057:             * @return a String will all instances of oldString replaced by newString
058:             */
059:            public static final String replace(String line, String oldString,
060:                    String newString) {
061:                if (line == null) {
062:                    return null;
063:                }
064:                int i = 0;
065:                if ((i = line.indexOf(oldString, i)) >= 0) {
066:                    char[] line2 = line.toCharArray();
067:                    char[] newString2 = newString.toCharArray();
068:                    int oLength = oldString.length();
069:                    StringBuffer buf = new StringBuffer(line2.length);
070:                    buf.append(line2, 0, i).append(newString2);
071:                    i += oLength;
072:                    int j = i;
073:                    while ((i = line.indexOf(oldString, i)) > 0) {
074:                        buf.append(line2, j, i - j).append(newString2);
075:                        i += oLength;
076:                        j = i;
077:                    }
078:                    buf.append(line2, j, line2.length - j);
079:                    return buf.toString();
080:                }
081:                return line;
082:            }
083:
084:            /**
085:             * Replaces all instances of oldString with newString in line with the
086:             * added feature that matches of newString in oldString ignore case.
087:             *
088:             * @param line the String to search to perform replacements on
089:             * @param oldString the String that should be replaced by newString
090:             * @param newString the String that will replace all instances of oldString
091:             *
092:             * @return a String will all instances of oldString replaced by newString
093:             */
094:            public static final String replaceIgnoreCase(String line,
095:                    String oldString, String newString) {
096:                if (line == null) {
097:                    return null;
098:                }
099:                String lcLine = line.toLowerCase();
100:                String lcOldString = oldString.toLowerCase();
101:                int i = 0;
102:                if ((i = lcLine.indexOf(lcOldString, i)) >= 0) {
103:                    char[] line2 = line.toCharArray();
104:                    char[] newString2 = newString.toCharArray();
105:                    int oLength = oldString.length();
106:                    StringBuffer buf = new StringBuffer(line2.length);
107:                    buf.append(line2, 0, i).append(newString2);
108:                    i += oLength;
109:                    int j = i;
110:                    while ((i = lcLine.indexOf(lcOldString, i)) > 0) {
111:                        buf.append(line2, j, i - j).append(newString2);
112:                        i += oLength;
113:                        j = i;
114:                    }
115:                    buf.append(line2, j, line2.length - j);
116:                    return buf.toString();
117:                }
118:                return line;
119:            }
120:
121:            /**
122:             * Replaces all instances of oldString with newString in line.
123:             * The count Integer is updated with number of replaces.
124:             *
125:             * @param line the String to search to perform replacements on
126:             * @param oldString the String that should be replaced by newString
127:             * @param newString the String that will replace all instances of oldString
128:             *
129:             * @return a String will all instances of oldString replaced by newString
130:             */
131:            public static final String replace(String line, String oldString,
132:                    String newString, int[] count) {
133:                if (line == null) {
134:                    return null;
135:                }
136:                int i = 0;
137:                if ((i = line.indexOf(oldString, i)) >= 0) {
138:                    int counter = 0;
139:                    counter++;
140:                    char[] line2 = line.toCharArray();
141:                    char[] newString2 = newString.toCharArray();
142:                    int oLength = oldString.length();
143:                    StringBuffer buf = new StringBuffer(line2.length);
144:                    buf.append(line2, 0, i).append(newString2);
145:                    i += oLength;
146:                    int j = i;
147:                    while ((i = line.indexOf(oldString, i)) > 0) {
148:                        counter++;
149:                        buf.append(line2, j, i - j).append(newString2);
150:                        i += oLength;
151:                        j = i;
152:                    }
153:                    buf.append(line2, j, line2.length - j);
154:                    count[0] = counter;
155:                    return buf.toString();
156:                }
157:                return line;
158:            }
159:
160:            /**
161:             * This method takes a string which may contain HTML tags (ie, &lt;b&gt;,
162:             * &lt;table&gt;, etc) and converts the '&lt'' and '&gt;' characters to
163:             * their HTML escape sequences.
164:             *
165:             * @param input the text to be converted.
166:             * @return the input string with the characters '&lt;' and '&gt;' replaced
167:             *  with their HTML escape sequences.
168:             */
169:            public static final String escapeHTMLTags(String input) {
170:                //Check if the string is null or zero length -- if so, return
171:                //what was sent in.
172:                if (input == null || input.length() == 0) {
173:                    return input;
174:                }
175:                //Use a StringBuffer in lieu of String concatenation -- it is
176:                //much more efficient this way.
177:                StringBuffer buf = new StringBuffer(input.length());
178:                char ch = ' ';
179:                for (int i = 0; i < input.length(); i++) {
180:                    ch = input.charAt(i);
181:                    if (ch == '<') {
182:                        buf.append("&lt;");
183:                    } else if (ch == '>') {
184:                        buf.append("&gt;");
185:                    } else {
186:                        buf.append(ch);
187:                    }
188:                }
189:                return buf.toString();
190:            }
191:
192:            /**
193:             * Used by the hash method.
194:             */
195:            private static MessageDigest digest = null;
196:
197:            /**
198:             * Hashes a String using the Md5 algorithm and returns the result as a
199:             * String of hexadecimal numbers. This method is synchronized to avoid
200:             * excessive MessageDigest object creation. If calling this method becomes
201:             * a bottleneck in your code, you may wish to maintain a pool of
202:             * MessageDigest objects instead of using this method.
203:             * <p>
204:             * A hash is a one-way function -- that is, given an
205:             * input, an output is easily computed. However, given the output, the
206:             * input is almost impossible to compute. This is useful for passwords
207:             * since we can store the hash and a hacker will then have a very hard time
208:             * determining the original password.
209:             * <p>
210:             * every time a user logs in, we simply
211:             * take their plain text password, compute the hash, and compare the
212:             * generated hash to the stored hash. Since it is almost impossible that
213:             * two passwords will generate the same hash, we know if the user gave us
214:             * the correct password or not. The only negative to this system is that
215:             * password recovery is basically impossible. Therefore, a reset password
216:             * method is used instead.
217:             *
218:             * @param data the String to compute the hash of.
219:             * @return a hashed version of the passed-in String
220:             */
221:            public synchronized static final String hash(String data) {
222:                if (digest == null) {
223:                    try {
224:                        digest = MessageDigest.getInstance("MD5");
225:                    } catch (NoSuchAlgorithmException nsae) {
226:                        log.error("Failed to load the MD5 MessageDigest. "
227:                                + "will be unable to function normally.", nsae);
228:
229:                    }
230:                }
231:                //Now, compute hash.
232:                digest.update(data.getBytes());
233:                return toHex(digest.digest());
234:            }
235:
236:            /**
237:             * Turns an array of bytes into a String representing each byte as an
238:             * unsigned hex number.
239:             * <p>
240:             * Method by Santeri Paavolainen, Helsinki Finland 1996<br>
241:             * (c) Santeri Paavolainen, Helsinki Finland 1996<br>
242:             * Distributed under LGPL.
243:             *
244:             * @param hash an rray of bytes to convert to a hex-string
245:             * @return generated hex string
246:             */
247:            public static final String toHex(byte hash[]) {
248:                StringBuffer buf = new StringBuffer(hash.length * 2);
249:                int i;
250:
251:                for (i = 0; i < hash.length; i++) {
252:                    if (((int) hash[i] & 0xff) < 0x10) {
253:                        buf.append("0");
254:                    }
255:                    buf.append(Long.toString((int) hash[i] & 0xff, 16));
256:                }
257:                return buf.toString();
258:            }
259:
260:            /**
261:             * Converts a line of text into an array of lower case words. Words are
262:             * delimited by the following characters: , .\r\n:/\+
263:             * <p>
264:             * In the future, this method should be changed to use a
265:             * BreakIterator.wordInstance(). That class offers much more fexibility.
266:             *
267:             * @param text a String of text to convert into an array of words
268:             * @return text broken up into an array of words.
269:             */
270:            public static final String[] toLowerCaseWordArray(String text) {
271:                if (text == null || text.length() == 0) {
272:                    return new String[0];
273:                }
274:                StringTokenizer tokens = new StringTokenizer(text,
275:                        " ,\r\n.:/\\+");
276:                String[] words = new String[tokens.countTokens()];
277:                for (int i = 0; i < words.length; i++) {
278:                    words[i] = tokens.nextToken().toLowerCase();
279:                }
280:                return words;
281:            }
282:
283:            /**
284:             * A list of some of the most common words. For searching and indexing, we
285:             * often want to filter out these words since they just confuse searches.
286:             * The list was not created scientifically so may be incomplete :)
287:             */
288:            private static final String[] commonWords = new String[] { "a",
289:                    "and", "as", "at", "be", "do", "i", "if", "in", "is", "it",
290:                    "so", "the", "to" };
291:            private static Map commonWordsMap = null;
292:
293:            /**
294:             * Returns a new String array with some of the most common English words
295:             * removed. The specific words removed are: a, and, as, at, be, do, i, if,
296:             * in, is, it, so, the, to
297:             */
298:            public static final String[] removeCommonWords(String[] words) {
299:                //See if common words map has been initialized. We don't statically
300:                //initialize it to save some memory. Even though this a small savings,
301:                //it adds up with hundreds of classes being loaded.
302:                if (commonWordsMap == null) {
303:                    synchronized (initLock) {
304:                        if (commonWordsMap == null) {
305:                            commonWordsMap = new HashMap();
306:                            for (int i = 0; i < commonWords.length; i++) {
307:                                commonWordsMap.put(commonWords[i],
308:                                        commonWords[i]);
309:                            }
310:                        }
311:                    }
312:                }
313:                //Now, add all words that aren't in the common map to results
314:                ArrayList results = new ArrayList(words.length);
315:                for (int i = 0; i < words.length; i++) {
316:                    if (!commonWordsMap.containsKey(words[i])) {
317:                        results.add(words[i]);
318:                    }
319:                }
320:                return (String[]) results.toArray(new String[results.size()]);
321:            }
322:
323:            /**
324:             * Pseudo-random number generator object for use with randomString().
325:             * The Random class is not considered to be cryptographically secure, so
326:             * only use these random Strings for low to medium security applications.
327:             */
328:            private static Random randGen = null;
329:
330:            /**
331:             * Array of numbers and letters of mixed case. Numbers appear in the list
332:             * twice so that there is a more equal chance that a number will be picked.
333:             * We can use the array to get a random number or letter by picking a random
334:             * array index.
335:             */
336:            private static char[] numbersAndLetters = null;
337:
338:            /**
339:             * Returns a random String of numbers and letters of the specified length.
340:             * The method uses the Random class that is built-in to Java which is
341:             * suitable for low to medium grade security uses. This means that the
342:             * output is only pseudo random, i.e., each number is mathematically
343:             * generated so is not truly random.<p>
344:             *
345:             * For every character in the returned String, there is an equal chance that
346:             * it will be a letter or number. If a letter, there is an equal chance
347:             * that it will be lower or upper case.<p>
348:             *
349:             * The specified length must be at least one. If not, the method will return
350:             * null.
351:             *
352:             * @param length the desired length of the random String to return.
353:             * @return a random String of numbers and letters of the specified length.
354:             */
355:            public static final String randomString(int length) {
356:                if (length < 1) {
357:                    return null;
358:                }
359:                //Init of pseudo random number generator.
360:                if (randGen == null) {
361:                    synchronized (initLock) {
362:                        if (randGen == null) {
363:                            randGen = new Random();
364:                            //Also initialize the numbersAndLetters array
365:                            numbersAndLetters = ("0123456789abcdefghijklmnopqrstuvwxyz"
366:                                    + "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ")
367:                                    .toCharArray();
368:                        }
369:                    }
370:                }
371:                //Create a char buffer to put random letters and numbers in.
372:                char[] randBuffer = new char[length];
373:                for (int i = 0; i < randBuffer.length; i++) {
374:                    randBuffer[i] = numbersAndLetters[randGen.nextInt(71)];
375:                }
376:                return new String(randBuffer);
377:            }
378:
379:            /**
380:             * Intelligently chops a String at a word boundary (whitespace) that occurs
381:             * at the specified index in the argument or before. However, if there is a
382:             * newline character before <code>length</code>, the String will be chopped
383:             * there. If no newline or whitespace is found in <code>string</code> up to
384:             * the index <code>length</code>, the String will chopped at <code>length</code>.
385:             * <p>
386:             * For example, chopAtWord("This is a nice String", 10) will return
387:             * "This is a" which is the first word boundary less than or equal to 10
388:             * characters into the original String.
389:             *
390:             * @param string the String to chop.
391:             * @param length the index in <code>string</code> to start looking for a
392:             *       whitespace boundary at.
393:             * @return a substring of <code>string</code> whose length is less than or
394:             *       equal to <code>length</code>, and that is chopped at whitespace.
395:             */
396:            public static final String chopAtWord(String string, int length) {
397:                if (string == null) {
398:                    return string;
399:                }
400:
401:                char[] charArray = string.toCharArray();
402:                int sLength = string.length();
403:                if (length < sLength) {
404:                    sLength = length;
405:                }
406:
407:                //First check if there is a newline character before length; if so,
408:                //chop word there.
409:                for (int i = 0; i < sLength - 1; i++) {
410:                    //Windows
411:                    if (charArray[i] == '\r' && charArray[i + 1] == '\n') {
412:                        return string.substring(0, i);
413:                    }
414:                    //Unix
415:                    else if (charArray[i] == '\n') {
416:                        return string.substring(0, i);
417:                    }
418:                }
419:                //Also check boundary case of Unix newline
420:                if (charArray[sLength - 1] == '\n') {
421:                    return string.substring(0, sLength - 1);
422:                }
423:
424:                //Done checking for newline, now see if the total string is less than
425:                //the specified chop point.
426:                if (string.length() < length) {
427:                    return string;
428:                }
429:
430:                //No newline, so chop at the first whitespace.
431:                for (int i = length - 1; i > 0; i--) {
432:                    if (charArray[i] == ' ') {
433:                        return string.substring(0, i).trim();
434:                    }
435:                }
436:
437:                //Did not find word boundary so return original String chopped at
438:                //specified length.
439:                return string.substring(0, length);
440:            }
441:
442:            /**
443:             * Highlights words in a string. Words matching ignores case. The actual
444:             * higlighting method is specified with the start and end higlight tags.
445:             * Those might be beginning and ending HTML bold tags, or anything else.
446:             *
447:             * @param string the String to highlight words in.
448:             * @param words an array of words that should be highlighted in the string.
449:             * @param startHighlight the tag that should be inserted to start highlighting.
450:             * @param endHighlight the tag that should be inserted to end highlighting.
451:             * @return a new String with the specified words highlighted.
452:             */
453:            public static final String highlightWords(String string,
454:                    String[] words, String startHighlight, String endHighlight) {
455:                if (string == null || words == null || startHighlight == null
456:                        || endHighlight == null) {
457:                    return null;
458:                }
459:
460:                //Iterate through each word.
461:                for (int x = 0; x < words.length; x++) {
462:                    //we want to ignore case.
463:                    String lcString = string.toLowerCase();
464:                    //using a char [] is more efficient
465:                    char[] string2 = string.toCharArray();
466:                    String word = words[x].toLowerCase();
467:
468:                    //perform specialized replace logic
469:                    int i = 0;
470:                    if ((i = lcString.indexOf(word, i)) >= 0) {
471:                        int oLength = word.length();
472:                        StringBuffer buf = new StringBuffer(string2.length);
473:
474:                        //we only want to highlight distinct words and not parts of
475:                        //larger words. The method used below mostly solves this. There
476:                        //are a few cases where it doesn't, but it's close enough.
477:                        boolean startSpace = false;
478:                        char startChar = ' ';
479:                        if (i - 1 > 0) {
480:                            startChar = string2[i - 1];
481:                            if (!Character.isLetter(startChar)) {
482:                                startSpace = true;
483:                            }
484:                        }
485:                        boolean endSpace = false;
486:                        char endChar = ' ';
487:                        if (i + oLength < string2.length) {
488:                            endChar = string2[i + oLength];
489:                            if (!Character.isLetter(endChar)) {
490:                                endSpace = true;
491:                            }
492:                        }
493:                        if ((startSpace && endSpace) || (i == 0 && endSpace)) {
494:                            buf.append(string2, 0, i);
495:                            if (startSpace && startChar == ' ') {
496:                                buf.append(startChar);
497:                            }
498:                            buf.append(startHighlight);
499:                            buf.append(string2, i, oLength)
500:                                    .append(endHighlight);
501:                            if (endSpace && endChar == ' ') {
502:                                buf.append(endChar);
503:                            }
504:                        } else {
505:                            buf.append(string2, 0, i);
506:                            buf.append(string2, i, oLength);
507:                        }
508:
509:                        i += oLength;
510:                        int j = i;
511:                        while ((i = lcString.indexOf(word, i)) > 0) {
512:                            startSpace = false;
513:                            startChar = string2[i - 1];
514:                            if (!Character.isLetter(startChar)) {
515:                                startSpace = true;
516:                            }
517:
518:                            endSpace = false;
519:                            if (i + oLength < string2.length) {
520:                                endChar = string2[i + oLength];
521:                                if (!Character.isLetter(endChar)) {
522:                                    endSpace = true;
523:                                }
524:                            }
525:                            if ((startSpace && endSpace)
526:                                    || i + oLength == string2.length) {
527:                                buf.append(string2, j, i - j);
528:                                if (startSpace && startChar == ' ') {
529:                                    buf.append(startChar);
530:                                }
531:                                buf.append(startHighlight);
532:                                buf.append(string2, i, oLength).append(
533:                                        endHighlight);
534:                                if (endSpace && endChar == ' ') {
535:                                    buf.append(endChar);
536:                                }
537:                            } else {
538:                                buf.append(string2, j, i - j);
539:                                buf.append(string2, i, oLength);
540:                            }
541:                            i += oLength;
542:                            j = i;
543:                        }
544:                        buf.append(string2, j, string2.length - j);
545:                        string = buf.toString();
546:                    }
547:                }
548:                return string;
549:            }
550:
551:            /**
552:             * Escapes all necessary characters in the String so that it can be used
553:             * in an XML doc.
554:             *
555:             * @param string the string to escape.
556:             * @return the string with appropriate characters escaped.
557:             */
558:            public static final String escapeForXML(String string) {
559:                //Check if the string is null or zero length -- if so, return
560:                //what was sent in.
561:                if (string == null || string.length() == 0) {
562:                    return string;
563:                }
564:                char[] sArray = string.toCharArray();
565:                StringBuffer buf = new StringBuffer(sArray.length);
566:                char ch;
567:                for (int i = 0; i < sArray.length; i++) {
568:                    ch = sArray[i];
569:                    if (ch == '<') {
570:                        buf.append("&lt;");
571:                    } else if (ch == '&') {
572:                        buf.append("&amp;");
573:                    } else if (ch == '"') {
574:                        buf.append("&quot;");
575:                    } else {
576:                        buf.append(ch);
577:                    }
578:                }
579:                return buf.toString();
580:            }
581:
582:        }
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.