001: /*
002: * (C) Copyright 2003 Nabh Information Systems, Inc.
003: *
004: * All copyright notices regarding Nabh's products MUST remain
005: * intact in the scripts and in the outputted HTML.
006: * This program is free software; you can redistribute it and/or
007: * modify it under the terms of the GNU Lesser General Public License
008: * as published by the Free Software Foundation; either version 2.1
009: * of the License, or (at your option) any later version.
010: *
011: * This program is distributed in the hope that it will be useful,
012: * but WITHOUT ANY WARRANTY; without even the implied warranty of
013: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
014: * GNU Lesser General Public License for more details.
015: *
016: * You should have received a copy of the GNU Lesser General Public License
017: * along with this program; if not, write to the Free Software
018: * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
019: *
020: */
021:
022: package com.nabhinc.util;
023:
024: import java.io.File;
025: import java.io.StringWriter;
026:
027: /**
028: * Provides utility methods for string manipulation.
029: *
030: * @author Padmanabh Dabke
031: * (c) 2001,2003 Nabh Information Systems, Inc. All Rights Reserved.
032: */
033: public class StringUtil {
034: // transformation table for characters 128 to 255. These actually fall into two
035: // groups, put together for efficiency: "Windows" chacacters 128-159 such as
036: // "smart quotes", which are encoded to valid Unicode entities, and
037: // valid ISO-8859 caracters 160-255, which are encoded to the symbolic HTML
038: // entity. Everything >= 256 is encoded to a numeric entity.
039: //
040: // for mor on HTML entities see http://www.pemberley.com/janeinfo/latin1.html and
041: // ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT
042: //
043: static final String[] transform = {
044: "€", // 128
045: "", // empty string means character is undefined in unicode
046: "‚",
047: "ƒ",
048: "„",
049: "…",
050: "†",
051: "‡",
052: "ˆ",
053: "‰",
054: "Š",
055: "‹",
056: "Œ",
057: "",
058: "Ž",
059: "",
060: "",
061: "‘",
062: "’",
063: "“",
064: "”",
065: "•",
066: "–",
067: "—",
068: "˜",
069: "™",
070: "š",
071: "›",
072: "œ",
073: "",
074: "ž",
075: "Ÿ", // 159
076: " ", // 160
077: "¡", "¢", "£", "¤", "¥",
078: "¦", "§", "¨", "©", "ª",
079: "«", "¬", "­", "®", "¯", "°",
080: "±", "²", "³", "´", "µ",
081: "¶", "·", "¸", "¹", "º",
082: "»", "¼", "½", "¾", "¿",
083: "À", "Á", "Â", "Ã", "Ä",
084: "Å", "Æ", "Ç", "È", "É",
085: "Ê", "Ë", "Ì", "Í", "Î",
086: "Ï", "Ð", "Ñ", "Ò", "Ó",
087: "Ô", "Õ", "Ö", "×", "Ø",
088: "Ù", "Ú", "Û", "Ü", "Ý",
089: "Þ", "ß", "à", "á", "â",
090: "ã", "ä", "å", "æ", "ç",
091: "è", "é", "ê", "ë", "ì",
092: "í", "î", "ï", "ð", "ñ",
093: "ò", "ó", "ô", "õ", "ö",
094: "÷", "ø", "ù", "ú", "û",
095: "ü", "ý", "þ", "ÿ" // 255
096: };
097:
098: /**
099: * Line feed string. Useful in writing multiline text files.
100: */
101: public static String LINE_FEED = (String) System
102: .getProperty("line.separator");
103:
104: /**
105: * Extracts the substring after last delimiter. Used for picking
106: * the file name out of a path string.
107: * @return null if path is null, empty string is path is just the
108: * token, substring after the last occurrence of the delimiter in
109: * all other cases.
110: * @param path Original string
111: * @param delim Delimiter
112: */
113: public static String extractLastToken(String path, String delim) {
114: if (path == null)
115: return null;
116: if (path.equals(delim))
117: return "";
118: int index = path.lastIndexOf(delim);
119: return path.substring(index + 1);
120: }
121:
122: /**
123: * Convinience method that calls extractLast token with / as
124: * the delimiter.
125: * @return Substring after the last occurrence of /
126: * @param path Original string
127: */
128: public static String extractName(String path) {
129: return extractLastToken(path, "/");
130: }
131:
132: /**
133: * Returns substring upto but not including the last
134: * occurence of / in the original string.
135: * @return null if path is null. Substring upto
136: * but not including / in all other cases.
137: * @param path Original string
138: */
139: public static String extractParentPath(String path) {
140: if (path == null)
141: return null;
142:
143: int index = path.lastIndexOf("/");
144: if (index == 0)
145: return "";
146: else if (index > 0)
147: return path.substring(0, index);
148: else
149: return null;
150: }
151:
152: /**
153: * Joins the array of string into one string with the specified
154: * delimiter. Returns null if the supplied string array is null.
155: * Returns an empty string if that array is of length 0.
156: * Creation date: (9/1/99 5:48:36 PM)
157: * @return Concatenated string
158: * @param elements Array of strings to be joined.
159: * @param delimiter Separator
160: */
161: public static String join(String[] elements, String delimiter) {
162:
163: // First take care of pathalogical cases.
164: if (elements == null) {
165: return null;
166: } else if (elements.length == 0) {
167: return "";
168: }
169:
170: if (delimiter == null)
171: delimiter = "";
172:
173: StringBuffer sb = null;
174: if (elements[0] == null) {
175: sb = new StringBuffer("null");
176: } else {
177: sb = new StringBuffer(elements[0]);
178: }
179:
180: for (int i = 1; i < elements.length; i++) {
181: sb.append(delimiter);
182: if (elements[i] != null)
183: sb.append(elements[i]);
184: else
185: sb.append("null");
186: }
187: return sb.toString();
188: }
189:
190: /**
191: * Main method for testing StringUtil class.
192: * Creation date: (10/4/99 2:16:31 PM)
193: * @param args Command line arguments.
194: */
195: public static void main(String[] args) {
196: split("h w", " ");
197: String path = "/";
198: System.out.println("Name = " + extractLastToken(path, "/"));
199: System.out.println("Parent = " + extractParentPath(path));
200: path = "/a/b/c";
201: System.out.println("Name = " + extractLastToken(path, "/"));
202: System.out.println("Parent = " + extractParentPath(path));
203: path = "/a/b/c/d/";
204: System.out.println("Name = " + extractLastToken(path, "/"));
205: System.out.println("Parent = " + extractParentPath(path));
206: path = "/a";
207: System.out.println("Name = " + extractLastToken(path, "/"));
208: System.out.println("Parent = " + extractParentPath(path));
209: }
210:
211: /**
212: * Spits a string into an array of strings using the specified
213: * delimiter. Returns null if the string is null. Returns the
214: * original string into an array of size 1 if the delimiter is
215: * null or an empty string.
216: *
217: * @return An array of strings that were delimited by the specified
218: * delimiter.
219: * @param str String to be split.
220: * @param delimiter Separator string.
221: */
222: public static String[] split(String str, String delimiter) {
223:
224: if (str == null)
225: return null;
226:
227: String[] ret;
228: if (delimiter == null || delimiter == "") {
229: ret = new String[1];
230: ret[0] = str;
231: return ret;
232: }
233:
234: java.util.StringTokenizer st = new java.util.StringTokenizer(
235: str, delimiter);
236: ret = new String[st.countTokens()];
237: for (int i = 0; i < ret.length; i++) {
238: ret[i] = st.nextToken().trim();
239: }
240: return ret;
241:
242: }
243:
244: /**
245: * Spits a string into an array of ints using the specified
246: * delimiter. Returns null if the string is null. Returns one
247: * int parsed from the entire string if the delimiter is
248: * null or an empty string.
249: *
250: * @return An array of strings that were delimited by the specified
251: * delimiter.
252: * @param str String to be split.
253: * @param delimiter Separator string.
254: */
255: public static int[] splitAsInts(String str, String delimiter) {
256:
257: if (str == null)
258: return null;
259:
260: int[] ret;
261: if (delimiter == null || delimiter == "") {
262: ret = new int[1];
263: ret[0] = Integer.parseInt(str);
264: return ret;
265: }
266:
267: java.util.StringTokenizer st = new java.util.StringTokenizer(
268: str, delimiter);
269: ret = new int[st.countTokens()];
270: for (int i = 0; i < ret.length; i++) {
271: ret[i] = Integer.parseInt(st.nextToken());
272: }
273: return ret;
274:
275: }
276:
277: /**
278: * Substitutes first occurance of a substring by another string and returns it.
279: * Creation date: (11/21/2001 11:30:37 PM)
280: * @return java.lang.String
281: * @param str String in which the substitution will occure
282: * @param orig Substring to be replaced
283: * @param sub Substitute string
284: */
285: public static String substitute(String str, String orig, String sub) {
286: if (str == null || orig == null || sub == null)
287: return str;
288:
289: int index = str.indexOf(orig);
290:
291: if (index < 0)
292: return str;
293:
294: return str.substring(0, index) + sub
295: + str.substring(index + orig.length());
296: }
297:
298: /**
299: *
300: */
301: public final static String encodeHTML(String str) {
302: return encodeHTML(str, true);
303: }
304:
305: public final static String encodeHTML(String str,
306: boolean encodeNewLine) {
307: if (str == null)
308: return null;
309: if (str.length() == 0)
310: return str;
311: StringBuffer sb = new StringBuffer(str.length());
312: encodeHTML(str, sb, encodeNewLine);
313: return sb.toString();
314: }
315:
316: /**
317: *
318: */
319: public final static String encodeXML(String str) {
320: if (str == null)
321: return null;
322: if (str.length() == 0)
323: return str;
324: StringBuffer sb = new StringBuffer(str.length());
325: encodeXML(str, sb);
326: return sb.toString();
327: }
328:
329: /**
330: *
331: */
332: public final static void encodeHTML(String str, StringBuffer ret) {
333: encodeHTML(str, ret, true);
334: }
335:
336: /**
337: *
338: */
339: public final static void encodeHTML(String str, StringBuffer ret,
340: boolean encodeNewline) {
341: if (str == null)
342: return;
343:
344: int l = str.length();
345: for (int i = 0; i < l; i++) {
346: char c = str.charAt(i);
347: switch (c) {
348: case '<':
349: ret.append("<");
350: break;
351: case '>':
352: ret.append(">");
353: break;
354: case '&':
355: ret.append("&");
356: break;
357: case '"':
358: ret.append(""");
359: break;
360: case '\n':
361: ret.append('\n');
362: if (encodeNewline) {
363: ret.append("<br />");
364: }
365: break;
366: default:
367: // ret.append (c);
368: if (c < 128)
369: ret.append(c);
370: else if (c >= 128 && c < 256)
371: ret.append(transform[c - 128]);
372: else {
373: ret.append("&#");
374: ret.append((int) c);
375: ret.append(";");
376: }
377: }
378: }
379: }
380:
381: public final static String removeNewLine(String str) {
382: if (str == null)
383: return null;
384:
385: int l = str.length();
386: StringBuffer ret = new StringBuffer(str.length());
387:
388: for (int i = 0; i < l; i++) {
389: char c = str.charAt(i);
390: switch (c) {
391: case '\n':
392: ret.append(" ");
393: break;
394: case '\r':
395: break;
396: default:
397: ret.append(c);
398: }
399: }
400:
401: return ret.toString();
402: }
403:
404: /**
405: *
406: */
407: public final static void encodeXML(String str, StringBuffer ret) {
408: if (str == null)
409: return;
410:
411: int l = str.length();
412: for (int i = 0; i < l; i++) {
413: char c = str.charAt(i);
414: switch (c) {
415: case '<':
416: ret.append("<");
417: break;
418: case '>':
419: ret.append(">");
420: break;
421: case '&':
422: ret.append("&");
423: break;
424: case '"':
425: ret.append(""");
426: break;
427: case '\'':
428: ret.append("'");
429: break;
430: //case '\\':
431: // ret.append("\\\\");
432: // break;
433: default:
434: ret.append(c);
435: /*
436: if (c < 128)
437: ret.append(c);
438: else if (c >= 128 && c < 256) {
439: ret.append(transform[c - 128]);
440: else {
441: ret.append("&#");
442: ret.append((int) c);
443: ret.append(";");
444: }
445: */
446: }
447: }
448: }
449:
450: public static String getErrorStackTraceString(Throwable ex) {
451: StackTraceElement[] trace = ex.getStackTrace();
452: StringWriter writer = new StringWriter();
453: writer.write("Error Class: " + ex.getClass().getName());
454: writer.write("<br/>");
455: if (ex.getMessage() != null) {
456: writer.write("Error Message: " + ex.getMessage());
457: writer.write("<br/>");
458: }
459: for (int i = 0; i < trace.length; i++) {
460: writer.write(trace[i].toString());
461: writer.write("<br/>");
462: }
463: Throwable rootCause = ex.getCause();
464: if (rootCause != null) {
465: writer.write("<br/><b>Root Cause:</b><br/>");
466: writer.write(rootCause.toString());
467: trace = rootCause.getStackTrace();
468: for (int i = 0; i < trace.length; i++) {
469: writer.write(" ");
470: writer.write(trace[i].toString());
471: writer.write("<br/>");
472: }
473: }
474: return writer.toString();
475: }
476:
477: /**
478: * Puts CDATA stuff around the text and returns it.
479: * @return Escaped text
480: * @param txt Original text string
481: */
482: public static String wrapInCDATA(String txt) {
483: if (txt == null)
484: return null;
485: return "<![CDATA[" + txt + "]]>";
486: }
487:
488: /**
489: * Transform relative or absolute path to a URL format. If a relative path
490: * is provided, the base path is required. The base path can be a
491: * local file system path, e.g.: C:/document or in a URL format,
492: * e.g.: http://localhost
493: */
494: public static String transformToURL(String path, String basePath) {
495: if (path != null && path.indexOf("://") == -1) {
496: if (basePath != null) {
497: path = basePath + path;
498: }
499:
500: //verify if the absolute path is not url format
501: if (path.indexOf("://") == -1) {
502: if (!path.startsWith("/")) {
503: path = "/" + path;
504: }
505:
506: path = "file://" + path;
507: }
508: }
509:
510: return path;
511: }
512:
513: /**
514: * Replace all occurrence of "/" in the specified path with the system's path separator.
515: * This helper method might not be needed in the future if <code>java.lang.String</code>'s
516: * <code>replaceAll()</code> has overcome the replacement of double back-slashes ("\\").
517: * @param path The path for
518: * @return path with system-dependent path separator character.
519: */
520: public static String replacePathSeparator(String path) {
521: String fSep = File.separator;
522: if ("\\".equals(fSep))
523: fSep = "\\\\";
524: return path.replaceAll("/", fSep);
525: }
526:
527: public static boolean isNotNullOrEmpty(String paramVal) {
528: return (paramVal != null && paramVal.length() > 0);
529: }
530:
531: public static boolean isNullOrEmpty(String paramValue) {
532: return (paramValue == null || paramValue.length() == 0);
533: }
534:
535: public static String escapeJavascript(String str) {
536: if (str == null)
537: return "";
538: StringBuffer sb = new StringBuffer(str.length());
539: int l = str.length();
540: for (int i = 0; i < l; i++) {
541: char c = str.charAt(i);
542: if (c == '\'') {
543: sb.append("\\'");
544: } else {
545: sb.append(c);
546: }
547: }
548: return sb.toString();
549:
550: }
551:
552: public static String getParentPath(String childPath) {
553: if (childPath == null)
554: return null;
555: int lastSlashIndex = childPath.lastIndexOf('/');
556: if (lastSlashIndex != -1) {
557: return childPath.substring(0, lastSlashIndex);
558: } else {
559: return null;
560: }
561: }
562:
563: /**
564: * Taken from Roller blogger
565: * Remove occurences of html, defined as any text
566: * between the characters "<" and ">".
567: * Optionally replace HTML tags with a space.
568: *
569: * @param str
570: * @param addSpace
571: * @return
572: */
573: public static String removeHTML(String str, boolean addSpace) {
574: if (str == null)
575: return "";
576: StringBuffer ret = new StringBuffer(str.length());
577: int start = 0;
578: int beginTag = str.indexOf("<");
579: int endTag = 0;
580: if (beginTag == -1)
581: return str;
582:
583: while (beginTag >= start) {
584: if (beginTag > 0) {
585: ret.append(str.substring(start, beginTag));
586:
587: // replace each tag with a space (looks better)
588: if (addSpace)
589: ret.append(" ");
590: }
591: endTag = str.indexOf(">", beginTag);
592:
593: // if endTag found move "cursor" forward
594: if (endTag > -1) {
595: start = endTag + 1;
596: beginTag = str.indexOf("<", start);
597: }
598: // if no endTag found, get rest of str and break
599: else {
600: ret.append(str.substring(beginTag));
601: break;
602: }
603: }
604: // append everything after the last endTag
605: if (endTag > -1 && endTag + 1 < str.length()) {
606: ret.append(str.substring(endTag + 1));
607: }
608: return ret.toString().trim();
609: }
610:
611: //------------------------------------------------------------------------
612: /**
613: * Replaces occurences of non-alphanumeric characters with an underscore.
614: */
615: public static String replaceNonAlphanumeric(String str) {
616: return replaceNonAlphanumeric(str, '_');
617: }
618:
619: //------------------------------------------------------------------------
620: /**
621: * Replaces occurences of non-alphanumeric characters with a
622: * supplied char.
623: */
624: public static String replaceNonAlphanumeric(String str, char subst) {
625: StringBuffer ret = new StringBuffer(str.length());
626: char[] testChars = str.toCharArray();
627: for (int i = 0; i < testChars.length; i++) {
628: if (Character.isLetterOrDigit(testChars[i])) {
629: ret.append(testChars[i]);
630: } else {
631: ret.append(subst);
632: }
633: }
634: return ret.toString();
635: }
636:
637: //------------------------------------------------------------------------
638: /**
639: * Remove occurences of non-alphanumeric characters.
640: */
641: public static String removeNonAlphanumeric(String str) {
642: StringBuffer ret = new StringBuffer(str.length());
643: char[] testChars = str.toCharArray();
644: for (int i = 0; i < testChars.length; i++) {
645: // MR: Allow periods in page links
646: if (Character.isLetterOrDigit(testChars[i])
647: || testChars[i] == '.') {
648: ret.append(testChars[i]);
649: }
650: }
651: return ret.toString();
652: }
653:
654: /**
655: * This method based on code from the String taglib at Apache Jakarta:
656: * http://cvs.apache.org/viewcvs/jakarta-taglibs/string/src/org/apache/taglibs/string/util/StringW.java?rev=1.16&content-type=text/vnd.viewcvs-markup
657: * Copyright (c) 1999 The Apache Software Foundation.
658: * Author: timster@mac.com
659: *
660: * @param str
661: * @param lower
662: * @param upper
663: * @param appendToEnd
664: * @return
665: */
666: public static String truncateNicely(String str, int lower,
667: int upper, String appendToEnd) {
668: // strip markup from the string
669: String str2 = removeHTML(str, false);
670: boolean diff = (str2.length() < str.length());
671:
672: // quickly adjust the upper if it is set lower than 'lower'
673: if (upper < lower) {
674: upper = lower;
675: }
676:
677: // now determine if the string fits within the upper limit
678: // if it does, go straight to return, do not pass 'go' and collect $200
679: if (str2.length() > upper) {
680: // the magic location int
681: int loc;
682:
683: // first we determine where the next space appears after lower
684: loc = str2.lastIndexOf(' ', upper);
685:
686: // now we'll see if the location is greater than the lower limit
687: if (loc >= lower) {
688: // yes it was, so we'll cut it off here
689: str2 = str2.substring(0, loc);
690: } else {
691: // no it wasnt, so we'll cut it off at the upper limit
692: str2 = str2.substring(0, upper);
693: loc = upper;
694: }
695:
696: // HTML was removed from original str
697: if (diff) {
698:
699: // location of last space in truncated string
700: loc = str2.lastIndexOf(' ', loc);
701:
702: // get last "word" in truncated string (add 1 to loc to eliminate space
703: String str3 = str2.substring(loc + 1);
704:
705: // find this fragment in original str, from 'loc' position
706: loc = str.indexOf(str3, loc) + str3.length();
707:
708: // get truncated string from original str, given new 'loc'
709: str2 = str.substring(0, loc);
710:
711: // get all the HTML from original str after loc
712: str3 = extractHTML(str.substring(loc));
713:
714: // remove any tags which generate visible HTML
715: // This call is unecessary, all HTML has already been stripped
716: //str3 = removeVisibleHTMLTags(str3);
717:
718: // append the appendToEnd String and
719: // add extracted HTML back onto truncated string
720: str = str2 + appendToEnd + str3;
721: } else {
722: // the string was truncated, so we append the appendToEnd String
723: str = str2 + appendToEnd;
724: }
725:
726: }
727:
728: return str;
729: }
730:
731: public static String truncateText(String str, int lower, int upper,
732: String appendToEnd) {
733: // strip markup from the string
734: String str2 = removeHTML(str, false);
735: boolean diff = (str2.length() < str.length());
736:
737: // quickly adjust the upper if it is set lower than 'lower'
738: if (upper < lower) {
739: upper = lower;
740: }
741:
742: // now determine if the string fits within the upper limit
743: // if it does, go straight to return, do not pass 'go' and collect $200
744: if (str2.length() > upper) {
745: // the magic location int
746: int loc;
747:
748: // first we determine where the next space appears after lower
749: loc = str2.lastIndexOf(' ', upper);
750:
751: // now we'll see if the location is greater than the lower limit
752: if (loc >= lower) {
753: // yes it was, so we'll cut it off here
754: str2 = str2.substring(0, loc);
755: } else {
756: // no it wasnt, so we'll cut it off at the upper limit
757: str2 = str2.substring(0, upper);
758: loc = upper;
759: }
760: // the string was truncated, so we append the appendToEnd String
761: str = str2 + appendToEnd;
762: }
763: return str;
764: }
765:
766: /**
767: * Extract (keep) JUST the HTML from the String.
768: * @param str
769: * @return
770: */
771: public static String extractHTML(String str) {
772: if (str == null)
773: return "";
774: StringBuffer ret = new StringBuffer(str.length());
775: int start = 0;
776: int beginTag = str.indexOf("<");
777: int endTag = 0;
778: if (beginTag == -1)
779: return str;
780:
781: while (beginTag >= start) {
782: endTag = str.indexOf(">", beginTag);
783:
784: // if endTag found, keep tag
785: if (endTag > -1) {
786: ret.append(str.substring(beginTag, endTag + 1));
787:
788: // move start forward and find another tag
789: start = endTag + 1;
790: beginTag = str.indexOf("<", start);
791: }
792: // if no endTag found, break
793: else {
794: break;
795: }
796: }
797: return ret.toString();
798: }
799:
800: }
|