001: /*
002: JSPWiki - a JSP-based WikiWiki clone.
004: Copyright (C) 2001-2002 Janne Jalkanen (Janne.Jalkanen@iki.fi)
006: This program is free software; you can redistribute it and/or modify
007: it under the terms of the GNU Lesser General Public License as published by
008: the Free Software Foundation; either version 2.1 of the License, or
009: (at your option) any later version.
011: This program is distributed in the hope that it will be useful,
012: but WITHOUT ANY WARRANTY; without even the implied warranty of
014: GNU Lesser General Public License for more details.
016: You should have received a copy of the GNU Lesser General Public License
017: along with this program; if not, write to the Free Software
018: Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
019: */
020: package com.ecyrd.jspwiki;
022: import java.io.UnsupportedEncodingException;
023: import java.security.SecureRandom;
024: import java.util.Properties;
025: import java.util.Random;
027: /**
028: * Contains a number of static utility methods.
029: */
030: // FIXME3.0: Move to the "util" package
031: public final class TextUtil {
032: static final String HEX_DIGITS = "0123456789ABCDEF";
034: /**
035: * Private constructor prevents instantiation.
036: */
037: private TextUtil() {
038: }
040: /**
041: * java.net.URLEncoder.encode() method in JDK < 1.4 is buggy. This duplicates
042: * its functionality.
043: * @param rs the string to encode
044: * @return the URL-encoded string
045: */
046: protected static String urlEncode(byte[] rs) {
047: StringBuffer result = new StringBuffer(rs.length * 2);
049: // Does the URLEncoding. We could use the java.net one, but
050: // it does not eat byte[]s.
052: for (int i = 0; i < rs.length; i++) {
053: char c = (char) rs[i];
055: switch (c) {
056: case '_':
057: case '.':
058: case '*':
059: case '-':
060: case '/':
061: result.append(c);
062: break;
064: case ' ':
065: result.append('+');
066: break;
068: default:
069: if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
070: || (c >= '0' && c <= '9')) {
071: result.append(c);
072: } else {
073: result.append('%');
074: result.append(HEX_DIGITS.charAt((c & 0xF0) >> 4));
075: result.append(HEX_DIGITS.charAt(c & 0x0F));
076: }
077: }
079: } // for
081: return result.toString();
082: }
084: /**
085: * URL encoder does not handle all characters correctly.
086: * See <A HREF="http://developer.java.sun.com/developer/bugParade/bugs/4257115.html">
087: * Bug parade, bug #4257115</A> for more information.
088: * <P>
089: * Thanks to CJB for this fix.
090: *
091: * @param bytes The byte array containing the bytes of the string
092: * @param encoding The encoding in which the string should be interpreted
093: * @return A decoded String
094: *
095: * @throws UnsupportedEncodingException If the encoding is unknown.
096: * @throws IllegalArgumentException If the byte array is not a valid string.
097: */
098: protected static String urlDecode(byte[] bytes, String encoding)
099: throws UnsupportedEncodingException,
100: IllegalArgumentException {
101: if (bytes == null) {
102: return null;
103: }
105: byte[] decodeBytes = new byte[bytes.length];
106: int decodedByteCount = 0;
108: try {
109: for (int count = 0; count < bytes.length; count++) {
110: switch (bytes[count]) {
111: case '+':
112: decodeBytes[decodedByteCount++] = (byte) ' ';
113: break;
115: case '%':
116: decodeBytes[decodedByteCount++] = (byte) ((HEX_DIGITS
117: .indexOf(bytes[++count]) << 4) + (HEX_DIGITS
118: .indexOf(bytes[++count])));
120: break;
122: default:
123: decodeBytes[decodedByteCount++] = bytes[count];
124: }
125: }
127: } catch (IndexOutOfBoundsException ae) {
128: throw new IllegalArgumentException(
129: "Malformed UTF-8 string?");
130: }
132: String processedPageName = null;
134: try {
135: processedPageName = new String(decodeBytes, 0,
136: decodedByteCount, encoding);
137: } catch (UnsupportedEncodingException e) {
138: throw new UnsupportedEncodingException(
139: "UTF-8 encoding not supported on this platform");
140: }
142: return processedPageName;
143: }
145: /**
146: * As java.net.URLEncoder class, but this does it in UTF8 character set.
147: *
148: * @param text The text to decode
149: * @return An URLEncoded string.
150: */
151: public static String urlEncodeUTF8(String text) {
152: // If text is null, just return an empty string
153: if (text == null) {
154: return "";
155: }
157: byte[] rs;
159: try {
160: rs = text.getBytes("UTF-8");
161: return urlEncode(rs);
162: } catch (UnsupportedEncodingException e) {
163: throw new InternalWikiException("UTF-8 not supported!?!");
164: }
166: }
168: /**
169: * As java.net.URLDecoder class, but for UTF-8 strings. null is a safe
170: * value and returns null.
171: *
172: * @param utf8 The UTF-8 encoded string
173: * @return A plain, normal string.
174: */
175: public static String urlDecodeUTF8(String utf8) {
176: String rs = null;
178: if (utf8 == null)
179: return null;
181: try {
182: rs = urlDecode(utf8.getBytes("ISO-8859-1"), "UTF-8");
183: } catch (UnsupportedEncodingException e) {
184: throw new InternalWikiException(
185: "UTF-8 or ISO-8859-1 not supported!?!");
186: }
188: return rs;
189: }
191: /**
192: * Provides encoded version of string depending on encoding.
193: * Encoding may be UTF-8 or ISO-8859-1 (default).
194: *
195: * <p>This implementation is the same as in
196: * FileSystemProvider.mangleName().
197: *
198: * @param data A string to encode
199: * @param encoding The encoding in which to encode
200: * @return An URL encoded string.
201: */
202: public static String urlEncode(String data, String encoding) {
203: // Presumably, the same caveats apply as in FileSystemProvider.
204: // Don't see why it would be horribly kludgy, though.
205: if ("UTF-8".equals(encoding)) {
206: return TextUtil.urlEncodeUTF8(data);
207: }
209: try {
210: return TextUtil.urlEncode(data.getBytes(encoding));
211: } catch (UnsupportedEncodingException uee) {
212: throw new InternalWikiException(
213: "Could not encode String into" + encoding);
214: }
215: }
217: /**
218: * Provides decoded version of string depending on encoding.
219: * Encoding may be UTF-8 or ISO-8859-1 (default).
220: *
221: * <p>This implementation is the same as in
222: * FileSystemProvider.unmangleName().
223: *
224: * @param data The URL-encoded string to decode
225: * @param encoding The encoding to use
226: * @return A decoded string.
227: * @throws UnsupportedEncodingException If the encoding is unknown
228: * @throws IllegalArgumentException If the data cannot be decoded.
229: */
230: public static String urlDecode(String data, String encoding)
231: throws UnsupportedEncodingException,
232: IllegalArgumentException {
233: // Presumably, the same caveats apply as in FileSystemProvider.
234: // Don't see why it would be horribly kludgy, though.
235: if ("UTF-8".equals(encoding)) {
236: return TextUtil.urlDecodeUTF8(data);
237: }
239: try {
240: return TextUtil
241: .urlDecode(data.getBytes(encoding), encoding);
242: } catch (UnsupportedEncodingException uee) {
243: throw new InternalWikiException(
244: "Could not decode String into" + encoding);
245: }
247: }
249: /**
250: * Replaces the relevant entities inside the String.
251: * All & >, <, and " are replaced by their
252: * respective names.
253: *
254: * @since 1.6.1
255: * @param src The source string.
256: * @return The encoded string.
257: */
258: public static String replaceEntities(String src) {
259: src = replaceString(src, "&", "&");
260: src = replaceString(src, "<", "<");
261: src = replaceString(src, ">", ">");
262: src = replaceString(src, "\"", """);
264: return src;
265: }
267: /**
268: * Replaces a string with an other string.
269: *
270: * @param orig Original string. Null is safe.
271: * @param src The string to find.
272: * @param dest The string to replace <I>src</I> with.
273: * @return A string with the replacement done.
274: */
275: public static final String replaceString(String orig, String src,
276: String dest) {
277: if (orig == null)
278: return null;
279: if (src == null || dest == null)
280: throw new NullPointerException();
281: if (src.length() == 0)
282: return orig;
284: StringBuffer res = new StringBuffer(orig.length() + 20); // Pure guesswork
285: int start = 0;
286: int end = 0;
287: int last = 0;
289: while ((start = orig.indexOf(src, end)) != -1) {
290: res.append(orig.substring(last, start));
291: res.append(dest);
292: end = start + src.length();
293: last = start + src.length();
294: }
296: res.append(orig.substring(end));
298: return res.toString();
299: }
301: /**
302: * Replaces a part of a string with a new String.
303: *
304: * @param start Where in the original string the replacing should start.
305: * @param end Where the replacing should end.
306: * @param orig Original string. Null is safe.
307: * @param text The new text to insert into the string.
308: * @return The string with the orig replaced with text.
309: */
310: public static String replaceString(String orig, int start, int end,
311: String text) {
312: if (orig == null)
313: return null;
315: StringBuffer buf = new StringBuffer(orig);
317: buf.replace(start, end, text);
319: return buf.toString();
320: }
322: /**
323: * Parses an integer parameter, returning a default value
324: * if the value is null or a non-number.
325: *
326: * @param value The value to parse
327: * @param defvalue A default value in case the value is not a number
328: * @return The parsed value (or defvalue).
329: */
331: public static int parseIntParameter(String value, int defvalue) {
332: int val = defvalue;
334: try {
335: val = Integer.parseInt(value.trim());
336: } catch (Exception e) {
337: }
339: return val;
340: }
342: /**
343: * Gets an integer-valued property from a standard Properties
344: * list. If the value does not exist, or is a non-integer, returns defVal.
345: *
346: * @since 2.1.48.
347: * @param props The property set to look through
348: * @param key The key to look for
349: * @param defVal If the property is not found or is a non-integer, returns this value.
350: * @return The property value as an integer (or defVal).
351: */
352: public static int getIntegerProperty(Properties props, String key,
353: int defVal) {
354: String val = props.getProperty(key);
356: return parseIntParameter(val, defVal);
357: }
359: /**
360: * Gets a boolean property from a standard Properties list.
361: * Returns the default value, in case the key has not been set.
362: * <P>
363: * The possible values for the property are "true"/"false", "yes"/"no", or
364: * "on"/"off". Any value not recognized is always defined as "false".
365: *
366: * @param props A list of properties to search.
367: * @param key The property key.
368: * @param defval The default value to return.
369: *
370: * @return True, if the property "key" was set to "true", "on", or "yes".
371: *
372: * @since 2.0.11
373: */
374: public static boolean getBooleanProperty(Properties props,
375: String key, boolean defval) {
376: String val = props.getProperty(key);
378: if (val == null)
379: return defval;
381: return isPositive(val);
382: }
384: /**
385: * Fetches a String property from the set of Properties. This differs from
386: * Properties.getProperty() in a couple of key respects: First, property value
387: * is trim()med (so no extra whitespace back and front), and well, that's it.
388: *
389: * @param props The Properties to search through
390: * @param key The property key
391: * @param defval A default value to return, if the property does not exist.
392: * @return The property value.
393: * @since 2.1.151
394: */
395: public static String getStringProperty(Properties props,
396: String key, String defval) {
397: String val = props.getProperty(key);
399: if (val == null)
400: return defval;
402: return val.trim();
403: }
405: /**
406: * Returns true, if the string "val" denotes a positive string. Allowed
407: * values are "yes", "on", and "true". Comparison is case-insignificant.
408: * Null values are safe.
409: *
410: * @param val Value to check.
411: * @return True, if val is "true", "on", or "yes"; otherwise false.
412: *
413: * @since 2.0.26
414: */
415: public static boolean isPositive(String val) {
416: if (val == null)
417: return false;
419: val = val.trim();
421: return val.equalsIgnoreCase("true")
422: || val.equalsIgnoreCase("on")
423: || val.equalsIgnoreCase("yes");
424: }
426: /**
427: * Makes sure that the POSTed data is conforms to certain rules. These
428: * rules are:
429: * <UL>
430: * <LI>The data always ends with a newline (some browsers, such
431: * as NS4.x series, does not send a newline at the end, which makes
432: * the diffs a bit strange sometimes.
433: * <LI>The CR/LF/CRLF mess is normalized to plain CRLF.
434: * </UL>
435: *
436: * The reason why we're using CRLF is that most browser already
437: * return CRLF since that is the closest thing to a HTTP standard.
438: *
439: * @param postData The data to normalize
440: * @return Normalized data
441: */
442: public static String normalizePostData(String postData) {
443: StringBuffer sb = new StringBuffer();
445: for (int i = 0; i < postData.length(); i++) {
446: switch (postData.charAt(i)) {
447: case 0x0a: // LF, UNIX
448: sb.append("\r\n");
449: break;
451: case 0x0d: // CR, either Mac or MSDOS
452: sb.append("\r\n");
453: // If it's MSDOS, skip the LF so that we don't add it again.
454: if (i < postData.length() - 1
455: && postData.charAt(i + 1) == 0x0a) {
456: i++;
457: }
458: break;
460: default:
461: sb.append(postData.charAt(i));
462: break;
463: }
464: }
466: if (sb.length() < 2
467: || !sb.substring(sb.length() - 2).equals("\r\n")) {
468: sb.append("\r\n");
469: }
471: return sb.toString();
472: }
474: private static final int EOI = 0;
475: private static final int LOWER = 1;
476: private static final int UPPER = 2;
477: private static final int DIGIT = 3;
478: private static final int OTHER = 4;
479: private static final Random RANDOM = new SecureRandom();
481: private static int getCharKind(int c) {
482: if (c == -1) {
483: return EOI;
484: }
486: char ch = (char) c;
488: if (Character.isLowerCase(ch))
489: return LOWER;
490: else if (Character.isUpperCase(ch))
491: return UPPER;
492: else if (Character.isDigit(ch))
493: return DIGIT;
494: else
495: return OTHER;
496: }
498: /**
499: * Adds spaces in suitable locations of the input string. This is
500: * used to transform a WikiName into a more readable format.
501: *
502: * @param s String to be beautified.
503: * @return A beautified string.
504: */
505: public static String beautifyString(String s) {
506: return beautifyString(s, " ");
507: }
509: /**
510: * Adds spaces in suitable locations of the input string. This is
511: * used to transform a WikiName into a more readable format.
512: *
513: * @param s String to be beautified.
514: * @param space Use this string for the space character.
515: * @return A beautified string.
516: * @since 2.1.127
517: */
518: public static String beautifyString(String s, String space) {
519: StringBuffer result = new StringBuffer();
521: if (s == null || s.length() == 0)
522: return "";
524: int cur = s.charAt(0);
525: int curKind = getCharKind(cur);
527: int prevKind = LOWER;
528: int nextKind = -1;
530: int next = -1;
531: int nextPos = 1;
533: while (curKind != EOI) {
534: next = (nextPos < s.length()) ? s.charAt(nextPos++) : -1;
535: nextKind = getCharKind(next);
537: if ((prevKind == UPPER) && (curKind == UPPER)
538: && (nextKind == LOWER)) {
539: result.append(space);
540: result.append((char) cur);
541: } else {
542: result.append((char) cur);
543: if (((curKind == UPPER) && (nextKind == DIGIT))
544: || ((curKind == LOWER) && ((nextKind == DIGIT) || (nextKind == UPPER)))
545: || ((curKind == DIGIT) && ((nextKind == UPPER) || (nextKind == LOWER)))) {
546: result.append(space);
547: }
548: }
549: prevKind = curKind;
550: cur = next;
551: curKind = nextKind;
552: }
554: return result.toString();
555: }
557: /**
558: * Creates a Properties object based on an array which contains alternatively
559: * a key and a value. It is useful for generating default mappings.
560: * For example:
561: * <pre>
562: * String[] properties = { "jspwiki.property1", "value1",
563: * "jspwiki.property2", "value2 };
564: *
565: * Properties props = TextUtil.createPropertes( values );
566: *
567: * System.out.println( props.getProperty("jspwiki.property1") );
568: * </pre>
569: * would output "value1".
570: *
571: * @param values Alternating key and value pairs.
572: * @return Property object
573: * @see java.util.Properties
574: * @throws IllegalArgumentException if the property array is missing
575: * a value for a key.
576: * @since 2.2.
577: */
579: public static Properties createProperties(String[] values)
580: throws IllegalArgumentException {
581: if (values.length % 2 != 0)
582: throw new IllegalArgumentException("One value is missing.");
584: Properties props = new Properties();
586: for (int i = 0; i < values.length; i += 2) {
587: props.setProperty(values[i], values[i + 1]);
588: }
590: return props;
591: }
593: /**
594: * Counts the number of sections (separated with "----") from the page.
595: *
596: * @param pagedata The WikiText to parse.
597: * @return int Number of counted sections.
598: * @since 2.1.86.
599: */
601: public static int countSections(String pagedata) {
602: int tags = 0;
603: int start = 0;
605: while ((start = pagedata.indexOf("----", start)) != -1) {
606: tags++;
607: start += 4; // Skip this "----"
608: }
610: //
611: // The first section does not get the "----"
612: //
613: return pagedata.length() > 0 ? tags + 1 : 0;
614: }
616: /**
617: * Gets the given section (separated with "----") from the page text.
618: * Note that the first section is always #1. If a page has no section markers,
619: * them there is only a single section, #1.
620: *
621: * @param pagedata WikiText to parse.
622: * @param section Which section to get.
623: * @return String The section.
624: * @throws IllegalArgumentException If the page does not contain this many sections.
625: * @since 2.1.86.
626: */
627: public static String getSection(String pagedata, int section)
628: throws IllegalArgumentException {
629: int tags = 0;
630: int start = 0;
631: int previous = 0;
633: while ((start = pagedata.indexOf("----", start)) != -1) {
634: if (++tags == section) {
635: return pagedata.substring(previous, start);
636: }
638: start += 4; // Skip this "----"
640: previous = start;
641: }
643: if (++tags == section) {
644: return pagedata.substring(previous);
645: }
647: throw new IllegalArgumentException("There is no section no. "
648: + section + " on the page.");
649: }
651: /**
652: * A simple routine which just repeates the arguments. This is useful
653: * for creating something like a line or something.
654: *
655: * @param what String to repeat
656: * @param times How many times to repeat the string.
657: * @return Guess what?
658: * @since 2.1.98.
659: */
660: public static String repeatString(String what, int times) {
661: StringBuffer sb = new StringBuffer();
663: for (int i = 0; i < times; i++) {
664: sb.append(what);
665: }
667: return sb.toString();
668: }
670: /**
671: * Converts a string from the Unicode representation into something that can be
672: * embedded in a java properties file. All references outside the ASCII range
673: * are replaced with \\uXXXX.
674: *
675: * @param s The string to convert
676: * @return the ASCII string
677: */
678: public static String native2Ascii(String s) {
679: StringBuffer sb = new StringBuffer();
680: for (int i = 0; i < s.length(); i++) {
681: char aChar = s.charAt(i);
682: if ((aChar < 0x0020) || (aChar > 0x007e)) {
683: sb.append('\\');
684: sb.append('u');
685: sb.append(toHex((aChar >> 12) & 0xF));
686: sb.append(toHex((aChar >> 8) & 0xF));
687: sb.append(toHex((aChar >> 4) & 0xF));
688: sb.append(toHex(aChar & 0xF));
689: } else {
690: sb.append(aChar);
691: }
692: }
693: return sb.toString();
694: }
696: private static char toHex(int nibble) {
697: final char[] hexDigit = { '0', '1', '2', '3', '4', '5', '6',
698: '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
699: return hexDigit[nibble & 0xF];
700: }
702: /**
703: * Generates a hexadecimal string from an array of bytes. For
704: * example, if the array contains { 0x01, 0x02, 0x3E }, the resulting
705: * string will be "01023E".
706: *
707: * @param bytes A Byte array
708: * @return A String representation
709: * @since 2.3.87
710: */
711: public static String toHexString(byte[] bytes) {
712: StringBuffer sb = new StringBuffer(bytes.length * 2);
713: for (int i = 0; i < bytes.length; i++) {
714: sb.append(toHex(bytes[i] >> 4));
715: sb.append(toHex(bytes[i]));
716: }
718: return sb.toString();
719: }
721: /**
722: * Returns true, if the argument contains a number, otherwise false.
723: * In a quick test this is roughly the same speed as Integer.parseInt()
724: * if the argument is a number, and roughly ten times the speed, if
725: * the argument is NOT a number.
726: *
727: * @since 2.4
728: * @param s String to check
729: * @return True, if s represents a number. False otherwise.
730: */
732: public static boolean isNumber(String s) {
733: if (s == null)
734: return false;
736: if (s.length() > 1 && s.charAt(0) == '-')
737: s = s.substring(1);
739: for (int i = 0; i < s.length(); i++) {
740: if (!Character.isDigit(s.charAt(i)))
741: return false;
742: }
744: return true;
745: }
747: /** Length of password. @see #generateRandomPassword() */
748: public static final int PASSWORD_LENGTH = 8;
750: /**
751: * Generate a random String suitable for use as a temporary password.
752: *
753: * @return String suitable for use as a temporary password
754: * @since 2.4
755: */
756: public static String generateRandomPassword() {
757: // Pick from some letters that won't be easily mistaken for each
758: // other. So, for example, omit o O and 0, 1 l and L.
759: String letters = "abcdefghjkmnpqrstuvwxyzABCDEFGHJKMNPQRSTUVWXYZ23456789+@";
761: String pw = "";
762: for (int i = 0; i < PASSWORD_LENGTH; i++) {
763: int index = (int) (RANDOM.nextDouble() * letters.length());
764: pw += letters.substring(index, index + 1);
765: }
766: return pw;
767: }
768: }