0001: /*
0002: * Copyright 2001-2007 Geert Bevin <gbevin[remove] at uwyn dot com>
0003: * Distributed under the terms of either:
0004: * - the common development and distribution license (CDDL), v1.0; or
0005: * - the GNU Lesser General Public License, v2.1 or later
0006: * $Id: StringUtils.java 3634 2007-01-08 21:42:24Z gbevin $
0007: */
0008: package com.uwyn.rife.tools;
0009:
0010: import java.util.*;
0011:
0012: import com.uwyn.rife.config.RifeConfig;
0013: import com.uwyn.rife.datastructures.DocumentPosition;
0014: import com.uwyn.rife.datastructures.EnumClass;
0015: import com.uwyn.rife.pcj.map.CharKeyMapIterator;
0016: import com.uwyn.rife.pcj.map.CharKeyOpenHashMap;
0017: import java.io.UnsupportedEncodingException;
0018: import java.lang.reflect.Array;
0019: import java.net.URLEncoder;
0020: import java.nio.CharBuffer;
0021: import java.nio.charset.CharacterCodingException;
0022: import java.nio.charset.Charset;
0023: import java.nio.charset.CharsetEncoder;
0024: import java.text.BreakIterator;
0025: import java.util.regex.Matcher;
0026: import java.util.regex.Pattern;
0027:
0028: /**
0029: * General purpose class containing common <code>String</code> manipulation
0030: * methods.
0031: *
0032: * @author Geert Bevin (gbevin[remove] at uwyn dot com)
0033: * @version $Revision: 3634 $
0034: * @since 1.0
0035: */
0036: public abstract class StringUtils {
0037: public static String ENCODING_US_ASCII = "US-ASCII";
0038: public static String ENCODING_ISO_8859_1 = "ISO-8859-1";
0039: public static String ENCODING_ISO_8859_2 = "ISO-8859-2";
0040: public static String ENCODING_ISO_8859_5 = "ISO-8859-5";
0041: public static String ENCODING_UTF_8 = "UTF-8";
0042: public static String ENCODING_UTF_16BE = "UTF-16BE";
0043: public static String ENCODING_UTF_16LE = "UTF-16LE";
0044: public static String ENCODING_UTF_16 = "UTF-16";
0045:
0046: public static Charset CHARSET_US_ASCII = Charset
0047: .forName(StringUtils.ENCODING_US_ASCII);
0048:
0049: public static final BbcodeOption SHORTEN_URL = new BbcodeOption(
0050: "SHORTEN_URL");
0051: public static final BbcodeOption SANITIZE_URL = new BbcodeOption(
0052: "SANITIZE_URL");
0053: public static final BbcodeOption CONVERT_BARE_URLS = new BbcodeOption(
0054: "CONVERT_BARE_URLS");
0055: public static final BbcodeOption NO_FOLLOW_LINKS = new BbcodeOption(
0056: "NO_FOLLOW_LINKS");
0057:
0058: public static final Pattern BBCODE_COLOR = Pattern.compile(
0059: "\\[color\\s*=\\s*([#\\w]*)\\s*\\]",
0060: Pattern.CASE_INSENSITIVE);
0061: public static final Pattern BBCODE_SIZE = Pattern.compile(
0062: "\\[size\\s*=\\s*([+\\-]?[0-9]*)\\s*\\]",
0063: Pattern.CASE_INSENSITIVE);
0064: public static final Pattern BBCODE_URL_SHORT = Pattern.compile(
0065: "\\[url\\]\\s*([^\\s]*)\\s*\\[\\/url\\]",
0066: Pattern.CASE_INSENSITIVE);
0067: public static final Pattern BBCODE_URL_LONG = Pattern.compile(
0068: "\\[url=([^\\[]*)\\]([^\\[]*)\\[/url\\]",
0069: Pattern.CASE_INSENSITIVE);
0070: public static final Pattern BBCODE_IMG = Pattern.compile(
0071: "\\[img\\]\\s*([^\\s]*)\\s*\\[\\/img\\]",
0072: Pattern.CASE_INSENSITIVE);
0073: public static final Pattern BBCODE_QUOTE_LONG = Pattern.compile(
0074: "\\[quote=([^\\]]+\\]*)\\]", Pattern.CASE_INSENSITIVE
0075: | Pattern.MULTILINE);
0076: public static final Pattern BBCODE_BAREURL = Pattern
0077: .compile(
0078: "(?:[^\"'=>\\]]|^)((?:http|ftp)s?://(?:%[\\p{Digit}A-Fa-f][\\p{Digit}A-Fa-f]|[\\-_\\.!~*';\\|/?:@#&=\\+$,\\p{Alnum}])+)",
0079: Pattern.CASE_INSENSITIVE | Pattern.MULTILINE);
0080:
0081: private static final CharKeyOpenHashMap<String> AGGRESSIVE_HTML_ENCODE_MAP = new CharKeyOpenHashMap<String>();
0082: private static final CharKeyOpenHashMap<String> DEFENSIVE_HTML_ENCODE_MAP = new CharKeyOpenHashMap<String>();
0083: private static final CharKeyOpenHashMap<String> XML_ENCODE_MAP = new CharKeyOpenHashMap<String>();
0084: private static final CharKeyOpenHashMap<String> STRING_ENCODE_MAP = new CharKeyOpenHashMap<String>();
0085: private static final CharKeyOpenHashMap<String> SQL_ENCODE_MAP = new CharKeyOpenHashMap<String>();
0086: private static final CharKeyOpenHashMap<String> LATEX_ENCODE_MAP = new CharKeyOpenHashMap<String>();
0087:
0088: private static final Map<String, Character> HTML_DECODE_MAP = new HashMap<String, Character>();
0089:
0090: private static final HtmlEncoderFallbackHandler HTML_ENCODER_FALLBACK = new HtmlEncoderFallbackHandler();
0091:
0092: static {
0093: // Html encoding mapping according to the HTML 4.0 spec
0094: // http://www.w3.org/TR/REC-html40/sgml/entities.html
0095:
0096: // Special characters for HTML
0097: AGGRESSIVE_HTML_ENCODE_MAP.put('\u0026', "&");
0098: AGGRESSIVE_HTML_ENCODE_MAP.put('\u003C', "<");
0099: AGGRESSIVE_HTML_ENCODE_MAP.put('\u003E', ">");
0100: AGGRESSIVE_HTML_ENCODE_MAP.put('\u0022', """);
0101:
0102: DEFENSIVE_HTML_ENCODE_MAP.put('\u0152', "Œ");
0103: DEFENSIVE_HTML_ENCODE_MAP.put('\u0153', "œ");
0104: DEFENSIVE_HTML_ENCODE_MAP.put('\u0160', "Š");
0105: DEFENSIVE_HTML_ENCODE_MAP.put('\u0161', "š");
0106: DEFENSIVE_HTML_ENCODE_MAP.put('\u0178', "Ÿ");
0107: DEFENSIVE_HTML_ENCODE_MAP.put('\u02C6', "ˆ");
0108: DEFENSIVE_HTML_ENCODE_MAP.put('\u02DC', "˜");
0109: DEFENSIVE_HTML_ENCODE_MAP.put('\u2002', " ");
0110: DEFENSIVE_HTML_ENCODE_MAP.put('\u2003', " ");
0111: DEFENSIVE_HTML_ENCODE_MAP.put('\u2009', " ");
0112: DEFENSIVE_HTML_ENCODE_MAP.put('\u200C', "‌");
0113: DEFENSIVE_HTML_ENCODE_MAP.put('\u200D', "‍");
0114: DEFENSIVE_HTML_ENCODE_MAP.put('\u200E', "‎");
0115: DEFENSIVE_HTML_ENCODE_MAP.put('\u200F', "‏");
0116: DEFENSIVE_HTML_ENCODE_MAP.put('\u2013', "–");
0117: DEFENSIVE_HTML_ENCODE_MAP.put('\u2014', "—");
0118: DEFENSIVE_HTML_ENCODE_MAP.put('\u2018', "‘");
0119: DEFENSIVE_HTML_ENCODE_MAP.put('\u2019', "’");
0120: DEFENSIVE_HTML_ENCODE_MAP.put('\u201A', "‚");
0121: DEFENSIVE_HTML_ENCODE_MAP.put('\u201C', "“");
0122: DEFENSIVE_HTML_ENCODE_MAP.put('\u201D', "”");
0123: DEFENSIVE_HTML_ENCODE_MAP.put('\u201E', "„");
0124: DEFENSIVE_HTML_ENCODE_MAP.put('\u2020', "†");
0125: DEFENSIVE_HTML_ENCODE_MAP.put('\u2021', "‡");
0126: DEFENSIVE_HTML_ENCODE_MAP.put('\u2030', "‰");
0127: DEFENSIVE_HTML_ENCODE_MAP.put('\u2039', "‹");
0128: DEFENSIVE_HTML_ENCODE_MAP.put('\u203A', "›");
0129: DEFENSIVE_HTML_ENCODE_MAP.put('\u20AC', "€");
0130:
0131: // Character entity references for ISO 8859-1 characters
0132: DEFENSIVE_HTML_ENCODE_MAP.put('\u00A0', " ");
0133: DEFENSIVE_HTML_ENCODE_MAP.put('\u00A1', "¡");
0134: DEFENSIVE_HTML_ENCODE_MAP.put('\u00A2', "¢");
0135: DEFENSIVE_HTML_ENCODE_MAP.put('\u00A3', "£");
0136: DEFENSIVE_HTML_ENCODE_MAP.put('\u00A4', "¤");
0137: DEFENSIVE_HTML_ENCODE_MAP.put('\u00A5', "¥");
0138: DEFENSIVE_HTML_ENCODE_MAP.put('\u00A6', "¦");
0139: DEFENSIVE_HTML_ENCODE_MAP.put('\u00A7', "§");
0140: DEFENSIVE_HTML_ENCODE_MAP.put('\u00A8', "¨");
0141: DEFENSIVE_HTML_ENCODE_MAP.put('\u00A9', "©");
0142: DEFENSIVE_HTML_ENCODE_MAP.put('\u00AA', "ª");
0143: DEFENSIVE_HTML_ENCODE_MAP.put('\u00AB', "«");
0144: DEFENSIVE_HTML_ENCODE_MAP.put('\u00AC', "¬");
0145: DEFENSIVE_HTML_ENCODE_MAP.put('\u00AD', "­");
0146: DEFENSIVE_HTML_ENCODE_MAP.put('\u00AE', "®");
0147: DEFENSIVE_HTML_ENCODE_MAP.put('\u00AF', "¯");
0148: DEFENSIVE_HTML_ENCODE_MAP.put('\u00B0', "°");
0149: DEFENSIVE_HTML_ENCODE_MAP.put('\u00B1', "±");
0150: DEFENSIVE_HTML_ENCODE_MAP.put('\u00B2', "²");
0151: DEFENSIVE_HTML_ENCODE_MAP.put('\u00B3', "³");
0152: DEFENSIVE_HTML_ENCODE_MAP.put('\u00B4', "´");
0153: DEFENSIVE_HTML_ENCODE_MAP.put('\u00B5', "µ");
0154: DEFENSIVE_HTML_ENCODE_MAP.put('\u00B6', "¶");
0155: DEFENSIVE_HTML_ENCODE_MAP.put('\u00B7', "·");
0156: DEFENSIVE_HTML_ENCODE_MAP.put('\u00B8', "¸");
0157: DEFENSIVE_HTML_ENCODE_MAP.put('\u00B9', "¹");
0158: DEFENSIVE_HTML_ENCODE_MAP.put('\u00BA', "º");
0159: DEFENSIVE_HTML_ENCODE_MAP.put('\u00BB', "»");
0160: DEFENSIVE_HTML_ENCODE_MAP.put('\u00BC', "¼");
0161: DEFENSIVE_HTML_ENCODE_MAP.put('\u00BD', "½");
0162: DEFENSIVE_HTML_ENCODE_MAP.put('\u00BE', "¾");
0163: DEFENSIVE_HTML_ENCODE_MAP.put('\u00BF', "¿");
0164: DEFENSIVE_HTML_ENCODE_MAP.put('\u00C0', "À");
0165: DEFENSIVE_HTML_ENCODE_MAP.put('\u00C1', "Á");
0166: DEFENSIVE_HTML_ENCODE_MAP.put('\u00C2', "Â");
0167: DEFENSIVE_HTML_ENCODE_MAP.put('\u00C3', "Ã");
0168: DEFENSIVE_HTML_ENCODE_MAP.put('\u00C4', "Ä");
0169: DEFENSIVE_HTML_ENCODE_MAP.put('\u00C5', "Å");
0170: DEFENSIVE_HTML_ENCODE_MAP.put('\u00C6', "Æ");
0171: DEFENSIVE_HTML_ENCODE_MAP.put('\u00C7', "Ç");
0172: DEFENSIVE_HTML_ENCODE_MAP.put('\u00C8', "È");
0173: DEFENSIVE_HTML_ENCODE_MAP.put('\u00C9', "É");
0174: DEFENSIVE_HTML_ENCODE_MAP.put('\u00CA', "Ê");
0175: DEFENSIVE_HTML_ENCODE_MAP.put('\u00CB', "Ë");
0176: DEFENSIVE_HTML_ENCODE_MAP.put('\u00CC', "Ì");
0177: DEFENSIVE_HTML_ENCODE_MAP.put('\u00CD', "Í");
0178: DEFENSIVE_HTML_ENCODE_MAP.put('\u00CE', "Î");
0179: DEFENSIVE_HTML_ENCODE_MAP.put('\u00CF', "Ï");
0180: DEFENSIVE_HTML_ENCODE_MAP.put('\u00D0', "Ð");
0181: DEFENSIVE_HTML_ENCODE_MAP.put('\u00D1', "Ñ");
0182: DEFENSIVE_HTML_ENCODE_MAP.put('\u00D2', "Ò");
0183: DEFENSIVE_HTML_ENCODE_MAP.put('\u00D3', "Ó");
0184: DEFENSIVE_HTML_ENCODE_MAP.put('\u00D4', "Ô");
0185: DEFENSIVE_HTML_ENCODE_MAP.put('\u00D5', "Õ");
0186: DEFENSIVE_HTML_ENCODE_MAP.put('\u00D6', "Ö");
0187: DEFENSIVE_HTML_ENCODE_MAP.put('\u00D7', "×");
0188: DEFENSIVE_HTML_ENCODE_MAP.put('\u00D8', "Ø");
0189: DEFENSIVE_HTML_ENCODE_MAP.put('\u00D9', "Ù");
0190: DEFENSIVE_HTML_ENCODE_MAP.put('\u00DA', "Ú");
0191: DEFENSIVE_HTML_ENCODE_MAP.put('\u00DB', "Û");
0192: DEFENSIVE_HTML_ENCODE_MAP.put('\u00DC', "Ü");
0193: DEFENSIVE_HTML_ENCODE_MAP.put('\u00DD', "Ý");
0194: DEFENSIVE_HTML_ENCODE_MAP.put('\u00DE', "Þ");
0195: DEFENSIVE_HTML_ENCODE_MAP.put('\u00DF', "ß");
0196: DEFENSIVE_HTML_ENCODE_MAP.put('\u00E0', "à");
0197: DEFENSIVE_HTML_ENCODE_MAP.put('\u00E1', "á");
0198: DEFENSIVE_HTML_ENCODE_MAP.put('\u00E2', "â");
0199: DEFENSIVE_HTML_ENCODE_MAP.put('\u00E3', "ã");
0200: DEFENSIVE_HTML_ENCODE_MAP.put('\u00E4', "ä");
0201: DEFENSIVE_HTML_ENCODE_MAP.put('\u00E5', "å");
0202: DEFENSIVE_HTML_ENCODE_MAP.put('\u00E6', "æ");
0203: DEFENSIVE_HTML_ENCODE_MAP.put('\u00E7', "ç");
0204: DEFENSIVE_HTML_ENCODE_MAP.put('\u00E8', "è");
0205: DEFENSIVE_HTML_ENCODE_MAP.put('\u00E9', "é");
0206: DEFENSIVE_HTML_ENCODE_MAP.put('\u00EA', "ê");
0207: DEFENSIVE_HTML_ENCODE_MAP.put('\u00EB', "ë");
0208: DEFENSIVE_HTML_ENCODE_MAP.put('\u00EC', "ì");
0209: DEFENSIVE_HTML_ENCODE_MAP.put('\u00ED', "í");
0210: DEFENSIVE_HTML_ENCODE_MAP.put('\u00EE', "î");
0211: DEFENSIVE_HTML_ENCODE_MAP.put('\u00EF', "ï");
0212: DEFENSIVE_HTML_ENCODE_MAP.put('\u00F0', "ð");
0213: DEFENSIVE_HTML_ENCODE_MAP.put('\u00F1', "ñ");
0214: DEFENSIVE_HTML_ENCODE_MAP.put('\u00F2', "ò");
0215: DEFENSIVE_HTML_ENCODE_MAP.put('\u00F3', "ó");
0216: DEFENSIVE_HTML_ENCODE_MAP.put('\u00F4', "ô");
0217: DEFENSIVE_HTML_ENCODE_MAP.put('\u00F5', "õ");
0218: DEFENSIVE_HTML_ENCODE_MAP.put('\u00F6', "ö");
0219: DEFENSIVE_HTML_ENCODE_MAP.put('\u00F7', "÷");
0220: DEFENSIVE_HTML_ENCODE_MAP.put('\u00F8', "ø");
0221: DEFENSIVE_HTML_ENCODE_MAP.put('\u00F9', "ù");
0222: DEFENSIVE_HTML_ENCODE_MAP.put('\u00FA', "ú");
0223: DEFENSIVE_HTML_ENCODE_MAP.put('\u00FB', "û");
0224: DEFENSIVE_HTML_ENCODE_MAP.put('\u00FC', "ü");
0225: DEFENSIVE_HTML_ENCODE_MAP.put('\u00FD', "ý");
0226: DEFENSIVE_HTML_ENCODE_MAP.put('\u00FE', "þ");
0227: DEFENSIVE_HTML_ENCODE_MAP.put('\u00FF', "ÿ");
0228:
0229: // Mathematical, Greek and Symbolic characters for HTML
0230: DEFENSIVE_HTML_ENCODE_MAP.put('\u0192', "ƒ");
0231: DEFENSIVE_HTML_ENCODE_MAP.put('\u0391', "Α");
0232: DEFENSIVE_HTML_ENCODE_MAP.put('\u0392', "Β");
0233: DEFENSIVE_HTML_ENCODE_MAP.put('\u0393', "Γ");
0234: DEFENSIVE_HTML_ENCODE_MAP.put('\u0394', "Δ");
0235: DEFENSIVE_HTML_ENCODE_MAP.put('\u0395', "Ε");
0236: DEFENSIVE_HTML_ENCODE_MAP.put('\u0396', "Ζ");
0237: DEFENSIVE_HTML_ENCODE_MAP.put('\u0397', "Η");
0238: DEFENSIVE_HTML_ENCODE_MAP.put('\u0398', "Θ");
0239: DEFENSIVE_HTML_ENCODE_MAP.put('\u0399', "Ι");
0240: DEFENSIVE_HTML_ENCODE_MAP.put('\u039A', "Κ");
0241: DEFENSIVE_HTML_ENCODE_MAP.put('\u039B', "Λ");
0242: DEFENSIVE_HTML_ENCODE_MAP.put('\u039C', "Μ");
0243: DEFENSIVE_HTML_ENCODE_MAP.put('\u039D', "Ν");
0244: DEFENSIVE_HTML_ENCODE_MAP.put('\u039E', "Ξ");
0245: DEFENSIVE_HTML_ENCODE_MAP.put('\u039F', "Ο");
0246: DEFENSIVE_HTML_ENCODE_MAP.put('\u03A0', "Π");
0247: DEFENSIVE_HTML_ENCODE_MAP.put('\u03A1', "Ρ");
0248: DEFENSIVE_HTML_ENCODE_MAP.put('\u03A3', "Σ");
0249: DEFENSIVE_HTML_ENCODE_MAP.put('\u03A4', "Τ");
0250: DEFENSIVE_HTML_ENCODE_MAP.put('\u03A5', "Υ");
0251: DEFENSIVE_HTML_ENCODE_MAP.put('\u03A6', "Φ");
0252: DEFENSIVE_HTML_ENCODE_MAP.put('\u03A7', "Χ");
0253: DEFENSIVE_HTML_ENCODE_MAP.put('\u03A8', "Ψ");
0254: DEFENSIVE_HTML_ENCODE_MAP.put('\u03A9', "Ω");
0255: DEFENSIVE_HTML_ENCODE_MAP.put('\u03B1', "α");
0256: DEFENSIVE_HTML_ENCODE_MAP.put('\u03B2', "β");
0257: DEFENSIVE_HTML_ENCODE_MAP.put('\u03B3', "γ");
0258: DEFENSIVE_HTML_ENCODE_MAP.put('\u03B4', "δ");
0259: DEFENSIVE_HTML_ENCODE_MAP.put('\u03B5', "ε");
0260: DEFENSIVE_HTML_ENCODE_MAP.put('\u03B6', "ζ");
0261: DEFENSIVE_HTML_ENCODE_MAP.put('\u03B7', "η");
0262: DEFENSIVE_HTML_ENCODE_MAP.put('\u03B8', "θ");
0263: DEFENSIVE_HTML_ENCODE_MAP.put('\u03B9', "ι");
0264: DEFENSIVE_HTML_ENCODE_MAP.put('\u03BA', "κ");
0265: DEFENSIVE_HTML_ENCODE_MAP.put('\u03BB', "λ");
0266: DEFENSIVE_HTML_ENCODE_MAP.put('\u03BC', "μ");
0267: DEFENSIVE_HTML_ENCODE_MAP.put('\u03BD', "ν");
0268: DEFENSIVE_HTML_ENCODE_MAP.put('\u03BE', "ξ");
0269: DEFENSIVE_HTML_ENCODE_MAP.put('\u03BF', "ο");
0270: DEFENSIVE_HTML_ENCODE_MAP.put('\u03C0', "π");
0271: DEFENSIVE_HTML_ENCODE_MAP.put('\u03C1', "ρ");
0272: DEFENSIVE_HTML_ENCODE_MAP.put('\u03C2', "ς");
0273: DEFENSIVE_HTML_ENCODE_MAP.put('\u03C3', "σ");
0274: DEFENSIVE_HTML_ENCODE_MAP.put('\u03C4', "τ");
0275: DEFENSIVE_HTML_ENCODE_MAP.put('\u03C5', "υ");
0276: DEFENSIVE_HTML_ENCODE_MAP.put('\u03C6', "φ");
0277: DEFENSIVE_HTML_ENCODE_MAP.put('\u03C7', "χ");
0278: DEFENSIVE_HTML_ENCODE_MAP.put('\u03C8', "ψ");
0279: DEFENSIVE_HTML_ENCODE_MAP.put('\u03C9', "ω");
0280: DEFENSIVE_HTML_ENCODE_MAP.put('\u03D1', "ϑ");
0281: DEFENSIVE_HTML_ENCODE_MAP.put('\u03D2', "ϒ");
0282: DEFENSIVE_HTML_ENCODE_MAP.put('\u03D6', "ϖ");
0283: DEFENSIVE_HTML_ENCODE_MAP.put('\u2022', "•");
0284: DEFENSIVE_HTML_ENCODE_MAP.put('\u2026', "…");
0285: DEFENSIVE_HTML_ENCODE_MAP.put('\u2032', "′");
0286: DEFENSIVE_HTML_ENCODE_MAP.put('\u2033', "″");
0287: DEFENSIVE_HTML_ENCODE_MAP.put('\u203E', "‾");
0288: DEFENSIVE_HTML_ENCODE_MAP.put('\u2044', "⁄");
0289: DEFENSIVE_HTML_ENCODE_MAP.put('\u2118', "℘");
0290: DEFENSIVE_HTML_ENCODE_MAP.put('\u2111', "ℑ");
0291: DEFENSIVE_HTML_ENCODE_MAP.put('\u211C', "ℜ");
0292: DEFENSIVE_HTML_ENCODE_MAP.put('\u2122', "™");
0293: DEFENSIVE_HTML_ENCODE_MAP.put('\u2135', "ℵ");
0294: DEFENSIVE_HTML_ENCODE_MAP.put('\u2190', "←");
0295: DEFENSIVE_HTML_ENCODE_MAP.put('\u2191', "↑");
0296: DEFENSIVE_HTML_ENCODE_MAP.put('\u2192', "→");
0297: DEFENSIVE_HTML_ENCODE_MAP.put('\u2193', "↓");
0298: DEFENSIVE_HTML_ENCODE_MAP.put('\u2194', "↔");
0299: DEFENSIVE_HTML_ENCODE_MAP.put('\u21B5', "↵");
0300: DEFENSIVE_HTML_ENCODE_MAP.put('\u21D0', "⇐");
0301: DEFENSIVE_HTML_ENCODE_MAP.put('\u21D1', "⇑");
0302: DEFENSIVE_HTML_ENCODE_MAP.put('\u21D2', "⇒");
0303: DEFENSIVE_HTML_ENCODE_MAP.put('\u21D3', "⇓");
0304: DEFENSIVE_HTML_ENCODE_MAP.put('\u21D4', "⇔");
0305: DEFENSIVE_HTML_ENCODE_MAP.put('\u2200', "∀");
0306: DEFENSIVE_HTML_ENCODE_MAP.put('\u2202', "∂");
0307: DEFENSIVE_HTML_ENCODE_MAP.put('\u2203', "∃");
0308: DEFENSIVE_HTML_ENCODE_MAP.put('\u2205', "∅");
0309: DEFENSIVE_HTML_ENCODE_MAP.put('\u2207', "∇");
0310: DEFENSIVE_HTML_ENCODE_MAP.put('\u2208', "∈");
0311: DEFENSIVE_HTML_ENCODE_MAP.put('\u2209', "∉");
0312: DEFENSIVE_HTML_ENCODE_MAP.put('\u220B', "∋");
0313: DEFENSIVE_HTML_ENCODE_MAP.put('\u220F', "∏");
0314: DEFENSIVE_HTML_ENCODE_MAP.put('\u2211', "∑");
0315: DEFENSIVE_HTML_ENCODE_MAP.put('\u2212', "−");
0316: DEFENSIVE_HTML_ENCODE_MAP.put('\u2217', "∗");
0317: DEFENSIVE_HTML_ENCODE_MAP.put('\u221A', "√");
0318: DEFENSIVE_HTML_ENCODE_MAP.put('\u221D', "∝");
0319: DEFENSIVE_HTML_ENCODE_MAP.put('\u221E', "∞");
0320: DEFENSIVE_HTML_ENCODE_MAP.put('\u2220', "∠");
0321: DEFENSIVE_HTML_ENCODE_MAP.put('\u2227', "∧");
0322: DEFENSIVE_HTML_ENCODE_MAP.put('\u2228', "∨");
0323: DEFENSIVE_HTML_ENCODE_MAP.put('\u2229', "∩");
0324: DEFENSIVE_HTML_ENCODE_MAP.put('\u222A', "∪");
0325: DEFENSIVE_HTML_ENCODE_MAP.put('\u222B', "∫");
0326: DEFENSIVE_HTML_ENCODE_MAP.put('\u2234', "∴");
0327: DEFENSIVE_HTML_ENCODE_MAP.put('\u223C', "∼");
0328: DEFENSIVE_HTML_ENCODE_MAP.put('\u2245', "≅");
0329: DEFENSIVE_HTML_ENCODE_MAP.put('\u2248', "≈");
0330: DEFENSIVE_HTML_ENCODE_MAP.put('\u2260', "≠");
0331: DEFENSIVE_HTML_ENCODE_MAP.put('\u2261', "≡");
0332: DEFENSIVE_HTML_ENCODE_MAP.put('\u2264', "≤");
0333: DEFENSIVE_HTML_ENCODE_MAP.put('\u2265', "≥");
0334: DEFENSIVE_HTML_ENCODE_MAP.put('\u2282', "⊂");
0335: DEFENSIVE_HTML_ENCODE_MAP.put('\u2283', "⊃");
0336: DEFENSIVE_HTML_ENCODE_MAP.put('\u2284', "⊄");
0337: DEFENSIVE_HTML_ENCODE_MAP.put('\u2286', "⊆");
0338: DEFENSIVE_HTML_ENCODE_MAP.put('\u2287', "⊇");
0339: DEFENSIVE_HTML_ENCODE_MAP.put('\u2295', "⊕");
0340: DEFENSIVE_HTML_ENCODE_MAP.put('\u2297', "⊗");
0341: DEFENSIVE_HTML_ENCODE_MAP.put('\u22A5', "⊥");
0342: DEFENSIVE_HTML_ENCODE_MAP.put('\u22C5', "⋅");
0343: DEFENSIVE_HTML_ENCODE_MAP.put('\u2308', "⌈");
0344: DEFENSIVE_HTML_ENCODE_MAP.put('\u2309', "⌉");
0345: DEFENSIVE_HTML_ENCODE_MAP.put('\u230A', "⌊");
0346: DEFENSIVE_HTML_ENCODE_MAP.put('\u230B', "⌋");
0347: DEFENSIVE_HTML_ENCODE_MAP.put('\u2329', "⟨");
0348: DEFENSIVE_HTML_ENCODE_MAP.put('\u232A', "⟩");
0349: DEFENSIVE_HTML_ENCODE_MAP.put('\u25CA', "◊");
0350: DEFENSIVE_HTML_ENCODE_MAP.put('\u2660', "♠");
0351: DEFENSIVE_HTML_ENCODE_MAP.put('\u2663', "♣");
0352: DEFENSIVE_HTML_ENCODE_MAP.put('\u2665', "♥");
0353: DEFENSIVE_HTML_ENCODE_MAP.put('\u2666', "♦");
0354:
0355: CharKeyMapIterator<String> aggresive_entries = AGGRESSIVE_HTML_ENCODE_MAP
0356: .entries();
0357: while (aggresive_entries.hasNext()) {
0358: aggresive_entries.next();
0359: HTML_DECODE_MAP.put(aggresive_entries.getValue(),
0360: aggresive_entries.getKey());
0361: }
0362:
0363: CharKeyMapIterator<String> defensive_entries = DEFENSIVE_HTML_ENCODE_MAP
0364: .entries();
0365: while (defensive_entries.hasNext()) {
0366: defensive_entries.next();
0367: HTML_DECODE_MAP.put(defensive_entries.getValue(),
0368: defensive_entries.getKey());
0369: }
0370:
0371: XML_ENCODE_MAP.put('\u0026', "&");
0372: XML_ENCODE_MAP.put('\'', "'");
0373: XML_ENCODE_MAP.put('\u0022', """);
0374: XML_ENCODE_MAP.put('\u003C', "<");
0375: XML_ENCODE_MAP.put('\u003E', ">");
0376:
0377: SQL_ENCODE_MAP.put('\'', "''");
0378:
0379: STRING_ENCODE_MAP.put('\\', "\\\\");
0380: STRING_ENCODE_MAP.put('\n', "\\n");
0381: STRING_ENCODE_MAP.put('\r', "\\r");
0382: STRING_ENCODE_MAP.put('\t', "\\t");
0383: STRING_ENCODE_MAP.put('"', "\\\"");
0384:
0385: LATEX_ENCODE_MAP.put('\\', "\\\\");
0386: LATEX_ENCODE_MAP.put('#', "\\#");
0387: LATEX_ENCODE_MAP.put('$', "\\$");
0388: LATEX_ENCODE_MAP.put('%', "\\%");
0389: LATEX_ENCODE_MAP.put('&', "\\&");
0390: LATEX_ENCODE_MAP.put('~', "\\~");
0391: LATEX_ENCODE_MAP.put('_', "\\_");
0392: LATEX_ENCODE_MAP.put('^', "\\^");
0393: LATEX_ENCODE_MAP.put('{', "\\{");
0394: LATEX_ENCODE_MAP.put('}', "\\}");
0395: LATEX_ENCODE_MAP.put('\u00A1', "!'");
0396: LATEX_ENCODE_MAP.put('\u00BF', "?'");
0397: LATEX_ENCODE_MAP.put('\u00C0', "\\`{A}");
0398: LATEX_ENCODE_MAP.put('\u00C1', "\\'{A}");
0399: LATEX_ENCODE_MAP.put('\u00C2', "\\^{A}");
0400: LATEX_ENCODE_MAP.put('\u00C3', "\\H{A}");
0401: LATEX_ENCODE_MAP.put('\u00C4', "\\\"{A}");
0402: LATEX_ENCODE_MAP.put('\u00C5', "\\AA");
0403: LATEX_ENCODE_MAP.put('\u00C6', "\\AE");
0404: LATEX_ENCODE_MAP.put('\u00C7', "\\c{C}");
0405: LATEX_ENCODE_MAP.put('\u00C8', "\\`{E}");
0406: LATEX_ENCODE_MAP.put('\u00C9', "\\'{E}");
0407: LATEX_ENCODE_MAP.put('\u00CA', "\\^{E}");
0408: LATEX_ENCODE_MAP.put('\u00CB', "\\\"{E}");
0409: LATEX_ENCODE_MAP.put('\u00CC', "\\`{I}");
0410: LATEX_ENCODE_MAP.put('\u00CD', "\\'{I}");
0411: LATEX_ENCODE_MAP.put('\u00CE', "\\^{I}");
0412: LATEX_ENCODE_MAP.put('\u00CF', "\\\"{I}");
0413: // todo \u00D0
0414: LATEX_ENCODE_MAP.put('\u00D1', "\\H{N}");
0415: LATEX_ENCODE_MAP.put('\u00D2', "\\`{O}");
0416: LATEX_ENCODE_MAP.put('\u00D3', "\\'{O}");
0417: LATEX_ENCODE_MAP.put('\u00D4', "\\^{O}");
0418: LATEX_ENCODE_MAP.put('\u00D5', "\\H{O}");
0419: LATEX_ENCODE_MAP.put('\u00D6', "\\\"{O}");
0420: // todo \u00D7
0421: LATEX_ENCODE_MAP.put('\u00D8', "\\O");
0422: LATEX_ENCODE_MAP.put('\u00D9', "\\`{U}");
0423: LATEX_ENCODE_MAP.put('\u00DA', "\\'{U}");
0424: LATEX_ENCODE_MAP.put('\u00DB', "\\^{U}");
0425: LATEX_ENCODE_MAP.put('\u00DC', "\\\"{U}");
0426: LATEX_ENCODE_MAP.put('\u00DD', "\\'{Y}");
0427: // todo \u00DE
0428: LATEX_ENCODE_MAP.put('\u00DF', "\\ss");
0429: LATEX_ENCODE_MAP.put('\u00E0', "\\`{a}");
0430: LATEX_ENCODE_MAP.put('\u00E1', "\\'{a}");
0431: LATEX_ENCODE_MAP.put('\u00E2', "\\^{a}");
0432: LATEX_ENCODE_MAP.put('\u00E3', "\\H{a}");
0433: LATEX_ENCODE_MAP.put('\u00E4', "\\\"{a}");
0434: LATEX_ENCODE_MAP.put('\u00E5', "\\aa");
0435: LATEX_ENCODE_MAP.put('\u00E6', "\\ae");
0436: LATEX_ENCODE_MAP.put('\u00E7', "\\c{c}");
0437: LATEX_ENCODE_MAP.put('\u00E8', "\\`{e}");
0438: LATEX_ENCODE_MAP.put('\u00E9', "\\'{e}");
0439: LATEX_ENCODE_MAP.put('\u00EA', "\\^{e}");
0440: LATEX_ENCODE_MAP.put('\u00EB', "\\\"{e}");
0441: LATEX_ENCODE_MAP.put('\u00EC', "\\`{i}");
0442: LATEX_ENCODE_MAP.put('\u00ED', "\\'{i}");
0443: LATEX_ENCODE_MAP.put('\u00EE', "\\^{i}");
0444: LATEX_ENCODE_MAP.put('\u00EF', "\\\"{i}");
0445: // todo \u00F0
0446: LATEX_ENCODE_MAP.put('\u00F1', "\\H{n}");
0447: LATEX_ENCODE_MAP.put('\u00F2', "\\`{o}");
0448: LATEX_ENCODE_MAP.put('\u00F3', "\\'{o}");
0449: LATEX_ENCODE_MAP.put('\u00F4', "\\^{o}");
0450: LATEX_ENCODE_MAP.put('\u00F5', "\\H{o}");
0451: LATEX_ENCODE_MAP.put('\u00F6', "\\\"{o}");
0452: // todo \u00F7
0453: LATEX_ENCODE_MAP.put('\u00F8', "\\o");
0454: LATEX_ENCODE_MAP.put('\u00F9', "\\`{u}");
0455: LATEX_ENCODE_MAP.put('\u00FA', "\\'{u}");
0456: LATEX_ENCODE_MAP.put('\u00FB', "\\^{u}");
0457: LATEX_ENCODE_MAP.put('\u00FC', "\\\"{u}");
0458: LATEX_ENCODE_MAP.put('\u00FD', "\\'{y}");
0459: // todo \u00FE
0460: LATEX_ENCODE_MAP.put('\u00FF', "\\\"{y}");
0461: }
0462:
0463: /**
0464: * Transforms a provided <code>String</code> object into a new string,
0465: * containing only valid characters for a java class name.
0466: *
0467: * @param name The string that has to be transformed into a valid class
0468: * name.
0469: * @return The encoded <code>String</code> object.
0470: * @see #encodeUrl(String)
0471: * @see #encodeHtml(String)
0472: * @see #encodeXml(String)
0473: * @see #encodeSql(String)
0474: * @see #encodeLatex(String)
0475: * @see #encodeRegexp(String)
0476: * @since 1.0
0477: */
0478: public static String encodeClassname(String name) {
0479: if (null == name) {
0480: return null;
0481: }
0482:
0483: Pattern pattern = Pattern.compile("[^\\w]");
0484: Matcher matcher = pattern.matcher(name);
0485:
0486: return matcher.replaceAll("_");
0487: }
0488:
0489: private static boolean needsUrlEncoding(String source) {
0490: if (null == source) {
0491: return false;
0492: }
0493:
0494: // check if the string needs encoding first since
0495: // the URLEncoder always allocates a StringBuffer, even when the
0496: // string is returned as-is
0497: boolean encode = false;
0498: char ch;
0499: for (int i = 0; i < source.length(); i++) {
0500: ch = source.charAt(i);
0501:
0502: if (ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z'
0503: || ch >= '0' && ch <= '9' || ch == '-' || ch == '_'
0504: || ch == '.' || ch == '*') {
0505: continue;
0506: }
0507:
0508: encode = true;
0509: break;
0510: }
0511:
0512: return encode;
0513: }
0514:
0515: /**
0516: * Transforms a provided <code>String</code> object into a new string,
0517: * containing only valid URL characters.
0518: *
0519: * @param source The string that has to be transformed into a valid URL
0520: * string.
0521: * @return The encoded <code>String</code> object.
0522: * @see #encodeClassname(String)
0523: * @see #encodeUrlValue(String)
0524: * @see #encodeHtml(String)
0525: * @see #encodeXml(String)
0526: * @see #encodeSql(String)
0527: * @see #encodeLatex(String)
0528: * @see #encodeRegexp(String)
0529: * @since 1.0
0530: */
0531: public static String encodeUrl(String source) {
0532: if (!needsUrlEncoding(source)) {
0533: return source;
0534: }
0535:
0536: try {
0537: return URLEncoder.encode(source, ENCODING_ISO_8859_1);
0538: }
0539: ///CLOVER:OFF
0540: catch (UnsupportedEncodingException e) {
0541: // this should never happen, ISO-8859-1 is a standard encoding
0542: throw new RuntimeException(e);
0543: }
0544: ///CLOVER:ON
0545: }
0546:
0547: /**
0548: * Transforms a provided <code>String</code> object into a new string,
0549: * only pure US Ascii strings are preserved and URL encoded in a regular
0550: * way. Strings with characters from other encodings will be encoded in a
0551: * RIFE-specific manner to allow international data to passed along the
0552: * query string.
0553: *
0554: * @param source The string that has to be transformed into a valid URL
0555: * parameter string.
0556: * @return The encoded <code>String</code> object.
0557: * @see #decodeUrlValue(String)
0558: * @see #encodeClassname(String)
0559: * @see #encodeUrl(String)
0560: * @see #encodeHtml(String)
0561: * @see #encodeXml(String)
0562: * @see #encodeSql(String)
0563: * @see #encodeLatex(String)
0564: * @see #encodeRegexp(String)
0565: * @since 1.0
0566: */
0567: public static String encodeUrlValue(String source) {
0568: if (!needsUrlEncoding(source)) {
0569: return source;
0570: }
0571:
0572: // check if the string is valid US-ASCII encoding
0573: boolean valid = true;
0574: CharsetEncoder encoder = CHARSET_US_ASCII.newEncoder();
0575: try {
0576: encoder.encode(CharBuffer.wrap(source));
0577: } catch (CharacterCodingException e) {
0578: valid = false;
0579: }
0580:
0581: try {
0582: // if it is valid US-ASCII, use the regular URL encoding method
0583: if (valid) {
0584: return URLEncoder.encode(source, ENCODING_US_ASCII);
0585: }
0586: // otherwise, base-64 encode the UTF-8 bytes and mark the string
0587: // as being encoded in a special way
0588: else {
0589: StringBuilder encoded = new StringBuilder("%02%02");
0590: String base64 = Base64.encodeToString(source
0591: .getBytes(ENCODING_UTF_8), false);
0592: String base64_urlsafe = replace(base64, "=", "%3D");
0593: encoded.append(base64_urlsafe);
0594:
0595: return encoded.toString();
0596: }
0597: }
0598: ///CLOVER:OFF
0599: catch (UnsupportedEncodingException e) {
0600: // this should never happen, ISO-8859-1 is a standard encoding
0601: throw new RuntimeException(e);
0602: }
0603: ///CLOVER:ON
0604: }
0605:
0606: /**
0607: * Decodes a <code>String</code> that has been encoded in a RIFE-specific
0608: * manner for URL usage.. Before calling this method, you should first
0609: * verify if the value needs decoding by using the
0610: * <code>doesUrlValueNeedDecoding(String)</code> method.
0611: *
0612: * @param source the value that has been encoded for URL usage in a
0613: * RIFE-specific way
0614: * @return The decoded <code>String</code> object.
0615: * @see #encodeUrlValue(String)
0616: * @see #doesUrlValueNeedDecoding(String)
0617: * @since 1.0
0618: */
0619: public static String decodeUrlValue(String source) {
0620: try {
0621: byte[] decoded = Base64.decode(source.substring(2));
0622: if (null == decoded) {
0623: return null;
0624: } else {
0625: return new String(decoded, StringUtils.ENCODING_UTF_8);
0626: }
0627: }
0628: ///CLOVER:OFF
0629: catch (UnsupportedEncodingException e) {
0630: // this should never happen, UTF-8 is a standard encoding
0631: throw new RuntimeException(e);
0632: }
0633: ///CLOVER:ON
0634: }
0635:
0636: /**
0637: * Checks if a <code>String</code> is encoded in a RIFE-specific manner
0638: * for URL usage.
0639: *
0640: * @param source the value that might have been encoded for URL usage in a
0641: * RIFE-specific way
0642: * @return <code>true</code> if the value is encoded in the RIFE-specific
0643: * format; and
0644: * <p><code>false</code> otherwise
0645: * @see #encodeUrlValue(String)
0646: * @see #decodeUrlValue(String)
0647: * @since 1.0
0648: */
0649: public static boolean doesUrlValueNeedDecoding(String source) {
0650: if (source != null && source.length() > 2
0651: && source.startsWith("\u0002\u0002")) {
0652: return true;
0653: }
0654:
0655: return false;
0656: }
0657:
0658: private static boolean needsHtmlEncoding(String source,
0659: boolean defensive) {
0660: if (null == source) {
0661: return false;
0662: }
0663:
0664: boolean encode = false;
0665: char ch;
0666: for (int i = 0; i < source.length(); i++) {
0667: ch = source.charAt(i);
0668:
0669: if ((defensive || (ch != '\u0022' && ch != '\u0026'
0670: && ch != '\u003C' && ch != '\u003E'))
0671: && ch < '\u00A0') {
0672: continue;
0673: }
0674:
0675: encode = true;
0676: break;
0677: }
0678:
0679: return encode;
0680: }
0681:
0682: /**
0683: *
0684: * @since 1.6
0685: */
0686: public static String decodeHtml(String source) {
0687: if (null == source || 0 == source.length()) {
0688: return source;
0689: }
0690:
0691: int current_index = 0;
0692: int delimiter_start_index = 0;
0693: int delimiter_end_index = 0;
0694:
0695: StringBuilder result = null;
0696:
0697: while (current_index <= source.length()) {
0698: delimiter_start_index = source.indexOf('&', current_index);
0699: if (delimiter_start_index != -1) {
0700: delimiter_end_index = source.indexOf(';',
0701: delimiter_start_index + 1);
0702: if (delimiter_end_index != -1) {
0703: // ensure that the string builder is setup correctly
0704: if (null == result) {
0705: result = new StringBuilder();
0706: }
0707:
0708: // add the text that leads up to this match
0709: if (delimiter_start_index > current_index) {
0710: result.append(source.substring(current_index,
0711: delimiter_start_index));
0712: }
0713:
0714: // add the decoded entity
0715: String entity = source.substring(
0716: delimiter_start_index,
0717: delimiter_end_index + 1);
0718:
0719: current_index = delimiter_end_index + 1;
0720:
0721: // try to decoded numeric entities
0722: if (entity.charAt(1) == '#') {
0723: int start = 2;
0724: int radix = 10;
0725: // check if the number is hexadecimal
0726: if (entity.charAt(2) == 'X'
0727: || entity.charAt(2) == 'x') {
0728: start++;
0729: radix = 16;
0730: }
0731: try {
0732: Character c = new Character(
0733: (char) Integer.parseInt(entity
0734: .substring(start, entity
0735: .length() - 1),
0736: radix));
0737: result.append(c);
0738: }
0739: // when the number of the entity can't be parsed, add the entity as-is
0740: catch (NumberFormatException e) {
0741: result.append(entity);
0742: }
0743: } else {
0744: // try to decode the entity as a literal
0745: Character decoded = HTML_DECODE_MAP.get(entity);
0746: if (decoded != null) {
0747: result.append(decoded);
0748: }
0749: // if there was no match, add the entity as-is
0750: else {
0751: result.append(entity);
0752: }
0753: }
0754: } else {
0755: break;
0756: }
0757: } else {
0758: break;
0759: }
0760: }
0761:
0762: if (null == result) {
0763: return source;
0764: } else if (current_index < source.length()) {
0765: result.append(source.substring(current_index));
0766: }
0767:
0768: return result.toString();
0769: }
0770:
0771: /**
0772: * Transforms a provided <code>String</code> object into a new string,
0773: * containing only valid Html characters.
0774: *
0775: * @param source The string that has to be transformed into a valid Html
0776: * string.
0777: * @return The encoded <code>String</code> object.
0778: * @see #encodeClassname(String)
0779: * @see #encodeUrl(String)
0780: * @see #encodeUrlValue(String)
0781: * @see #encodeXml(String)
0782: * @see #encodeSql(String)
0783: * @see #encodeString(String)
0784: * @see #encodeLatex(String)
0785: * @see #encodeRegexp(String)
0786: * @since 1.0
0787: */
0788: public static String encodeHtml(String source) {
0789: if (needsHtmlEncoding(source, false)) {
0790: return encode(source, HTML_ENCODER_FALLBACK,
0791: AGGRESSIVE_HTML_ENCODE_MAP,
0792: DEFENSIVE_HTML_ENCODE_MAP);
0793: }
0794: return source;
0795: }
0796:
0797: /**
0798: * Transforms a provided <code>String</code> object into a new string,
0799: * containing as much as possible Html characters. It is safe to already
0800: * feed existing Html to this method since &, < and > will not
0801: * be encoded.
0802: *
0803: * @param source The string that has to be transformed into a valid Html
0804: * string.
0805: * @return The encoded <code>String</code> object.
0806: * @see #encodeClassname(String)
0807: * @see #encodeUrl(String)
0808: * @see #encodeUrlValue(String)
0809: * @see #encodeXml(String)
0810: * @see #encodeSql(String)
0811: * @see #encodeString(String)
0812: * @see #encodeLatex(String)
0813: * @see #encodeRegexp(String)
0814: * @since 1.0
0815: */
0816: public static String encodeHtmlDefensive(String source) {
0817: if (needsHtmlEncoding(source, true)) {
0818: return encode(source, null, DEFENSIVE_HTML_ENCODE_MAP);
0819: }
0820: return source;
0821: }
0822:
0823: /**
0824: * Transforms a provided <code>String</code> object into a new string,
0825: * containing only valid XML characters.
0826: *
0827: * @param source The string that has to be transformed into a valid XML
0828: * string.
0829: * @return The encoded <code>String</code> object.
0830: * @see #encodeClassname(String)
0831: * @see #encodeUrl(String)
0832: * @see #encodeUrlValue(String)
0833: * @see #encodeHtml(String)
0834: * @see #encodeSql(String)
0835: * @see #encodeString(String)
0836: * @see #encodeLatex(String)
0837: * @see #encodeRegexp(String)
0838: * @since 1.0
0839: */
0840: public static String encodeXml(String source) {
0841: return encode(source, null, XML_ENCODE_MAP);
0842: }
0843:
0844: /**
0845: * Transforms a provided <code>String</code> object into a new string,
0846: * containing only valid <code>String</code> characters.
0847: *
0848: * @param source The string that has to be transformed into a valid
0849: * sequence of <code>String</code> characters.
0850: * @return The encoded <code>String</code> object.
0851: * @see #encodeClassname(String)
0852: * @see #encodeUrl(String)
0853: * @see #encodeUrlValue(String)
0854: * @see #encodeHtml(String)
0855: * @see #encodeXml(String)
0856: * @see #encodeSql(String)
0857: * @see #encodeLatex(String)
0858: * @see #encodeRegexp(String)
0859: * @since 1.0
0860: */
0861: public static String encodeString(String source) {
0862: return encode(source, null, STRING_ENCODE_MAP);
0863: }
0864:
0865: /**
0866: * Transforms a provided <code>String</code> object into a series of
0867: * unicode escape codes.
0868: *
0869: * @param source The string that has to be transformed into a valid
0870: * sequence of unicode escape codes
0871: * @return The encoded <code>String</code> object.
0872: * @see #encodeClassname(String)
0873: * @see #encodeUrl(String)
0874: * @see #encodeUrlValue(String)
0875: * @see #encodeHtml(String)
0876: * @see #encodeXml(String)
0877: * @see #encodeSql(String)
0878: * @see #encodeLatex(String)
0879: * @see #encodeRegexp(String)
0880: * @since 1.0
0881: */
0882: public static String encodeUnicode(String source) {
0883: if (null == source) {
0884: return null;
0885: }
0886:
0887: StringBuilder encoded = new StringBuilder();
0888: String hexstring = null;
0889: for (int i = 0; i < source.length(); i++) {
0890: hexstring = Integer.toHexString((int) source.charAt(i))
0891: .toUpperCase();
0892: encoded.append("\\u");
0893: // fill with zeros
0894: for (int j = hexstring.length(); j < 4; j++) {
0895: encoded.append("0");
0896: }
0897: encoded.append(hexstring);
0898: }
0899:
0900: return encoded.toString();
0901: }
0902:
0903: /**
0904: * Transforms a provided <code>String</code> object into a new string,
0905: * containing only valid Sql characters.
0906: *
0907: * @param source The string that has to be transformed into a valid Sql
0908: * string.
0909: * @return The encoded <code>String</code> object.
0910: * @see #encodeClassname(String)
0911: * @see #encodeUrl(String)
0912: * @see #encodeUrlValue(String)
0913: * @see #encodeHtml(String)
0914: * @see #encodeXml(String)
0915: * @see #encodeString(String)
0916: * @see #encodeLatex(String)
0917: * @see #encodeRegexp(String)
0918: * @since 1.0
0919: */
0920: public static String encodeSql(String source) {
0921: return encode(source, null, SQL_ENCODE_MAP);
0922: }
0923:
0924: /**
0925: * Transforms a provided <code>String</code> object into a new string,
0926: * containing only valid LaTeX characters.
0927: *
0928: * @param source The string that has to be transformed into a valid LaTeX
0929: * string.
0930: * @return The encoded <code>String</code> object.
0931: * @see #encodeClassname(String)
0932: * @see #encodeUrl(String)
0933: * @see #encodeUrlValue(String)
0934: * @see #encodeHtml(String)
0935: * @see #encodeXml(String)
0936: * @see #encodeSql(String)
0937: * @see #encodeString(String)
0938: * @see #encodeRegexp(String)
0939: * @since 1.0
0940: */
0941: public static String encodeLatex(String source) {
0942: if (null == source) {
0943: return null;
0944: }
0945:
0946: source = encode(source, null, LATEX_ENCODE_MAP);
0947: source = StringUtils.replace(source, "latex", "\\LaTeX", false);
0948:
0949: return source;
0950: }
0951:
0952: /**
0953: * Transforms a provided <code>String</code> object into a new string,
0954: * using the mapping that are provided through the supplied encoding
0955: * table.
0956: *
0957: * @param source The string that has to be transformed into a valid
0958: * string, using the mappings that are provided through the supplied
0959: * encoding table.
0960: * @param encodingTables A <code>Map</code> object containing the mappings
0961: * to transform characters into valid entities. The keys of this map
0962: * should be <code>Character</code> objects and the values
0963: * <code>String</code> objects.
0964: * @return The encoded <code>String</code> object.
0965: * @since 1.0
0966: */
0967: private static String encode(String source,
0968: EncoderFallbackHandler fallbackHandler,
0969: CharKeyOpenHashMap... encodingTables) {
0970: if (null == source) {
0971: return null;
0972: }
0973:
0974: if (null == encodingTables || 0 == encodingTables.length) {
0975: return source;
0976: }
0977:
0978: StringBuilder encoded_string = null;
0979: char[] string_to_encode_array = source.toCharArray();
0980: int last_match = -1;
0981:
0982: for (int i = 0; i < string_to_encode_array.length; i++) {
0983: char char_to_encode = string_to_encode_array[i];
0984: for (CharKeyOpenHashMap encoding_table : encodingTables) {
0985: if (encoding_table.containsKey(char_to_encode)) {
0986: encoded_string = prepareEncodedString(source,
0987: encoded_string, i, last_match,
0988: string_to_encode_array);
0989:
0990: encoded_string.append(encoding_table
0991: .get(char_to_encode));
0992: last_match = i;
0993: }
0994: }
0995:
0996: if (fallbackHandler != null && last_match < i
0997: && fallbackHandler.hasFallback(char_to_encode)) {
0998: encoded_string = prepareEncodedString(source,
0999: encoded_string, i, last_match,
1000: string_to_encode_array);
1001:
1002: fallbackHandler.appendFallback(encoded_string,
1003: char_to_encode);
1004: last_match = i;
1005: }
1006: }
1007:
1008: if (null == encoded_string) {
1009: return source;
1010: } else {
1011: int difference = string_to_encode_array.length
1012: - (last_match + 1);
1013: if (difference > 0) {
1014: encoded_string.append(string_to_encode_array,
1015: last_match + 1, difference);
1016: }
1017: return encoded_string.toString();
1018: }
1019: }
1020:
1021: private static StringBuilder prepareEncodedString(String source,
1022: StringBuilder encodedString, int i, int lastMatch,
1023: char[] stringToEncodeArray) {
1024: if (null == encodedString) {
1025: encodedString = new StringBuilder(source.length());
1026: }
1027:
1028: int difference = i - (lastMatch + 1);
1029: if (difference > 0) {
1030: encodedString.append(stringToEncodeArray, lastMatch + 1,
1031: difference);
1032: }
1033:
1034: return encodedString;
1035: }
1036:
1037: private static interface EncoderFallbackHandler {
1038: abstract boolean hasFallback(char character);
1039:
1040: abstract void appendFallback(StringBuilder encodedBuffer,
1041: char character);
1042: }
1043:
1044: private static class HtmlEncoderFallbackHandler implements
1045: EncoderFallbackHandler {
1046: private final static String PREFIX = "&#";
1047: private final static String SUFFIX = ";";
1048:
1049: public boolean hasFallback(char character) {
1050: if (character < '\u00A0') {
1051: return false;
1052: }
1053:
1054: return true;
1055: }
1056:
1057: public void appendFallback(StringBuilder encodedBuffer,
1058: char character) {
1059: encodedBuffer.append(PREFIX);
1060: encodedBuffer.append((int) character);
1061: encodedBuffer.append(SUFFIX);
1062: }
1063: }
1064:
1065: /**
1066: * Transforms a provided <code>String</code> object into a literal that can
1067: * be included into a regular expression {@link Pattern} as-is. None of the
1068: * regular expression escapes in the string will be functional anymore.
1069: *
1070: * @param source The string that has to be escaped as a literal
1071: * @return The encoded <code>String</code> object.
1072: * @see #encodeClassname(String)
1073: * @see #encodeUrl(String)
1074: * @see #encodeUrlValue(String)
1075: * @see #encodeHtml(String)
1076: * @see #encodeXml(String)
1077: * @see #encodeSql(String)
1078: * @see #encodeString(String)
1079: * @see #encodeLatex(String)
1080: * @since 1.3
1081: */
1082: public static String encodeRegexp(String source) {
1083: int regexp_quote_start = source.indexOf("\\E");
1084: if (-1 == regexp_quote_start) {
1085: return "\\Q" + source + "\\E";
1086: }
1087:
1088: StringBuilder buffer = new StringBuilder(source.length() * 2);
1089: buffer.append("\\Q");
1090:
1091: regexp_quote_start = 0;
1092:
1093: int current = 0;
1094: while (-1 == (regexp_quote_start = source.indexOf("\\E",
1095: current))) {
1096: buffer
1097: .append(source.substring(current,
1098: regexp_quote_start));
1099: current = regexp_quote_start + 2;
1100: buffer.append("\\E\\\\E\\Q");
1101: }
1102:
1103: buffer.append(source.substring(current, source.length()));
1104: buffer.append("\\E");
1105:
1106: return buffer.toString();
1107: }
1108:
1109: /**
1110: * Counts the number of times a substring occures in a provided string in
1111: * a case-sensitive manner.
1112: *
1113: * @param source The <code>String</code> object that will be searched in.
1114: * @param substring The string whose occurances will we counted.
1115: * @return An <code>int</code> value containing the number of occurances
1116: * of the substring.
1117: * @since 1.0
1118: */
1119: public static int count(String source, String substring) {
1120: return count(source, substring, true);
1121: }
1122:
1123: /**
1124: * Counts the number of times a substring occures in a provided string.
1125: *
1126: * @param source The <code>String</code> object that will be searched in.
1127: * @param substring The string whose occurances will we counted.
1128: * @param matchCase A <code>boolean</code> indicating if the match is
1129: * going to be performed in a case-sensitive manner or not.
1130: * @return An <code>int</code> value containing the number of occurances
1131: * of the substring.
1132: * @since 1.0
1133: */
1134: public static int count(String source, String substring,
1135: boolean matchCase) {
1136: if (null == source) {
1137: return 0;
1138: }
1139:
1140: if (null == substring) {
1141: return 0;
1142: }
1143:
1144: int current_index = 0;
1145: int substring_index = 0;
1146: int count = 0;
1147:
1148: if (!matchCase) {
1149: source = source.toLowerCase();
1150: substring = substring.toLowerCase();
1151: }
1152:
1153: while (current_index < source.length() - 1) {
1154: substring_index = source.indexOf(substring, current_index);
1155:
1156: if (-1 == substring_index) {
1157: break;
1158: } else {
1159: current_index = substring_index + substring.length();
1160: count++;
1161: }
1162: }
1163:
1164: return count;
1165: }
1166:
1167: /**
1168: * Splits a string into different parts, using a seperator string to
1169: * detect the seperation boundaries in a case-sensitive manner. The
1170: * seperator will not be included in the list of parts.
1171: *
1172: * @param source The string that will be split into parts.
1173: * @param seperator The seperator string that will be used to determine
1174: * the parts.
1175: * @return An <code>ArrayList</code> containing the parts as
1176: * <code>String</code> objects.
1177: * @since 1.0
1178: */
1179: public static ArrayList<String> split(String source,
1180: String seperator) {
1181: return split(source, seperator, true);
1182: }
1183:
1184: /**
1185: * Splits a string into different parts, using a seperator string to
1186: * detect the seperation boundaries. The seperator will not be included in
1187: * the list of parts.
1188: *
1189: * @param source The string that will be split into parts.
1190: * @param seperator The seperator string that will be used to determine
1191: * the parts.
1192: * @param matchCase A <code>boolean</code> indicating if the match is
1193: * going to be performed in a case-sensitive manner or not.
1194: * @return An <code>ArrayList</code> containing the parts as
1195: * <code>String</code> objects.
1196: * @since 1.0
1197: */
1198: public static ArrayList<String> split(String source,
1199: String seperator, boolean matchCase) {
1200: ArrayList<String> substrings = new ArrayList<String>();
1201:
1202: if (null == source) {
1203: return substrings;
1204: }
1205:
1206: if (null == seperator) {
1207: substrings.add(source);
1208: return substrings;
1209: }
1210:
1211: int current_index = 0;
1212: int delimiter_index = 0;
1213: String element = null;
1214:
1215: String source_lookup_reference = null;
1216: if (!matchCase) {
1217: source_lookup_reference = source.toLowerCase();
1218: seperator = seperator.toLowerCase();
1219: } else {
1220: source_lookup_reference = source;
1221: }
1222:
1223: while (current_index <= source_lookup_reference.length()) {
1224: delimiter_index = source_lookup_reference.indexOf(
1225: seperator, current_index);
1226:
1227: if (-1 == delimiter_index) {
1228: element = new String(source.substring(current_index,
1229: source.length()));
1230: substrings.add(element);
1231: current_index = source.length() + 1;
1232: } else {
1233: element = new String(source.substring(current_index,
1234: delimiter_index));
1235: substrings.add(element);
1236: current_index = delimiter_index + seperator.length();
1237: }
1238: }
1239:
1240: return substrings;
1241: }
1242:
1243: /**
1244: * Splits a string into different parts, using a seperator string to
1245: * detect the seperation boundaries in a case-sensitive manner. The
1246: * seperator will not be included in the parts array.
1247: *
1248: * @param source The string that will be split into parts.
1249: * @param seperator The seperator string that will be used to determine
1250: * the parts.
1251: * @return A <code>String[]</code> array containing the seperated parts.
1252: * @since 1.0
1253: */
1254: public static String[] splitToArray(String source, String seperator) {
1255: return splitToArray(source, seperator, true);
1256: }
1257:
1258: /**
1259: * Splits a string into different parts, using a seperator string to
1260: * detect the seperation boundaries. The seperator will not be included in
1261: * the parts array.
1262: *
1263: * @param source The string that will be split into parts.
1264: * @param seperator The seperator string that will be used to determine
1265: * the parts.
1266: * @param matchCase A <code>boolean</code> indicating if the match is
1267: * going to be performed in a case-sensitive manner or not.
1268: * @return A <code>String[]</code> array containing the seperated parts.
1269: * @since 1.0
1270: */
1271: public static String[] splitToArray(String source,
1272: String seperator, boolean matchCase) {
1273: ArrayList<String> substrings = split(source, seperator,
1274: matchCase);
1275: String[] substrings_array = new String[substrings.size()];
1276: substrings_array = substrings.toArray(substrings_array);
1277:
1278: return substrings_array;
1279: }
1280:
1281: /**
1282: * Splits a string into integers, using a seperator string to detect the
1283: * seperation boundaries in a case-sensitive manner. If a part couldn't be
1284: * converted to an integer, it will be omitted from the resulting array.
1285: *
1286: * @param source The string that will be split into integers.
1287: * @param seperator The seperator string that will be used to determine
1288: * the parts.
1289: * @return An <code>int[]</code> array containing the seperated parts.
1290: * @since 1.0
1291: */
1292: public static int[] splitToIntArray(String source, String seperator) {
1293: return splitToIntArray(source, seperator, true);
1294: }
1295:
1296: /**
1297: * Splits a string into integers, using a seperator string to detect the
1298: * seperation boundaries. If a part couldn't be converted to an integer,
1299: * it will be omitted from the resulting array.
1300: *
1301: * @param source The string that will be split into integers.
1302: * @param seperator The seperator string that will be used to determine
1303: * the parts.
1304: * @param matchCase A <code>boolean</code> indicating if the match is
1305: * going to be performed in a case-sensitive manner or not.
1306: * @return An <code>int[]</code> array containing the seperated parts.
1307: * @since 1.0
1308: */
1309: public static int[] splitToIntArray(String source,
1310: String seperator, boolean matchCase) {
1311: ArrayList<String> string_parts = split(source, seperator,
1312: matchCase);
1313: int number_of_valid_parts = 0;
1314:
1315: for (String string_part : string_parts) {
1316: try {
1317: Integer.parseInt(string_part);
1318: number_of_valid_parts++;
1319: } catch (NumberFormatException e) {
1320: // just continue
1321: }
1322: }
1323:
1324: int[] string_parts_int = (int[]) Array.newInstance(int.class,
1325: number_of_valid_parts);
1326: int added_parts = 0;
1327:
1328: for (String string_part : string_parts) {
1329: try {
1330: string_parts_int[added_parts] = Integer
1331: .parseInt(string_part);
1332: added_parts++;
1333: } catch (NumberFormatException e) {
1334: // just continue
1335: }
1336: }
1337:
1338: return string_parts_int;
1339: }
1340:
1341: /**
1342: * Splits a string into bytes, using a seperator string to detect the
1343: * seperation boundaries in a case-sensitive manner. If a part couldn't be
1344: * converted to a <code>byte</code>, it will be omitted from the resulting
1345: * array.
1346: *
1347: * @param source The string that will be split into bytes.
1348: * @param seperator The seperator string that will be used to determine
1349: * the parts.
1350: * @return A <code>byte[]</code> array containing the bytes.
1351: * @since 1.0
1352: */
1353: public static byte[] splitToByteArray(String source,
1354: String seperator) {
1355: return splitToByteArray(source, seperator, true);
1356: }
1357:
1358: /**
1359: * Splits a string into bytes, using a seperator string to detect the
1360: * seperation boundaries. If a part couldn't be converted to a
1361: * <code>byte</code>, it will be omitted from the resulting array.
1362: *
1363: * @param source The string that will be split into bytes.
1364: * @param seperator The seperator string that will be used to determine
1365: * the parts.
1366: * @param matchCase A <code>boolean</code> indicating if the match is
1367: * going to be performed in a case-sensitive manner or not.
1368: * @return A <code>byte[]</code> array containing the bytes.
1369: * @since 1.0
1370: */
1371: public static byte[] splitToByteArray(String source,
1372: String seperator, boolean matchCase) {
1373: ArrayList<String> string_parts = split(source, seperator,
1374: matchCase);
1375: int number_of_valid_parts = 0;
1376: for (String string_part : string_parts) {
1377: try {
1378: Byte.parseByte(string_part);
1379: number_of_valid_parts++;
1380: } catch (NumberFormatException e) {
1381: // just continue
1382: }
1383: }
1384:
1385: byte[] string_parts_byte = (byte[]) Array.newInstance(
1386: byte.class, number_of_valid_parts);
1387: int added_parts = 0;
1388: for (String string_part : string_parts) {
1389: try {
1390: string_parts_byte[added_parts] = Byte
1391: .parseByte(string_part);
1392: added_parts++;
1393: } catch (NumberFormatException e) {
1394: // just continue
1395: }
1396: }
1397:
1398: return string_parts_byte;
1399: }
1400:
1401: /**
1402: * Removes all occurances of a string from the front of another string in
1403: * a case-sensitive manner.
1404: *
1405: * @param source The string in which the matching will be done.
1406: * @param stringToStrip The string that will be stripped from the front.
1407: * @return A new <code>String</code> containing the stripped result.
1408: * @since 1.0
1409: */
1410: public static String stripFromFront(String source,
1411: String stringToStrip) {
1412: return stripFromFront(source, stringToStrip, true);
1413: }
1414:
1415: /**
1416: * Removes all occurances of a string from the front of another string.
1417: *
1418: * @param source The string in which the matching will be done.
1419: * @param stringToStrip The string that will be stripped from the front.
1420: * @param matchCase A <code>boolean</code> indicating if the match is
1421: * going to be performed in a case-sensitive manner or not.
1422: * @return A new <code>String</code> containing the stripping result.
1423: * @since 1.0
1424: */
1425: public static String stripFromFront(String source,
1426: String stringToStrip, boolean matchCase) {
1427: if (null == source) {
1428: return null;
1429: }
1430:
1431: if (null == stringToStrip) {
1432: return source;
1433: }
1434:
1435: int strip_length = stringToStrip.length();
1436: int new_index = 0;
1437: int last_index = 0;
1438:
1439: String source_lookup_reference = null;
1440: if (!matchCase) {
1441: source_lookup_reference = source.toLowerCase();
1442: stringToStrip = stringToStrip.toLowerCase();
1443: } else {
1444: source_lookup_reference = source;
1445: }
1446:
1447: new_index = source_lookup_reference.indexOf(stringToStrip);
1448: if (0 == new_index) {
1449: do {
1450: last_index = new_index;
1451: new_index = source_lookup_reference.indexOf(
1452: stringToStrip, new_index + strip_length);
1453: } while (new_index != -1
1454: && new_index == last_index + strip_length);
1455:
1456: return source.substring(last_index + strip_length);
1457: } else {
1458: return source;
1459: }
1460: }
1461:
1462: /**
1463: * Removes all occurances of a string from the end of another string in a
1464: * case-sensitive manner.
1465: *
1466: * @param source The string in which the matching will be done.
1467: * @param stringToStrip The string that will be stripped from the end.
1468: * @return A new <code>String</code> containing the stripped result.
1469: * @since 1.0
1470: */
1471: public static String stripFromEnd(String source,
1472: String stringToStrip) {
1473: return stripFromEnd(source, stringToStrip, true);
1474: }
1475:
1476: /**
1477: * Removes all occurances of a string from the end of another string.
1478: *
1479: * @param source The string in which the matching will be done.
1480: * @param stringToStrip The string that will be stripped from the end.
1481: * @param matchCase A <code>boolean</code> indicating if the match is
1482: * going to be performed in a case-sensitive manner or not.
1483: * @return A new <code>String</code> containing the stripped result.
1484: * @since 1.0
1485: */
1486: public static String stripFromEnd(String source,
1487: String stringToStrip, boolean matchCase) {
1488: if (null == source) {
1489: return null;
1490: }
1491:
1492: if (null == stringToStrip) {
1493: return source;
1494: }
1495:
1496: int strip_length = stringToStrip.length();
1497: int new_index = 0;
1498: int last_index = 0;
1499:
1500: String source_lookup_reference = null;
1501: if (!matchCase) {
1502: source_lookup_reference = source.toLowerCase();
1503: stringToStrip = stringToStrip.toLowerCase();
1504: } else {
1505: source_lookup_reference = source;
1506: }
1507:
1508: new_index = source_lookup_reference.lastIndexOf(stringToStrip);
1509: if (new_index != -1
1510: && source.length() == new_index + strip_length) {
1511: do {
1512: last_index = new_index;
1513: new_index = source_lookup_reference.lastIndexOf(
1514: stringToStrip, last_index - 1);
1515: } while (new_index != -1
1516: && new_index == last_index - strip_length);
1517:
1518: return source.substring(0, last_index);
1519: } else {
1520: return source;
1521: }
1522: }
1523:
1524: /**
1525: * Searches for a string within a specified string in a case-sensitive
1526: * manner and replaces every match with another string.
1527: *
1528: * @param source The string in which the matching parts will be replaced.
1529: * @param stringToReplace The string that will be searched for.
1530: * @param replacementString The string that will replace each matching
1531: * part.
1532: * @return A new <code>String</code> object containing the replacement
1533: * result.
1534: * @since 1.0
1535: */
1536: public static String replace(String source, String stringToReplace,
1537: String replacementString) {
1538: return replace(source, stringToReplace, replacementString, true);
1539: }
1540:
1541: /**
1542: * Searches for a string within a specified string and replaces every
1543: * match with another string.
1544: *
1545: * @param source The string in which the matching parts will be replaced.
1546: * @param stringToReplace The string that will be searched for.
1547: * @param replacementString The string that will replace each matching
1548: * part.
1549: * @param matchCase A <code>boolean</code> indicating if the match is
1550: * going to be performed in a case-sensitive manner or not.
1551: * @return A new <code>String</code> object containing the replacement
1552: * result.
1553: * @since 1.0
1554: */
1555: public static String replace(String source, String stringToReplace,
1556: String replacementString, boolean matchCase) {
1557: if (null == source) {
1558: return null;
1559: }
1560:
1561: if (null == stringToReplace) {
1562: return source;
1563: }
1564:
1565: if (null == replacementString) {
1566: return source;
1567: }
1568:
1569: Iterator<String> string_parts = split(source, stringToReplace,
1570: matchCase).iterator();
1571: StringBuilder new_string = new StringBuilder();
1572:
1573: while (string_parts.hasNext()) {
1574: String string_part = string_parts.next();
1575: new_string.append(string_part);
1576: if (string_parts.hasNext()) {
1577: new_string.append(replacementString);
1578: }
1579: }
1580:
1581: return new_string.toString();
1582: }
1583:
1584: /**
1585: * Creates a new string that contains the provided string a number of
1586: * times.
1587: *
1588: * @param source The string that will be repeated.
1589: * @param count The number of times that the string will be repeated.
1590: * @return A new <code>String</code> object containing the repeated
1591: * concatenation result.
1592: * @since 1.0
1593: */
1594: public static String repeat(String source, int count) {
1595: if (null == source) {
1596: return null;
1597: }
1598:
1599: StringBuilder new_string = new StringBuilder();
1600: while (count > 0) {
1601: new_string.append(source);
1602: count--;
1603: }
1604:
1605: return new_string.toString();
1606: }
1607:
1608: /**
1609: * Creates a new array of <code>String</code> objects, containing the
1610: * elements of a supplied <code>Iterator</code>.
1611: *
1612: * @param iterator The iterator containing the elements to create the
1613: * array with.
1614: * @return The new <code>String</code> array.
1615: * @since 1.0
1616: */
1617: public static String[] toStringArray(Iterator<String> iterator) {
1618: if (null == iterator) {
1619: return new String[0];
1620: }
1621:
1622: ArrayList<String> strings = new ArrayList<String>();
1623:
1624: while (iterator.hasNext()) {
1625: strings.add(iterator.next());
1626: }
1627:
1628: String[] string_array = new String[strings.size()];
1629: strings.toArray(string_array);
1630:
1631: return string_array;
1632: }
1633:
1634: /**
1635: * Creates a new <code>ArrayList</code>, containing the elements of a
1636: * supplied array of <code>String</code> objects.
1637: *
1638: * @param stringArray The array of <code>String</code> objects that have
1639: * to be converted.
1640: * @return The new <code>ArrayList</code> with the elements of the
1641: * <code>String</code> array.
1642: * @since 1.0
1643: */
1644: public static ArrayList<String> toArrayList(String[] stringArray) {
1645: ArrayList<String> strings = new ArrayList<String>();
1646:
1647: if (null == stringArray) {
1648: return strings;
1649: }
1650:
1651: for (String element : stringArray) {
1652: strings.add(element);
1653: }
1654:
1655: return strings;
1656: }
1657:
1658: /**
1659: * Creates a new <code>String</code> object, containing the elements of a
1660: * supplied <code>Collection</code> of <code>String</code> objects joined
1661: * by a given seperator.
1662: *
1663: * @param collection The <code>Collection</code> containing the elements
1664: * to join.
1665: * @param seperator The seperator used to join the string elements.
1666: * @return A new <code>String</code> with the join result.
1667: * @since 1.0
1668: */
1669: public static String join(Collection collection, String seperator) {
1670: if (null == collection) {
1671: return null;
1672: }
1673:
1674: if (null == seperator) {
1675: seperator = "";
1676: }
1677:
1678: if (0 == collection.size()) {
1679: return "";
1680: } else {
1681: StringBuilder result = new StringBuilder();
1682: for (Object element : collection) {
1683: result.append(String.valueOf(element));
1684: result.append(seperator);
1685: }
1686:
1687: result.setLength(result.length() - seperator.length());
1688: return result.toString();
1689: }
1690: }
1691:
1692: /**
1693: * Creates a new <code>String</code> object, containing the elements of a
1694: * supplied array, joined by a given seperator.
1695: *
1696: * @param array The object array containing the elements to join.
1697: * @param seperator The seperator used to join the string elements.
1698: * @return A new <code>String</code> with the join result.
1699: * @since 1.0
1700: */
1701: public static String join(Object[] array, String seperator) {
1702: return join(array, seperator, null, false);
1703: }
1704:
1705: /**
1706: * Creates a new <code>String</code> object, containing the elements of a
1707: * supplied array, joined by a given seperator.
1708: *
1709: * @param array The object array containing the elements to join.
1710: * @param seperator The seperator used to join the string elements.
1711: * @param delimiter The delimiter used to surround the string elements.
1712: * @return A new <code>String</code> with the join result.
1713: * @since 1.0
1714: */
1715: public static String join(Object[] array, String seperator,
1716: String delimiter) {
1717: return join(array, seperator, delimiter, false);
1718: }
1719:
1720: /**
1721: * Creates a new <code>String</code> object, containing the elements of a
1722: * supplied array, joined by a given seperator.
1723: *
1724: * @param array The object array containing the elements to join.
1725: * @param seperator The seperator used to join the string elements.
1726: * @param delimiter The delimiter used to surround the string elements.
1727: * @param encodeStrings Indicates whether the characters of the string
1728: * representation of the Array values should be encoded.
1729: * @return A new <code>String</code> with the join result.
1730: * @since 1.0
1731: */
1732: public static String join(Object[] array, String seperator,
1733: String delimiter, boolean encodeStrings) {
1734: if (null == array) {
1735: return null;
1736: }
1737:
1738: if (null == seperator) {
1739: seperator = "";
1740: }
1741:
1742: if (null == delimiter) {
1743: delimiter = "";
1744: }
1745:
1746: if (0 == array.length) {
1747: return "";
1748: } else {
1749: int current_index = 0;
1750: String array_value = null;
1751: StringBuilder result = new StringBuilder();
1752: while (current_index < array.length - 1) {
1753: if (null == array[current_index]) {
1754: result.append("null");
1755: } else {
1756: array_value = String.valueOf(array[current_index]);
1757: if (encodeStrings) {
1758: array_value = encodeString(array_value);
1759: }
1760: result.append(delimiter);
1761: result.append(array_value);
1762: result.append(delimiter);
1763: }
1764: result.append(seperator);
1765: current_index++;
1766: }
1767:
1768: if (null == array[current_index]) {
1769: result.append("null");
1770: } else {
1771: array_value = String.valueOf(array[current_index]);
1772: if (encodeStrings) {
1773: array_value = encodeString(array_value);
1774: }
1775: result.append(delimiter);
1776: result.append(array_value);
1777: result.append(delimiter);
1778: }
1779: return result.toString();
1780: }
1781: }
1782:
1783: /**
1784: * Creates a new <code>String</code> object, containing the elements of a
1785: * supplied array, joined by a given seperator.
1786: *
1787: * @param array The boolean array containing the values to join.
1788: * @param seperator The seperator used to join the string elements.
1789: * @return A new <code>String</code> with the join result.
1790: * @since 1.0
1791: */
1792: public static String join(boolean[] array, String seperator) {
1793: if (null == array) {
1794: return null;
1795: }
1796:
1797: if (null == seperator) {
1798: seperator = "";
1799: }
1800:
1801: if (0 == array.length) {
1802: return "";
1803: } else {
1804: int current_index = 0;
1805: String result = "";
1806: while (current_index < array.length - 1) {
1807: result = result + array[current_index] + seperator;
1808: current_index++;
1809: }
1810:
1811: result = result + array[current_index];
1812: return result;
1813: }
1814: }
1815:
1816: /**
1817: * Creates a new <code>String</code> object, containing the elements of a
1818: * supplied array, joined by a given seperator.
1819: *
1820: * @param array The byte array containing the values to join.
1821: * @param seperator The seperator used to join the string elements.
1822: * @return A new <code>String</code> with the join result.
1823: * @since 1.0
1824: */
1825: public static String join(byte[] array, String seperator) {
1826: if (null == array) {
1827: return null;
1828: }
1829:
1830: if (null == seperator) {
1831: seperator = "";
1832: }
1833:
1834: if (0 == array.length) {
1835: return "";
1836: } else {
1837: int current_index = 0;
1838: String result = "";
1839: while (current_index < array.length - 1) {
1840: result = result + array[current_index] + seperator;
1841: current_index++;
1842: }
1843:
1844: result = result + array[current_index];
1845: return result;
1846: }
1847: }
1848:
1849: /**
1850: * Creates a new <code>String</code> object, containing the elements of a
1851: * supplied array, joined by a given seperator.
1852: *
1853: * @param array The double array containing the values to join.
1854: * @param seperator The seperator used to join the string elements.
1855: * @return A new <code>String</code> with the join result.
1856: * @since 1.0
1857: */
1858: public static String join(double[] array, String seperator) {
1859: if (null == array) {
1860: return null;
1861: }
1862:
1863: if (null == seperator) {
1864: seperator = "";
1865: }
1866:
1867: if (0 == array.length) {
1868: return "";
1869: } else {
1870: int current_index = 0;
1871: String result = "";
1872: while (current_index < array.length - 1) {
1873: result = result + array[current_index] + seperator;
1874: current_index++;
1875: }
1876:
1877: result = result + array[current_index];
1878: return result;
1879: }
1880: }
1881:
1882: /**
1883: * Creates a new <code>String</code> object, containing the elements of a
1884: * supplied array, joined by a given seperator.
1885: *
1886: * @param array The float array containing the values to join.
1887: * @param seperator The seperator used to join the string elements.
1888: * @return A new <code>String</code> with the join result.
1889: * @since 1.0
1890: */
1891: public static String join(float[] array, String seperator) {
1892: if (null == array) {
1893: return null;
1894: }
1895:
1896: if (null == seperator) {
1897: seperator = "";
1898: }
1899:
1900: if (0 == array.length) {
1901: return "";
1902: } else {
1903: int current_index = 0;
1904: String result = "";
1905: while (current_index < array.length - 1) {
1906: result = result + array[current_index] + seperator;
1907: current_index++;
1908: }
1909:
1910: result = result + array[current_index];
1911: return result;
1912: }
1913: }
1914:
1915: /**
1916: * Creates a new <code>String</code> object, containing the elements of a
1917: * supplied array, joined by a given seperator.
1918: *
1919: * @param array The integer array containing the values to join.
1920: * @param seperator The seperator used to join the string elements.
1921: * @return A new <code>String</code> with the join result.
1922: * @since 1.0
1923: */
1924: public static String join(int[] array, String seperator) {
1925: if (null == array) {
1926: return null;
1927: }
1928:
1929: if (null == seperator) {
1930: seperator = "";
1931: }
1932:
1933: if (0 == array.length) {
1934: return "";
1935: } else {
1936: int current_index = 0;
1937: String result = "";
1938: while (current_index < array.length - 1) {
1939: result = result + array[current_index] + seperator;
1940: current_index++;
1941: }
1942:
1943: result = result + array[current_index];
1944: return result;
1945: }
1946: }
1947:
1948: /**
1949: * Creates a new <code>String</code> object, containing the elements of a
1950: * supplied array, joined by a given seperator.
1951: *
1952: * @param array The long array containing the values to join.
1953: * @param seperator The seperator used to join the string elements.
1954: * @return A new <code>String</code> with the join result.
1955: * @since 1.0
1956: */
1957: public static String join(long[] array, String seperator) {
1958: if (null == array) {
1959: return null;
1960: }
1961:
1962: if (null == seperator) {
1963: seperator = "";
1964: }
1965:
1966: if (0 == array.length) {
1967: return "";
1968: } else {
1969: int current_index = 0;
1970: String result = "";
1971: while (current_index < array.length - 1) {
1972: result = result + array[current_index] + seperator;
1973: current_index++;
1974: }
1975:
1976: result = result + array[current_index];
1977: return result;
1978: }
1979: }
1980:
1981: /**
1982: * Creates a new <code>String</code> object, containing the elements of a
1983: * supplied array, joined by a given seperator.
1984: *
1985: * @param array The short array containing the values to join.
1986: * @param seperator The seperator used to join the string elements.
1987: * @return A new <code>String</code> with the join result.
1988: * @since 1.0
1989: */
1990: public static String join(short[] array, String seperator) {
1991: if (null == array) {
1992: return null;
1993: }
1994:
1995: if (null == seperator) {
1996: seperator = "";
1997: }
1998:
1999: if (0 == array.length) {
2000: return "";
2001: } else {
2002: int current_index = 0;
2003: String result = "";
2004: while (current_index < array.length - 1) {
2005: result = result + array[current_index] + seperator;
2006: current_index++;
2007: }
2008:
2009: result = result + array[current_index];
2010: return result;
2011: }
2012: }
2013:
2014: /**
2015: * Creates a new <code>String</code> object, containing the elements of a
2016: * supplied array, joined by a given seperator.
2017: *
2018: * @param array The char array containing the values to join.
2019: * @param seperator The seperator used to join the string elements.
2020: * @return A new <code>String</code> with the join result.
2021: * @since 1.0
2022: */
2023: public static String join(char[] array, String seperator) {
2024: return join(array, seperator, null);
2025: }
2026:
2027: /**
2028: * Creates a new <code>String</code> object, containing the elements of a
2029: * supplied array, joined by a given seperator.
2030: *
2031: * @param array The char array containing the values to join.
2032: * @param seperator The seperator used to join the string elements.
2033: * @param delimiter The delimiter used to surround the string elements.
2034: * @return A new <code>String</code> with the join result.
2035: * @since 1.0
2036: */
2037: public static String join(char[] array, String seperator,
2038: String delimiter) {
2039: if (null == array) {
2040: return null;
2041: }
2042:
2043: if (null == seperator) {
2044: seperator = "";
2045: }
2046:
2047: if (null == delimiter) {
2048: delimiter = "";
2049: }
2050:
2051: if (0 == array.length) {
2052: return "";
2053: } else {
2054: int current_index = 0;
2055: StringBuilder result = new StringBuilder();
2056: while (current_index < array.length - 1) {
2057: result.append(delimiter);
2058: result.append(array[current_index]);
2059: result.append(delimiter);
2060: result.append(seperator);
2061: current_index++;
2062: }
2063:
2064: result.append(delimiter);
2065: result.append(String.valueOf(array[current_index]));
2066: result.append(delimiter);
2067: return result.toString();
2068: }
2069: }
2070:
2071: /**
2072: * Returns an array that contains all the occurances of a substring in a
2073: * string in the correct order. The search will be performed in a
2074: * case-sensitive manner.
2075: *
2076: * @param source The <code>String</code> object that will be searched in.
2077: * @param substring The string whose occurances will we counted.
2078: * @return An <code>int[]</code> array containing the indices of the
2079: * substring.
2080: * @since 1.0
2081: */
2082: public static int[] indicesOf(String source, String substring) {
2083: return indicesOf(source, substring, true);
2084: }
2085:
2086: /**
2087: * Returns an array that contains all the occurances of a substring in a
2088: * string in the correct order.
2089: *
2090: * @param source The <code>String</code> object that will be searched in.
2091: * @param substring The string whose occurances will we counted.
2092: * @param matchCase A <code>boolean</code> indicating if the match is
2093: * going to be performed in a case-sensitive manner or not.
2094: * @return An <code>int[]</code> array containing the indices of the
2095: * substring.
2096: * @since 1.0
2097: */
2098: public static int[] indicesOf(String source, String substring,
2099: boolean matchCase) {
2100: if (null == source || null == substring) {
2101: return new int[0];
2102: }
2103:
2104: String source_lookup_reference = null;
2105: if (!matchCase) {
2106: source_lookup_reference = source.toLowerCase();
2107: substring = substring.toLowerCase();
2108: } else {
2109: source_lookup_reference = source;
2110: }
2111:
2112: int current_index = 0;
2113: int substring_index = 0;
2114: int count = count(source_lookup_reference, substring);
2115: int[] indices = new int[count];
2116: int counter = 0;
2117:
2118: while (current_index < source.length() - 1) {
2119: substring_index = source_lookup_reference.indexOf(
2120: substring, current_index);
2121:
2122: if (-1 == substring_index) {
2123: break;
2124: } else {
2125: current_index = substring_index + substring.length();
2126: indices[counter] = substring_index;
2127: counter++;
2128: }
2129: }
2130:
2131: return indices;
2132: }
2133:
2134: /**
2135: * Matches a collection of regular expressions against a string.
2136: *
2137: * @param value The <code>String</code> that will be checked.
2138: * @param regexps The collection of regular expressions against which the
2139: * match will be performed.
2140: * @return The <code>Matcher</code> instance that corresponds to the
2141: * <code>String</code> that returned a successful match; or
2142: * <p><code>null</code> if no match could be found.
2143: * @since 1.0
2144: */
2145: public static Matcher getMatchingRegexp(String value,
2146: Collection<Pattern> regexps) {
2147: if (value != null && value.length() > 0 && regexps != null
2148: && regexps.size() > 0) {
2149: Matcher matcher = null;
2150: for (Pattern regexp : regexps) {
2151: matcher = regexp.matcher(value);
2152: if (matcher.matches()) {
2153: return matcher;
2154: }
2155: }
2156: }
2157:
2158: return null;
2159: }
2160:
2161: /**
2162: * Matches a collection of strings against a regular expression.
2163: *
2164: * @param values The <code>Collection</code> of <code>String</code>
2165: * objects that will be checked.
2166: * @param regexp The regular expression <code>Pattern</code> against which
2167: * the matches will be performed.
2168: * @return The <code>Matcher</code> instance that corresponds to the
2169: * <code>String</code> that returned a successful match; or
2170: * <p><code>null</code> if no match could be found.
2171: * @since 1.0
2172: */
2173: public static Matcher getRegexpMatch(Collection<String> values,
2174: Pattern regexp) {
2175: if (values != null && values.size() > 0 && regexp != null) {
2176: Matcher matcher = null;
2177: for (String value : values) {
2178: matcher = regexp.matcher(value);
2179: if (matcher.matches()) {
2180: return matcher;
2181: }
2182: }
2183: }
2184:
2185: return null;
2186: }
2187:
2188: /**
2189: * Checks if the name filters through an including and an excluding
2190: * regular expression.
2191: *
2192: * @param name The <code>String</code> that will be filtered.
2193: * @param included The regular expressions that needs to succeed
2194: * @param excluded The regular expressions that needs to fail
2195: * @return <code>true</code> if the name filtered through correctly; or
2196: * <p><code>false</code> otherwise.
2197: * @since 1.0
2198: */
2199: public static boolean filter(String name, Pattern included,
2200: Pattern excluded) {
2201: Pattern[] included_array = null;
2202: if (included != null) {
2203: included_array = new Pattern[] { included };
2204: }
2205:
2206: Pattern[] excluded_array = null;
2207: if (excluded != null) {
2208: excluded_array = new Pattern[] { excluded };
2209: }
2210:
2211: return filter(name, included_array, excluded_array);
2212: }
2213:
2214: /**
2215: * Checks if the name filters through a series of including and excluding
2216: * regular expressions.
2217: *
2218: * @param name The <code>String</code> that will be filtered.
2219: * @param included An array of regular expressions that need to succeed
2220: * @param excluded An array of regular expressions that need to fail
2221: * @return <code>true</code> if the name filtered through correctly; or
2222: * <p><code>false</code> otherwise.
2223: * @since 1.0
2224: */
2225: public static boolean filter(String name, Pattern[] included,
2226: Pattern[] excluded) {
2227: if (null == name) {
2228: return false;
2229: }
2230:
2231: boolean accepted = false;
2232:
2233: // retain only the includes
2234: if (null == included) {
2235: accepted = true;
2236: } else {
2237: for (Pattern pattern : included) {
2238: if (pattern != null && pattern.matcher(name).matches()) {
2239: accepted = true;
2240: break;
2241: }
2242: }
2243: }
2244:
2245: // remove the excludes
2246: if (accepted && excluded != null) {
2247: for (Pattern pattern : excluded) {
2248: if (pattern != null && pattern.matcher(name).matches()) {
2249: accepted = false;
2250: break;
2251: }
2252: }
2253: }
2254:
2255: return accepted;
2256: }
2257:
2258: /**
2259: * Ensure that the first character of the provided string is upper case.
2260: *
2261: * @param source The <code>String</code> to capitalize.
2262: * @return The capitalized <code>String</code>.
2263: * @since 1.0
2264: */
2265: public static String capitalize(String source) {
2266: if (source == null || source.length() == 0) {
2267: return source;
2268: }
2269:
2270: if (source.length() > 1
2271: && Character.isUpperCase(source.charAt(0))) {
2272: return source;
2273: }
2274:
2275: char chars[] = source.toCharArray();
2276: chars[0] = Character.toUpperCase(chars[0]);
2277: return new String(chars);
2278: }
2279:
2280: /**
2281: * Ensure that the first character of the provided string lower case.
2282: *
2283: * @param source The <code>String</code> to uncapitalize.
2284: * @return The uncapitalized <code>String</code>.
2285: * @since 1.5
2286: */
2287: public static String uncapitalize(String source) {
2288: if (source == null || source.length() == 0) {
2289: return source;
2290: }
2291:
2292: if (source.length() > 1
2293: && Character.isLowerCase(source.charAt(0))) {
2294: return source;
2295: }
2296:
2297: char chars[] = source.toCharArray();
2298: chars[0] = Character.toLowerCase(chars[0]);
2299: return new String(chars);
2300: }
2301:
2302: private static String convertUrl(String source, Pattern pattern,
2303: boolean shorten, boolean sanitize, boolean no_follow) {
2304: int max_length = RifeConfig.Tools.getMaxVisualUrlLength();
2305:
2306: String result = source;
2307:
2308: Matcher url_matcher = pattern.matcher(source);
2309: boolean found = url_matcher.find();
2310: if (found) {
2311: String visual_url = null;
2312: String actual_url = null;
2313: int last = 0;
2314: StringBuilder sb = new StringBuilder();
2315: do {
2316: actual_url = url_matcher.group(1);
2317: if (url_matcher.groupCount() > 1) {
2318: visual_url = url_matcher.group(2);
2319: } else {
2320: visual_url = actual_url;
2321: }
2322:
2323: if (sanitize) {
2324: // defang javascript
2325: actual_url = StringUtils.replace(actual_url,
2326: "javascript:", "");
2327:
2328: // fill in http:// for URLs that don't begin with /
2329: if ((actual_url.indexOf("://") == -1)
2330: && (!actual_url.startsWith("/"))) {
2331: actual_url = "http://" + actual_url;
2332: }
2333: }
2334:
2335: if (pattern.equals(BBCODE_BAREURL)) {
2336: sb.append(source.substring(last, url_matcher
2337: .start(1)));
2338: } else {
2339: sb.append(source.substring(last, url_matcher
2340: .start(0)));
2341: }
2342: sb.append("<a href=\"");
2343: sb.append(actual_url);
2344: sb.append("\"");
2345: if (actual_url.startsWith("http://")
2346: || actual_url.startsWith("https://")) {
2347: sb.append(" target=\"_blank\"");
2348: }
2349: if (no_follow) {
2350: sb.append(" rel=\"nofollow\"");
2351: }
2352: sb.append(">");
2353: if (visual_url.length() <= max_length || !shorten) {
2354: sb.append(visual_url);
2355: } else {
2356: String ellipsis = "...";
2357: int query_index = visual_url.indexOf("?");
2358:
2359: // hack query string off
2360: // keep '?'
2361: if (query_index != -1) {
2362: visual_url = visual_url.substring(0,
2363: query_index + 1)
2364: + ellipsis;
2365: }
2366:
2367: if (visual_url.length() >= max_length) {
2368: int last_slash = visual_url.lastIndexOf("/");
2369: int start_slash = visual_url.indexOf("/",
2370: visual_url.indexOf("://") + 3);
2371:
2372: if (last_slash != start_slash) {
2373: visual_url = visual_url.substring(0,
2374: start_slash + 1)
2375: + ellipsis
2376: + visual_url.substring(last_slash);
2377: }
2378: }
2379:
2380: sb.append(visual_url);
2381: }
2382: sb.append("</a>");
2383:
2384: if (pattern.equals(BBCODE_BAREURL)) {
2385: last = url_matcher.end(1);
2386: } else {
2387: last = url_matcher.end(0);
2388: }
2389:
2390: found = url_matcher.find();
2391: } while (found);
2392:
2393: sb.append(source.substring(last));
2394: result = sb.toString();
2395: }
2396:
2397: return result;
2398: }
2399:
2400: /**
2401: * Converts a BBCode marked-up text to regular html.
2402: *
2403: * @param source The text with BBCode tags.
2404: * @return A <code>String</code> with the corresponding HTML code
2405: * @since 1.0
2406: */
2407: public static String convertBbcode(String source) {
2408: if (null == source) {
2409: return null;
2410: }
2411:
2412: return convertBbcode(source, (BbcodeOption[]) null);
2413: }
2414:
2415: /**
2416: * Converts a BBCode marked-up text to regular html.
2417: *
2418: * @param source The text with BBCode tags.
2419: * @return A <code>String</code> with the corresponding HTML code
2420: * @since 1.0
2421: */
2422: public static String convertBbcode(final String source,
2423: BbcodeOption... options) {
2424: if (null == source) {
2425: return null;
2426: }
2427:
2428: boolean shorten = false;
2429: boolean sanitize = false;
2430: boolean convert_bare = false;
2431: boolean no_follow_links = false;
2432: if (options != null) {
2433: for (BbcodeOption option : options) {
2434: if (option.equals(StringUtils.SHORTEN_URL)) {
2435: shorten = true;
2436: } else if (option.equals(StringUtils.SANITIZE_URL)) {
2437: sanitize = true;
2438: } else if (option.equals(StringUtils.CONVERT_BARE_URLS)) {
2439: convert_bare = true;
2440: } else if (option.equals(StringUtils.NO_FOLLOW_LINKS)) {
2441: no_follow_links = true;
2442: }
2443: }
2444: }
2445:
2446: String sourcecopy = source;
2447: StringBuilder result = new StringBuilder(source.length());
2448:
2449: int startindex;
2450: int endIndex;
2451: int nextCodeIndex;
2452: while (-1 != (startindex = sourcecopy.indexOf("[code]"))) {
2453: // handle parsed
2454: String parsed = sourcecopy.substring(0, startindex);
2455: endIndex = sourcecopy.indexOf("[/code]") + 7; // 7 == the sizeof "[/code]"
2456: nextCodeIndex = sourcecopy
2457: .indexOf("[code]", startindex + 6); // 6 == the sizeof "[code]"
2458:
2459: if (endIndex < 0) {
2460: // not ended... set to end of string
2461: endIndex = sourcecopy.length() - 1;
2462: }
2463:
2464: if (nextCodeIndex < endIndex && nextCodeIndex > 0) {
2465: // nested [code] tags
2466:
2467: /* must end before the next [code]
2468: * this will leave a dangling [/code] but the HTML is valid
2469: */
2470: StringBuilder sourcecopycopy = new StringBuilder();
2471: sourcecopycopy.append(
2472: sourcecopy.substring(0, nextCodeIndex)).append(
2473: "[/code]").append(
2474: sourcecopy.substring(nextCodeIndex));
2475: sourcecopy = sourcecopycopy.toString();
2476:
2477: endIndex = sourcecopy.indexOf("[/code]") + 7;
2478: }
2479:
2480: if (startindex > endIndex) {
2481: // dangling [/code]
2482: endIndex = sourcecopy.indexOf("[/code]", endIndex + 7) + 7; // 7 == the sizeof "[/code]"
2483: if (endIndex < 0) {
2484: endIndex = sourcecopy.length() - 1;
2485: }
2486: }
2487:
2488: String code = sourcecopy.substring(startindex, endIndex);
2489:
2490: parsed = parseBBCode(parsed, shorten, sanitize,
2491: convert_bare, no_follow_links);
2492:
2493: // handle raw
2494: code = StringUtils.replace(code, "[code]",
2495: "<div class=\"codebody\"><pre>", false);
2496: code = StringUtils.replace(code, "[/code]", "</pre></div>",
2497: false);
2498:
2499: result.append(parsed).append(code);
2500:
2501: sourcecopy = sourcecopy.substring(endIndex);
2502: }
2503:
2504: result.append(parseBBCode(sourcecopy, shorten, sanitize,
2505: convert_bare, no_follow_links));
2506:
2507: return result.toString();
2508: }
2509:
2510: private static String parseBBCode(String source, boolean shorten,
2511: boolean sanitize, boolean convert_bare, boolean no_follow) {
2512: String result = source;
2513:
2514: result = StringUtils.replace(result, "[b]", "<b>", false);
2515: result = StringUtils.replace(result, "[/b]", "</b>", false);
2516: result = StringUtils.replace(result, "[u]", "<u>", false);
2517: result = StringUtils.replace(result, "[/u]", "</u>", false);
2518: result = StringUtils.replace(result, "[i]", "<i>", false);
2519: result = StringUtils.replace(result, "[/i]", "</i>", false);
2520: result = StringUtils.replace(result, "[pre]", "<pre>", false);
2521: result = StringUtils.replace(result, "[/pre]", "</pre>", false);
2522:
2523: String resultCopy = result;
2524: String resultLowerCopy = result.toLowerCase();
2525: StringBuilder buffer = new StringBuilder();
2526: int startIndex;
2527: int endIndex;
2528: while (-1 != (startIndex = resultLowerCopy.indexOf("[*]"))) {
2529: int begin = resultLowerCopy.indexOf("[list]",
2530: startIndex + 3);
2531: int end = resultLowerCopy
2532: .indexOf("[/list]", startIndex + 3);
2533: int next = resultLowerCopy.indexOf("[*]", startIndex + 3); // 3 == sizeof [*]
2534:
2535: if (begin == -1) {
2536: begin = Integer.MAX_VALUE;
2537: }
2538:
2539: if (end == -1) {
2540: end = Integer.MAX_VALUE;
2541: }
2542:
2543: if (next == -1) {
2544: next = Integer.MAX_VALUE;
2545: }
2546:
2547: if (next < begin && next < end) {
2548: endIndex = next;
2549: } else if (begin < next && begin < end) {
2550: endIndex = begin;
2551: } else if (end < next && end < begin) {
2552: endIndex = end;
2553: } else {
2554: endIndex = resultLowerCopy.length();
2555: }
2556:
2557: buffer.append(resultCopy.substring(0, startIndex)).append(
2558: "<li>").append(
2559: resultCopy.substring(startIndex + 3, endIndex)) // 3 == sizeof [*]
2560: .append("</li>");
2561:
2562: resultCopy = resultCopy.substring(endIndex);
2563: resultLowerCopy = resultLowerCopy.substring(endIndex);
2564: }
2565: buffer.append(resultCopy.substring(0));
2566:
2567: result = buffer.toString();
2568:
2569: result = StringUtils.replace(result, "[list]", "<ul>", false);
2570: result = StringUtils.replace(result, "[/list]", "</ul>", false);
2571:
2572: Matcher color_matcher = BBCODE_COLOR.matcher(result);
2573: result = color_matcher.replaceAll("<font color=\"$1\">");
2574: result = StringUtils.replace(result, "[/color]", "</font>",
2575: false);
2576:
2577: Matcher size_matcher = BBCODE_SIZE.matcher(result);
2578: result = size_matcher.replaceAll("<font size=\"$1\">");
2579: result = StringUtils.replace(result, "[/size]", "</font>",
2580: false);
2581:
2582: result = convertUrl(result, BBCODE_URL_SHORT, shorten,
2583: sanitize, no_follow);
2584: result = convertUrl(result, BBCODE_URL_LONG, shorten, sanitize,
2585: no_follow);
2586:
2587: if (convert_bare) {
2588: result = convertUrl(result, BBCODE_BAREURL, shorten,
2589: sanitize, no_follow);
2590: }
2591:
2592: Matcher img_matcher = BBCODE_IMG.matcher(result);
2593: result = img_matcher
2594: .replaceAll("<div class=\"bbcode_img\"><img src=\"$1\" border=\"0\" alt=\"\" /></div>");
2595:
2596: Matcher quote_matcher_long = BBCODE_QUOTE_LONG.matcher(result);
2597: result = quote_matcher_long
2598: .replaceAll("<div class=\"quoteaccount\">$1:</div><div class=\"quotebody\">");
2599: result = StringUtils.replace(result, "[quote]",
2600: "<div class=\"quotebody\">", false);
2601: result = StringUtils.replace(result, "[/quote]", "</div>",
2602: false);
2603:
2604: result = StringUtils.replace(result, "\r\n", "<br />\r");
2605: result = StringUtils.replace(result, "\n", "<br />\n");
2606: result = StringUtils.replace(result, "\r", "\r\n");
2607:
2608: // remove the BR that could be added due to code formatting ppl
2609: // use to format lists
2610: result = StringUtils
2611: .replace(result, "ul><br />\r\n", "ul>\r\n");
2612: result = StringUtils.replace(result, "ul><br />\n", "ul>\n");
2613:
2614: return result;
2615: }
2616:
2617: /**
2618: * Converts a <code>String</code> to a <code>boolean</code> value.
2619: *
2620: * @param value The <code>String</code> to convert.
2621: * @return The corresponding <code>boolean</code> value.
2622: * @since 1.0
2623: */
2624: public static boolean convertToBoolean(String value) {
2625: if (null == value) {
2626: return false;
2627: }
2628:
2629: if (value.equals("1") || value.equalsIgnoreCase("t")
2630: || value.equalsIgnoreCase("true")
2631: || value.equalsIgnoreCase("y")
2632: || value.equalsIgnoreCase("yes")
2633: || value.equalsIgnoreCase("on")) {
2634: return true;
2635: }
2636:
2637: return false;
2638: }
2639:
2640: /**
2641: * Converts all tabs on a line to spaces according to the provided tab
2642: * width.
2643: *
2644: * @param line The line whose tabs have to be converted.
2645: * @param tabWidth The tab width.
2646: * @return A new <code>String</code> object containing the line with the
2647: * replaced tabs.
2648: * @since 1.0
2649: */
2650: public static String convertTabsToSpaces(String line, int tabWidth) {
2651: StringBuilder result = new StringBuilder();
2652: int tab_index = -1;
2653: int last_tab_index = 0;
2654: int added_chars = 0;
2655: int tab_size;
2656: while ((tab_index = line.indexOf("\t", last_tab_index)) != -1) {
2657: tab_size = tabWidth
2658: - ((tab_index + added_chars) % tabWidth);
2659: if (0 == tab_size) {
2660: tab_size = tabWidth;
2661: }
2662: added_chars += tab_size - 1;
2663: result.append(line.substring(last_tab_index, tab_index));
2664: result.append(StringUtils.repeat(" ", tab_size));
2665: last_tab_index = tab_index + 1;
2666: }
2667: if (0 == last_tab_index) {
2668: return line;
2669: } else {
2670: result.append(line.substring(last_tab_index));
2671: }
2672:
2673: return result.toString();
2674: }
2675:
2676: /**
2677: * Ensures that all whitespace is removed from a <code>String</code>.
2678: * <p>It also works with a <code>null</code> argument.
2679: *
2680: * @param source The <code>String</code> to trim.
2681: * @return The trimmed <code>String</code>.
2682: * @since 1.0
2683: */
2684: public static String trim(String source) {
2685: if (source == null || source.length() == 0) {
2686: return source;
2687: }
2688:
2689: return source.trim();
2690: }
2691:
2692: /**
2693: * Calculates the {@link DocumentPosition} of a character index in a
2694: * document.
2695: *
2696: * @param document a <code>String</code> with the document where the
2697: * position should be looked up in
2698: * @param characterIndex the index of the character
2699: * @return the resulting <code>DocumentPosition</code> instance; or
2700: * <p><code>null</code> if the <code>characterIndex</code> was invalid or
2701: * if the <code>document</code> was null
2702: * @since 1.0
2703: */
2704: public static DocumentPosition getDocumentPosition(String document,
2705: int characterIndex) {
2706: if (null == document || characterIndex < 0
2707: || characterIndex > document.length()) {
2708: return null;
2709: }
2710:
2711: int line = 0;
2712: int column;
2713:
2714: String[] linebreaks = new String[] { "\r\n", "\n", "\r" };
2715: int last_linebreak_index = 0;
2716: int next_linebreak_index = document.length();
2717: int match = -1;
2718: do {
2719: line++;
2720:
2721: for (String linebreak : linebreaks) {
2722: match = document.indexOf(linebreak,
2723: last_linebreak_index);
2724: if (match != -1) {
2725: if (match >= characterIndex) {
2726: next_linebreak_index = match;
2727: match = -1;
2728: break;
2729: }
2730:
2731: last_linebreak_index = match + linebreak.length();
2732: break;
2733: }
2734: }
2735: } while (match != -1);
2736:
2737: column = characterIndex - last_linebreak_index + 1;
2738:
2739: return new DocumentPosition(document.substring(
2740: last_linebreak_index, next_linebreak_index), line,
2741: column);
2742: }
2743:
2744: /**
2745: * Reformats a string where lines that are longer than <tt>width</tt>
2746: * are split apart at the earliest wordbreak or at maxLength, whichever is
2747: * sooner. If the width specified is less than 5 or greater than the input
2748: * Strings length the string will be returned as is.
2749: * <p>
2750: * Please note that this method can be lossy - trailing spaces on wrapped
2751: * lines may be trimmed.
2752: *
2753: * @param input the String to reformat.
2754: * @param width the maximum length of any one line.
2755: * @return a new String with reformatted as needed.
2756: */
2757: public static String wordWrap(String input, int width, Locale locale) {
2758: // handle invalid input
2759: if (input == null) {
2760: return "";
2761: } else if (width < 5) {
2762: return input;
2763: } else if (width >= input.length()) {
2764: return input;
2765: }
2766:
2767: // default locale
2768: if (locale == null) {
2769: locale = Locale.US;
2770: }
2771:
2772: StringBuilder buffer = new StringBuilder(input.length());
2773: int current_index = 0;
2774: int delimiter_index = 0;
2775: String seperator = "\n";
2776: String line;
2777:
2778: // go over the input string and jump from line to line
2779: while (current_index <= input.length()) {
2780: // look for the next linebreak
2781: delimiter_index = input.indexOf(seperator, current_index);
2782:
2783: // get the line that corresponds to it
2784: if (-1 == delimiter_index) {
2785: line = new String(input.substring(current_index, input
2786: .length()));
2787: current_index = input.length() + 1;
2788: } else {
2789: line = new String(input.substring(current_index,
2790: delimiter_index));
2791: current_index = delimiter_index + seperator.length();
2792: }
2793:
2794: // handle the wrapping of the line
2795: BreakIterator breaks = BreakIterator
2796: .getLineInstance(locale);
2797: breaks.setText(line);
2798:
2799: int line_start = 0;
2800: int start = breaks.first();
2801: int end = breaks.next();
2802: while (end != BreakIterator.DONE) {
2803: // check if the width has been exceeded
2804: if (end - 1 - line_start >= width) {
2805: boolean break_line = true;
2806:
2807: // first check if the last characters were spaces,
2808: // if they were and by removing them the width is not
2809: // exceeded, just continue
2810: if (Character.isWhitespace(line.charAt(end - 1))) {
2811: for (int j = end - 1; j >= 0; j--) {
2812: if (!Character.isWhitespace(line.charAt(j))) {
2813: if (j - line_start < width) {
2814: break_line = false;
2815: }
2816:
2817: break;
2818: }
2819: }
2820: }
2821:
2822: if (break_line) {
2823: String line_breaked = line.substring(
2824: line_start, start);
2825: // this can happen with trailing whitespace
2826: if (line_breaked.length() > width) {
2827: line_breaked = line_breaked.substring(0,
2828: width);
2829: }
2830: buffer.append(line_breaked);
2831:
2832: buffer.append("\n");
2833:
2834: line_start = start;
2835: }
2836: }
2837:
2838: start = end;
2839: end = breaks.next();
2840: }
2841:
2842: if (line_start < line.length()) {
2843: buffer.append(line.substring(line_start));
2844: }
2845:
2846: if (delimiter_index != -1) {
2847: buffer.append("\n");
2848: }
2849: }
2850:
2851: return buffer.toString();
2852: }
2853:
2854: public static class BbcodeOption extends EnumClass<String> {
2855: protected BbcodeOption(String identifier) {
2856: super(identifier);
2857: }
2858: }
2859: }
|