0001: /*
0002: * Static String formatting and query routines.
0003: * Copyright (C) 2001-2005 Stephen Ostermiller
0004: * http://ostermiller.org/contact.pl?regarding=Java+Utilities
0005: *
0006: * This program is free software; you can redistribute it and/or modify
0007: * it under the terms of the GNU General Public License as published by
0008: * the Free Software Foundation; either version 2 of the License, or
0009: * (at your option) any later version.
0010: *
0011: * This program is distributed in the hope that it will be useful,
0012: * but WITHOUT ANY WARRANTY; without even the implied warranty of
0013: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
0014: * GNU General Public License for more details.
0015: *
0016: * See COPYING.TXT for details.
0017: */
0018:
0019: package com.Ostermiller.util;
0020:
0021: import java.util.HashMap;
0022: import java.util.regex.Pattern;
0023:
0024: /**
0025: * Utilities for String formatting, manipulation, and queries.
0026: * More information about this class is available from <a target="_top" href=
0027: * "http://ostermiller.org/utils/StringHelper.html">ostermiller.org</a>.
0028: *
0029: * @author Stephen Ostermiller http://ostermiller.org/contact.pl?regarding=Java+Utilities
0030: * @since ostermillerutils 1.00.00
0031: */
0032: public class StringHelper {
0033:
0034: /**
0035: * Pad the beginning of the given String with spaces until
0036: * the String is of the given length.
0037: * <p>
0038: * If a String is longer than the desired length,
0039: * it will not be truncated, however no padding
0040: * will be added.
0041: *
0042: * @param s String to be padded.
0043: * @param length desired length of result.
0044: * @return padded String.
0045: * @throws NullPointerException if s is null.
0046: *
0047: * @since ostermillerutils 1.00.00
0048: */
0049: public static String prepad(String s, int length) {
0050: return prepad(s, length, ' ');
0051: }
0052:
0053: /**
0054: * Pre-pend the given character to the String until
0055: * the result is the desired length.
0056: * <p>
0057: * If a String is longer than the desired length,
0058: * it will not be truncated, however no padding
0059: * will be added.
0060: *
0061: * @param s String to be padded.
0062: * @param length desired length of result.
0063: * @param c padding character.
0064: * @return padded String.
0065: * @throws NullPointerException if s is null.
0066: *
0067: * @since ostermillerutils 1.00.00
0068: */
0069: public static String prepad(String s, int length, char c) {
0070: int needed = length - s.length();
0071: if (needed <= 0) {
0072: return s;
0073: }
0074: char padding[] = new char[needed];
0075: java.util.Arrays.fill(padding, c);
0076: StringBuffer sb = new StringBuffer(length);
0077: sb.append(padding);
0078: sb.append(s);
0079: return sb.toString();
0080: }
0081:
0082: /**
0083: * Pad the end of the given String with spaces until
0084: * the String is of the given length.
0085: * <p>
0086: * If a String is longer than the desired length,
0087: * it will not be truncated, however no padding
0088: * will be added.
0089: *
0090: * @param s String to be padded.
0091: * @param length desired length of result.
0092: * @return padded String.
0093: * @throws NullPointerException if s is null.
0094: *
0095: * @since ostermillerutils 1.00.00
0096: */
0097: public static String postpad(String s, int length) {
0098: return postpad(s, length, ' ');
0099: }
0100:
0101: /**
0102: * Append the given character to the String until
0103: * the result is the desired length.
0104: * <p>
0105: * If a String is longer than the desired length,
0106: * it will not be truncated, however no padding
0107: * will be added.
0108: *
0109: * @param s String to be padded.
0110: * @param length desired length of result.
0111: * @param c padding character.
0112: * @return padded String.
0113: * @throws NullPointerException if s is null.
0114: *
0115: * @since ostermillerutils 1.00.00
0116: */
0117: public static String postpad(String s, int length, char c) {
0118: int needed = length - s.length();
0119: if (needed <= 0) {
0120: return s;
0121: }
0122: char padding[] = new char[needed];
0123: java.util.Arrays.fill(padding, c);
0124: StringBuffer sb = new StringBuffer(length);
0125: sb.append(s);
0126: sb.append(padding);
0127: return sb.toString();
0128: }
0129:
0130: /**
0131: * Pad the beginning and end of the given String with spaces until
0132: * the String is of the given length. The result is that the original
0133: * String is centered in the middle of the new string.
0134: * <p>
0135: * If the number of characters to pad is even, then the padding
0136: * will be split evenly between the beginning and end, otherwise,
0137: * the extra character will be added to the end.
0138: * <p>
0139: * If a String is longer than the desired length,
0140: * it will not be truncated, however no padding
0141: * will be added.
0142: *
0143: * @param s String to be padded.
0144: * @param length desired length of result.
0145: * @return padded String.
0146: * @throws NullPointerException if s is null.
0147: *
0148: * @since ostermillerutils 1.00.00
0149: */
0150: public static String midpad(String s, int length) {
0151: return midpad(s, length, ' ');
0152: }
0153:
0154: /**
0155: * Pad the beginning and end of the given String with the given character
0156: * until the result is the desired length. The result is that the original
0157: * String is centered in the middle of the new string.
0158: * <p>
0159: * If the number of characters to pad is even, then the padding
0160: * will be split evenly between the beginning and end, otherwise,
0161: * the extra character will be added to the end.
0162: * <p>
0163: * If a String is longer than the desired length,
0164: * it will not be truncated, however no padding
0165: * will be added.
0166: *
0167: * @param s String to be padded.
0168: * @param length desired length of result.
0169: * @param c padding character.
0170: * @return padded String.
0171: * @throws NullPointerException if s is null.
0172: *
0173: * @since ostermillerutils 1.00.00
0174: */
0175: public static String midpad(String s, int length, char c) {
0176: int needed = length - s.length();
0177: if (needed <= 0) {
0178: return s;
0179: }
0180: int beginning = needed / 2;
0181: int end = beginning + needed % 2;
0182: char prepadding[] = new char[beginning];
0183: java.util.Arrays.fill(prepadding, c);
0184: char postpadding[] = new char[end];
0185: java.util.Arrays.fill(postpadding, c);
0186: StringBuffer sb = new StringBuffer(length);
0187: sb.append(prepadding);
0188: sb.append(s);
0189: sb.append(postpadding);
0190: return sb.toString();
0191: }
0192:
0193: /**
0194: * Split the given String into tokens.
0195: * <P>
0196: * This method is meant to be similar to the split
0197: * function in other programming languages but it does
0198: * not use regular expressions. Rather the String is
0199: * split on a single String literal.
0200: * <P>
0201: * Unlike java.util.StringTokenizer which accepts
0202: * multiple character tokens as delimiters, the delimiter
0203: * here is a single String literal.
0204: * <P>
0205: * Each null token is returned as an empty String.
0206: * Delimiters are never returned as tokens.
0207: * <P>
0208: * If there is no delimiter because it is either empty or
0209: * null, the only element in the result is the original String.
0210: * <P>
0211: * StringHelper.split("1-2-3", "-");<br>
0212: * result: {"1","2","3"}<br>
0213: * StringHelper.split("-1--2-", "-");<br>
0214: * result: {"","1","","2",""}<br>
0215: * StringHelper.split("123", "");<br>
0216: * result: {"123"}<br>
0217: * StringHelper.split("1-2---3----4", "--");<br>
0218: * result: {"1-2","-3","","4"}<br>
0219: *
0220: * @param s String to be split.
0221: * @param delimiter String literal on which to split.
0222: * @return an array of tokens.
0223: * @throws NullPointerException if s is null.
0224: *
0225: * @since ostermillerutils 1.00.00
0226: */
0227: public static String[] split(String s, String delimiter) {
0228: int delimiterLength;
0229: // the next statement has the side effect of throwing a null pointer
0230: // exception if s is null.
0231: int stringLength = s.length();
0232: if (delimiter == null
0233: || (delimiterLength = delimiter.length()) == 0) {
0234: // it is not inherently clear what to do if there is no delimiter
0235: // On one hand it would make sense to return each character because
0236: // the null String can be found between each pair of characters in
0237: // a String. However, it can be found many times there and we don'
0238: // want to be returning multiple null tokens.
0239: // returning the whole String will be defined as the correct behavior
0240: // in this instance.
0241: return new String[] { s };
0242: }
0243:
0244: // a two pass solution is used because a one pass solution would
0245: // require the possible resizing and copying of memory structures
0246: // In the worst case it would have to be resized n times with each
0247: // resize having a O(n) copy leading to an O(n^2) algorithm.
0248:
0249: int count;
0250: int start;
0251: int end;
0252:
0253: // Scan s and count the tokens.
0254: count = 0;
0255: start = 0;
0256: while ((end = s.indexOf(delimiter, start)) != -1) {
0257: count++;
0258: start = end + delimiterLength;
0259: }
0260: count++;
0261:
0262: // allocate an array to return the tokens,
0263: // we now know how big it should be
0264: String[] result = new String[count];
0265:
0266: // Scan s again, but this time pick out the tokens
0267: count = 0;
0268: start = 0;
0269: while ((end = s.indexOf(delimiter, start)) != -1) {
0270: result[count] = (s.substring(start, end));
0271: count++;
0272: start = end + delimiterLength;
0273: }
0274: end = stringLength;
0275: result[count] = s.substring(start, end);
0276:
0277: return (result);
0278: }
0279:
0280: /**
0281: * Split the given String into tokens. Delimiters will
0282: * be returned as tokens.
0283: * <P>
0284: * This method is meant to be similar to the split
0285: * function in other programming languages but it does
0286: * not use regular expressions. Rather the String is
0287: * split on a single String literal.
0288: * <P>
0289: * Unlike java.util.StringTokenizer which accepts
0290: * multiple character tokens as delimiters, the delimiter
0291: * here is a single String literal.
0292: * <P>
0293: * Each null token is returned as an empty String.
0294: * Delimiters are never returned as tokens.
0295: * <P>
0296: * If there is no delimiter because it is either empty or
0297: * null, the only element in the result is the original String.
0298: * <P>
0299: * StringHelper.split("1-2-3", "-");<br>
0300: * result: {"1","-","2","-","3"}<br>
0301: * StringHelper.split("-1--2-", "-");<br>
0302: * result: {"","-","1","-","","-","2","-",""}<br>
0303: * StringHelper.split("123", "");<br>
0304: * result: {"123"}<br>
0305: * StringHelper.split("1-2--3---4----5", "--");<br>
0306: * result: {"1-2","--","3","--","-4","--","","--","5"}<br>
0307: *
0308: * @param s String to be split.
0309: * @param delimiter String literal on which to split.
0310: * @return an array of tokens.
0311: * @throws NullPointerException if s is null.
0312: *
0313: * @since ostermillerutils 1.05.00
0314: */
0315: public static String[] splitIncludeDelimiters(String s,
0316: String delimiter) {
0317: int delimiterLength;
0318: // the next statement has the side effect of throwing a null pointer
0319: // exception if s is null.
0320: int stringLength = s.length();
0321: if (delimiter == null
0322: || (delimiterLength = delimiter.length()) == 0) {
0323: // it is not inherently clear what to do if there is no delimiter
0324: // On one hand it would make sense to return each character because
0325: // the null String can be found between each pair of characters in
0326: // a String. However, it can be found many times there and we don'
0327: // want to be returning multiple null tokens.
0328: // returning the whole String will be defined as the correct behavior
0329: // in this instance.
0330: return new String[] { s };
0331: }
0332:
0333: // a two pass solution is used because a one pass solution would
0334: // require the possible resizing and copying of memory structures
0335: // In the worst case it would have to be resized n times with each
0336: // resize having a O(n) copy leading to an O(n^2) algorithm.
0337:
0338: int count;
0339: int start;
0340: int end;
0341:
0342: // Scan s and count the tokens.
0343: count = 0;
0344: start = 0;
0345: while ((end = s.indexOf(delimiter, start)) != -1) {
0346: count += 2;
0347: start = end + delimiterLength;
0348: }
0349: count++;
0350:
0351: // allocate an array to return the tokens,
0352: // we now know how big it should be
0353: String[] result = new String[count];
0354:
0355: // Scan s again, but this time pick out the tokens
0356: count = 0;
0357: start = 0;
0358: while ((end = s.indexOf(delimiter, start)) != -1) {
0359: result[count] = (s.substring(start, end));
0360: count++;
0361: result[count] = delimiter;
0362: count++;
0363: start = end + delimiterLength;
0364: }
0365: end = stringLength;
0366: result[count] = s.substring(start, end);
0367:
0368: return (result);
0369: }
0370:
0371: /**
0372: * Join all the elements of a string array into a single
0373: * String.
0374: * <p>
0375: * If the given array empty an empty string
0376: * will be returned. Null elements of the array are allowed
0377: * and will be treated like empty Strings.
0378: *
0379: * @param array Array to be joined into a string.
0380: * @return Concatenation of all the elements of the given array.
0381: * @throws NullPointerException if array is null.
0382: *
0383: * @since ostermillerutils 1.05.00
0384: */
0385: public static String join(String[] array) {
0386: return join(array, "");
0387: }
0388:
0389: /**
0390: * Join all the elements of a string array into a single
0391: * String.
0392: * <p>
0393: * If the given array empty an empty string
0394: * will be returned. Null elements of the array are allowed
0395: * and will be treated like empty Strings.
0396: *
0397: * @param array Array to be joined into a string.
0398: * @param delimiter String to place between array elements.
0399: * @return Concatenation of all the elements of the given array with the the delimiter in between.
0400: * @throws NullPointerException if array or delimiter is null.
0401: *
0402: * @since ostermillerutils 1.05.00
0403: */
0404: public static String join(String[] array, String delimiter) {
0405: // Cache the length of the delimiter
0406: // has the side effect of throwing a NullPointerException if
0407: // the delimiter is null.
0408: int delimiterLength = delimiter.length();
0409:
0410: // Nothing in the array return empty string
0411: // has the side effect of throwing a NullPointerException if
0412: // the array is null.
0413: if (array.length == 0)
0414: return "";
0415:
0416: // Only one thing in the array, return it.
0417: if (array.length == 1) {
0418: if (array[0] == null)
0419: return "";
0420: return array[0];
0421: }
0422:
0423: // Make a pass through and determine the size
0424: // of the resulting string.
0425: int length = 0;
0426: for (int i = 0; i < array.length; i++) {
0427: if (array[i] != null)
0428: length += array[i].length();
0429: if (i < array.length - 1)
0430: length += delimiterLength;
0431: }
0432:
0433: // Make a second pass through and concatenate everything
0434: // into a string buffer.
0435: StringBuffer result = new StringBuffer(length);
0436: for (int i = 0; i < array.length; i++) {
0437: if (array[i] != null)
0438: result.append(array[i]);
0439: if (i < array.length - 1)
0440: result.append(delimiter);
0441: }
0442:
0443: return result.toString();
0444: }
0445:
0446: /**
0447: * Replace occurrences of a substring.
0448: *
0449: * StringHelper.replace("1-2-3", "-", "|");<br>
0450: * result: "1|2|3"<br>
0451: * StringHelper.replace("-1--2-", "-", "|");<br>
0452: * result: "|1||2|"<br>
0453: * StringHelper.replace("123", "", "|");<br>
0454: * result: "123"<br>
0455: * StringHelper.replace("1-2---3----4", "--", "|");<br>
0456: * result: "1-2|-3||4"<br>
0457: * StringHelper.replace("1-2---3----4", "--", "---");<br>
0458: * result: "1-2----3------4"<br>
0459: *
0460: * @param s String to be modified.
0461: * @param find String to find.
0462: * @param replace String to replace.
0463: * @return a string with all the occurrences of the string to find replaced.
0464: * @throws NullPointerException if s is null.
0465: *
0466: * @since ostermillerutils 1.00.00
0467: */
0468: public static String replace(String s, String find, String replace) {
0469: int findLength;
0470: // the next statement has the side effect of throwing a null pointer
0471: // exception if s is null.
0472: int stringLength = s.length();
0473: if (find == null || (findLength = find.length()) == 0) {
0474: // If there is nothing to find, we won't try and find it.
0475: return s;
0476: }
0477: if (replace == null) {
0478: // a null string and an empty string are the same
0479: // for replacement purposes.
0480: replace = "";
0481: }
0482: int replaceLength = replace.length();
0483:
0484: // We need to figure out how long our resulting string will be.
0485: // This is required because without it, the possible resizing
0486: // and copying of memory structures could lead to an unacceptable runtime.
0487: // In the worst case it would have to be resized n times with each
0488: // resize having a O(n) copy leading to an O(n^2) algorithm.
0489: int length;
0490: if (findLength == replaceLength) {
0491: // special case in which we don't need to count the replacements
0492: // because the count falls out of the length formula.
0493: length = stringLength;
0494: } else {
0495: int count;
0496: int start;
0497: int end;
0498:
0499: // Scan s and count the number of times we find our target.
0500: count = 0;
0501: start = 0;
0502: while ((end = s.indexOf(find, start)) != -1) {
0503: count++;
0504: start = end + findLength;
0505: }
0506: if (count == 0) {
0507: // special case in which on first pass, we find there is nothing
0508: // to be replaced. No need to do a second pass or create a string buffer.
0509: return s;
0510: }
0511: length = stringLength
0512: - (count * (findLength - replaceLength));
0513: }
0514:
0515: int start = 0;
0516: int end = s.indexOf(find, start);
0517: if (end == -1) {
0518: // nothing was found in the string to replace.
0519: // we can get this if the find and replace strings
0520: // are the same length because we didn't check before.
0521: // in this case, we will return the original string
0522: return s;
0523: }
0524: // it looks like we actually have something to replace
0525: // *sigh* allocate memory for it.
0526: StringBuffer sb = new StringBuffer(length);
0527:
0528: // Scan s and do the replacements
0529: while (end != -1) {
0530: sb.append(s.substring(start, end));
0531: sb.append(replace);
0532: start = end + findLength;
0533: end = s.indexOf(find, start);
0534: }
0535: end = stringLength;
0536: sb.append(s.substring(start, end));
0537:
0538: return (sb.toString());
0539: }
0540:
0541: /**
0542: * Replaces characters that may be confused by a HTML
0543: * parser with their equivalent character entity references.
0544: * <p>
0545: * Any data that will appear as text on a web page should
0546: * be be escaped. This is especially important for data
0547: * that comes from untrusted sources such as Internet users.
0548: * A common mistake in CGI programming is to ask a user for
0549: * data and then put that data on a web page. For example:<pre>
0550: * Server: What is your name?
0551: * User: <b>Joe<b>
0552: * Server: Hello <b>Joe</b>, Welcome</pre>
0553: * If the name is put on the page without checking that it doesn't
0554: * contain HTML code or without sanitizing that HTML code, the user
0555: * could reformat the page, insert scripts, and control the the
0556: * content on your web server.
0557: * <p>
0558: * This method will replace HTML characters such as > with their
0559: * HTML entity reference (&gt;) so that the html parser will
0560: * be sure to interpret them as plain text rather than HTML or script.
0561: * <p>
0562: * This method should be used for both data to be displayed in text
0563: * in the html document, and data put in form elements. For example:<br>
0564: * <code><html><body><i>This in not a &lt;tag&gt;
0565: * in HTML</i></body></html></code><br>
0566: * and<br>
0567: * <code><form><input type="hidden" name="date" value="<i>This data could
0568: * be &quot;malicious&quot;</i>"></form></code><br>
0569: * In the second example, the form data would be properly be resubmitted
0570: * to your CGI script in the URLEncoded format:<br>
0571: * <code><i>This data could be %22malicious%22</i></code>
0572: *
0573: * @param s String to be escaped
0574: * @return escaped String
0575: * @throws NullPointerException if s is null.
0576: *
0577: * @since ostermillerutils 1.00.00
0578: */
0579: public static String escapeHTML(String s) {
0580: int length = s.length();
0581: int newLength = length;
0582: boolean someCharacterEscaped = false;
0583: // first check for characters that might
0584: // be dangerous and calculate a length
0585: // of the string that has escapes.
0586: for (int i = 0; i < length; i++) {
0587: char c = s.charAt(i);
0588: int cint = 0xffff & c;
0589: if (cint < 32) {
0590: switch (c) {
0591: case '\r':
0592: case '\n':
0593: case '\t':
0594: case '\f': {
0595: // Leave whitespace untouched
0596: }
0597: break;
0598: default: {
0599: newLength -= 1;
0600: someCharacterEscaped = true;
0601: }
0602: }
0603: } else {
0604: switch (c) {
0605: case '\"': {
0606: newLength += 5;
0607: someCharacterEscaped = true;
0608: }
0609: break;
0610: case '&':
0611: case '\'': {
0612: newLength += 4;
0613: someCharacterEscaped = true;
0614: }
0615: break;
0616: case '<':
0617: case '>': {
0618: newLength += 3;
0619: someCharacterEscaped = true;
0620: }
0621: break;
0622: }
0623: }
0624: }
0625: if (!someCharacterEscaped) {
0626: // nothing to escape in the string
0627: return s;
0628: }
0629: StringBuffer sb = new StringBuffer(newLength);
0630: for (int i = 0; i < length; i++) {
0631: char c = s.charAt(i);
0632: int cint = 0xffff & c;
0633: if (cint < 32) {
0634: switch (c) {
0635: case '\r':
0636: case '\n':
0637: case '\t':
0638: case '\f': {
0639: sb.append(c);
0640: }
0641: break;
0642: default: {
0643: // Remove this character
0644: }
0645: }
0646: } else {
0647: switch (c) {
0648: case '\"': {
0649: sb.append(""");
0650: }
0651: break;
0652: case '\'': {
0653: sb.append("'");
0654: }
0655: break;
0656: case '&': {
0657: sb.append("&");
0658: }
0659: break;
0660: case '<': {
0661: sb.append("<");
0662: }
0663: break;
0664: case '>': {
0665: sb.append(">");
0666: }
0667: break;
0668: default: {
0669: sb.append(c);
0670: }
0671: }
0672: }
0673: }
0674: return sb.toString();
0675: }
0676:
0677: /**
0678: * Replaces characters that may be confused by an SQL
0679: * parser with their equivalent escape characters.
0680: * <p>
0681: * Any data that will be put in an SQL query should
0682: * be be escaped. This is especially important for data
0683: * that comes from untrusted sources such as Internet users.
0684: * <p>
0685: * For example if you had the following SQL query:<br>
0686: * <code>"SELECT * FROM addresses WHERE name='" + name + "' AND private='N'"</code><br>
0687: * Without this function a user could give <code>" OR 1=1 OR ''='"</code>
0688: * as their name causing the query to be:<br>
0689: * <code>"SELECT * FROM addresses WHERE name='' OR 1=1 OR ''='' AND private='N'"</code><br>
0690: * which will give all addresses, including private ones.<br>
0691: * Correct usage would be:<br>
0692: * <code>"SELECT * FROM addresses WHERE name='" + StringHelper.escapeSQL(name) + "' AND private='N'"</code><br>
0693: * <p>
0694: * Another way to avoid this problem is to use a PreparedStatement
0695: * with appropriate place holders.
0696: *
0697: * @param s String to be escaped
0698: * @return escaped String
0699: * @throws NullPointerException if s is null.
0700: *
0701: * @since ostermillerutils 1.00.00
0702: */
0703: public static String escapeSQL(String s) {
0704: int length = s.length();
0705: int newLength = length;
0706: // first check for characters that might
0707: // be dangerous and calculate a length
0708: // of the string that has escapes.
0709: for (int i = 0; i < length; i++) {
0710: char c = s.charAt(i);
0711: switch (c) {
0712: case '\\':
0713: case '\"':
0714: case '\'':
0715: case '\0': {
0716: newLength += 1;
0717: }
0718: break;
0719: }
0720: }
0721: if (length == newLength) {
0722: // nothing to escape in the string
0723: return s;
0724: }
0725: StringBuffer sb = new StringBuffer(newLength);
0726: for (int i = 0; i < length; i++) {
0727: char c = s.charAt(i);
0728: switch (c) {
0729: case '\\': {
0730: sb.append("\\\\");
0731: }
0732: break;
0733: case '\"': {
0734: sb.append("\\\"");
0735: }
0736: break;
0737: case '\'': {
0738: sb.append("\\\'");
0739: }
0740: break;
0741: case '\0': {
0742: sb.append("\\0");
0743: }
0744: break;
0745: default: {
0746: sb.append(c);
0747: }
0748: }
0749: }
0750: return sb.toString();
0751: }
0752:
0753: /**
0754: * Replaces characters that are not allowed in a Java style
0755: * string literal with their escape characters. Specifically
0756: * quote ("), single quote ('), new line (\n), carriage return (\r),
0757: * and backslash (\), and tab (\t) are escaped.
0758: *
0759: * @param s String to be escaped
0760: * @return escaped String
0761: * @throws NullPointerException if s is null.
0762: *
0763: * @since ostermillerutils 1.00.00
0764: */
0765: public static String escapeJavaLiteral(String s) {
0766: int length = s.length();
0767: int newLength = length;
0768: // first check for characters that might
0769: // be dangerous and calculate a length
0770: // of the string that has escapes.
0771: for (int i = 0; i < length; i++) {
0772: char c = s.charAt(i);
0773: switch (c) {
0774: case '\"':
0775: case '\'':
0776: case '\n':
0777: case '\r':
0778: case '\t':
0779: case '\\': {
0780: newLength += 1;
0781: }
0782: break;
0783: }
0784: }
0785: if (length == newLength) {
0786: // nothing to escape in the string
0787: return s;
0788: }
0789: StringBuffer sb = new StringBuffer(newLength);
0790: for (int i = 0; i < length; i++) {
0791: char c = s.charAt(i);
0792: switch (c) {
0793: case '\"': {
0794: sb.append("\\\"");
0795: }
0796: break;
0797: case '\'': {
0798: sb.append("\\\'");
0799: }
0800: break;
0801: case '\n': {
0802: sb.append("\\n");
0803: }
0804: break;
0805: case '\r': {
0806: sb.append("\\r");
0807: }
0808: break;
0809: case '\t': {
0810: sb.append("\\t");
0811: }
0812: break;
0813: case '\\': {
0814: sb.append("\\\\");
0815: }
0816: break;
0817: default: {
0818: sb.append(c);
0819: }
0820: }
0821: }
0822: return sb.toString();
0823: }
0824:
0825: /**
0826: * Trim any of the characters contained in the second
0827: * string from the beginning and end of the first.
0828: *
0829: * @param s String to be trimmed.
0830: * @param c list of characters to trim from s.
0831: * @return trimmed String.
0832: * @throws NullPointerException if s is null.
0833: *
0834: * @since ostermillerutils 1.00.00
0835: */
0836: public static String trim(String s, String c) {
0837: int length = s.length();
0838: if (c == null) {
0839: return s;
0840: }
0841: int cLength = c.length();
0842: if (c.length() == 0) {
0843: return s;
0844: }
0845: int start = 0;
0846: int end = length;
0847: boolean found; // trim-able character found.
0848: int i;
0849: // Start from the beginning and find the
0850: // first non-trim-able character.
0851: found = false;
0852: for (i = 0; !found && i < length; i++) {
0853: char ch = s.charAt(i);
0854: found = true;
0855: for (int j = 0; found && j < cLength; j++) {
0856: if (c.charAt(j) == ch)
0857: found = false;
0858: }
0859: }
0860: // if all characters are trim-able.
0861: if (!found)
0862: return "";
0863: start = i - 1;
0864: // Start from the end and find the
0865: // last non-trim-able character.
0866: found = false;
0867: for (i = length - 1; !found && i >= 0; i--) {
0868: char ch = s.charAt(i);
0869: found = true;
0870: for (int j = 0; found && j < cLength; j++) {
0871: if (c.charAt(j) == ch)
0872: found = false;
0873: }
0874: }
0875: end = i + 2;
0876: return s.substring(start, end);
0877: }
0878:
0879: private static HashMap<String, Integer> htmlEntities = new HashMap<String, Integer>();
0880: static {
0881: htmlEntities.put("n" + "b" + "s" + "p", new Integer(160));
0882: htmlEntities.put("i" + "e" + "x" + "c" + "l", new Integer(161));
0883: htmlEntities.put("cent", new Integer(162));
0884: htmlEntities.put("pound", new Integer(163));
0885: htmlEntities.put("c" + "u" + "r" + "r" + "e" + "n",
0886: new Integer(164));
0887: htmlEntities.put("y" + "e" + "n", new Integer(165));
0888: htmlEntities.put("b" + "r" + "v" + "b" + "a" + "r",
0889: new Integer(166));
0890: htmlEntities.put("sect", new Integer(167));
0891: htmlEntities.put("u" + "m" + "l", new Integer(168));
0892: htmlEntities.put("copy", new Integer(169));
0893: htmlEntities.put("o" + "r" + "d" + "f", new Integer(170));
0894: htmlEntities.put("l" + "a" + "quo", new Integer(171));
0895: htmlEntities.put("not", new Integer(172));
0896: htmlEntities.put("shy", new Integer(173));
0897: htmlEntities.put("r" + "e" + "g", new Integer(174));
0898: htmlEntities.put("m" + "a" + "c" + "r", new Integer(175));
0899: htmlEntities.put("d" + "e" + "g", new Integer(176));
0900: htmlEntities.put("plus" + "m" + "n", new Integer(177));
0901: htmlEntities.put("sup2", new Integer(178));
0902: htmlEntities.put("sup3", new Integer(179));
0903: htmlEntities.put("acute", new Integer(180));
0904: htmlEntities.put("m" + "i" + "c" + "r" + "o", new Integer(181));
0905: htmlEntities.put("par" + "a", new Integer(182));
0906: htmlEntities.put("mid" + "dot", new Integer(183));
0907: htmlEntities.put("c" + "e" + "d" + "i" + "l", new Integer(184));
0908: htmlEntities.put("sup1", new Integer(185));
0909: htmlEntities.put("o" + "r" + "d" + "m", new Integer(186));
0910: htmlEntities.put("r" + "a" + "quo", new Integer(187));
0911: htmlEntities.put("frac14", new Integer(188));
0912: htmlEntities.put("frac12", new Integer(189));
0913: htmlEntities.put("frac34", new Integer(190));
0914: htmlEntities.put("i" + "quest", new Integer(191));
0915: htmlEntities.put("A" + "grave", new Integer(192));
0916: htmlEntities.put("A" + "a" + "cute", new Integer(193));
0917: htmlEntities.put("A" + "c" + "i" + "r" + "c", new Integer(194));
0918: htmlEntities.put("A" + "tilde", new Integer(195));
0919: htmlEntities.put("A" + "u" + "m" + "l", new Integer(196));
0920: htmlEntities.put("A" + "ring", new Integer(197));
0921: htmlEntities.put("A" + "E" + "l" + "i" + "g", new Integer(198));
0922: htmlEntities.put("C" + "c" + "e" + "d" + "i" + "l",
0923: new Integer(199));
0924: htmlEntities.put("E" + "grave", new Integer(200));
0925: htmlEntities.put("E" + "a" + "cute", new Integer(201));
0926: htmlEntities.put("E" + "c" + "i" + "r" + "c", new Integer(202));
0927: htmlEntities.put("E" + "u" + "m" + "l", new Integer(203));
0928: htmlEntities.put("I" + "grave", new Integer(204));
0929: htmlEntities.put("I" + "a" + "cute", new Integer(205));
0930: htmlEntities.put("I" + "c" + "i" + "r" + "c", new Integer(206));
0931: htmlEntities.put("I" + "u" + "m" + "l", new Integer(207));
0932: htmlEntities.put("ETH", new Integer(208));
0933: htmlEntities.put("N" + "tilde", new Integer(209));
0934: htmlEntities.put("O" + "grave", new Integer(210));
0935: htmlEntities.put("O" + "a" + "cute", new Integer(211));
0936: htmlEntities.put("O" + "c" + "i" + "r" + "c", new Integer(212));
0937: htmlEntities.put("O" + "tilde", new Integer(213));
0938: htmlEntities.put("O" + "u" + "" + "m" + "l", new Integer(214));
0939: htmlEntities.put("times", new Integer(215));
0940: htmlEntities.put("O" + "slash", new Integer(216));
0941: htmlEntities.put("U" + "grave", new Integer(217));
0942: htmlEntities.put("U" + "a" + "cute", new Integer(218));
0943: htmlEntities.put("U" + "c" + "i" + "r" + "c", new Integer(219));
0944: htmlEntities.put("U" + "u" + "m" + "l", new Integer(220));
0945: htmlEntities.put("Y" + "a" + "cute", new Integer(221));
0946: htmlEntities.put("THORN", new Integer(222));
0947: htmlEntities.put("s" + "z" + "l" + "i" + "g", new Integer(223));
0948: htmlEntities.put("a" + "grave", new Integer(224));
0949: htmlEntities.put("a" + "a" + "cute", new Integer(225));
0950: htmlEntities.put("a" + "c" + "i" + "r" + "c", new Integer(226));
0951: htmlEntities.put("a" + "tilde", new Integer(227));
0952: htmlEntities.put("a" + "u" + "m" + "l", new Integer(228));
0953: htmlEntities.put("a" + "ring", new Integer(229));
0954: htmlEntities.put("a" + "e" + "l" + "i" + "g", new Integer(230));
0955: htmlEntities.put("c" + "c" + "e" + "d" + "i" + "l",
0956: new Integer(231));
0957: htmlEntities.put("e" + "grave", new Integer(232));
0958: htmlEntities.put("e" + "a" + "cute", new Integer(233));
0959: htmlEntities.put("e" + "c" + "i" + "r" + "c", new Integer(234));
0960: htmlEntities.put("e" + "u" + "m" + "l", new Integer(235));
0961: htmlEntities.put("i" + "grave", new Integer(236));
0962: htmlEntities.put("i" + "a" + "cute", new Integer(237));
0963: htmlEntities.put("i" + "c" + "i" + "r" + "c", new Integer(238));
0964: htmlEntities.put("i" + "u" + "" + "m" + "l", new Integer(239));
0965: htmlEntities.put("e" + "t" + "h", new Integer(240));
0966: htmlEntities.put("n" + "tilde", new Integer(241));
0967: htmlEntities.put("o" + "grave", new Integer(242));
0968: htmlEntities.put("o" + "a" + "cute", new Integer(243));
0969: htmlEntities.put("o" + "c" + "i" + "r" + "c", new Integer(244));
0970: htmlEntities.put("o" + "tilde", new Integer(245));
0971: htmlEntities.put("o" + "u" + "m" + "l", new Integer(246));
0972: htmlEntities.put("divide", new Integer(247));
0973: htmlEntities.put("o" + "slash", new Integer(248));
0974: htmlEntities.put("u" + "grave", new Integer(249));
0975: htmlEntities.put("u" + "a" + "cute", new Integer(250));
0976: htmlEntities.put("u" + "c" + "i" + "r" + "c", new Integer(251));
0977: htmlEntities.put("u" + "u" + "m" + "l", new Integer(252));
0978: htmlEntities.put("y" + "a" + "cute", new Integer(253));
0979: htmlEntities.put("thorn", new Integer(254));
0980: htmlEntities.put("y" + "u" + "m" + "l", new Integer(255));
0981: htmlEntities.put("f" + "no" + "f", new Integer(402));
0982: htmlEntities.put("Alpha", new Integer(913));
0983: htmlEntities.put("Beta", new Integer(914));
0984: htmlEntities.put("Gamma", new Integer(915));
0985: htmlEntities.put("Delta", new Integer(916));
0986: htmlEntities.put("Epsilon", new Integer(917));
0987: htmlEntities.put("Z" + "e" + "t" + "a", new Integer(918));
0988: htmlEntities.put("E" + "t" + "a", new Integer(919));
0989: htmlEntities.put("T" + "h" + "e" + "t" + "a", new Integer(920));
0990: htmlEntities.put("I" + "o" + "t" + "a", new Integer(921));
0991: htmlEntities.put("K" + "a" + "p" + "pa", new Integer(922));
0992: htmlEntities.put("Lambda", new Integer(923));
0993: htmlEntities.put("M" + "u", new Integer(924));
0994: htmlEntities.put("N" + "u", new Integer(925));
0995: htmlEntities.put("Xi", new Integer(926));
0996: htmlEntities.put("O" + "m" + "i" + "c" + "r" + "on",
0997: new Integer(927));
0998: htmlEntities.put("Pi", new Integer(928));
0999: htmlEntities.put("R" + "h" + "o", new Integer(929));
1000: htmlEntities.put("S" + "i" + "g" + "m" + "a", new Integer(931));
1001: htmlEntities.put("Tau", new Integer(932));
1002: htmlEntities.put("Up" + "s" + "i" + "l" + "on",
1003: new Integer(933));
1004: htmlEntities.put("P" + "h" + "i", new Integer(934));
1005: htmlEntities.put("C" + "h" + "i", new Integer(935));
1006: htmlEntities.put("P" + "s" + "i", new Integer(936));
1007: htmlEntities.put("O" + "m" + "e" + "g" + "a", new Integer(937));
1008: htmlEntities.put("alpha", new Integer(945));
1009: htmlEntities.put("beta", new Integer(946));
1010: htmlEntities.put("gamma", new Integer(947));
1011: htmlEntities.put("delta", new Integer(948));
1012: htmlEntities.put("epsilon", new Integer(949));
1013: htmlEntities.put("z" + "e" + "t" + "a", new Integer(950));
1014: htmlEntities.put("e" + "t" + "a", new Integer(951));
1015: htmlEntities.put("the" + "t" + "a", new Integer(952));
1016: htmlEntities.put("i" + "o" + "t" + "a", new Integer(953));
1017: htmlEntities.put("k" + "a" + "p" + "pa", new Integer(954));
1018: htmlEntities.put("lambda", new Integer(955));
1019: htmlEntities.put("m" + "u", new Integer(956));
1020: htmlEntities.put("n" + "u", new Integer(957));
1021: htmlEntities.put("xi", new Integer(958));
1022: htmlEntities.put("o" + "m" + "i" + "" + "c" + "r" + "on",
1023: new Integer(959));
1024: htmlEntities.put("pi", new Integer(960));
1025: htmlEntities.put("r" + "h" + "o", new Integer(961));
1026: htmlEntities.put("s" + "i" + "g" + "m" + "a" + "f",
1027: new Integer(962));
1028: htmlEntities.put("s" + "i" + "g" + "m" + "a", new Integer(963));
1029: htmlEntities.put("tau", new Integer(964));
1030: htmlEntities.put("up" + "s" + "i" + "l" + "on",
1031: new Integer(965));
1032: htmlEntities.put("p" + "h" + "i", new Integer(966));
1033: htmlEntities.put("c" + "h" + "i", new Integer(967));
1034: htmlEntities.put("p" + "s" + "i", new Integer(968));
1035: htmlEntities.put("o" + "m" + "e" + "g" + "a", new Integer(969));
1036: htmlEntities.put("the" + "t" + "a" + "s" + "y" + "m",
1037: new Integer(977));
1038: htmlEntities.put("up" + "s" + "i" + "h", new Integer(978));
1039: htmlEntities.put("pi" + "v", new Integer(982));
1040: htmlEntities.put("bull", new Integer(8226));
1041: htmlEntities.put("hell" + "i" + "p", new Integer(8230));
1042: htmlEntities.put("prime", new Integer(8242));
1043: htmlEntities.put("Prime", new Integer(8243));
1044: htmlEntities.put("o" + "line", new Integer(8254));
1045: htmlEntities.put("f" + "r" + "" + "a" + "s" + "l", new Integer(
1046: 8260));
1047: htmlEntities.put("we" + "i" + "e" + "r" + "p",
1048: new Integer(8472));
1049: htmlEntities.put("image", new Integer(8465));
1050: htmlEntities.put("real", new Integer(8476));
1051: htmlEntities.put("trade", new Integer(8482));
1052: htmlEntities.put("ale" + "f" + "s" + "y" + "m", new Integer(
1053: 8501));
1054: htmlEntities.put("l" + "a" + "r" + "r", new Integer(8592));
1055: htmlEntities.put("u" + "a" + "r" + "r", new Integer(8593));
1056: htmlEntities.put("r" + "a" + "r" + "r", new Integer(8594));
1057: htmlEntities.put("d" + "a" + "r" + "r", new Integer(8595));
1058: htmlEntities.put("ha" + "r" + "r", new Integer(8596));
1059: htmlEntities.put("c" + "r" + "" + "a" + "r" + "r", new Integer(
1060: 8629));
1061: htmlEntities.put("lArr", new Integer(8656));
1062: htmlEntities.put("uArr", new Integer(8657));
1063: htmlEntities.put("rArr", new Integer(8658));
1064: htmlEntities.put("dArr", new Integer(8659));
1065: htmlEntities.put("hArr", new Integer(8660));
1066: htmlEntities.put("for" + "all", new Integer(8704));
1067: htmlEntities.put("part", new Integer(8706));
1068: htmlEntities.put("exist", new Integer(8707));
1069: htmlEntities.put("empty", new Integer(8709));
1070: htmlEntities
1071: .put("n" + "a" + "b" + "l" + "a", new Integer(8711));
1072: htmlEntities.put("is" + "in", new Integer(8712));
1073: htmlEntities.put("not" + "in", new Integer(8713));
1074: htmlEntities.put("n" + "i", new Integer(8715));
1075: htmlEntities.put("p" + "rod", new Integer(8719));
1076: htmlEntities.put("sum", new Integer(8721));
1077: htmlEntities.put("minus", new Integer(8722));
1078: htmlEntities.put("low" + "as" + "t", new Integer(8727));
1079: htmlEntities
1080: .put("r" + "a" + "d" + "i" + "c", new Integer(8730));
1081: htmlEntities.put("prop", new Integer(8733));
1082: htmlEntities.put("in" + "fin", new Integer(8734));
1083: htmlEntities.put("an" + "g", new Integer(8736));
1084: htmlEntities.put("and", new Integer(8743));
1085: htmlEntities.put("or", new Integer(8744));
1086: htmlEntities.put("cap", new Integer(8745));
1087: htmlEntities.put("cup", new Integer(8746));
1088: htmlEntities.put("int", new Integer(8747));
1089: htmlEntities.put("there4", new Integer(8756));
1090: htmlEntities.put("s" + "i" + "m", new Integer(8764));
1091: htmlEntities.put("c" + "on" + "g", new Integer(8773));
1092: htmlEntities
1093: .put("a" + "s" + "y" + "m" + "p", new Integer(8776));
1094: htmlEntities.put("n" + "e", new Integer(8800));
1095: htmlEntities
1096: .put("e" + "q" + "u" + "i" + "v", new Integer(8801));
1097: htmlEntities.put("l" + "e", new Integer(8804));
1098: htmlEntities.put("g" + "e", new Integer(8805));
1099: htmlEntities.put("sub", new Integer(8834));
1100: htmlEntities.put("sup", new Integer(8835));
1101: htmlEntities.put("n" + "sub", new Integer(8836));
1102: htmlEntities.put("sub" + "e", new Integer(8838));
1103: htmlEntities.put("sup" + "e", new Integer(8839));
1104: htmlEntities.put("o" + "plus", new Integer(8853));
1105: htmlEntities.put("o" + "times", new Integer(8855));
1106: htmlEntities.put("per" + "p", new Integer(8869));
1107: htmlEntities.put("s" + "dot", new Integer(8901));
1108: htmlEntities
1109: .put("l" + "c" + "e" + "i" + "l", new Integer(8968));
1110: htmlEntities
1111: .put("r" + "c" + "e" + "i" + "l", new Integer(8969));
1112: htmlEntities.put("l" + "floor", new Integer(8970));
1113: htmlEntities.put("r" + "floor", new Integer(8971));
1114: htmlEntities.put("lang", new Integer(9001));
1115: htmlEntities.put("rang", new Integer(9002));
1116: htmlEntities.put("l" + "o" + "z", new Integer(9674));
1117: htmlEntities.put("spades", new Integer(9824));
1118: htmlEntities.put("clubs", new Integer(9827));
1119: htmlEntities.put("hearts", new Integer(9829));
1120: htmlEntities.put("d" + "i" + "am" + "s", new Integer(9830));
1121: htmlEntities.put("quot", new Integer(34));
1122: htmlEntities.put("amp", new Integer(38));
1123: htmlEntities.put("lt", new Integer(60));
1124: htmlEntities.put("gt", new Integer(62));
1125: htmlEntities.put("OElig", new Integer(338));
1126: htmlEntities.put("o" + "e" + "l" + "i" + "g", new Integer(339));
1127: htmlEntities.put("Scar" + "on", new Integer(352));
1128: htmlEntities.put("scar" + "on", new Integer(353));
1129: htmlEntities.put("Y" + "u" + "m" + "l", new Integer(376));
1130: htmlEntities.put("c" + "i" + "r" + "c", new Integer(710));
1131: htmlEntities.put("tilde", new Integer(732));
1132: htmlEntities.put("e" + "n" + "s" + "p", new Integer(8194));
1133: htmlEntities.put("e" + "m" + "s" + "p", new Integer(8195));
1134: htmlEntities.put("thin" + "s" + "p", new Integer(8201));
1135: htmlEntities.put("z" + "w" + "n" + "j", new Integer(8204));
1136: htmlEntities.put("z" + "w" + "j", new Integer(8205));
1137: htmlEntities.put("l" + "r" + "m", new Integer(8206));
1138: htmlEntities.put("r" + "l" + "m", new Integer(8207));
1139: htmlEntities.put("n" + "dash", new Integer(8211));
1140: htmlEntities.put("m" + "dash", new Integer(8212));
1141: htmlEntities.put("l" + "s" + "quo", new Integer(8216));
1142: htmlEntities.put("r" + "s" + "quo", new Integer(8217));
1143: htmlEntities.put("s" + "b" + "quo", new Integer(8218));
1144: htmlEntities.put("l" + "d" + "quo", new Integer(8220));
1145: htmlEntities.put("r" + "d" + "quo", new Integer(8221));
1146: htmlEntities.put("b" + "d" + "quo", new Integer(8222));
1147: htmlEntities.put("dagger", new Integer(8224));
1148: htmlEntities.put("Dagger", new Integer(8225));
1149: htmlEntities.put("p" + "e" + "r" + "m" + "i" + "l",
1150: new Integer(8240));
1151: htmlEntities.put("l" + "s" + "a" + "quo", new Integer(8249));
1152: htmlEntities.put("r" + "s" + "a" + "quo", new Integer(8250));
1153: htmlEntities.put("euro", new Integer(8364));
1154: }
1155:
1156: /**
1157: * Turn any HTML escape entities in the string into
1158: * characters and return the resulting string.
1159: *
1160: * @param s String to be un-escaped.
1161: * @return un-escaped String.
1162: * @throws NullPointerException if s is null.
1163: *
1164: * @since ostermillerutils 1.00.00
1165: */
1166: public static String unescapeHTML(String s) {
1167: StringBuffer result = new StringBuffer(s.length());
1168: int ampInd = s.indexOf("&");
1169: int lastEnd = 0;
1170: while (ampInd >= 0) {
1171: int nextAmp = s.indexOf("&", ampInd + 1);
1172: int nextSemi = s.indexOf(";", ampInd + 1);
1173: if (nextSemi != -1 && (nextAmp == -1 || nextSemi < nextAmp)) {
1174: int value = -1;
1175: String escape = s.substring(ampInd + 1, nextSemi);
1176: try {
1177: if (escape.startsWith("#")) {
1178: value = Integer.parseInt(escape.substring(1),
1179: 10);
1180: } else {
1181: if (htmlEntities.containsKey(escape)) {
1182: value = htmlEntities.get(escape).intValue();
1183: }
1184: }
1185: } catch (NumberFormatException x) {
1186: // Could not parse the entity,
1187: // output it verbatim
1188: }
1189: result.append(s.substring(lastEnd, ampInd));
1190: lastEnd = nextSemi + 1;
1191: if (value >= 0 && value <= 0xffff) {
1192: result.append((char) value);
1193: } else {
1194: result.append("&").append(escape).append(";");
1195: }
1196: }
1197: ampInd = nextAmp;
1198: }
1199: result.append(s.substring(lastEnd));
1200: return result.toString();
1201: }
1202:
1203: /**
1204: * Escapes characters that have special meaning to
1205: * regular expressions
1206: *
1207: * @param s String to be escaped
1208: * @return escaped String
1209: * @throws NullPointerException if s is null.
1210: *
1211: * @since ostermillerutils 1.02.25
1212: */
1213: public static String escapeRegularExpressionLiteral(String s) {
1214: // According to the documentation in the Pattern class:
1215: //
1216: // The backslash character ('\') serves to introduce escaped constructs,
1217: // as defined in the table above, as well as to quote characters that
1218: // otherwise would be interpreted as un-escaped constructs. Thus the
1219: // expression \\ matches a single backslash and \{ matches a left brace.
1220: //
1221: // It is an error to use a backslash prior to any alphabetic character
1222: // that does not denote an escaped construct; these are reserved for future
1223: // extensions to the regular-expression language. A backslash may be used
1224: // prior to a non-alphabetic character regardless of whether that character
1225: // is part of an un-escaped construct.
1226: //
1227: // As a result, escape everything except [0-9a-zA-Z]
1228:
1229: int length = s.length();
1230: int newLength = length;
1231: // first check for characters that might
1232: // be dangerous and calculate a length
1233: // of the string that has escapes.
1234: for (int i = 0; i < length; i++) {
1235: char c = s.charAt(i);
1236: if (!((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))) {
1237: newLength += 1;
1238: }
1239: }
1240: if (length == newLength) {
1241: // nothing to escape in the string
1242: return s;
1243: }
1244: StringBuffer sb = new StringBuffer(newLength);
1245: for (int i = 0; i < length; i++) {
1246: char c = s.charAt(i);
1247: if (!((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))) {
1248: sb.append('\\');
1249: }
1250: sb.append(c);
1251: }
1252: return sb.toString();
1253: }
1254:
1255: /**
1256: * Build a regular expression that is each of the terms or'd together.
1257: *
1258: * @param terms a list of search terms.
1259: * @param sb place to build the regular expression.
1260: * @throws IllegalArgumentException if the length of terms is zero.
1261: *
1262: * @since ostermillerutils 1.02.25
1263: */
1264: private static void buildFindAnyPattern(String[] terms,
1265: StringBuffer sb) {
1266: if (terms.length == 0)
1267: throw new IllegalArgumentException(
1268: "There must be at least one term to find.");
1269: sb.append("(?:");
1270: for (int i = 0; i < terms.length; i++) {
1271: if (i > 0)
1272: sb.append("|");
1273: sb.append("(?:");
1274: sb.append(escapeRegularExpressionLiteral(terms[i]));
1275: sb.append(")");
1276: }
1277: sb.append(")");
1278: }
1279:
1280: /**
1281: * Compile a pattern that can will match a string if the string
1282: * contains any of the given terms.
1283: * <p>
1284: * Usage:<br>
1285: * <code>boolean b = getContainsAnyPattern(terms).matcher(s).matches();</code>
1286: * <p>
1287: * If multiple strings are matched against the same set of terms,
1288: * it is more efficient to reuse the pattern returned by this function.
1289: *
1290: * @param terms Array of search strings.
1291: * @return Compiled pattern that can be used to match a string to see if it contains any of the terms.
1292: *
1293: * @since ostermillerutils 1.02.25
1294: */
1295: public static Pattern getContainsAnyPattern(String[] terms) {
1296: StringBuffer sb = new StringBuffer();
1297: sb.append("(?s).*");
1298: buildFindAnyPattern(terms, sb);
1299: sb.append(".*");
1300: return Pattern.compile(sb.toString());
1301: }
1302:
1303: /**
1304: * Compile a pattern that can will match a string if the string
1305: * equals any of the given terms.
1306: * <p>
1307: * Usage:<br>
1308: * <code>boolean b = getEqualsAnyPattern(terms).matcher(s).matches();</code>
1309: * <p>
1310: * If multiple strings are matched against the same set of terms,
1311: * it is more efficient to reuse the pattern returned by this function.
1312: *
1313: * @param terms Array of search strings.
1314: * @return Compiled pattern that can be used to match a string to see if it equals any of the terms.
1315: *
1316: * @since ostermillerutils 1.02.25
1317: */
1318: public static Pattern getEqualsAnyPattern(String[] terms) {
1319: StringBuffer sb = new StringBuffer();
1320: sb.append("(?s)\\A");
1321: buildFindAnyPattern(terms, sb);
1322: sb.append("\\z");
1323: return Pattern.compile(sb.toString());
1324: }
1325:
1326: /**
1327: * Compile a pattern that can will match a string if the string
1328: * starts with any of the given terms.
1329: * <p>
1330: * Usage:<br>
1331: * <code>boolean b = getStartsWithAnyPattern(terms).matcher(s).matches();</code>
1332: * <p>
1333: * If multiple strings are matched against the same set of terms,
1334: * it is more efficient to reuse the pattern returned by this function.
1335: *
1336: * @param terms Array of search strings.
1337: * @return Compiled pattern that can be used to match a string to see if it starts with any of the terms.
1338: *
1339: * @since ostermillerutils 1.02.25
1340: */
1341: public static Pattern getStartsWithAnyPattern(String[] terms) {
1342: StringBuffer sb = new StringBuffer();
1343: sb.append("(?s)\\A");
1344: buildFindAnyPattern(terms, sb);
1345: sb.append(".*");
1346: return Pattern.compile(sb.toString());
1347: }
1348:
1349: /**
1350: * Compile a pattern that can will match a string if the string
1351: * ends with any of the given terms.
1352: * <p>
1353: * Usage:<br>
1354: * <code>boolean b = getEndsWithAnyPattern(terms).matcher(s).matches();</code>
1355: * <p>
1356: * If multiple strings are matched against the same set of terms,
1357: * it is more efficient to reuse the pattern returned by this function.
1358: *
1359: * @param terms Array of search strings.
1360: * @return Compiled pattern that can be used to match a string to see if it ends with any of the terms.
1361: *
1362: * @since ostermillerutils 1.02.25
1363: */
1364: public static Pattern getEndsWithAnyPattern(String[] terms) {
1365: StringBuffer sb = new StringBuffer();
1366: sb.append("(?s).*");
1367: buildFindAnyPattern(terms, sb);
1368: sb.append("\\z");
1369: return Pattern.compile(sb.toString());
1370: }
1371:
1372: /**
1373: * Compile a pattern that can will match a string if the string
1374: * contains any of the given terms.
1375: * <p>
1376: * Case is ignored when matching using Unicode case rules.
1377: * <p>
1378: * Usage:<br>
1379: * <code>boolean b = getContainsAnyPattern(terms).matcher(s).matches();</code>
1380: * <p>
1381: * If multiple strings are matched against the same set of terms,
1382: * it is more efficient to reuse the pattern returned by this function.
1383: *
1384: * @param terms Array of search strings.
1385: * @return Compiled pattern that can be used to match a string to see if it contains any of the terms.
1386: *
1387: * @since ostermillerutils 1.02.25
1388: */
1389: public static Pattern getContainsAnyIgnoreCasePattern(String[] terms) {
1390: StringBuffer sb = new StringBuffer();
1391: sb.append("(?i)(?u)(?s).*");
1392: buildFindAnyPattern(terms, sb);
1393: sb.append(".*");
1394: return Pattern.compile(sb.toString());
1395: }
1396:
1397: /**
1398: * Compile a pattern that can will match a string if the string
1399: * equals any of the given terms.
1400: * <p>
1401: * Case is ignored when matching using Unicode case rules.
1402: * <p>
1403: * Usage:<br>
1404: * <code>boolean b = getEqualsAnyPattern(terms).matcher(s).matches();</code>
1405: * <p>
1406: * If multiple strings are matched against the same set of terms,
1407: * it is more efficient to reuse the pattern returned by this function.
1408: *
1409: * @param terms Array of search strings.
1410: * @return Compiled pattern that can be used to match a string to see if it equals any of the terms.
1411: *
1412: * @since ostermillerutils 1.02.25
1413: */
1414: public static Pattern getEqualsAnyIgnoreCasePattern(String[] terms) {
1415: StringBuffer sb = new StringBuffer();
1416: sb.append("(?i)(?u)(?s)\\A");
1417: buildFindAnyPattern(terms, sb);
1418: sb.append("\\z");
1419: return Pattern.compile(sb.toString());
1420: }
1421:
1422: /**
1423: * Compile a pattern that can will match a string if the string
1424: * starts with any of the given terms.
1425: * <p>
1426: * Case is ignored when matching using Unicode case rules.
1427: * <p>
1428: * Usage:<br>
1429: * <code>boolean b = getStartsWithAnyPattern(terms).matcher(s).matches();</code>
1430: * <p>
1431: * If multiple strings are matched against the same set of terms,
1432: * it is more efficient to reuse the pattern returned by this function.
1433: *
1434: * @param terms Array of search strings.
1435: * @return Compiled pattern that can be used to match a string to see if it starts with any of the terms.
1436: *
1437: * @since ostermillerutils 1.02.25
1438: */
1439: public static Pattern getStartsWithAnyIgnoreCasePattern(
1440: String[] terms) {
1441: StringBuffer sb = new StringBuffer();
1442: sb.append("(?i)(?u)(?s)\\A");
1443: buildFindAnyPattern(terms, sb);
1444: sb.append(".*");
1445: return Pattern.compile(sb.toString());
1446: }
1447:
1448: /**
1449: * Compile a pattern that can will match a string if the string
1450: * ends with any of the given terms.
1451: * <p>
1452: * Case is ignored when matching using Unicode case rules.
1453: * <p>
1454: * Usage:<br>
1455: * <code>boolean b = getEndsWithAnyPattern(terms).matcher(s).matches();</code>
1456: * <p>
1457: * If multiple strings are matched against the same set of terms,
1458: * it is more efficient to reuse the pattern returned by this function.
1459: *
1460: * @param terms Array of search strings.
1461: * @return Compiled pattern that can be used to match a string to see if it ends with any of the terms.
1462: *
1463: * @since ostermillerutils 1.02.25
1464: */
1465: public static Pattern getEndsWithAnyIgnoreCasePattern(String[] terms) {
1466: StringBuffer sb = new StringBuffer();
1467: sb.append("(?i)(?u)(?s).*");
1468: buildFindAnyPattern(terms, sb);
1469: sb.append("\\z");
1470: return Pattern.compile(sb.toString());
1471: }
1472:
1473: /**
1474: * Tests to see if the given string contains any of the given terms.
1475: * <p>
1476: * This implementation is more efficient than the brute force approach
1477: * of testing the string against each of the terms. It instead compiles
1478: * a single regular expression that can test all the terms at once, and
1479: * uses that expression against the string.
1480: * <p>
1481: * This is a convenience method. If multiple strings are tested against
1482: * the same set of terms, it is more efficient not to compile the regular
1483: * expression multiple times.
1484: * @see #getContainsAnyPattern(String[])
1485: *
1486: * @param s String that may contain any of the given terms.
1487: * @param terms list of substrings that may be contained in the given string.
1488: * @return true iff one of the terms is a substring of the given string.
1489: *
1490: * @since ostermillerutils 1.02.25
1491: */
1492: public static boolean containsAny(String s, String[] terms) {
1493: return getContainsAnyPattern(terms).matcher(s).matches();
1494: }
1495:
1496: /**
1497: * Tests to see if the given string equals any of the given terms.
1498: * <p>
1499: * This implementation is more efficient than the brute force approach
1500: * of testing the string against each of the terms. It instead compiles
1501: * a single regular expression that can test all the terms at once, and
1502: * uses that expression against the string.
1503: * <p>
1504: * This is a convenience method. If multiple strings are tested against
1505: * the same set of terms, it is more efficient not to compile the regular
1506: * expression multiple times.
1507: * @see #getEqualsAnyPattern(String[])
1508: *
1509: * @param s String that may equal any of the given terms.
1510: * @param terms list of strings that may equal the given string.
1511: * @return true iff one of the terms is equal to the given string.
1512: *
1513: * @since ostermillerutils 1.02.25
1514: */
1515: public static boolean equalsAny(String s, String[] terms) {
1516: return getEqualsAnyPattern(terms).matcher(s).matches();
1517: }
1518:
1519: /**
1520: * Tests to see if the given string starts with any of the given terms.
1521: * <p>
1522: * This implementation is more efficient than the brute force approach
1523: * of testing the string against each of the terms. It instead compiles
1524: * a single regular expression that can test all the terms at once, and
1525: * uses that expression against the string.
1526: * <p>
1527: * This is a convenience method. If multiple strings are tested against
1528: * the same set of terms, it is more efficient not to compile the regular
1529: * expression multiple times.
1530: * @see #getStartsWithAnyPattern(String[])
1531: *
1532: * @param s String that may start with any of the given terms.
1533: * @param terms list of strings that may start with the given string.
1534: * @return true iff the given string starts with one of the given terms.
1535: *
1536: * @since ostermillerutils 1.02.25
1537: */
1538: public static boolean startsWithAny(String s, String[] terms) {
1539: return getStartsWithAnyPattern(terms).matcher(s).matches();
1540: }
1541:
1542: /**
1543: * Tests to see if the given string ends with any of the given terms.
1544: * <p>
1545: * This implementation is more efficient than the brute force approach
1546: * of testing the string against each of the terms. It instead compiles
1547: * a single regular expression that can test all the terms at once, and
1548: * uses that expression against the string.
1549: * <p>
1550: * This is a convenience method. If multiple strings are tested against
1551: * the same set of terms, it is more efficient not to compile the regular
1552: * expression multiple times.
1553: * @see #getEndsWithAnyPattern(String[])
1554: *
1555: * @param s String that may end with any of the given terms.
1556: * @param terms list of strings that may end with the given string.
1557: * @return true iff the given string ends with one of the given terms.
1558: *
1559: * @since ostermillerutils 1.02.25
1560: */
1561: public static boolean endsWithAny(String s, String[] terms) {
1562: return getEndsWithAnyPattern(terms).matcher(s).matches();
1563: }
1564:
1565: /**
1566: * Tests to see if the given string contains any of the given terms.
1567: * <p>
1568: * Case is ignored when matching using Unicode case rules.
1569: * <p>
1570: * This implementation is more efficient than the brute force approach
1571: * of testing the string against each of the terms. It instead compiles
1572: * a single regular expression that can test all the terms at once, and
1573: * uses that expression against the string.
1574: * <p>
1575: * This is a convenience method. If multiple strings are tested against
1576: * the same set of terms, it is more efficient not to compile the regular
1577: * expression multiple times.
1578: * @see #getContainsAnyIgnoreCasePattern(String[])
1579: *
1580: * @param s String that may contain any of the given terms.
1581: * @param terms list of substrings that may be contained in the given string.
1582: * @return true iff one of the terms is a substring of the given string.
1583: *
1584: * @since ostermillerutils 1.02.25
1585: */
1586: public static boolean containsAnyIgnoreCase(String s, String[] terms) {
1587: return getContainsAnyIgnoreCasePattern(terms).matcher(s)
1588: .matches();
1589: }
1590:
1591: /**
1592: * Tests to see if the given string equals any of the given terms.
1593: * <p>
1594: * Case is ignored when matching using Unicode case rules.
1595: * <p>
1596: * This implementation is more efficient than the brute force approach
1597: * of testing the string against each of the terms. It instead compiles
1598: * a single regular expression that can test all the terms at once, and
1599: * uses that expression against the string.
1600: * <p>
1601: * This is a convenience method. If multiple strings are tested against
1602: * the same set of terms, it is more efficient not to compile the regular
1603: * expression multiple times.
1604: * @see #getEqualsAnyIgnoreCasePattern(String[])
1605: *
1606: * @param s String that may equal any of the given terms.
1607: * @param terms list of strings that may equal the given string.
1608: * @return true iff one of the terms is equal to the given string.
1609: *
1610: * @since ostermillerutils 1.02.25
1611: */
1612: public static boolean equalsAnyIgnoreCase(String s, String[] terms) {
1613: return getEqualsAnyIgnoreCasePattern(terms).matcher(s)
1614: .matches();
1615: }
1616:
1617: /**
1618: * Tests to see if the given string starts with any of the given terms.
1619: * <p>
1620: * Case is ignored when matching using Unicode case rules.
1621: * <p>
1622: * This implementation is more efficient than the brute force approach
1623: * of testing the string against each of the terms. It instead compiles
1624: * a single regular expression that can test all the terms at once, and
1625: * uses that expression against the string.
1626: * <p>
1627: * This is a convenience method. If multiple strings are tested against
1628: * the same set of terms, it is more efficient not to compile the regular
1629: * expression multiple times.
1630: * @see #getStartsWithAnyIgnoreCasePattern(String[])
1631: *
1632: * @param s String that may start with any of the given terms.
1633: * @param terms list of strings that may start with the given string.
1634: * @return true iff the given string starts with one of the given terms.
1635: *
1636: * @since ostermillerutils 1.02.25
1637: */
1638: public static boolean startsWithAnyIgnoreCase(String s,
1639: String[] terms) {
1640: return getStartsWithAnyIgnoreCasePattern(terms).matcher(s)
1641: .matches();
1642: }
1643:
1644: /**
1645: * Tests to see if the given string ends with any of the given terms.
1646: * <p>
1647: * Case is ignored when matching using Unicode case rules.
1648: * <p>
1649: * This implementation is more efficient than the brute force approach
1650: * of testing the string against each of the terms. It instead compiles
1651: * a single regular expression that can test all the terms at once, and
1652: * uses that expression against the string.
1653: * <p>
1654: * This is a convenience method. If multiple strings are tested against
1655: * the same set of terms, it is more efficient not to compile the regular
1656: * expression multiple times.
1657: * @see #getEndsWithAnyIgnoreCasePattern(String[])
1658: *
1659: * @param s String that may end with any of the given terms.
1660: * @param terms list of strings that may end with the given string.
1661: * @return true iff the given string ends with one of the given terms.
1662: *
1663: * @since ostermillerutils 1.02.25
1664: */
1665: public static boolean endsWithAnyIgnoreCase(String s, String[] terms) {
1666: return getEndsWithAnyIgnoreCasePattern(terms).matcher(s)
1667: .matches();
1668: }
1669: }
|