001: /**
002: * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
003: */package net.sourceforge.pmd.util;
004:
005: import java.util.ArrayList;
006: import java.util.Iterator;
007: import java.util.List;
008:
009: public class StringUtil {
010:
011: public static final String[] EMPTY_STRINGS = new String[0];
012: private static final boolean supportsUTF8 = System.getProperty(
013: "net.sourceforge.pmd.supportUTF8", "no").equals("yes");
014: private static final String[] ENTITIES;
015:
016: static {
017: ENTITIES = new String[256 - 126];
018: for (int i = 126; i <= 255; i++) {
019: ENTITIES[i - 126] = "&#" + i + ';';
020: }
021: }
022:
023: public static String replaceString(String original, char oldChar,
024: String newString) {
025:
026: String fixedNew = newString == null ? "" : newString;
027:
028: StringBuffer desc = new StringBuffer();
029: int index = original.indexOf(oldChar);
030: int last = 0;
031: while (index != -1) {
032: desc.append(original.substring(last, index));
033: desc.append(fixedNew);
034: last = index + 1;
035: index = original.indexOf(oldChar, last);
036: }
037: desc.append(original.substring(last));
038: return desc.toString();
039: }
040:
041: public static String replaceString(String original,
042: String oldString, String newString) {
043:
044: String fixedNew = newString == null ? "" : newString;
045:
046: StringBuffer desc = new StringBuffer();
047: int index = original.indexOf(oldString);
048: int last = 0;
049: while (index != -1) {
050: desc.append(original.substring(last, index));
051: desc.append(fixedNew);
052: last = index + oldString.length();
053: index = original.indexOf(oldString, last);
054: }
055: desc.append(original.substring(last));
056: return desc.toString();
057: }
058:
059: /**
060: * Appends to a StringBuffer the String src where non-ASCII and
061: * XML special chars are escaped.
062: *
063: * @param buf The destination XML stream
064: * @param src The String to append to the stream
065: */
066: public static void appendXmlEscaped(StringBuffer buf, String src) {
067: appendXmlEscaped(buf, src, supportsUTF8);
068: }
069:
070: public static String htmlEncode(String string) {
071: String encoded = StringUtil.replaceString(string, '&', "&");
072: encoded = StringUtil.replaceString(encoded, '<', "<");
073: return StringUtil.replaceString(encoded, '>', ">");
074: }
075:
076: // TODO - unify the method above with the one below
077:
078: private static void appendXmlEscaped(StringBuffer buf, String src,
079: boolean supportUTF8) {
080: char c;
081: for (int i = 0; i < src.length(); i++) {
082: c = src.charAt(i);
083: if (c > '~') {// 126
084: if (!supportUTF8) {
085: if (c <= 255) {
086: buf.append(ENTITIES[c - 126]);
087: } else {
088: buf.append("&u").append(Integer.toHexString(c))
089: .append(';');
090: }
091: } else {
092: buf.append(c);
093: }
094: } else if (c == '&')
095: buf.append("&");
096: else if (c == '"')
097: buf.append(""");
098: else if (c == '<')
099: buf.append("<");
100: else if (c == '>')
101: buf.append(">");
102: else
103: buf.append(c);
104: }
105: }
106:
107: /**
108: * Parses the input source using the delimiter specified. This method is much
109: * faster than using the StringTokenizer or String.split(char) approach and
110: * serves as a replacement for String.split() for JDK1.3 that doesn't have it.
111: *
112: * FIXME - we're on JDK 1.4 now, can we replace this with String.split?
113: *
114: * @param source String
115: * @param delimiter char
116: * @return String[]
117: */
118: public static String[] substringsOf(String source, char delimiter) {
119:
120: if (source == null || source.length() == 0) {
121: return EMPTY_STRINGS;
122: }
123:
124: int delimiterCount = 0;
125: int length = source.length();
126: char[] chars = source.toCharArray();
127:
128: for (int i = 0; i < length; i++) {
129: if (chars[i] == delimiter)
130: delimiterCount++;
131: }
132:
133: if (delimiterCount == 0)
134: return new String[] { source };
135:
136: String results[] = new String[delimiterCount + 1];
137:
138: int i = 0;
139: int offset = 0;
140:
141: while (offset <= length) {
142: int pos = source.indexOf(delimiter, offset);
143: if (pos < 0)
144: pos = length;
145: results[i++] = pos == offset ? "" : source.substring(
146: offset, pos);
147: offset = pos + 1;
148: }
149:
150: return results;
151: }
152:
153: /**
154: * Much more efficient than StringTokenizer.
155: *
156: * @param str String
157: * @param separator char
158: * @return String[]
159: */
160: public static String[] substringsOf(String str, String separator) {
161:
162: if (str == null || str.length() == 0) {
163: return EMPTY_STRINGS;
164: }
165:
166: int index = str.indexOf(separator);
167: if (index == -1) {
168: return new String[] { str };
169: }
170:
171: List<String> list = new ArrayList<String>();
172: int currPos = 0;
173: int len = separator.length();
174: while (index != -1) {
175: list.add(str.substring(currPos, index));
176: currPos = index + len;
177: index = str.indexOf(separator, currPos);
178: }
179: list.add(str.substring(currPos));
180: return list.toArray(new String[list.size()]);
181: }
182:
183: /**
184: * Copies the elements returned by the iterator onto the string buffer
185: * each delimited by the separator.
186: *
187: * @param sb StringBuffer
188: * @param iter Iterator
189: * @param separator String
190: */
191: public static void asStringOn(StringBuffer sb, Iterator iter,
192: String separator) {
193:
194: if (!iter.hasNext())
195: return;
196:
197: sb.append(iter.next());
198:
199: while (iter.hasNext()) {
200: sb.append(separator);
201: sb.append(iter.next());
202: }
203: }
204:
205: /**
206: * Return the length of the shortest string in the array.
207: * If any one of them is null then it returns 0.
208: *
209: * @param strings String[]
210: * @return int
211: */
212: public static int lengthOfShortestIn(String[] strings) {
213:
214: int minLength = Integer.MAX_VALUE;
215:
216: for (int i = 0; i < strings.length; i++) {
217: if (strings[i] == null)
218: return 0;
219: minLength = Math.min(minLength, strings[i].length());
220: }
221:
222: return minLength;
223: }
224:
225: /**
226: * Determine the maximum number of common leading whitespace characters
227: * the strings share in the same sequence. Useful for determining how
228: * many leading characters can be removed to shift all the text in the
229: * strings to the left without misaligning them.
230: *
231: * @param strings String[]
232: * @return int
233: */
234: public static int maxCommonLeadingWhitespaceForAll(String[] strings) {
235:
236: int shortest = lengthOfShortestIn(strings);
237: if (shortest == 0)
238: return 0;
239:
240: char[] matches = new char[shortest];
241:
242: String str;
243: for (int m = 0; m < matches.length; m++) {
244: matches[m] = strings[0].charAt(m);
245: if (!Character.isWhitespace(matches[m]))
246: return m;
247: for (int i = 0; i < strings.length; i++) {
248: str = strings[i];
249: if (str.charAt(m) != matches[m])
250: return m;
251: }
252: }
253:
254: return shortest;
255: }
256:
257: /**
258: * Trims off the leading characters off the strings up to the trimDepth
259: * specified. Returns the same strings if trimDepth = 0
260: *
261: * @param strings
262: * @param trimDepth
263: * @return String[]
264: */
265: public static String[] trimStartOn(String[] strings, int trimDepth) {
266:
267: if (trimDepth == 0)
268: return strings;
269:
270: String[] results = new String[strings.length];
271: for (int i = 0; i < strings.length; i++) {
272: results[i] = strings[i].substring(trimDepth);
273: }
274: return results;
275: }
276:
277: /**
278: * Left pads a string.
279: * @param s The String to pad
280: * @param length The desired minimum length of the resulting padded String
281: * @return The resulting left padded String
282: */
283: public static String lpad(String s, int length) {
284: String res = s;
285: if (length - s.length() > 0) {
286: char[] arr = new char[length - s.length()];
287: java.util.Arrays.fill(arr, ' ');
288: res = new StringBuffer(length).append(arr).append(s)
289: .toString();
290: }
291: return res;
292: }
293:
294: /**
295: * Are the two String values the same.
296: * The Strings can be optionally trimmed before checking.
297: * The Strings can be optionally compared ignoring case.
298: * The Strings can be have embedded whitespace standardized before comparing.
299: * Two null values are treated as equal.
300: *
301: * @param s1 The first String.
302: * @param s2 The second String.
303: * @param trim Indicates if the Strings should be trimmed before comparison.
304: * @param ignoreCase Indicates if the case of the Strings should ignored during comparison.
305: * @param standardizeWhitespace Indicates if the embedded whitespace should be standardized before comparison.
306: * @return <code>true</code> if the Strings are the same, <code>false</code> otherwise.
307: */
308: public static boolean isSame(String s1, String s2, boolean trim,
309: boolean ignoreCase, boolean standardizeWhitespace) {
310: if (s1 == s2) {
311: return true;
312: } else if (s1 == null || s2 == null) {
313: return false;
314: } else {
315: if (trim) {
316: s1 = s1.trim();
317: s2 = s2.trim();
318: }
319: if (standardizeWhitespace) {
320: // Replace all whitespace with a standard single space character.
321: s1 = s1.replaceAll("\\s+", " ");
322: s2 = s2.replaceAll("\\s+", " ");
323: }
324: return ignoreCase ? s1.equalsIgnoreCase(s2) : s1.equals(s2);
325: }
326: }
327: }
|