001: package jimm.util;
002:
003: import java.awt.FontMetrics;
004: import java.util.*;
005:
006: /**
007: * Globally available utility classes, mostly for string manipulation.
008: *
009: * @author Jim Menard, <a href="mailto:jimm@io.com">jimm@io.com</a>
010: */
011: public class StringUtils {
012:
013: protected static final int DEFAULT_MAX_MESSAGE_WIDTH = 78;
014:
015: /**
016: * Returns a list of substrings created by splitting the given string at
017: * the given delimiter. The return value will be <code>null</code> if the
018: * string is <code>null</code>, else it will be a non-empty list of strings.
019: * If <var>delim</var> is <code>null</code> or is not found in the string,
020: * the list will contain one element: the original string.
021: * <p>
022: * This isn't the same thing as using a tokenizer. <var>delim</var> is
023: * a literal string, not a set of characters any of which may be a
024: * delimiter.
025: *
026: * @param str the string we're splitting
027: * @param delim the delimter string
028: */
029: public static List split(String str, String delim) {
030: if (str == null)
031: return null;
032:
033: ArrayList list = new ArrayList();
034:
035: if (delim == null) {
036: list.add(str);
037: return list;
038: }
039:
040: int subStart, afterDelim = 0;
041: int delimLength = delim.length();
042: while ((subStart = str.indexOf(delim, afterDelim)) != -1) {
043: list.add(str.substring(afterDelim, subStart));
044: afterDelim = subStart + delimLength;
045: }
046: if (afterDelim <= str.length())
047: list.add(str.substring(afterDelim));
048:
049: return list;
050: }
051:
052: /**
053: * Returns a string consisting of all members of a collection separated
054: * by the specified string. The <code>toString</code> method of each
055: * collection member is called to convert it to a string.
056: *
057: * @param c a collection of objects
058: * @param joinWith the string that will separate each member of the collection
059: */
060: public static String join(Collection c, String joinWith) {
061: if (c == null)
062: return "";
063:
064: StringBuffer buf = new StringBuffer();
065: boolean first = true;
066: for (Iterator iter = c.iterator(); iter.hasNext();) {
067: if (first)
068: first = false;
069: else if (joinWith != null)
070: buf.append(joinWith);
071: buf.append(iter.next().toString());
072: }
073: return buf.toString();
074: }
075:
076: /**
077: * Returns an array of strings, one for each line in the string. Lines end
078: * with any of cr, lf, or cr lf. A line ending at the end of the string
079: * will not output a further, empty string.
080: * <p>
081: * This code assumes <var>str</var> is not <code>null</code>.
082: *
083: * @param str the string to split
084: * @return a non-empty list of strings
085: */
086: public static List splitIntoLines(String str) {
087: ArrayList strings = new ArrayList();
088:
089: int len = str.length();
090: if (len == 0) {
091: strings.add("");
092: return strings;
093: }
094:
095: int lineStart = 0;
096:
097: for (int i = 0; i < len; ++i) {
098: char c = str.charAt(i);
099: if (c == '\r') {
100: int newlineLength = 1;
101: if ((i + 1) < len && str.charAt(i + 1) == '\n')
102: newlineLength = 2;
103: strings.add(str.substring(lineStart, i));
104: lineStart = i + newlineLength;
105: if (newlineLength == 2) // skip \n next time through loop
106: ++i;
107: } else if (c == '\n') {
108: strings.add(str.substring(lineStart, i));
109: lineStart = i + 1;
110: }
111: }
112: if (lineStart < len)
113: strings.add(str.substring(lineStart));
114:
115: return strings;
116: }
117:
118: /**
119: * Appends a string to a string buffer, adding extra newlines so the message
120: * is not too wide. Max width is not guaranteed; if there is no space in a
121: * line before <code>DEFAULT_MAX_MESSAGE_WIDTH</code> then the next one after
122: * it will be used insetead. Each line will be trimmed before and after it's
123: * added, so some whitespace may be goofed up. This is used for error message
124: * wrapping, so it's not critical that whitespace be preserved.
125: * <p>
126: * TODO Looks for space, not all whitespace. This should probably change.
127: *
128: * @param buf the string buffer
129: * @param str the string
130: */
131: public static void splitUp(StringBuffer buf, String str) {
132: splitUp(buf, str, DEFAULT_MAX_MESSAGE_WIDTH);
133: }
134:
135: /**
136: * Appends a string to a string buffer, adding extra newlines so the
137: * message is not too wide. Max width is not guaranteed; if there is no space
138: * in a line before <var>maxWidth</var> then the next one after it will be
139: * used instead. Each line will be trimmed before and after it's added,
140: * so some whitespace may be goofed up. This is used for error message
141: * wrapping, so it's not critical that whitespace be preserved.
142: * <p>
143: * TODO Looks for space, not all whitespace. This should probably change.
144: *
145: * @param buf the string buffer
146: * @param str the string
147: * @param maxWidth maximum number of chars in each line
148: */
149: public static void splitUp(StringBuffer buf, String str,
150: int maxWidth) {
151: if (str == null)
152: return;
153:
154: str = str.trim();
155: while (str.length() >= maxWidth) {
156: int pos = str.lastIndexOf(' ', maxWidth);
157: if (pos == -1) { // No spaces before; look for first one after
158: pos = str.indexOf(' ', maxWidth);
159: if (pos == -1)
160: break;
161: }
162: buf.append(str.substring(0, pos).trim());
163: buf.append("\n");
164: str = str.substring(pos + 1).trim();
165: }
166: buf.append(str);
167: }
168:
169: /**
170: * Returns an array of strings, one for each line in the string after it
171: * has been wrapped to fit lines of <var>maxWidth</var>. Lines end
172: * with any of cr, lf, or cr lf. A line ending at the end of the string
173: * will not output a further, empty string.
174: * <p>
175: * This code assumes <var>str</var> is not <code>null</code>.
176: *
177: * @param str the string to split
178: * @param fm needed for string width calculations
179: * @param maxWidth the max line width, in points
180: * @return a non-empty list of strings
181: */
182: public static List wrap(String str, FontMetrics fm, int maxWidth) {
183: List lines = splitIntoLines(str);
184: if (lines.size() == 0)
185: return lines;
186:
187: ArrayList strings = new ArrayList();
188: for (Iterator iter = lines.iterator(); iter.hasNext();)
189: wrapLineInto((String) iter.next(), strings, fm, maxWidth);
190: return strings;
191: }
192:
193: /**
194: * Given a line of text and font metrics information, wrap the line and
195: * add the new line(s) to <var>list</var>.
196: *
197: * @param line a line of text
198: * @param list an output list of strings
199: * @param fm font metrics
200: * @param maxWidth maximum width of the line(s)
201: */
202: public static void wrapLineInto(String line, List list,
203: FontMetrics fm, int maxWidth) {
204: int len = line.length();
205: int width;
206: while (len > 0 && (width = fm.stringWidth(line)) > maxWidth) {
207: // Guess where to split the line. Look for the next space before
208: // or after the guess.
209: int guess = len * maxWidth / width;
210: String before = line.substring(0, guess).trim();
211:
212: width = fm.stringWidth(before);
213: int pos;
214: if (width > maxWidth) // Too long
215: pos = findBreakBefore(line, guess);
216: else { // Too short or possibly just right
217: pos = findBreakAfter(line, guess);
218: if (pos != -1) { // Make sure this doesn't make us too long
219: before = line.substring(0, pos).trim();
220: if (fm.stringWidth(before) > maxWidth)
221: pos = findBreakBefore(line, guess);
222: }
223: }
224: if (pos == -1)
225: pos = guess; // Split in the middle of the word
226:
227: list.add(line.substring(0, pos).trim());
228: line = line.substring(pos).trim();
229: len = line.length();
230: }
231: if (len > 0)
232: list.add(line);
233: }
234:
235: /**
236: * Returns the index of the first whitespace character or '-' in
237: * <var>line</var> that is at or before <var>start</var>. Returns -1 if no
238: * such character is found.
239: *
240: * @param line a string
241: * @param start where to star looking
242: */
243: public static int findBreakBefore(String line, int start) {
244: for (int i = start; i >= 0; --i) {
245: char c = line.charAt(i);
246: if (Character.isWhitespace(c) || c == '-')
247: return i;
248: }
249: return -1;
250: }
251:
252: /**
253: * Returns the index of the first whitespace character or '-' in
254: * <var>line</var> that is at or after <var>start</var>. Returns -1 if no
255: * such character is found.
256: *
257: * @param line a string
258: * @param start where to star looking
259: */
260: public static int findBreakAfter(String line, int start) {
261: int len = line.length();
262: for (int i = start; i < len; ++i) {
263: char c = line.charAt(i);
264: if (Character.isWhitespace(c) || c == '-')
265: return i;
266: }
267: return -1;
268: }
269:
270: /**
271: * Returns a string with HTML special characters replaced by their entity
272: * equivalents.
273: *
274: * @param str the string to escape
275: * @return a new string without HTML special characters
276: */
277: public static String escapeHTML(String str) {
278: if (str == null || str.length() == 0)
279: return "";
280:
281: StringBuffer buf = new StringBuffer();
282: int len = str.length();
283: for (int i = 0; i < len; ++i) {
284: char c = str.charAt(i);
285: switch (c) {
286: case '&':
287: buf.append("&");
288: break;
289: case '<':
290: buf.append("<");
291: break;
292: case '>':
293: buf.append(">");
294: break;
295: case '"':
296: buf.append(""");
297: break;
298: case '\'':
299: buf.append("'");
300: break;
301: default:
302: buf.append(c);
303: break;
304: }
305: }
306: return buf.toString();
307: }
308:
309: /**
310: * Returns a new string where all newlines ("\n", "\r",
311: * or "\r\n") have been replaced by "\n" plus XHTML
312: * break tags ("\n<br />").
313: * <p>
314: * We don't call <code>splitIntoLines</code> because that method does not
315: * tell us if the string ended with a newline or not.
316: *
317: * @param str any string
318: * @return a new string with all newlines replaced by
319: * "\n<br />"
320: */
321: public static String newlinesToXHTMLBreaks(String str) {
322: if (str == null || str.length() == 0)
323: return "";
324:
325: StringBuffer buf = new StringBuffer();
326: int len = str.length();
327: for (int i = 0; i < len; ++i) {
328: char c = str.charAt(i);
329: switch (c) {
330: case '\n':
331: buf.append("\n<br />");
332: break;
333: case '\r':
334: if (i + 1 < len && str.charAt(i + 1) == '\n') // Look for '\n'
335: ++i;
336: buf.append("\n<br />");
337: break;
338: default:
339: buf.append(c);
340: break;
341: }
342: }
343: return buf.toString();
344: }
345:
346: /**
347: * Returns a string with XML special characters replaced by their entity
348: * equivalents.
349: *
350: * @param str the string to escape
351: * @return a new string without XML special characters
352: */
353: public static String escapeXML(String str) {
354: return escapeHTML(str);
355: }
356:
357: /**
358: * Returns a string with XML entities replaced by their normal characters.
359: *
360: * @param str the string to un-escape
361: * @return a new normal string
362: */
363: public static String unescapeXML(String str) {
364: if (str == null || str.length() == 0)
365: return "";
366:
367: StringBuffer buf = new StringBuffer();
368: int len = str.length();
369: for (int i = 0; i < len; ++i) {
370: char c = str.charAt(i);
371: if (c == '&') {
372: int pos = str.indexOf(";", i);
373: if (pos == -1) { // Really evil
374: buf.append('&');
375: } else if (str.charAt(i + 1) == '#') {
376: int val = Integer.parseInt(str
377: .substring(i + 2, pos), 16);
378: buf.append((char) val);
379: i = pos;
380: } else {
381: String substr = str.substring(i, pos + 1);
382: if (substr.equals("&"))
383: buf.append('&');
384: else if (substr.equals("<"))
385: buf.append('<');
386: else if (substr.equals(">"))
387: buf.append('>');
388: else if (substr.equals("""))
389: buf.append('"');
390: else if (substr.equals("'"))
391: buf.append('\'');
392: else
393: // ????
394: buf.append(substr);
395: i = pos;
396: }
397: } else {
398: buf.append(c);
399: }
400: }
401: return buf.toString();
402: }
403:
404: /**
405: * Returns a new string with all strings delimited by <var>start</var> and
406: * <var>end</var> replaced by whatever is generated by the
407: * <code>Replacer</code> <var>r</var>. The delimiters themselves are
408: * not part of the returned string.
409: * <p>
410: * If the <code>Replacer</code> ever returns <code>null</code>, we return
411: * <code>null</code>.
412: *
413: * @param start the delimiter start (for example, "{@")
414: * @param end the delimiter end (for example, "}")
415: * @param r the replacer; takes the text between <var>start</var> and
416: * <var>end</var> and returns the replacement text
417: * @param s the string we're munging
418: * @return a new string munged by the replacer, or <code>null</code> if
419: * the replacer ever returns <code>null</code>
420: */
421: public static String replaceDelimited(String start, String end,
422: Replacer r, String s) {
423: return replaceDelimited(null, start, end, r, s);
424: }
425:
426: /**
427: * Returns a new string with all strings delimited by <var>start</var> and
428: * <var>end</var> (but not immediately preceeded by <var>exceptAfter</var>)
429: * replaced by whatever is generated by the <code>Replacer</code>
430: * <var>r</var>. The delimiters themselves are not part of the returned
431: * string.
432: * <p>
433: * If the <code>Replacer</code> ever returns <code>null</code>, we return
434: * <code>null</code>.
435: *
436: * @param exceptAfter ignore <var>start</var> if it appears immediately
437: * after this string; may be <code>null</code>
438: * @param start the delimiter start (for example, "{@")
439: * @param end the delimiter end (for example, "}")
440: * @param r the replacer; takes the text between <var>start</var> and
441: * <var>end</var> and returns the replacement text
442: * @param s the string we're munging
443: * @return a new string munged by the replacer, or <code>null</code> if
444: * the replacer ever returns <code>null</code>
445: */
446: public static String replaceDelimited(String exceptAfter,
447: String start, String end, Replacer r, String s) {
448: if (s == null)
449: return null;
450:
451: int startLength, endLength;
452: if (start == null || end == null
453: || (startLength = start.length()) == 0
454: || (endLength = end.length()) == 0)
455: return s;
456:
457: int exceptAfterLength = exceptAfter == null ? 0 : exceptAfter
458: .length();
459:
460: String str = new String(s); // We're gonna munge the string, so copy it
461: int pos, pos2;
462: int searchFrom = 0;
463: while ((pos = str.indexOf(start, searchFrom)) != -1) {
464: // Skip this one if it is immediately preceeded by exceptAfter.
465: if (exceptAfterLength > 0) {
466: int lookFrom = pos - exceptAfterLength;
467: if (lookFrom >= 0
468: && str.indexOf(exceptAfter, lookFrom) == lookFrom) {
469: searchFrom = pos + 1;
470: continue;
471: }
472: }
473:
474: pos2 = str.indexOf(end, pos + startLength);
475: if (pos2 != -1) {
476: Object val = r.replace(str.substring(pos + startLength,
477: pos2));
478: if (val == null)
479: return null;
480: String valAsString = val.toString();
481: str = str.substring(0, pos) + valAsString
482: + str.substring(pos2 + endLength);
483: searchFrom = pos + valAsString.length();
484: } else
485: // Didn't find end delimiter; stop right here
486: break;
487: }
488: return str;
489: }
490:
491: /**
492: * Returns <var>str</var> with leading and trailing spaces trimmed or, if
493: * <var>str</var> is <code>null</code>, returns <code>null</code>.
494: *
495: * @return str trimmed or <code>null</code>
496: */
497: public static String nullOrTrimmed(String str) {
498: return str == null ? str : str.trim();
499: }
500:
501: }
|