001: /*
002: * Copyright (c) 1998-2002 Carnegie Mellon University. All rights
003: * reserved.
004: *
005: * Redistribution and use in source and binary forms, with or without
006: * modification, are permitted provided that the following conditions
007: * are met:
008: *
009: * 1. Redistributions of source code must retain the above copyright
010: * notice, this list of conditions and the following disclaimer.
011: *
012: * 2. Redistributions in binary form must reproduce the above copyright
013: * notice, this list of conditions and the following disclaimer in
014: * the documentation and/or other materials provided with the
015: * distribution.
016: *
017: * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
018: * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
019: * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
020: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
021: * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
022: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
023: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
024: * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
025: * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
026: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
027: * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028: *
029: */
030:
031: package rcm.util;
032:
033: import java.util.StringTokenizer;
034:
035: /**
036: * String utility routines.
037: */
038: public abstract class Str {
039:
040: /**
041: * Find first occurence of any of a set of characters.
042: * @param subject String in which to search
043: * @param chars Characters to search for
044: * @return index of first occurence in subject of a character from chars,
045: * or -1 if no match.
046: */
047: public static int indexOfAnyChar(String subject, String chars) {
048: return indexOfAnyChar(subject, chars, 0);
049: }
050:
051: /**
052: * Find first occurence of any of a set of characters, starting
053: * at a specified index.
054: * @param subject String in which to search
055: * @param chars Characters to search for
056: * @param start Starting offset to search from
057: * @return index of first occurence (after start) in subject of a character from chars,
058: * or -1 if no match.
059: */
060: public static int indexOfAnyChar(String subject, String chars,
061: int start) {
062: for (int i = start; i < subject.length(); ++i)
063: if (chars.indexOf(subject.charAt(i)) != -1)
064: return i;
065: return -1;
066: }
067:
068: /**
069: * Replace all occurences of a string.
070: * @param subject String in which to search
071: * @param original String to search for in subject
072: * @param replacement String to substitute
073: * @return subject with all occurences of original replaced by replacement
074: */
075: public static String replace(String subject, String original,
076: String replacement) {
077: StringBuffer output = new StringBuffer();
078:
079: int p = 0;
080: int i;
081: while ((i = subject.indexOf(original, p)) != -1) {
082: output.append(subject.substring(p, i));
083: output.append(replacement);
084: p = i + original.length();
085: }
086: if (p < subject.length())
087: output.append(subject.substring(p));
088: return output.toString();
089: }
090:
091: /**
092: * Escapes metacharacters in a string.
093: * @param subject String in which metacharacters are to be escaped
094: * @param escapeChar the escape character (e.g., \)
095: * @param metachars the metacharacters that should be escaped
096: * @return subject with escapeChar inserted before every character found in metachars
097: */
098: public static String escape(String subject, char escapeChar,
099: String metachars) {
100: return escape(subject, metachars, escapeChar, metachars);
101: }
102:
103: /**
104: * Escapes characters in a string.
105: * @param subject String in which metacharacters are to be escaped
106: * @param chars Characters that need to be escaped (e.g. "\b\t\r\n\\")
107: * @param escapeChar the escape character (e.g., '\\')
108: * @param metachars escape code letters corresponding to each letter in chars (e.g. "btrn\\")
109: * <B>Must have metachars.length () == chars.length().</B>
110: * @return subject where every occurence of c in chars is replaced
111: * by escapeChar followed the character corresponding to c in metachars.
112: *
113: */
114: public static String escape(String subject, String chars,
115: char escapeChar, String metachars) {
116: StringBuffer output = new StringBuffer();
117:
118: int p = 0;
119: int i;
120: while ((i = indexOfAnyChar(subject, chars, p)) != -1) {
121: output.append(subject.substring(p, i));
122:
123: char c = subject.charAt(i); // character that needs escaping
124: int k = chars.indexOf(c);
125: char metac = metachars.charAt(k); // its corresponding metachar
126: output.append(escapeChar);
127: output.append(metac);
128:
129: p = i + 1;
130: }
131: if (p < subject.length())
132: output.append(subject.substring(p));
133: return output.toString();
134: }
135:
136: /**
137: * Translate escape sequences (e.g. \r, \n) to characters.
138: * @param subject String in which metacharacters are to be escaped
139: * @param escapeChar the escape character (e.g., \)
140: * @param metachars letters representing escape codes (typically "btrn\\")
141: * @param chars characters corresponding to metachars (typically "\b\t\r\n\\").
142: * <B>Must have chars.length () == metachars.length().</B>
143: * @param keepUntranslatedEscapes Controls behavior on unknown escape sequences
144: * (see below).
145: * @return subject where every escapeChar followed by c in metachars
146: * is replaced by the character corresponding to c in chars. If an escape
147: * sequence is untranslatable (because escapeChar is followed by some character c
148: * not in metachars), then the escapeChar is kept if keepUntranslatedEscapes is true,
149: * otherwise the escapeChar is deleted. (The character c is always kept.)
150: *
151: */
152: public static String unescape(String subject, char escapeChar,
153: String metachars, String chars,
154: boolean keepUntranslatedEscapes) {
155: StringBuffer output = new StringBuffer();
156:
157: int p = 0;
158: int i;
159: int len = subject.length();
160: while ((i = subject.indexOf(escapeChar, p)) != -1) {
161: output.append(subject.substring(p, i));
162: if (i + 1 == len)
163: break;
164:
165: char metac = subject.charAt(i + 1); // metachar to replace
166: int k = metachars.indexOf(metac);
167: if (k == -1) {
168: // untranslatable sequence
169: if (keepUntranslatedEscapes)
170: output.append(escapeChar);
171: output.append(metac);
172: } else
173: output.append(chars.charAt(k)); // its corresponding true char
174:
175: p = i + 2; // skip over both escapeChar & metac
176: }
177:
178: if (p < len)
179: output.append(subject.substring(p));
180: return output.toString();
181: }
182:
183: /**
184: * Parse a number from a string. Finds the first recognizable base-10 number (integer or floating point)
185: * in the string and returns it as a Number. Uses American English conventions
186: * (i.e., '.' as decimal point and ',' as thousands separator).
187: * @param string String to parse
188: * @return first recognizable number
189: * @exception NumberFormatException if no recognizable number is found
190: */
191: private static final int INT = 0;
192: private static final int FRAC = 1;
193: private static final int EXP = 2;
194:
195: public static Number parseNumber(String s)
196: throws NumberFormatException {
197: int p = 0;
198: for (int i = 0; i < s.length(); ++i) {
199: char c = s.charAt(i);
200: if (Character.isDigit(c)) {
201: int start = i;
202: int end = ++i;
203: int state = INT;
204:
205: if (start > 0 && s.charAt(start - 1) == '.') {
206: --start;
207: state = FRAC;
208: }
209: if (start > 0 && s.charAt(start - 1) == '-')
210: --start;
211:
212: foundEnd: while (i < s.length()) {
213: switch (s.charAt(i)) {
214: case '0':
215: case '1':
216: case '2':
217: case '3':
218: case '4':
219: case '5':
220: case '6':
221: case '7':
222: case '8':
223: case '9':
224: end = ++i;
225: break;
226: case '.':
227: if (state != INT)
228: break foundEnd;
229: state = FRAC;
230: ++i;
231: break;
232: case ',': // ignore commas
233: ++i;
234: break;
235: case 'e':
236: case 'E':
237: state = EXP;
238: ++i;
239: if (i < s.length()
240: && ((c = s.charAt(i)) == '+' || c == '-'))
241: ++i;
242: break;
243: default:
244: break foundEnd;
245: }
246: }
247:
248: String num = s.substring(start, end);
249: num = replace(num, ",", "");
250: try {
251: if (state == INT)
252: return new Integer(num);
253: else
254: return new Float(num);
255: } catch (NumberFormatException e) {
256: throw new RuntimeException("internal error: " + e);
257: }
258: }
259: }
260: throw new NumberFormatException(s);
261: }
262:
263: /*
264: For testing parseNumber
265:
266: public static void main (String[] args) {
267: for (int i=0; i<args.length; ++i)
268: System.out.println (parseNumber (args[i]));
269: }
270: */
271:
272: /**
273: * Generate a string by concatenating n copies of another string.
274: * @param s String to repeat
275: * @param n number of times to repeat s
276: * @return s concatenated with itself n times
277: */
278: public static String repeat(String s, int n) {
279: StringBuffer out = new StringBuffer();
280: while (--n >= 0)
281: out.append(s);
282: return out.toString();
283: }
284:
285: /**
286: * Compress whitespace.
287: * @param s String to compress
288: * @return string with leading and trailing whitespace removed, and
289: * internal runs of whitespace replaced by a single space character
290: */
291: public static String compressWhitespace(String s) {
292: StringBuffer output = new StringBuffer();
293: int p = 0;
294: boolean inSpace = true;
295: for (int i = 0, len = s.length(); i < len; ++i) {
296: if (Character.isWhitespace(s.charAt(i))) {
297: if (!inSpace) {
298: output.append(s.substring(p, i));
299: output.append(' ');
300: inSpace = true;
301: }
302: } else {
303: if (inSpace) {
304: p = i;
305: inSpace = false;
306: }
307: }
308: }
309: if (!inSpace)
310: output.append(s.substring(p));
311: return output.toString();
312: }
313:
314: /**
315: * Test if string contains only whitespace.
316: * @param s String to test
317: * @return true iff all characters in s satisfy Character.isWhitespace().
318: * If s is empty, returns true.
319: */
320: public static boolean isWhitespace(String s) {
321: for (int i = 0, n = s.length(); i < n; ++i)
322: if (!Character.isWhitespace(s.charAt(i)))
323: return false;
324: return true;
325: }
326:
327: /**
328: * Concatenate an array of strings.
329: * @param list Array of strings to concatenate
330: * @param sep Separator to insert between each string
331: * @return string consisting of list[0] + sep + list[1] + sep + ... + sep + list[list.length-1]
332: */
333: public static String join(String[] list, String sep) {
334: StringBuffer result = new StringBuffer();
335: for (int i = 0; i < list.length; ++i) {
336: if (i > 0)
337: result.append(sep);
338: result.append(list[i]);
339: }
340: return result.toString();
341: }
342:
343: /**
344: * Abbreviate a string.
345: * @param s String to abbreviate
346: * @param max Maximum length of returned string; must be at least 5
347: * @returns s with linebreaks removed and enough characters removed from
348: * the middle (replaced by "...") to make length <= max
349: */
350: public static String abbreviate(String s, int max) {
351: s = compressWhitespace(s);
352: if (s.length() < max)
353: return s;
354: else {
355: max = Math.max(max - 3, 2); // for "..."
356: int half = max / 2;
357: return s.substring(0, half) + "..."
358: + s.substring(s.length() - half);
359: }
360: }
361:
362: /**
363: * Abbreviate a multi-line string.
364: * @param s String to abbreviate
365: * @param maxLines Max number of lines in returned string; must be at least 3
366: * @param message Message to replace removed lines with; should end with
367: * \n, but may be multiple lines. Occurrences of %d are replaced with
368: * the number of lines removed.
369: * @returns s with enough whole lines removed from
370: * the middle (replaced by message) to make its length in lines <= max
371: */
372: public static String abbreviateLines(String s, int maxLines,
373: String message) {
374: int nLines = countLines(s);
375: if (nLines < maxLines)
376: return s;
377: else {
378: maxLines = Math.max(maxLines - 1, 2); // take out one line for "..."
379: int half = maxLines / 2;
380: return s.substring(0, nthLine(s, half))
381: + replace(message, "%d", String.valueOf(nLines
382: - half * 2))
383: + s.substring(nthLine(s, -half));
384: }
385: }
386:
387: static int countLines(String s) {
388: int n = 1;
389: int i = -1;
390: while ((i = s.indexOf('\n', i + 1)) != -1)
391: ++n;
392: return n;
393: }
394:
395: static int nthLine(String s, int n) {
396: if (n >= 0) {
397: int i = -1;
398: while (n > 0 && (i = s.indexOf('\n', i + 1)) != -1)
399: --n;
400: return i + 1;
401: } else {
402: int i = s.length();
403: while (n < 0 && (i = s.lastIndexOf('\n', i - 1)) != -1)
404: ++n;
405: return i + 1;
406: }
407: }
408:
409: /**
410: * Split string around a substring match and return prefix.
411: * @param s String to split
412: * @param pat Substring to search for in s
413: * @return Prefix of s ending just before the first occurrence
414: * of pat. If pat is not found in s, returns s itself.
415: */
416: public static String before(String s, String pat) {
417: int i = s.indexOf(pat);
418: return (i >= 0) ? s.substring(0, i) : s;
419: }
420:
421: /**
422: * Split string around a substring match and return suffix.
423: * @param s String to split
424: * @param pat Substring to search for in s
425: * @return Suffix of s starting just after the first occurrence
426: * of pat. If pat is not found in s, returns "".
427: */
428: public static String after(String s, String pat) {
429: int i = s.indexOf(pat);
430: return (i >= 0) ? s.substring(i + pat.length()) : "";
431: }
432:
433: /**
434: * Like String.startsWith, but case-insensitive.
435: */
436: public static boolean startsWithIgnoreCase(String s, String prefix) {
437: int sLen = s.length();
438: int prefixLen = prefix.length();
439: return (sLen >= prefixLen && s.substring(0, prefixLen)
440: .equalsIgnoreCase(prefix));
441: }
442:
443: /**
444: * Like String.endsWith, but case-insensitive.
445: */
446: public static boolean endsWithIgnoreCase(String s, String suffix) {
447: int sLen = s.length();
448: int suffixLen = suffix.length();
449: return (sLen >= suffixLen && s.substring(sLen - suffixLen)
450: .equalsIgnoreCase(suffix));
451: }
452:
453: /**
454: * Expands tabs to spaces.
455: */
456: public static String untabify(String s, int tabsize) {
457: if (s.indexOf('\t') == -1)
458: return s; // no tabs, don't bother
459:
460: int col = 0;
461: StringBuffer result = new StringBuffer();
462: for (StringTokenizer tokenizer = new StringTokenizer(s,
463: "\t\r\n", true); tokenizer.hasMoreTokens();) {
464: String tok = tokenizer.nextToken();
465: switch (tok.charAt(0)) {
466: case '\t': {
467: int oldcol = col;
468: col = (col / tabsize + 1) * tabsize;
469: result.append(Str.repeat(" ", col - oldcol));
470: }
471: break;
472: case '\r':
473: case '\n':
474: col = 0;
475: result.append(tok);
476: break;
477: default:
478: col += tok.length();
479: result.append(tok);
480: break;
481: }
482: }
483:
484: return result.toString();
485: }
486:
487: /**
488: * Reverse a string.
489: * @param s String to reverse
490: * @return string containing characters of s in reverse order
491: */
492: public static String reverse(String s) {
493: StringBuffer t = new StringBuffer(s.length());
494: for (int i = s.length() - 1; i >= 0; --i)
495: t.append(s.charAt(i));
496: return t.toString();
497: }
498:
499: /**
500: * Find longest common prefix of two strings.
501: */
502: public static String longestCommonPrefix(String s, String t) {
503: return s.substring(0, longestCommonPrefixLength(s, t));
504: }
505:
506: public static int longestCommonPrefixLength(String s, String t) {
507: int m = Math.min(s.length(), t.length());
508: for (int k = 0; k < m; ++k)
509: if (s.charAt(k) != t.charAt(k))
510: return k;
511: return m;
512: }
513:
514: /**
515: * Find longest common suffix of two strings.
516: */
517: public static String longestCommonSuffix(String s, String t) {
518: return s
519: .substring(s.length() - longestCommonSuffixLength(s, t));
520: }
521:
522: public static int longestCommonSuffixLength(String s, String t) {
523: int i = s.length() - 1;
524: int j = t.length() - 1;
525: for (; i >= 0 && j >= 0; --i, --j)
526: if (s.charAt(i) != t.charAt(j))
527: return s.length() - (i + 1);
528: return s.length() - (i + 1);
529: }
530:
531: /**
532: * Find longest common prefix of two strings, ignoring case.
533: */
534: public static String longestCommonPrefixIgnoreCase(String s,
535: String t) {
536: return s
537: .substring(0, longestCommonPrefixLengthIgnoreCase(s, t));
538: }
539:
540: public static int longestCommonPrefixLengthIgnoreCase(String s,
541: String t) {
542: int m = Math.min(s.length(), t.length());
543: for (int k = 0; k < m; ++k)
544: if (Character.toLowerCase(s.charAt(k)) != Character
545: .toLowerCase(t.charAt(k)))
546: return k;
547: return m;
548: }
549:
550: /**
551: * Find longest common suffix of two strings, ignoring case.
552: */
553: public static String longestCommonSuffixIgnoreCase(String s,
554: String t) {
555: return s.substring(s.length()
556: - longestCommonSuffixLengthIgnoreCase(s, t));
557: }
558:
559: public static int longestCommonSuffixLengthIgnoreCase(String s,
560: String t) {
561: int i = s.length() - 1;
562: int j = t.length() - 1;
563: for (; i >= 0 && j >= 0; --i, --j)
564: if (Character.toLowerCase(s.charAt(i)) != Character
565: .toLowerCase(t.charAt(j)))
566: return s.length() - (i + 1);
567: return s.length() - (i + 1);
568: }
569: }
|