001: /* Strings.java
002:
003: {{IS_NOTE
004:
005: Purpose: String utilities and constants
006: Description:
007: History:
008: 2001/4/17, Tom M. Yeh: Created.
009:
010: }}IS_NOTE
011:
012: Copyright (C) 2001 Potix Corporation. All Rights Reserved.
013:
014: {{IS_RIGHT
015: This program is distributed under GPL Version 2.0 in the hope that
016: it will be useful, but WITHOUT ANY WARRANTY.
017: }}IS_RIGHT
018: */
019: package org.zkoss.lang;
020:
021: import java.util.Date;
022: import java.util.Locale;
023: import java.util.TimeZone;
024: import java.lang.reflect.InvocationTargetException;
025: import java.text.ParseException;
026: import java.math.BigDecimal;
027: import java.math.BigInteger;
028:
029: import org.zkoss.mesg.MCommon;
030: import org.zkoss.text.DateFormats;
031: import org.zkoss.util.Locales;
032: import org.zkoss.util.IllegalSyntaxException;
033:
034: /**
035: * String utilties and constants
036: *
037: * @author tomyeh
038: */
039: public class Strings {
040: /**
041: * Returns true if the string is null or empty.
042: */
043: public static final boolean isEmpty(String s) {
044: return s == null || s.length() == 0;
045: }
046:
047: /**
048: * Returns true if the string is null or empty or pure blank.
049: */
050: public static final boolean isBlank(String s) {
051: return s == null || s.trim().length() == 0;
052: }
053:
054: /** Returns an encoded string buffer, faster and shorter than
055: * Integer.toHexString. It uses numbers and lower-case leters only.
056: * Thus it is a valid variable name if prefix with an alphabet.
057: * At least one character is generated.
058: *
059: * <p>It works even in system that is case-insensitive, such as IE.
060: *
061: * <p>It is useful to generate a string to represent a number.
062: */
063: public static final StringBuffer encode(StringBuffer sb, int val) {
064: if (val < 0) {
065: sb.append('z');
066: val = -val;
067: }
068:
069: do {
070: int v = val & 31;
071: if (v < 10) {
072: sb.append((char) ('0' + v));
073: } else {
074: sb.append((char) (v + ((int) 'a' - 10)));
075: }
076: } while ((val >>>= 5) != 0);
077: return sb;
078: }
079:
080: /** Returns an encoded string buffer, faster and shorter than
081: * Long.toHexString. It uses numbers and lower-case letters only.
082: * Thus it is a valid variable name if prefix with an alphabet.
083: * At least one character is generated.
084: *
085: * <p>It works even in system that is case-insensitive, such as IE.
086: *
087: * <p>It is useful to generate a string to represent a number.
088: */
089: public static final StringBuffer encode(StringBuffer sb, long val) {
090: if (val < 0) {
091: sb.append('z');
092: val = -val;
093: }
094:
095: do {
096: int v = ((int) val) & 31;
097: if (v < 10) {
098: sb.append((char) ('0' + v));
099: } else {
100: sb.append((char) (v + ((int) 'a' - 10)));
101: }
102: } while ((val >>>= 5) != 0);
103: return sb;
104: }
105:
106: /** Returns an encoded string, faster and shorter than
107: * Long.toHexString.
108: */
109: public static final String encode(int val) {
110: return encode(new StringBuffer(12), val).toString();
111: }
112:
113: /** Returns an encoded string, faster and shorter than
114: * Long.toHexString.
115: */
116: public static final String encode(long val) {
117: return encode(new StringBuffer(20), val).toString();
118: }
119:
120: /**
121: * Returns the index that is one of delimiters, or the length if none
122: * of delimiter is found.
123: *
124: * <p>Unlike String.indexOf(String, int), this method returns the first
125: * occurrence of <i>any</i> character in the delimiters.
126: *
127: * <p>This method is optimized to use String.indexOf(char, int)
128: * if it found the length of dilimiter is 1.
129: *
130: * @param src the source string to search
131: * @param from the index to start the search from
132: * @param delimiters the set of characters to search for
133: *
134: * @return the index that is one of delimiters.
135: * If return >= src.length(), it means no such delimiters
136: * @see #lastAnyOf
137: */
138: public static final int anyOf(String src, String delimiters,
139: int from) {
140: switch (delimiters.length()) {
141: case 0:
142: return src.length();
143: case 1:
144: final int j = src.indexOf(delimiters.charAt(0), from);
145: return j >= 0 ? j : src.length();
146: }
147:
148: for (int len = src.length(); from < len
149: && delimiters.indexOf(src.charAt(from)) < 0; ++from)
150: ;
151: return from;
152: }
153:
154: /**
155: * The backward version of {@link #anyOf}.
156: *
157: * <p>This method is optimized to use String.indexOf(char, int)
158: * if it found the length of dilimiter is 1.
159: *
160: * @return the previous index that is one of delimiter.
161: * If it is negative, it means no delimiter in front of
162: * <code>from</code>
163: * @see #anyOf
164: */
165: public static final int lastAnyOf(String src, String delimiters,
166: int from) {
167: switch (delimiters.length()) {
168: case 0:
169: return -1;
170: case 1:
171: return src.lastIndexOf(delimiters.charAt(0), from);
172: }
173:
174: int len = src.length();
175: if (from >= len)
176: from = len - 1;
177: for (; from >= 0 && delimiters.indexOf(src.charAt(from)) < 0; --from)
178: ;
179: return from;
180: }
181:
182: /**
183: * Returns the next index after skipping whitespaces.
184: */
185: public static final int skipWhitespaces(CharSequence src, int from) {
186: for (final int len = src.length(); from < len
187: && Character.isWhitespace(src.charAt(from)); ++from)
188: ;
189: return from;
190: }
191:
192: /**
193: * The backward version of {@link #skipWhitespaces}.
194: *
195: * @return the next index that is not a whitespace.
196: * If it is negative, it means no whitespace in front of it.
197: */
198: public static final int skipWhitespacesBackward(CharSequence src,
199: int from) {
200: final int len = src.length();
201: if (from >= len)
202: from = len - 1;
203: for (; from >= 0 && Character.isWhitespace(src.charAt(from)); --from)
204: ;
205: return from;
206: }
207:
208: /** Returns the next whitespace.
209: */
210: public static final int nextWhitespace(CharSequence src, int from) {
211: for (final int len = src.length(); from < len
212: && !Character.isWhitespace(src.charAt(from)); ++from)
213: ;
214: return from;
215: }
216:
217: /** Escapes (aka, quote) the special characters with backslash.
218: * It prefix a backslash to any characters specfied in the specials
219: * argument.
220: *
221: * <p>Note: specials usually contains '\\'.
222: *
223: * <p>For example, {@link org.zkoss.util.Maps#parse} will un-quote
224: * backspace. Thus, if you want to preserve backslash, you have
225: * invoke escape(s, "\\") before calling Maps.parse().
226: *
227: * @param s the string to process. If null, null is returned.
228: * @param specials a string of characters that shall be escaped/quoted
229: * @see #unescape
230: */
231: public static final String escape(String s, String specials) {
232: if (s == null)
233: return null;
234:
235: StringBuffer sb = null;
236: int j = 0;
237: for (int k, len = s.length(); (k = anyOf(s, specials, j)) < len;) {
238: if (sb == null)
239: sb = new StringBuffer(len + 4);
240:
241: char cc = s.charAt(k);
242: switch (cc) {
243: case '\n':
244: cc = 'n';
245: break;
246: case '\t':
247: cc = 't';
248: break;
249: case '\r':
250: cc = 'r';
251: break;
252: case '\f':
253: cc = 'f';
254: break;
255: }
256: sb.append(s.substring(j, k)).append('\\').append(cc);
257: j = k + 1;
258: }
259: if (sb == null)
260: return s; //nothing changed
261: return sb.append(s.substring(j)).toString();
262: }
263:
264: /** Escapes (aka. quote) the special characters with backslash
265: * and appends it the specified string buffer.
266: */
267: public static final StringBuffer appendEscape(StringBuffer sb,
268: String s, String specials) {
269: if (s == null)
270: return sb;
271:
272: for (int j = 0, len = s.length();;) {
273: final int k = Strings.anyOf(s, specials, j);
274: if (k >= len)
275: return sb.append(s.substring(j));
276:
277: char cc = s.charAt(k);
278: switch (cc) {
279: case '\n':
280: cc = 'n';
281: break;
282: case '\t':
283: cc = 't';
284: break;
285: case '\r':
286: cc = 'r';
287: break;
288: case '\f':
289: cc = 'f';
290: break;
291: }
292: sb.append(s.substring(j, k)).append('\\').append(cc);
293: j = k + 1;
294: }
295: }
296:
297: /** Un-escape the quoted string.
298: * @see #escape
299: * @see #appendEscape
300: */
301: public static final String unescape(String s) {
302: if (s == null)
303: return null;
304: StringBuffer sb = null;
305: int j = 0;
306: for (int k; (k = s.indexOf('\\', j)) >= 0;) {
307: if (sb == null)
308: sb = new StringBuffer(s.length());
309:
310: char cc = s.charAt(k + 1);
311: switch (cc) {
312: case 'n':
313: cc = '\n';
314: break;
315: case 't':
316: cc = '\t';
317: break;
318: case 'r':
319: cc = '\r';
320: break;
321: case 'f':
322: cc = '\f';
323: break;
324: }
325: sb.append(s.substring(j, k)).append(cc);
326: j = k + 2;
327: }
328: if (sb == null)
329: return s; //nothing changed
330: return sb.append(s.substring(j)).toString();
331: }
332:
333: /**
334: * Returns the substring from the <code>from</code> index up to the
335: * <code>until</code> character or end-of-string.
336: * Unlike String.subsring, it converts \f, \n, \t and \r. It doesn't
337: * handle u and x yet.
338: *
339: * @return the result (never null). Result.next is the position of
340: * the <code>until</code> character if found, or
341: * a number larger than length() if no such character.
342: */
343: public static final Result substring(String src, int from,
344: char until) {
345: return substring(src, from, until, true);
346: }
347:
348: /**
349: * Returns the substring from the <code>from</code> index up to the
350: * <code>until</code> character or end-of-string.
351: *
352: * @param escBackslash whether to treat '\\' specially (as escape char)
353: * It doesn't handle u and x yet.
354: * @return the result (never null). Result.next is the position of
355: * the <code>until</code> character if found, or
356: * a number larger than length() if no such character.
357: * You can tell which case it is by examining {@link Result#separator}.
358: */
359: public static final Result substring(String src, int from,
360: char until, boolean escBackslash) {
361: final int len = src.length();
362: final StringBuffer sb = new StringBuffer(len);
363: for (boolean quoted = false; from < len; ++from) {
364: char cc = src.charAt(from);
365: if (quoted) {
366: quoted = false;
367: switch (cc) {
368: case 'f':
369: cc = '\f';
370: break;
371: case 'n':
372: cc = '\n';
373: break;
374: case 'r':
375: cc = '\r';
376: break;
377: case 't':
378: cc = '\t';
379: break;
380: }
381: } else if (cc == until) {
382: break;
383: } else if (escBackslash && cc == '\\') {
384: quoted = true;
385: continue; //skip it
386: }
387: sb.append(cc);
388: }
389: return new Result(from, sb.toString(), from < len ? until
390: : (char) 0);
391: }
392:
393: /** Returns the next token with unescape.
394: * <ul>
395: * <li>It trims whitespaces before and after the token.</li>
396: * <li>It handles both '\'' and '"'. All characters between them are
397: * considered as a token.</li>
398: * <li>If nothing found before end-of-string, null is returned</li>
399: * </ul>
400: *
401: * If a separator is found, it is returned in
402: * {@link Strings.Result#separator}.
403: *
404: * @exception IllegalSyntaxException if the quoted string is unclosed.
405: */
406: public static final Result nextToken(String src, int from,
407: char[] separators) throws IllegalSyntaxException {
408: return nextToken(src, from, separators, true, true);
409: }
410:
411: /** Returns the next token with unescape option.
412: *
413: * <ul>
414: * <li>It trims whitespaces before and after the token.</li>
415: * <li>If quotAsToken is true, all characters between quotations
416: * ('\'' or '"') are considered as a token.</li>
417: * <li>Consider '\\' as the escape char if escBackslash is true.</li>
418: * <li>If nothing found before end-of-string, null is returned</li>
419: * </ul>
420: *
421: * If a separator is found, it is returned in
422: * {@link Strings.Result#separator}.
423: *
424: * @param escBackslash whether to treat '\\' specially (as escape char)
425: * It doesn't handle u and x yet.
426: * @param quotAsToken whether to treat characters inside '\'' or '"'
427: * as a token
428: * @exception IllegalSyntaxException if the quoted string is unclosed.
429: */
430: public static final Result nextToken(String src, int from,
431: char[] separators, boolean escBackslash, boolean quotAsToken)
432: throws IllegalSyntaxException {
433: final int len = src.length();
434: from = skipWhitespaces(src, from);
435: if (from >= len)
436: return null; //end-of-string
437:
438: //1. handle quoted
439: final char cc = src.charAt(from);
440: if (quotAsToken && (cc == '\'' || cc == '"')) {
441: final Result res = substring(src, from + 1, cc,
442: escBackslash);
443: if (res.separator != cc)
444: throw new IllegalSyntaxException(
445: MCommon.QUOTE_UNMATCHED, src);
446:
447: res.next = skipWhitespaces(src, res.next + 1);
448: if (res.next < len
449: && isSeparator(src.charAt(res.next), separators))
450: ++res.next;
451: return res;
452: }
453:
454: //2. handle not-quoted
455: final int j = nextSeparator(src, from, separators,
456: escBackslash, false, quotAsToken);
457: int next = j;
458: if (j < len) {
459: if (quotAsToken) {
460: final char c = src.charAt(j);
461: if (c != '\'' && c != '"')
462: ++next;
463: } else {
464: ++next;
465: }
466: }
467:
468: if (j == from) //nothing but separator
469: return new Result(next, "", src.charAt(j));
470:
471: int k = 1 + skipWhitespacesBackward(src, j - 1);
472: return new Result(next, k > from ? escBackslash ? unescape(src
473: .substring(from, k)) : src.substring(from, k) : "",
474: j < len ? src.charAt(j) : (char) 0);
475: //if the token is nothing but spaces, k < from
476: }
477:
478: /** Returns the next seperator index in the src string.
479: *
480: * @param escQuot whether to escape characters inside quotations
481: * ('\'' or '"'). In other words, ignore separators inside quotations
482: * @param quotAsSeparator whether to consider quotations as one of
483: * the separators
484: * @since 2.4.0
485: */
486: public static int nextSeparator(String src, int from,
487: char[] separators, boolean escBackslash, boolean escQuot,
488: boolean quotAsSeparator) {
489: boolean esc = false;
490: char quot = (char) 0;
491: for (final int len = src.length(); from < len; ++from) {
492: if (esc) {
493: esc = false;
494: continue;
495: }
496:
497: final char cc = src.charAt(from);
498: if (escBackslash && cc == '\\') {
499: esc = true;
500: } else if (quot != (char) 0) {
501: if (cc == quot)
502: quot = (char) 0;
503: } else if (escQuot && (cc == '\'' || cc == '"')) {
504: quot = cc;
505: } else if ((quotAsSeparator && (cc == '\'' || cc == '"'))
506: || isSeparator(cc, separators)) {
507: return from;
508: }
509: }
510: return from;
511: }
512:
513: private static final boolean isSeparator(char cc, char[] separators) {
514: for (int j = 0; j < separators.length; ++j) {
515: if (cc == separators[j]
516: || (separators[j] == ' ' && Character
517: .isWhitespace(cc)))
518: return true;
519: }
520: return false;
521: }
522:
523: /** The result of {@link #substring}.
524: */
525: public static class Result {
526: /** The next index. */
527: public int next;
528: /** The converted string. */
529: public String token;
530: /** The separator found. If no separator but end-of-line found,
531: * ((char)0) is returned.
532: */
533: public char separator;
534:
535: protected Result(int next, String token, char separator) {
536: this .next = next;
537: this .token = token;
538: this .separator = separator;
539: }
540:
541: protected Result(int next, char separator) {
542: this .next = next;
543: this .separator = separator;
544: }
545:
546: //-- Object --//
547: public String toString() {
548: return "[next=" + next + ", token=" + token + " separator="
549: + separator + ']';
550: }
551: }
552: }
|