001: /* Maps.java
002:
003: {{IS_NOTE
004:
005: Purpose: Utilities for Map
006: Description:
007: History:
008: 2001/4/25, Tom M. Yeh: Created.
009:
010: }}IS_NOTE
011:
012: Copyright (C) 2001 Potix Corporation. All Rights Reserved.
013:
014: {{IS_RIGHT
015: This program is distributed under GPL Version 2.0 in the hope that
016: it will be useful, but WITHOUT ANY WARRANTY.
017: }}IS_RIGHT
018: */
019: package org.zkoss.util;
020:
021: import java.util.List;
022: import java.util.LinkedList;
023: import java.util.Map;
024: import java.util.HashMap;
025: import java.util.Iterator;
026: import java.io.IOException;
027: import java.io.InputStream;
028: import java.io.BufferedReader;
029: import java.io.InputStreamReader;
030: import java.io.PushbackInputStream;
031:
032: import org.zkoss.lang.D;
033: import org.zkoss.lang.Strings;
034: import org.zkoss.mesg.MCommon;
035: import org.zkoss.util.logging.Log;
036:
037: /**
038: * Utilities for process Map.
039: *
040: * @author tomyeh
041: */
042: public class Maps {
043: private static final Log log = Log.lookup(Maps.class);
044:
045: /**
046: * Reads a property list (key and element pairs) from the input stream,
047: * by specifying the charset.
048: * <p>Like java.util.Properties, it translates \\u, \n, \r, \t and \f.
049: * However, it enhanced Properties as follows.
050: *
051: * <ul>
052: * <li>It accepts any charset, not just 8859-1.</li>
053: * <li>It uses a different syntax to let value spread over multiple
054: * lines, descrubed below.</li>
055: * <li>Whitespace is trimmed around '=' and at the beginning of
056: * the key and the ending of the value.</li>
057: * <li>Illegal lines are ignored (Properties.load considers it
058: * as a key with an empty value).</li>
059: * <li>Only '=' is accepted as the separator of key and value.</li>
060: * <li>Only '#' is accepted as comment lines.</li>
061: * </ul>
062: *
063: * <p>To spead a value over multiple lines, you could,
064: * unlike java.util.Properties.load, append '{' to the end of a line.
065: * Then, all the following lines are considerred as part of a value,
066: * unless encountering a line containing only one '}'.<br>
067: * Example:
068: * <pre><code>abc = {
069: *line 1
070: *line 2
071: *}
072: *xyz = {
073: *line 1
074: *line 2
075: *}</code></pre>
076: *
077: * <p>Moreover, you could prefix a group of keys with certain prefix:
078: * <pre><code>org.zkoss.some. {
079: * a = aaa
080: * b = bbb
081: *}</code></pre>
082: *
083: * It actually defines two keys: "org.zkoss.some.a" and "org.zkoss.some.b".
084: *
085: * <p>Note: (1) whitespace in the {...} block are all preserved.<br>
086: * (2) if only whitespaces is between '=' and '{', they are ignored.
087: *
088: * @param charset the charset; if null, it detects UTF-16 BOM (0xfe 0xff
089: * or 0xff 0xfe). If no UTF-16 BOM, UTF-8 is always assumed.
090: * Note 1: UTF-8's BOM (0xef 0xbb 0xbf) is optional, so we don't count on it.
091: * Note 2: ISO-8859-1 is not used because we cannot tell its difference
092: * from UTF-8 (while some of our properties files are in UTF-8).
093: *
094: * @param caseInsensitive whether the key used to access the map
095: * is case-insensitive. If true, all keys are converted to lower cases.
096: */
097: public final static void load(Map map, InputStream sm,
098: String charset, boolean caseInsensitive) throws IOException {
099: final PushbackInputStream pis = new PushbackInputStream(sm, 3);
100: if (charset == null || charset.startsWith("UTF")) {
101: final byte[] ahead = new byte[3];
102: int n = pis.read(ahead);
103: if (n >= 2
104: && ((ahead[0] == (byte) 0xfe && ahead[1] == (byte) 0xff) || (ahead[0] == (byte) 0xff && ahead[1] == (byte) 0xfe))) {
105: charset = "UTF-16";
106: //don't eat UTF-16 BOM, since Java use it to know endian
107: } else if (n == 3 && ahead[0] == (byte) 0xef
108: && ahead[1] == (byte) 0xbb
109: && ahead[2] == (byte) 0xbf) {
110: charset = "UTF-8";
111: n = 0; //eat UTF-8 BOM since Java won't handle it
112: } else if (charset == null) {
113: charset = "UTF-8";
114: }
115: if (n > 0)
116: pis.unread(ahead, 0, n);
117: }
118:
119: final BufferedReader in = new BufferedReader(
120: new InputStreamReader(pis, charset));
121:
122: final List prefixes = new LinkedList();
123: String prefix = null;
124: String line;
125: for (int lno = 1; (line = in.readLine()) != null; ++lno) {
126: int len = line.length();
127: if (len == 0)
128: continue;
129:
130: final Strings.Result res = Strings.nextToken(line, 0,
131: new char[] { '=', '{', '}' }, true, false);
132: if (res == null || res.token.startsWith("#"))
133: continue; //nothing found
134: if (res.separator == (char) 0) {
135: if (res.token.length() > 0)
136: log.warning(">>Igored: a key, " + res.token
137: + ", without value, line " + lno);
138: continue;
139: }
140: if (res.separator == '{') {
141: //res.token.lenth() could be zero
142: if (Strings.skipWhitespaces(line, res.next) < len) //non-space following '{'
143: throw new IllegalSyntaxException(
144: "Invalid nest: '{' must be the last character, line "
145: + lno);
146: prefixes.add(new Integer(res.token.length()));
147: prefix = prefix != null ? prefix + res.token
148: : res.token;
149: continue;
150: }
151: if (res.separator == '}') {
152: if (Strings.skipWhitespaces(line, res.next) < len) //non-space following '}'
153: throw new IllegalSyntaxException(
154: "Invalid nesting: '}' must be the last character, line "
155: + lno);
156: if (prefixes.isEmpty())
157: throw new IllegalSyntaxException(
158: "Invalid nesting: '}' does have any preceding '{', line "
159: + lno);
160: final Integer i = (Integer) prefixes.remove(prefixes
161: .size() - 1); //pop
162: prefix = prefixes.isEmpty() ? null : prefix.substring(
163: 0, prefix.length() - i.intValue());
164: continue;
165: }
166: if (res.token.length() == 0) {
167: log.warning(">>Ignored: wihout key, line " + lno);
168: continue;
169: }
170:
171: // assert res.separator == '=': "Wrong separator: "+res.separator;
172: final String val;
173: String key = caseInsensitive ? res.token.toLowerCase()
174: : res.token;
175: int j = Strings.skipWhitespaces(line, res.next);
176: int k = Strings.skipWhitespacesBackward(line, len - 1);
177: if (j == k && line.charAt(k) == '{') { //pack multiple lines
178: final StringBuffer sb = new StringBuffer();
179: for (int lnoFrom = lno;;) {
180: line = in.readLine();
181: ++lno;
182: if (line == null) {
183: log
184: .warning(">>Ignored: invalid multiple-line format: '={' does not have following '}', "
185: + lnoFrom);
186: break;
187: }
188:
189: len = line.length();
190: if (len > 0) {
191: j = Strings.skipWhitespacesBackward(line,
192: len - 1);
193: if (j >= 0 && line.charAt(j) == '}') {
194: if (j > 0)
195: j = 1 + Strings
196: .skipWhitespacesBackward(line,
197: j - 1);
198: if (j == 0) //no non-space before }
199: break;
200: }
201: }
202: if (sb.length() > 0)
203: sb.append('\n');
204: sb.append(line);
205: }
206: val = sb.toString();
207: } else {
208: val = j <= k ? line.substring(j, k + 1) : "";
209: }
210: map.put(prefix != null ? prefix + key : key, val);
211: }
212:
213: if (!prefixes.isEmpty())
214: log.warning(">>Ignored: unclosed nesting '{': "
215: + prefixes.size());
216: }
217:
218: /**
219: * Reads a property list (key and element pairs) from the input stream,
220: * by specifying the charset.
221: */
222: public final static void load(Map map, InputStream sm,
223: String charset) throws IOException {
224: load(map, sm, charset, false);
225: }
226:
227: /** Reads a property list (key and element pairs) from the input stream,
228: * by detecting correct charset.
229: *
230: * @param caseInsensitive whether the key used to access the map
231: * is case-insensitive. If true, all keys are converted to lower cases.
232: */
233: public final static void load(Map map, InputStream sm,
234: boolean caseInsensitive) throws IOException {
235: load(map, sm, null, caseInsensitive);
236: }
237:
238: /** Reads a property list (key and element pairs) from the input stream,
239: * by detecting correct charset.
240: */
241: public final static void load(Map map, InputStream sm)
242: throws IOException {
243: load(map, sm, null, false);
244: }
245:
246: /**
247: * Parses a string into a map.
248: *
249: * <p>For example, if the following string is parsed:<br/>
250: * a12=12,b3,c6=abc=125,x=y
251: *
252: * <p>Then, a map with the following content is returned:<br/>
253: * ("a12", "12"), ("b3", null), ("c6", "abc=125"), ("x", "y")
254: *
255: * <p>If = is omitted, it is considered as a key with the null value.
256: * If you want to consider it as the value, use
257: * {@link #parse(Map, String, char, char, boolean)} instead.
258: * Actually, this is the same as parse(map, src, separator, quote, false);
259: *
260: * <p>Notice: only the first = after separator is meaningful,
261: * so you don't have to escape the following =.
262: *
263: * <p>Beside specifying the quote character, you could use back slash
264: * quote a single character (as Java does).
265: *
266: * @param map the map to put parsed results to; null to create a
267: * new hash map
268: * @param src the string to parse
269: * @param separator the separator, e.g., ' ' or ','.
270: * @param quote the quote character to surrounding value, e.g.,
271: * name = 'value'. If (char)0, no quotation is recognized.
272: * Notice: if value is an expression, it is better to specify (char)0
273: * because expression might contain strings.
274: * @return the map being generated
275: *
276: * @exception IllegalSyntaxException if syntax errors
277: * @see CollectionsX#parse
278: * @see #toString(Map, char, char)
279: */
280: public static final Map parse(Map map, String src, char separator,
281: char quote) throws IllegalSyntaxException {
282: return parse(map, src, separator, quote, false);
283: }
284:
285: /**
286: * Parses a string into a map.
287: *
288: * <p>If = is omitted, whether it is considered as a key with the null
289: * value or a value with the null key depends on
290: * the asValue argument. If true, it is considered as a value with
291: * the null key.
292: *
293: * <p>For example, if the following string is parsed with asValue=false:<br/>
294: * a12=12,b3,c6=abc=125,x=y
295: *
296: * <p>Then, a map with the following content is returned:<br/>
297: * ("a12", "12"), ("b3", null), ("c6", "abc=125"), ("x", "y")
298: *
299: * <p>Notice: only the first = after separator is meaningful,
300: * so you don't have to escape the following =.
301: * <p>Beside specifying the quote character, you could use back slash
302: * quote a single character (as Java does).
303: *
304: * @param map the map to put parsed results to; null to create a
305: * new hash map
306: * @param src the string to parse
307: * @param separator the separator, e.g., ' ' or ','.
308: * @param quote the quote character to surrounding value, e.g.,
309: * name = 'value'. If (char)0, no quotation is recognized.
310: * Notice: if value is an expression, it is better to specify (char)0
311: * because expression might contain strings.
312: * @param asValue whether to consider the substring without = as
313: * a value (with the null key), or as a key (with the null value)
314: * @return the map being generated
315: *
316: * @exception IllegalSyntaxException if syntax errors
317: * @see CollectionsX#parse
318: * @see #toString(Map, char, char)
319: * @since 2.4.0
320: */
321: public static final Map parse(Map map, String src, char separator,
322: char quote, boolean asValue) throws IllegalSyntaxException {
323: if (separator == (char) 0)
324: throw new IllegalArgumentException("Separator cannot be 0");
325: if (map == null)
326: map = new HashMap();
327: if (src == null || src.length() == 0)
328: return map; //nothing to do
329:
330: //prepare delimiters for keys and values.
331: final String delimValue, delimKey;
332: {
333: final StringBuffer delimsb = new StringBuffer()
334: .append(separator);
335: if (quote != (char) 0)
336: delimsb.append(quote);
337: delimValue = delimsb.toString();
338: delimKey = delimsb.append('=').toString();
339: }
340:
341: //parase
342: for (int j = 0, len = src.length();;) {
343: //handle name
344: Token tk = next(src, delimKey, j, true);
345: // if (D.ON && log.finerable()) log.finer("name: "+tk.token+" "+tk.cc);
346: j = tk.next;
347: final String name = tk.token;
348: switch (tk.cc) {
349: case '=':
350: if (name.length() == 0)
351: throw newIllegalSyntaxException(
352: MCommon.UNEXPECTED_CHARACTER, tk.cc, src);
353: ++j; //skip =
354: break;
355: case (char) 0:
356: // assert tk.next >= len;
357: if (name.length() > 0)
358: if (asValue)
359: map.put(null, name);
360: else
361: map.put(name, null);
362: return map;//done
363: default:
364: //If separator is ' ', tk.cc can be anything; see next()
365: if ((separator != ' ' && tk.cc != separator)
366: || name.length() == 0)
367: throw newIllegalSyntaxException(
368: MCommon.UNEXPECTED_CHARACTER, tk.cc, src);
369:
370: if (asValue)
371: map.put(null, name);
372: else
373: map.put(name, null);
374: if (tk.cc == separator)
375: ++j; //skip separator
376: continue;
377: }
378:
379: //handle value
380: tk = next(src, delimValue, j, false);
381: // if (D.ON && log.finerable()) log.finer("value: "+tk.token+" "+tk.cc);
382: j = tk.next;
383: final String value = tk.token;
384: if (quote != (char) 0 && tk.cc == quote) {
385: if (value.length() > 0)
386: throw newIllegalSyntaxException(
387: MCommon.UNEXPECTED_CHARACTER, tk.cc, src);
388:
389: final StringBuffer valsb = new StringBuffer(32);
390: for (;;) {
391: if (++j == len)
392: throw newIllegalSyntaxException(
393: MCommon.EXPECTING_CHARACTER, tk.cc, src);
394:
395: final char cc = src.charAt(j);
396: if (cc == tk.cc)
397: break;
398: valsb.append(cc == '\\' ? escape(src, ++j) : cc);
399: }
400: map.put(name, valsb.toString());
401: ++j; //skip the closing ' or "
402: } else {
403: map.put(name, value);
404: }
405:
406: if (separator != ' ') {
407: //If not ' ', ensure the following is a separator
408: j = Strings.skipWhitespaces(src, j);
409: if (j >= len)
410: return map;
411: if (src.charAt(j) != separator)
412: throw newIllegalSyntaxException(
413: MCommon.EXPECTING_CHARACTER, separator, src);
414: ++j; //skip separator
415: }
416: }
417: }
418:
419: private static final IllegalSyntaxException newIllegalSyntaxException(
420: int code, char cc, String src) {
421: return new IllegalSyntaxException(code, new Object[] {
422: new Character(cc), src });
423: }
424:
425: private static class Token {
426: /** The next position right after token. */
427: private final int next;
428: /** The character before next. */
429: private final char cc;
430: /** The token before next. */
431: private final String token;
432:
433: private Token(int next, char cc, String token) {
434: this .next = next;
435: this .cc = cc;
436: this .token = token;
437: }
438: }
439:
440: private static final Token next(String src, String delimiters,
441: int j, boolean whitespaceAware) {
442: final StringBuffer tksb = new StringBuffer(64);
443: final int len = src.length();
444: j = Strings.skipWhitespaces(src, j);
445: for (; j < len; ++j) {
446: final char cc = src.charAt(j);
447: if (cc == '\\') {
448: tksb.append(escape(src, ++j));
449: } else if (delimiters.indexOf(cc) >= 0) {
450: //note: cc might be a separator which might be a whitespace
451: j = Strings.skipWhitespaces(src, j);
452: break; //done
453: } else if (Character.isWhitespace(cc)) {
454: final int k = Strings.skipWhitespaces(src, j);
455: //done if the following is nothing but whitespace or...
456: if (whitespaceAware || k >= len
457: || delimiters.indexOf(src.charAt(k)) >= 0) {
458: j = k;
459: break; //done
460: }
461: if (j > k - 1) { //more than one whitespaces
462: tksb.append(src.substring(j, k));
463: j = k - 1; //j will increase by one later
464: } else {
465: tksb.append(cc);
466: }
467: } else if (cc == (char) 0) {
468: throw newIllegalSyntaxException(
469: MCommon.UNEXPECTED_CHARACTER, (char) 0, src);
470: } else {
471: tksb.append(cc);
472: }
473: }
474: return new Token(j, j < len ? src.charAt(j) : (char) 0, tksb
475: .toString());
476: }
477:
478: private static final char escape(String src, int j) {
479: if (j >= src.length())
480: throw new IllegalSyntaxException(MCommon.ILLEGAL_CHAR, "\\");
481: final char cc = src.charAt(j);
482: return cc == 'n' ? '\n' : cc == 't' ? '\t' : cc;
483: }
484:
485: /**
486: * Converts a map to a string.
487: *
488: * @param map the map to convert from
489: * @param quote the quotation character; 0 means no quotation surrunding
490: * the value
491: * @param separator the separator between two name=value pairs
492: * @see #parse(Map, String, char, char)
493: */
494: public static final String toString(Map map, char quote,
495: char separator) {
496: return toStringBuffer(new StringBuffer(64), map, quote,
497: separator).toString();
498: }
499:
500: /** Converts a map to string and append to a string buffer.
501: * @see #toString
502: */
503: public static final StringBuffer toStringBuffer(StringBuffer sb,
504: Map map, char quote, char separator) {
505: if (separator == (char) 0)
506: throw new IllegalArgumentException("Separator cannot be 0");
507: if (map.isEmpty())
508: return sb; //nothing to do
509:
510: //prepare characters to escape with backslash.
511: final String escKey, escValue;
512: {
513: final StringBuffer escsb = new StringBuffer(12).append(
514: ",\\'\" \n\t").append(separator);
515: if (quote != (char) 0 && quote != '\'' && quote != '"')
516: escsb.append(quote);
517: escValue = escsb.toString();
518: escKey = escsb.append('=').toString();
519: }
520:
521: //convert one-by-one
522: for (final Iterator it = map.entrySet().iterator(); it
523: .hasNext();) {
524: final Map.Entry me = (Map.Entry) it.next();
525:
526: final Object key = me.getKey();
527: if (key == null)
528: throw new IllegalSyntaxException(
529: "Unable to encode null key: " + map);
530: final String skey = key.toString();
531: if (skey == null || skey.length() == 0)
532: throw new IllegalSyntaxException(
533: MCommon.EMPTY_NOT_ALLOWED, "key");
534: encode(sb, skey, escKey);
535:
536: final Object val = me.getValue();
537: if (val != null) {
538: sb.append('=');
539: if (quote != (char) 0)
540: sb.append(quote);
541: encode(sb, val.toString(), escValue);
542: if (quote != (char) 0)
543: sb.append(quote);
544: }
545: sb.append(separator);
546: }
547: sb.setLength(sb.length() - 1); //remove the last comma
548: return sb;
549: }
550:
551: private static final void encode(StringBuffer sb, String val,
552: String escapes) {
553: for (int j = 0, len = val.length();;) {
554: final int k = Strings.anyOf(val, escapes, j);
555: if (k >= len) {
556: sb.append(val.substring(j));
557: return;
558: }
559: sb.append(val.substring(j, k)).append('\\');
560: final char cc = val.charAt(k);
561: sb.append(cc == '\n' ? 'n' : cc == '\t' ? 't' : cc);
562: j = k + 1;
563: }
564: }
565: }
|