001: /**
002: *******************************************************************************
003: * Copyright (C) 1996-2006, international Business Machines Corporation and *
004: * others. All Rights Reserved. *
005: *******************************************************************************
006: */package com.ibm.icu.impl;
007:
008: import java.util.Comparator;
009: import java.util.Iterator;
010: import java.util.Set;
011: import java.util.TreeSet;
012:
013: import com.ibm.icu.impl.CollectionUtilities.MultiComparator;
014: import com.ibm.icu.lang.UCharacter;
015: import com.ibm.icu.text.Collator;
016: import com.ibm.icu.text.RuleBasedCollator;
017: import com.ibm.icu.text.Transliterator;
018: import com.ibm.icu.text.UTF16;
019: import com.ibm.icu.text.UnicodeSet;
020: import com.ibm.icu.text.UnicodeSetIterator;
021: import com.ibm.icu.util.ULocale;
022:
023: /** Provides more flexible formatting of UnicodeSet patterns.
024: */
025: public class PrettyPrinter {
026: private static final UnicodeSet patternWhitespace = (UnicodeSet) new UnicodeSet(
027: "[[:Cn:][:Default_Ignorable_Code_Point:][:patternwhitespace:]]")
028: .freeze();
029: private static final UnicodeSet sortAtEnd = (UnicodeSet) new UnicodeSet(
030: "[[:Cn:][:Cs:][:Co:][:Ideographic:]]").freeze();
031:
032: private boolean first = true;
033: private StringBuffer target = new StringBuffer();
034: private int firstCodePoint = -2;
035: private int lastCodePoint = -2;
036: private boolean compressRanges = true;
037: private String lastString = "";
038: private UnicodeSet toQuote = new UnicodeSet(patternWhitespace);
039: private Transliterator quoter = null;
040:
041: private Comparator ordering;
042: private Comparator spaceComp = Collator.getInstance(ULocale.ROOT);
043: {
044: setOrdering(Collator.getInstance(ULocale.ROOT));
045: ((RuleBasedCollator) spaceComp)
046: .setStrength(RuleBasedCollator.PRIMARY);
047: }
048:
049: public Transliterator getQuoter() {
050: return quoter;
051: }
052:
053: public PrettyPrinter setQuoter(Transliterator quoter) {
054: this .quoter = quoter;
055: return this ; // for chaining
056: }
057:
058: public boolean isCompressRanges() {
059: return compressRanges;
060: }
061:
062: /**
063: * @param compressRanges if you want abcde instead of a-e, make this false
064: * @return
065: */
066: public PrettyPrinter setCompressRanges(boolean compressRanges) {
067: this .compressRanges = compressRanges;
068: return this ;
069: }
070:
071: public Comparator getOrdering() {
072: return ordering;
073: }
074:
075: /**
076: * @param ordering the resulting ordering of the list of characters in the pattern
077: * @return
078: */
079: public PrettyPrinter setOrdering(Comparator ordering) {
080: this .ordering = new MultiComparator(new Comparator[] {
081: ordering, new UTF16.StringComparator(true, false, 0) });
082: return this ;
083: }
084:
085: public Comparator getSpaceComparator() {
086: return spaceComp;
087: }
088:
089: /**
090: * @param spaceComp if the comparison returns non-zero, then a space will be inserted between characters
091: * @return this, for chaining
092: */
093: public PrettyPrinter setSpaceComparator(Comparator spaceComp) {
094: this .spaceComp = spaceComp;
095: return this ;
096: }
097:
098: public UnicodeSet getToQuote() {
099: return toQuote;
100: }
101:
102: /**
103: * a UnicodeSet of extra characters to quote with \\uXXXX-style escaping (will automatically quote pattern whitespace)
104: * @param toQuote
105: */
106: public PrettyPrinter setToQuote(UnicodeSet toQuote) {
107: toQuote = (UnicodeSet) toQuote.clone();
108: toQuote.addAll(patternWhitespace);
109: this .toQuote = toQuote;
110: return this ;
111: }
112:
113: /**
114: * Get the pattern for a particular set.
115: * @param uset
116: * @return formatted UnicodeSet
117: */
118: public String toPattern(UnicodeSet uset) {
119: first = true;
120: UnicodeSet putAtEnd = new UnicodeSet(uset).retainAll(sortAtEnd); // remove all the unassigned gorp for now
121: // make sure that comparison separates all strings, even canonically equivalent ones
122: Set orderedStrings = new TreeSet(ordering);
123: for (UnicodeSetIterator it = new UnicodeSetIterator(uset); it
124: .nextRange();) {
125: if (it.codepoint == it.IS_STRING) {
126: orderedStrings.add(it.string);
127: } else {
128: for (int i = it.codepoint; i <= it.codepointEnd; ++i) {
129: if (!putAtEnd.contains(i)) {
130: orderedStrings.add(UTF16.valueOf(i));
131: }
132: }
133: }
134: }
135: target.setLength(0);
136: target.append("[");
137: for (Iterator it = orderedStrings.iterator(); it.hasNext();) {
138: appendUnicodeSetItem((String) it.next());
139: }
140: for (UnicodeSetIterator it = new UnicodeSetIterator(putAtEnd); it
141: .next();) { // add back the unassigned gorp
142: appendUnicodeSetItem(it.codepoint);
143: }
144: flushLast();
145: target.append("]");
146: String sresult = target.toString();
147:
148: // double check the results. This can be removed once we have more tests.
149: // try {
150: // UnicodeSet doubleCheck = new UnicodeSet(sresult);
151: // if (!uset.equals(doubleCheck)) {
152: // throw new IllegalStateException("Failure to round-trip in pretty-print " + uset + " => " + sresult + "\r\n source-result: " + new UnicodeSet(uset).removeAll(doubleCheck) + "\r\n result-source: " + new UnicodeSet(doubleCheck).removeAll(uset));
153: // }
154: // } catch (RuntimeException e) {
155: // throw (RuntimeException) new IllegalStateException("Failure to round-trip in pretty-print " + uset).initCause(e);
156: // }
157: return sresult;
158: }
159:
160: private PrettyPrinter appendUnicodeSetItem(String s) {
161: int cp;
162: if (UTF16.hasMoreCodePointsThan(s, 1)) {
163: flushLast();
164: addSpace(s);
165: target.append("{");
166: for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
167: appendQuoted(cp = UTF16.charAt(s, i));
168: }
169: target.append("}");
170: lastString = s;
171: } else {
172: appendUnicodeSetItem(UTF16.charAt(s, 0));
173: }
174: return this ;
175: }
176:
177: private void appendUnicodeSetItem(int cp) {
178: if (!compressRanges)
179: flushLast();
180: if (cp == lastCodePoint + 1) {
181: lastCodePoint = cp; // continue range
182: } else { // start range
183: flushLast();
184: firstCodePoint = lastCodePoint = cp;
185: }
186: }
187:
188: /**
189: *
190: */
191: private void addSpace(String s) {
192: if (first) {
193: first = false;
194: } else if (spaceComp.compare(s, lastString) != 0) {
195: target.append(' ');
196: } else {
197: int cp = UTF16.charAt(s, 0);
198: int type = UCharacter.getType(cp);
199: if (type == UCharacter.NON_SPACING_MARK
200: || type == UCharacter.ENCLOSING_MARK) {
201: target.append(' ');
202: } else if (type == UCharacter.SURROGATE
203: && cp >= UTF16.TRAIL_SURROGATE_MIN_VALUE) {
204: target.append(' '); // make sure we don't accidentally merge two surrogates
205: }
206: }
207: }
208:
209: private void flushLast() {
210: if (lastCodePoint >= 0) {
211: addSpace(UTF16.valueOf(firstCodePoint));
212: if (firstCodePoint != lastCodePoint) {
213: appendQuoted(firstCodePoint);
214: target.append(firstCodePoint + 1 == lastCodePoint ? ' '
215: : '-');
216: }
217: appendQuoted(lastCodePoint);
218: lastString = UTF16.valueOf(lastCodePoint);
219: firstCodePoint = lastCodePoint = -2;
220: }
221: }
222:
223: PrettyPrinter appendQuoted(int codePoint) {
224: if (toQuote.contains(codePoint)) {
225: if (quoter != null) {
226: target.append(quoter.transliterate(UTF16
227: .valueOf(codePoint)));
228: return this ;
229: }
230: if (codePoint > 0xFFFF) {
231: target.append("\\U");
232: target.append(Utility.hex(codePoint, 8));
233: } else {
234: target.append("\\u");
235: target.append(Utility.hex(codePoint, 4));
236: }
237: return this ;
238: }
239: switch (codePoint) {
240: case '[': // SET_OPEN:
241: case ']': // SET_CLOSE:
242: case '-': // HYPHEN:
243: case '^': // COMPLEMENT:
244: case '&': // INTERSECTION:
245: case '\\': //BACKSLASH:
246: case '{':
247: case '}':
248: case '$':
249: case ':':
250: target.append('\\');
251: break;
252: default:
253: // Escape whitespace
254: if (patternWhitespace.contains(codePoint)) {
255: target.append('\\');
256: }
257: break;
258: }
259: UTF16.append(target, codePoint);
260: return this ;
261: }
262: // Appender append(String s) {
263: // target.append(s);
264: // return this;
265: // }
266: // public String toString() {
267: // return target.toString();
268: // }
269: }
|