001: /*
002: *******************************************************************************
003: * Copyright (C) 1996-2005, International Business Machines Corporation and *
004: * others. All Rights Reserved. *
005: *******************************************************************************
006: */
007: package com.ibm.icu.dev.test.normalizer;
008:
009: import java.util.Collection;
010: import java.util.Iterator;
011: import java.util.SortedSet;
012: import java.util.TreeSet;
013: import java.util.Set;
014: import com.ibm.icu.dev.test.TestFmwk;
015: import com.ibm.icu.impl.Utility;
016: import com.ibm.icu.lang.UCharacter;
017: import com.ibm.icu.text.CanonicalIterator;
018: import com.ibm.icu.text.Normalizer;
019: import com.ibm.icu.text.UTF16;
020:
021: // TODO: fit into test framework
022:
023: public class TestCanonicalIterator extends TestFmwk {
024:
025: static final boolean SHOW_NAMES = false;
026:
027: public static void main(String[] args) throws Exception {
028: new TestCanonicalIterator().run(args);
029: }
030:
031: static final String testArray[][] = {
032: {
033: "\u00C5d\u0307\u0327",
034: "A\u030Ad\u0307\u0327, A\u030Ad\u0327\u0307, A\u030A\u1E0B\u0327, "
035: + "A\u030A\u1E11\u0307, \u00C5d\u0307\u0327, \u00C5d\u0327\u0307, "
036: + "\u00C5\u1E0B\u0327, \u00C5\u1E11\u0307, \u212Bd\u0307\u0327, "
037: + "\u212Bd\u0327\u0307, \u212B\u1E0B\u0327, \u212B\u1E11\u0307" },
038: { "\u010d\u017E",
039: "c\u030Cz\u030C, c\u030C\u017E, \u010Dz\u030C, \u010D\u017E" },
040: { "x\u0307\u0327",
041: "x\u0307\u0327, x\u0327\u0307, \u1E8B\u0327" }, };
042:
043: public void TestExhaustive() {
044: int counter = 0;
045: int mixedCounter = 0;
046: int lastMixedCounter = -1;
047: CanonicalIterator it = new CanonicalIterator("");
048: /*
049: CanonicalIterator slowIt = new CanonicalIterator("");
050: slowIt.SKIP_ZEROS = false;
051: */
052: //Transliterator name = Transliterator.getInstance("[^\\u0020-\\u007F] name");
053: //Set itSet = new TreeSet();
054: //Set slowItSet = new TreeSet();
055:
056: for (int i = 0; i < 0x10FFFF; ++i) {
057:
058: // skip characters we know don't have decomps
059: int type = UCharacter.getType(i);
060: if (type == Character.UNASSIGNED
061: || type == Character.PRIVATE_USE
062: || type == Character.SURROGATE)
063: continue;
064:
065: if ((++counter % 5000) == 0)
066: logln("Testing " + Utility.hex(i, 0));
067:
068: String s = UTF16.valueOf(i);
069:
070: if (!skipIfBeforeICU(3, 4)) {
071: characterTest(s, i, it);
072: }
073:
074: characterTest(s + "\u0345", i, it);
075: }
076: }
077:
078: public int TestSpeed() {
079: // skip unless verbose
080: if (!isVerbose())
081: return 0;
082:
083: String s = "\uAC01\u0345";
084:
085: CanonicalIterator it = new CanonicalIterator(s);
086: double start, end;
087: int x = 0; // just to keep code from optimizing away.
088: int iterations = 10000;
089: double slowDelta = 0;
090:
091: /*
092: CanonicalIterator slowIt = new CanonicalIterator(s);
093: slowIt.SKIP_ZEROS = false;
094:
095: start = System.currentTimeMillis();
096: for (int i = 0; i < iterations; ++i) {
097: slowIt.setSource(s);
098: while (true) {
099: String item = slowIt.next();
100: if (item == null) break;
101: x += item.length();
102: }
103: }
104: end = System.currentTimeMillis();
105: double slowDelta = (end-start) / iterations;
106: logln("Slow iteration: " + slowDelta);
107: */
108:
109: start = System.currentTimeMillis();
110: for (int i = 0; i < iterations; ++i) {
111: it.setSource(s);
112: while (true) {
113: String item = it.next();
114: if (item == null)
115: break;
116: x += item.length();
117: }
118: }
119: end = System.currentTimeMillis();
120: double fastDelta = (end - start) / iterations;
121: logln("Fast iteration: "
122: + fastDelta
123: + (slowDelta != 0 ? ", " + (fastDelta / slowDelta) : ""));
124:
125: return x;
126: }
127:
128: public void TestBasic() {
129: // This is not interesting anymore as the data is already built
130: // beforehand
131:
132: // check build
133: // UnicodeSet ss = CanonicalIterator.getSafeStart();
134: // logln("Safe Start: " + ss.toPattern(true));
135: // ss = CanonicalIterator.getStarts('a');
136: // expectEqual("Characters with 'a' at the start of their decomposition: ", "", CanonicalIterator.getStarts('a'),
137: // new UnicodeSet("[\u00E0-\u00E5\u0101\u0103\u0105\u01CE\u01DF\u01E1\u01FB"
138: // + "\u0201\u0203\u0227\u1E01\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1EAF\u1EB1\u1EB3\u1EB5\u1EB7]")
139: // );
140:
141: // check permute
142: // NOTE: we use a TreeSet below to sort the output, which is not guaranteed to be sorted!
143:
144: Set results = new TreeSet();
145: CanonicalIterator.permute("ABC", false, results);
146: expectEqual("Simple permutation ", "",
147: collectionToString(results),
148: "ABC, ACB, BAC, BCA, CAB, CBA");
149:
150: // try samples
151: SortedSet set = new TreeSet();
152: for (int i = 0; i < testArray.length; ++i) {
153: //logln("Results for: " + name.transliterate(testArray[i]));
154: CanonicalIterator it = new CanonicalIterator(
155: testArray[i][0]);
156: // int counter = 0;
157: set.clear();
158: String first = null;
159: while (true) {
160: String result = it.next();
161: if (first == null) {
162: first = result;
163: }
164: if (result == null)
165: break;
166: set.add(result); // sort them
167: //logln(++counter + ": " + hex.transliterate(result));
168: //logln(" = " + name.transliterate(result));
169: }
170: expectEqual(i + ": ", testArray[i][0],
171: collectionToString(set), testArray[i][1]);
172: it.reset();
173: if (!it.next().equals(first)) {
174: errln("CanonicalIterator.reset() failed");
175: }
176: if (!it.getSource().equals(
177: Normalizer.normalize(testArray[i][0],
178: Normalizer.NFD))) {
179: errln("CanonicalIterator.getSource() does not return NFD of input source");
180: }
181: }
182: }
183:
184: public void expectEqual(String message, String item, Object a,
185: Object b) {
186: if (!a.equals(b)) {
187: errln("FAIL: " + message + getReadable(item));
188: errln("\t" + getReadable(a));
189: errln("\t" + getReadable(b));
190: } else {
191: logln("Checked: " + message + getReadable(item));
192: logln("\t" + getReadable(a));
193: logln("\t" + getReadable(b));
194: }
195: }
196:
197: //Transliterator name = null;
198: //Transliterator hex = null;
199:
200: public String getReadable(Object obj) {
201: if (obj == null)
202: return "null";
203: String s = obj.toString();
204: if (s.length() == 0)
205: return "";
206: // set up for readable display
207: //if (name == null) name = Transliterator.getInstance("[^\\ -\\u007F] name");
208: //if (hex == null) hex = Transliterator.getInstance("[^\\ -\\u007F] hex");
209: return "[" + (SHOW_NAMES ? hex(s) + "; " : "") + hex(s) + "]";
210: }
211:
212: public void characterTest(String s, int ch, CanonicalIterator it) {
213: int counter = 0;
214: int mixedCounter = 0;
215: int lastMixedCounter = -1;
216: boolean gotDecomp = false;
217: boolean gotComp = false;
218: boolean gotSource = false;
219: String decomp = Normalizer.decompose(s, false);
220: String comp = Normalizer.compose(s, false);
221:
222: // skip characters that don't have either decomp.
223: // need quick test for this!
224: if (s.equals(decomp) && s.equals(comp))
225: return;
226:
227: it.setSource(s);
228:
229: while (true) {
230: String item = it.next();
231: if (item == null)
232: break;
233: if (item.equals(s))
234: gotSource = true;
235: if (item.equals(decomp))
236: gotDecomp = true;
237: if (item.equals(comp))
238: gotComp = true;
239: if ((mixedCounter & 0x7F) == 0
240: && (ch < 0xAD00 || ch > 0xAC00 + 11172)) {
241: if (lastMixedCounter != mixedCounter) {
242: logln("");
243: lastMixedCounter = mixedCounter;
244: }
245: logln("\t" + mixedCounter + "\t" + hex(item)
246: + (item.equals(s) ? "\t(*original*)" : "")
247: + (item.equals(decomp) ? "\t(*decomp*)" : "")
248: + (item.equals(comp) ? "\t(*comp*)" : ""));
249: }
250:
251: }
252:
253: // check that zeros optimization doesn't mess up.
254: /*
255: if (true) {
256: it.reset();
257: itSet.clear();
258: while (true) {
259: String item = it.next();
260: if (item == null) break;
261: itSet.add(item);
262: }
263: slowIt.setSource(s);
264: slowItSet.clear();
265: while (true) {
266: String item = slowIt.next();
267: if (item == null) break;
268: slowItSet.add(item);
269: }
270: if (!itSet.equals(slowItSet)) {
271: errln("Zero optimization failure with " + getReadable(s));
272: }
273: }
274: */
275:
276: mixedCounter++;
277: if (!gotSource || !gotDecomp || !gotComp) {
278: errln("FAIL CanonicalIterator: " + s + " decomp: " + decomp
279: + " comp: " + comp);
280: it.reset();
281: for (String item = it.next(); item != null; item = it
282: .next()) {
283: err(item + " ");
284: }
285: errln("");
286: }
287: }
288:
289: static String collectionToString(Collection col) {
290: StringBuffer result = new StringBuffer();
291: Iterator it = col.iterator();
292: while (it.hasNext()) {
293: if (result.length() != 0)
294: result.append(", ");
295: result.append(it.next().toString());
296: }
297: return result.toString();
298: }
299: }
|