001: /*
002: *******************************************************************************
003: * Copyright (C) 2002-2005, International Business Machines Corporation and *
004: * others. All Rights Reserved. *
005: *******************************************************************************
006: */
007:
008: /**
009: * Port From: ICU4C v2.1 : collate/CollationIteratorTest
010: * Source File: $ICU4CRoot/source/test/intltest/itercoll.cpp
011: **/package com.ibm.icu.dev.test.collator;
012:
013: import java.util.Locale;
014: import java.util.Arrays;
015: import java.text.CharacterIterator;
016: import java.text.StringCharacterIterator;
017: import com.ibm.icu.dev.test.*;
018: import com.ibm.icu.text.*;
019: import com.ibm.icu.lang.UCharacter;
020:
021: public class CollationIteratorTest extends TestFmwk {
022:
023: String test1 = "What subset of all possible test cases?";
024: String test2 = "has the highest probability of detecting";
025:
026: public static void main(String[] args) throws Exception {
027: new CollationIteratorTest().run(args);
028: // new CollationIteratorTest().TestNormalizedUnicodeChar();
029: }
030:
031: /*
032: * @bug 4157299
033: */
034: public void TestClearBuffers(/* char* par */) {
035: RuleBasedCollator c = null;
036: try {
037: c = new RuleBasedCollator("&a < b < c & ab = d");
038: } catch (Exception e) {
039: warnln("Couldn't create a RuleBasedCollator.");
040: return;
041: }
042:
043: String source = "abcd";
044: CollationElementIterator i = c
045: .getCollationElementIterator(source);
046: int e0 = 0;
047: try {
048: e0 = i.next(); // save the first collation element
049: } catch (Exception e) {
050: errln("call to i.next() failed.");
051: return;
052: }
053:
054: try {
055: i.setOffset(3); // go to the expanding character
056: } catch (Exception e) {
057: errln("call to i.setOffset(3) failed.");
058: return;
059: }
060:
061: try {
062: i.next(); // but only use up half of it
063: } catch (Exception e) {
064: errln("call to i.next() failed.");
065: return;
066: }
067:
068: try {
069: i.setOffset(0); // go back to the beginning
070: } catch (Exception e) {
071: errln("call to i.setOffset(0) failed. ");
072: }
073:
074: int e = 0;
075: try {
076: e = i.next(); // and get this one again
077: } catch (Exception ee) {
078: errln("call to i.next() failed. ");
079: return;
080: }
081:
082: if (e != e0) {
083: errln("got 0x" + Integer.toHexString(e) + ", expected 0x"
084: + Integer.toHexString(e0));
085: }
086: }
087:
088: /** @bug 4108762
089: * Test for getMaxExpansion()
090: */
091: public void TestMaxExpansion(/* char* par */) {
092: int unassigned = 0xEFFFD;
093: String rule = "&a < ab < c/aba < d < z < ch";
094: RuleBasedCollator coll = null;
095: try {
096: coll = new RuleBasedCollator(rule);
097: } catch (Exception e) {
098: warnln("Fail to create RuleBasedCollator");
099: return;
100: }
101: char ch = 0;
102: String str = String.valueOf(ch);
103:
104: CollationElementIterator iter = coll
105: .getCollationElementIterator(str);
106:
107: while (ch < 0xFFFF) {
108: int count = 1;
109: ch++;
110: str = String.valueOf(ch);
111: iter.setText(str);
112: int order = iter.previous();
113:
114: // thai management
115: if (order == 0) {
116: order = iter.previous();
117: }
118:
119: while (iter.previous() != CollationElementIterator.NULLORDER) {
120: count++;
121: }
122:
123: if (iter.getMaxExpansion(order) < count) {
124: errln("Failure at codepoint " + ch
125: + ", maximum expansion count < " + count);
126: }
127: }
128:
129: // testing for exact max expansion
130: ch = 0;
131: while (ch < 0x61) {
132: str = String.valueOf(ch);
133: iter.setText(str);
134: int order = iter.previous();
135:
136: if (iter.getMaxExpansion(order) != 1) {
137: errln("Failure at codepoint 0x"
138: + Integer.toHexString(ch)
139: + " maximum expansion count == 1");
140: }
141: ch++;
142: }
143:
144: ch = 0x63;
145: str = String.valueOf(ch);
146: iter.setText(str);
147: int temporder = iter.previous();
148:
149: if (iter.getMaxExpansion(temporder) != 3) {
150: errln("Failure at codepoint 0x" + Integer.toHexString(ch)
151: + " maximum expansion count == 3");
152: }
153:
154: ch = 0x64;
155: str = String.valueOf(ch);
156: iter.setText(str);
157: temporder = iter.previous();
158:
159: if (iter.getMaxExpansion(temporder) != 1) {
160: errln("Failure at codepoint 0x" + Integer.toHexString(ch)
161: + " maximum expansion count == 1");
162: }
163:
164: str = UCharacter.toString(unassigned);
165: iter.setText(str);
166: temporder = iter.previous();
167:
168: if (iter.getMaxExpansion(temporder) != 2) {
169: errln("Failure at codepoint 0x" + Integer.toHexString(ch)
170: + " maximum expansion count == 2");
171: }
172:
173: // testing jamo
174: ch = 0x1165;
175: str = String.valueOf(ch);
176: iter.setText(str);
177: temporder = iter.previous();
178:
179: if (iter.getMaxExpansion(temporder) > 3) {
180: errln("Failure at codepoint 0x" + Integer.toHexString(ch)
181: + " maximum expansion count < 3");
182: }
183:
184: // testing special jamo &a<\u1165
185: rule = "\u0026\u0071\u003c\u1165\u002f\u0071\u0071\u0071\u0071";
186:
187: try {
188: coll = new RuleBasedCollator(rule);
189: } catch (Exception e) {
190: errln("Fail to create RuleBasedCollator");
191: return;
192: }
193: iter = coll.getCollationElementIterator(str);
194:
195: temporder = iter.previous();
196:
197: if (iter.getMaxExpansion(temporder) != 6) {
198: errln("Failure at codepoint 0x" + Integer.toHexString(ch)
199: + " maximum expansion count == 6");
200: }
201: }
202:
203: /**
204: * Test for getOffset() and setOffset()
205: */
206: public void TestOffset(/* char* par */) {
207: RuleBasedCollator en_us;
208: try {
209: en_us = (RuleBasedCollator) Collator.getInstance(Locale.US);
210: } catch (Exception e) {
211: warnln("ERROR: in creation of collator of ENGLISH locale");
212: return;
213: }
214:
215: CollationElementIterator iter = en_us
216: .getCollationElementIterator(test1);
217: // testing boundaries
218: iter.setOffset(0);
219: if (iter.previous() != CollationElementIterator.NULLORDER) {
220: errln("Error: After setting offset to 0, we should be at the end "
221: + "of the backwards iteration");
222: }
223: iter.setOffset(test1.length());
224: if (iter.next() != CollationElementIterator.NULLORDER) {
225: errln("Error: After setting offset to the end of the string, we "
226: + "should be at the end of the forwards iteration");
227: }
228:
229: // Run all the way through the iterator, then get the offset
230: int[] orders = CollationTest.getOrders(iter);
231: logln("orders.length = " + orders.length);
232:
233: int offset = iter.getOffset();
234:
235: if (offset != test1.length()) {
236: String msg1 = "offset at end != length: ";
237: String msg2 = " vs ";
238: errln(msg1 + offset + msg2 + test1.length());
239: }
240:
241: // Now set the offset back to the beginning and see if it works
242: CollationElementIterator pristine = en_us
243: .getCollationElementIterator(test1);
244:
245: try {
246: iter.setOffset(0);
247: } catch (Exception e) {
248: errln("setOffset failed.");
249: }
250: assertEqual(iter, pristine);
251:
252: // setting offset in the middle of a contraction
253: String contraction = "change";
254: RuleBasedCollator tailored = null;
255: try {
256: tailored = new RuleBasedCollator("& a < ch");
257: } catch (Exception e) {
258: errln("Error: in creation of Spanish collator");
259: }
260: iter = tailored.getCollationElementIterator(contraction);
261: int order[] = CollationTest.getOrders(iter);
262: iter.setOffset(1); // sets offset in the middle of ch
263: int order2[] = CollationTest.getOrders(iter);
264: if (!Arrays.equals(order, order2)) {
265: errln("Error: setting offset in the middle of a contraction should be the same as setting it to the start of the contraction");
266: }
267: contraction = "peache";
268: iter = tailored.getCollationElementIterator(contraction);
269: iter.setOffset(3);
270: order = CollationTest.getOrders(iter);
271: iter.setOffset(4); // sets offset in the middle of ch
272: order2 = CollationTest.getOrders(iter);
273: if (!Arrays.equals(order, order2)) {
274: errln("Error: setting offset in the middle of a contraction should be the same as setting it to the start of the contraction");
275: }
276: // setting offset in the middle of a surrogate pair
277: String surrogate = "\ud800\udc00str";
278: iter = tailored.getCollationElementIterator(surrogate);
279: order = CollationTest.getOrders(iter);
280: iter.setOffset(1); // sets offset in the middle of surrogate
281: order2 = CollationTest.getOrders(iter);
282: if (!Arrays.equals(order, order2)) {
283: errln("Error: setting offset in the middle of a surrogate pair should be the same as setting it to the start of the surrogate pair");
284: }
285: surrogate = "simple\ud800\udc00str";
286: iter = tailored.getCollationElementIterator(surrogate);
287: iter.setOffset(6);
288: order = CollationTest.getOrders(iter);
289: iter.setOffset(7); // sets offset in the middle of surrogate
290: order2 = CollationTest.getOrders(iter);
291: if (!Arrays.equals(order, order2)) {
292: errln("Error: setting offset in the middle of a surrogate pair should be the same as setting it to the start of the surrogate pair");
293: }
294: // TODO: try iterating halfway through a messy string.
295: }
296:
297: void assertEqual(CollationElementIterator i1,
298: CollationElementIterator i2) {
299: int c1, c2, count = 0;
300: do {
301: c1 = i1.next();
302: c2 = i2.next();
303: if (c1 != c2) {
304: errln(" " + count + ": strength(0x"
305: + Integer.toHexString(c1) + ") != strength(0x"
306: + Integer.toHexString(c2) + ")");
307: break;
308: }
309: count += 1;
310: } while (c1 != CollationElementIterator.NULLORDER);
311: CollationTest.backAndForth(this , i1);
312: CollationTest.backAndForth(this , i2);
313: }
314:
315: /**
316: * Test for CollationElementIterator.previous()
317: *
318: * @bug 4108758 - Make sure it works with contracting characters
319: *
320: */
321: public void TestPrevious(/* char* par */) {
322: RuleBasedCollator en_us = (RuleBasedCollator) Collator
323: .getInstance(Locale.US);
324: CollationElementIterator iter = en_us
325: .getCollationElementIterator(test1);
326:
327: // A basic test to see if it's working at all
328: CollationTest.backAndForth(this , iter);
329:
330: // Test with a contracting character sequence
331: String source;
332: RuleBasedCollator c1 = null;
333: try {
334: c1 = new RuleBasedCollator(
335: "&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH");
336: } catch (Exception e) {
337: errln("Couldn't create a RuleBasedCollator with a contracting sequence.");
338: return;
339: }
340:
341: source = "abchdcba";
342: iter = c1.getCollationElementIterator(source);
343: CollationTest.backAndForth(this , iter);
344:
345: // Test with an expanding character sequence
346: RuleBasedCollator c2 = null;
347: try {
348: c2 = new RuleBasedCollator("&a < b < c/abd < d");
349: } catch (Exception e) {
350: errln("Couldn't create a RuleBasedCollator with an expanding sequence.");
351: return;
352: }
353:
354: source = "abcd";
355: iter = c2.getCollationElementIterator(source);
356: CollationTest.backAndForth(this , iter);
357:
358: // Now try both
359: RuleBasedCollator c3 = null;
360: try {
361: c3 = new RuleBasedCollator("&a < b < c/aba < d < z < ch");
362: } catch (Exception e) {
363: errln("Couldn't create a RuleBasedCollator with both an expanding and a contracting sequence.");
364: return;
365: }
366:
367: source = "abcdbchdc";
368: iter = c3.getCollationElementIterator(source);
369: CollationTest.backAndForth(this , iter);
370:
371: source = "\u0e41\u0e02\u0e41\u0e02\u0e27abc";
372: Collator c4 = null;
373: try {
374: c4 = Collator.getInstance(new Locale("th", "TH", ""));
375: } catch (Exception e) {
376: errln("Couldn't create a collator");
377: return;
378: }
379:
380: iter = ((RuleBasedCollator) c4)
381: .getCollationElementIterator(source);
382: CollationTest.backAndForth(this , iter);
383:
384: source = "\u0061\u30CF\u3099\u30FC";
385: Collator c5 = null;
386: try {
387: c5 = Collator.getInstance(new Locale("ja", "JP", ""));
388: } catch (Exception e) {
389: errln("Couldn't create Japanese collator\n");
390: }
391: iter = ((RuleBasedCollator) c5)
392: .getCollationElementIterator(source);
393:
394: CollationTest.backAndForth(this , iter);
395: }
396:
397: /**
398: * Test for setText()
399: */
400: public void TestSetText(/* char* par */) {
401: RuleBasedCollator en_us = (RuleBasedCollator) Collator
402: .getInstance(Locale.US);
403: CollationElementIterator iter1 = en_us
404: .getCollationElementIterator(test1);
405: CollationElementIterator iter2 = en_us
406: .getCollationElementIterator(test2);
407:
408: // Run through the second iterator just to exercise it
409: int c = iter2.next();
410: int i = 0;
411:
412: while (++i < 10 && c != CollationElementIterator.NULLORDER) {
413: try {
414: c = iter2.next();
415: } catch (Exception e) {
416: errln("iter2.next() returned an error.");
417: break;
418: }
419: }
420:
421: // Now set it to point to the same string as the first iterator
422: try {
423: iter2.setText(test1);
424: } catch (Exception e) {
425: errln("call to iter2->setText(test1) failed.");
426: return;
427: }
428: assertEqual(iter1, iter2);
429:
430: iter1.reset();
431: //now use the overloaded setText(ChracterIterator&, UErrorCode) function to set the text
432: CharacterIterator chariter = new StringCharacterIterator(test1);
433: try {
434: iter2.setText(chariter);
435: } catch (Exception e) {
436: errln("call to iter2->setText(chariter(test1)) failed.");
437: return;
438: }
439: assertEqual(iter1, iter2);
440:
441: iter1.reset();
442: //now use the overloaded setText(ChracterIterator&, UErrorCode) function to set the text
443: UCharacterIterator uchariter = UCharacterIterator
444: .getInstance(test1);
445: try {
446: iter2.setText(uchariter);
447: } catch (Exception e) {
448: errln("call to iter2->setText(uchariter(test1)) failed.");
449: return;
450: }
451: assertEqual(iter1, iter2);
452: }
453:
454: /**
455: * Test for CollationElementIterator previous and next for the whole set of
456: * unicode characters.
457: */
458: public void TestUnicodeChar() {
459: RuleBasedCollator en_us = (RuleBasedCollator) Collator
460: .getInstance(Locale.US);
461: CollationElementIterator iter;
462: char codepoint;
463: StringBuffer source = new StringBuffer();
464: source.append("\u0e4d\u0e4e\u0e4f");
465: // source.append("\u04e8\u04e9");
466: iter = en_us.getCollationElementIterator(source.toString());
467: // A basic test to see if it's working at all
468: CollationTest.backAndForth(this , iter);
469: for (codepoint = 1; codepoint < 0xFFFE;) {
470: source.delete(0, source.length());
471: while (codepoint % 0xFF != 0) {
472: if (UCharacter.isDefined(codepoint)) {
473: source.append(codepoint);
474: }
475: codepoint++;
476: }
477:
478: if (UCharacter.isDefined(codepoint)) {
479: source.append(codepoint);
480: }
481:
482: if (codepoint != 0xFFFF) {
483: codepoint++;
484: }
485: /*if (codepoint >= 0x04fc) {
486: System.out.println("codepoint " + Integer.toHexString(codepoint));
487: String str = source.substring(230, 232);
488: System.out.println(com.ibm.icu.impl.Utility.escape(str));
489: System.out.println("codepoint " + Integer.toHexString(codepoint)
490: + "length " + str.length());
491: iter = en_us.getCollationElementIterator(str);
492: CollationTest.backAndForth(this, iter);
493: }
494: */
495: iter = en_us.getCollationElementIterator(source.toString());
496: // A basic test to see if it's working at all
497: CollationTest.backAndForth(this , iter);
498: }
499: }
500:
501: /**
502: * Test for CollationElementIterator previous and next for the whole set of
503: * unicode characters with normalization on.
504: */
505: public void TestNormalizedUnicodeChar() {
506: // thai should have normalization on
507: RuleBasedCollator th_th = null;
508: try {
509: th_th = (RuleBasedCollator) Collator
510: .getInstance(new Locale("th", "TH"));
511: } catch (Exception e) {
512: warnln("Error creating Thai collator");
513: return;
514: }
515: StringBuffer source = new StringBuffer();
516: source.append('\uFDFA');
517: CollationElementIterator iter = th_th
518: .getCollationElementIterator(source.toString());
519: CollationTest.backAndForth(this , iter);
520: for (char codepoint = 0x1; codepoint < 0xfffe;) {
521: source.delete(0, source.length());
522: while (codepoint % 0xFF != 0) {
523: if (UCharacter.isDefined(codepoint)) {
524: source.append(codepoint);
525: }
526: codepoint++;
527: }
528:
529: if (UCharacter.isDefined(codepoint)) {
530: source.append(codepoint);
531: }
532:
533: if (codepoint != 0xFFFF) {
534: codepoint++;
535: }
536:
537: /*if (((int)codepoint) >= 0xfe00) {
538: String str = source.substring(185, 190);
539: System.out.println(com.ibm.icu.impl.Utility.escape(str));
540: System.out.println("codepoint "
541: + Integer.toHexString(codepoint)
542: + "length " + str.length());
543: iter = th_th.getCollationElementIterator(str);
544: CollationTest.backAndForth(this, iter);
545: */
546: iter = th_th.getCollationElementIterator(source.toString());
547: // A basic test to see if it's working at all
548: CollationTest.backAndForth(this , iter);
549: }
550: }
551:
552: /**
553: * Testing the discontiguous contractions
554: */
555: public void TestDiscontiguous() {
556: String rulestr = "&z < AB < X\u0300 < ABC < X\u0300\u0315";
557: String src[] = {
558: "ADB",
559: "ADBC",
560: "A\u0315B",
561: "A\u0315BC",
562: // base character blocked
563: "XD\u0300",
564: "XD\u0300\u0315",
565: // non blocking combining character
566: "X\u0319\u0300",
567: "X\u0319\u0300\u0315",
568: // blocking combining character
569: "X\u0314\u0300",
570: "X\u0314\u0300\u0315",
571: // contraction prefix
572: "ABDC", "AB\u0315C", "X\u0300D\u0315",
573: "X\u0300\u0319\u0315",
574: "X\u0300\u031A\u0315",
575: // ends not with a contraction character
576: "X\u0319\u0300D", "X\u0319\u0300\u0315D",
577: "X\u0300D\u0315D", "X\u0300\u0319\u0315D",
578: "X\u0300\u031A\u0315D" };
579: String tgt[] = {// non blocking combining character
580: "A D B",
581: "A D BC",
582: "A \u0315 B",
583: "A \u0315 BC",
584: // base character blocked
585: "X D \u0300",
586: "X D \u0300\u0315",
587: // non blocking combining character
588: "X\u0300 \u0319",
589: "X\u0300\u0315 \u0319",
590: // blocking combining character
591: "X \u0314 \u0300",
592: "X \u0314 \u0300\u0315",
593: // contraction prefix
594: "AB DC", "AB \u0315 C", "X\u0300 D \u0315",
595: "X\u0300\u0315 \u0319",
596: "X\u0300 \u031A \u0315",
597: // ends not with a contraction character
598: "X\u0300 \u0319D", "X\u0300\u0315 \u0319D",
599: "X\u0300 D\u0315D", "X\u0300\u0315 \u0319D",
600: "X\u0300 \u031A\u0315D" };
601: int count = 0;
602: try {
603: RuleBasedCollator coll = new RuleBasedCollator(rulestr);
604: CollationElementIterator iter = coll
605: .getCollationElementIterator("");
606: CollationElementIterator resultiter = coll
607: .getCollationElementIterator("");
608: while (count < src.length) {
609: iter.setText(src[count]);
610: int s = 0;
611: while (s < tgt[count].length()) {
612: int e = tgt[count].indexOf(' ', s);
613: if (e < 0) {
614: e = tgt[count].length();
615: }
616: String resultstr = tgt[count].substring(s, e);
617: resultiter.setText(resultstr);
618: int ce = resultiter.next();
619: while (ce != CollationElementIterator.NULLORDER) {
620: if (ce != iter.next()) {
621: errln("Discontiguos contraction test mismatch at"
622: + count);
623: return;
624: }
625: ce = resultiter.next();
626: }
627: s = e + 1;
628: }
629: iter.reset();
630: CollationTest.backAndForth(this , iter);
631: count++;
632: }
633: } catch (Exception e) {
634: warnln("Error running discontiguous tests " + e.toString());
635: }
636: }
637:
638: /**
639: * Test the incremental normalization
640: */
641: public void TestNormalization() {
642: String rules = "&a < \u0300\u0315 < A\u0300\u0315 < \u0316\u0315B < \u0316\u0300\u0315";
643: String testdata[] = { "\u1ED9", "o\u0323\u0302",
644: "\u0300\u0315", "\u0315\u0300", "A\u0300\u0315B",
645: "A\u0315\u0300B", "A\u0316\u0315B", "A\u0315\u0316B",
646: "\u0316\u0300\u0315", "\u0315\u0300\u0316",
647: "A\u0316\u0300\u0315B", "A\u0315\u0300\u0316B",
648: "\u0316\u0315\u0300", "A\u0316\u0315\u0300B" };
649: RuleBasedCollator coll = null;
650: try {
651: coll = new RuleBasedCollator(rules);
652: coll.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
653: } catch (Exception e) {
654: warnln("ERROR: in creation of collator using rules "
655: + rules);
656: return;
657: }
658:
659: CollationElementIterator iter = coll
660: .getCollationElementIterator("testing");
661: for (int count = 0; count < testdata.length; count++) {
662: iter.setText(testdata[count]);
663: CollationTest.backAndForth(this, iter);
664: }
665: }
666: }
|