0001: /*
0002: *******************************************************************************
0003: * Copyright (C) 2002-2005, International Business Machines Corporation and *
0004: * others. All Rights Reserved. *
0005: *******************************************************************************
0006: */
0007:
0008: /**
0009: * Port From: ICU4C v2.1 : collate/CollationRegressionTest
0010: * Source File: $ICU4CRoot/source/test/intltest/regcoll.cpp
0011: **/package com.ibm.icu.dev.test.collator;
0012:
0013: import com.ibm.icu.dev.test.*;
0014: import com.ibm.icu.text.*;
0015:
0016: import java.util.Locale;
0017: import java.util.Vector;
0018: import java.text.ParseException;
0019:
0020: public class CollationRegressionTest extends TestFmwk {
0021: public static void main(String[] args) throws Exception {
0022: new CollationRegressionTest().run(args);
0023: }
0024:
0025: // @bug 4048446
0026: //
0027: // CollationElementIterator.reset() doesn't work
0028: //
0029: public void Test4048446() {
0030: final String test1 = "XFILE What subset of all possible test cases has the highest probability of detecting the most errors?";
0031: //final String test2 = "Xf_ile What subset of all possible test cases has the lowest probability of detecting the least errors?";
0032: RuleBasedCollator en_us = (RuleBasedCollator) Collator
0033: .getInstance(Locale.US);
0034: CollationElementIterator i1 = en_us
0035: .getCollationElementIterator(test1);
0036: CollationElementIterator i2 = en_us
0037: .getCollationElementIterator(test1);
0038:
0039: if (i1 == null || i2 == null) {
0040: errln("Could not create CollationElementIterator's");
0041: return;
0042: }
0043:
0044: while (i1.next() != CollationElementIterator.NULLORDER) {
0045: //
0046: }
0047:
0048: i1.reset();
0049: assertEqual(i1, i2);
0050: }
0051:
0052: void assertEqual(CollationElementIterator i1,
0053: CollationElementIterator i2) {
0054: int c1, c2, count = 0;
0055:
0056: do {
0057: c1 = i1.next();
0058: c2 = i2.next();
0059:
0060: if (c1 != c2) {
0061: String msg = "";
0062: String msg1 = " ";
0063:
0064: msg += msg1 + count;
0065: msg += ": strength(0x" + Integer.toHexString(c1);
0066: msg += ") != strength(0x" + Integer.toHexString(c2);
0067: msg += ")";
0068: errln(msg);
0069: break;
0070: }
0071: count += 1;
0072: } while (c1 != CollationElementIterator.NULLORDER);
0073: }
0074:
0075: // @bug 4051866
0076: //
0077: // Collator -> rules -> Collator round-trip broken for expanding characters
0078: //
0079: public void Test4051866() {
0080: String rules = "< o & oe ,o\u3080& oe ,\u1530 ,O& OE ,O\u3080& OE ,\u1520< p ,P";
0081:
0082: // Build a collator containing expanding characters
0083: RuleBasedCollator c1 = null;
0084:
0085: try {
0086: c1 = new RuleBasedCollator(rules);
0087: } catch (Exception e) {
0088: errln("Fail to create RuleBasedCollator with rules:"
0089: + rules);
0090: return;
0091: }
0092:
0093: // Build another using the rules from the first
0094: RuleBasedCollator c2 = null;
0095: try {
0096: c2 = new RuleBasedCollator(c1.getRules());
0097: } catch (Exception e) {
0098: errln("Fail to create RuleBasedCollator with rules:"
0099: + rules);
0100: return;
0101: }
0102:
0103: // Make sure they're the same
0104: if (!(c1.getRules().equals(c2.getRules()))) {
0105: errln("Rules are not equal");
0106: }
0107: }
0108:
0109: // @bug 4053636
0110: //
0111: // Collator thinks "black-bird" == "black"
0112: //
0113: public void Test4053636() {
0114: RuleBasedCollator en_us = (RuleBasedCollator) Collator
0115: .getInstance(Locale.US);
0116: if (en_us.equals("black_bird", "black")) {
0117: errln("black-bird == black");
0118: }
0119: }
0120:
0121: // @bug 4054238
0122: //
0123: // CollationElementIterator will not work correctly if the associated
0124: // Collator object's mode is changed
0125: //
0126: public void Test4054238(/* char* par */) {
0127: final char[] chars3 = { 0x61, 0x00FC, 0x62, 0x65, 0x63, 0x6b,
0128: 0x20, 0x47, 0x72, 0x00F6, 0x00DF, 0x65, 0x20, 0x4c,
0129: 0x00FC, 0x62, 0x63, 0x6b, 0 };
0130: final String test3 = new String(chars3);
0131: RuleBasedCollator c = (RuleBasedCollator) Collator
0132: .getInstance(Locale.US);
0133:
0134: // NOTE: The Java code uses en_us to create the CollationElementIterators
0135: // but I'm pretty sure that's wrong, so I've changed this to use c.
0136: c.setDecomposition(Collator.NO_DECOMPOSITION);
0137: CollationElementIterator i1 = c
0138: .getCollationElementIterator(test3);
0139: logln("Offset:" + i1.getOffset());
0140: }
0141:
0142: // @bug 4054734
0143: //
0144: // Collator::IDENTICAL documented but not implemented
0145: //
0146: public void Test4054734(/* char* par */) {
0147:
0148: //Here's the original Java:
0149:
0150: String[] decomp = { "\u0001", "<", "\u0002", "\u0001", "=",
0151: "\u0001", "A\u0001", ">", "~\u0002", // Ensure A and ~ are not compared bitwise
0152: "\u00C0", "=", "A\u0300", // Decomp should make these equal
0153: };
0154:
0155: RuleBasedCollator c = (RuleBasedCollator) Collator
0156: .getInstance(Locale.US);
0157: c.setStrength(Collator.IDENTICAL);
0158: c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
0159: compareArray(c, decomp);
0160: }
0161:
0162: void compareArray(Collator c, String[] tests) {
0163:
0164: int expectedResult = 0;
0165:
0166: for (int i = 0; i < tests.length; i += 3) {
0167: String source = tests[i];
0168: String comparison = tests[i + 1];
0169: String target = tests[i + 2];
0170:
0171: if (comparison.equals("<")) {
0172: expectedResult = -1;
0173: } else if (comparison.equals(">")) {
0174: expectedResult = 1;
0175: } else if (comparison.equals("=")) {
0176: expectedResult = 0;
0177: } else {
0178: errln("Bogus comparison string \"" + comparison + "\"");
0179: }
0180:
0181: int compareResult = 0;
0182:
0183: logln("i = " + i);
0184: logln(source);
0185: logln(target);
0186: try {
0187: compareResult = c.compare(source, target);
0188: } catch (Exception e) {
0189: errln(e.toString());
0190: }
0191:
0192: CollationKey sourceKey = null, targetKey = null;
0193: try {
0194: sourceKey = c.getCollationKey(source);
0195: } catch (Exception e) {
0196: errln("Couldn't get collationKey for source");
0197: continue;
0198: }
0199:
0200: try {
0201: targetKey = c.getCollationKey(target);
0202: } catch (Exception e) {
0203: errln("Couldn't get collationKey for target");
0204: continue;
0205: }
0206:
0207: int keyResult = sourceKey.compareTo(targetKey);
0208: reportCResult(source, target, sourceKey, targetKey,
0209: compareResult, keyResult, compareResult,
0210: expectedResult);
0211: }
0212: }
0213:
0214: void reportCResult(String source, String target,
0215: CollationKey sourceKey, CollationKey targetKey,
0216: int compareResult, int keyResult, int incResult,
0217: int expectedResult) {
0218: if (expectedResult < -1 || expectedResult > 1) {
0219: errln("***** invalid call to reportCResult ****");
0220: return;
0221: }
0222:
0223: boolean ok1 = (compareResult == expectedResult);
0224: boolean ok2 = (keyResult == expectedResult);
0225: boolean ok3 = (incResult == expectedResult);
0226:
0227: if (ok1 && ok2 && ok3 && !isVerbose()) {
0228: return;
0229: } else {
0230: String msg1 = ok1 ? "Ok: compare(\"" : "FAIL: compare(\"";
0231: String msg2 = "\", \"";
0232: String msg3 = "\") returned ";
0233: String msg4 = "; expected ";
0234:
0235: String sExpect = new String("");
0236: String sResult = new String("");
0237: sResult = appendCompareResult(compareResult, sResult);
0238: sExpect = appendCompareResult(expectedResult, sExpect);
0239: if (ok1) {
0240: logln(msg1 + source + msg2 + target + msg3 + sResult);
0241: } else {
0242: errln(msg1 + source + msg2 + target + msg3 + sResult
0243: + msg4 + sExpect);
0244: }
0245:
0246: msg1 = ok2 ? "Ok: key(\"" : "FAIL: key(\"";
0247: msg2 = "\").compareTo(key(\"";
0248: msg3 = "\")) returned ";
0249: sResult = appendCompareResult(keyResult, sResult);
0250: if (ok2) {
0251: logln(msg1 + source + msg2 + target + msg3 + sResult);
0252: } else {
0253: errln(msg1 + source + msg2 + target + msg3 + sResult
0254: + msg4 + sExpect);
0255: msg1 = " ";
0256: msg2 = " vs. ";
0257: errln(msg1 + prettify(sourceKey) + msg2
0258: + prettify(targetKey));
0259: }
0260:
0261: msg1 = ok3 ? "Ok: incCompare(\"" : "FAIL: incCompare(\"";
0262: msg2 = "\", \"";
0263: msg3 = "\") returned ";
0264:
0265: sResult = appendCompareResult(incResult, sResult);
0266:
0267: if (ok3) {
0268: logln(msg1 + source + msg2 + target + msg3 + sResult);
0269: } else {
0270: errln(msg1 + source + msg2 + target + msg3 + sResult
0271: + msg4 + sExpect);
0272: }
0273: }
0274: }
0275:
0276: String appendCompareResult(int result, String target) {
0277: if (result == -1) { //LESS
0278: target += "LESS";
0279: } else if (result == 0) { //EQUAL
0280: target += "EQUAL";
0281: } else if (result == 1) { //GREATER
0282: target += "GREATER";
0283: } else {
0284: String huh = "?";
0285: target += huh + result;
0286: }
0287: return target;
0288: }
0289:
0290: String prettify(CollationKey sourceKey) {
0291: int i;
0292: byte[] bytes = sourceKey.toByteArray();
0293: String target = "[";
0294:
0295: for (i = 0; i < bytes.length; i++) {
0296: target += Integer.toHexString(bytes[i]);
0297: target += " ";
0298: }
0299: target += "]";
0300: return target;
0301: }
0302:
0303: // @bug 4054736
0304: //
0305: // Full Decomposition mode not implemented
0306: //
0307: public void Test4054736(/* char* par */) {
0308: RuleBasedCollator c = (RuleBasedCollator) Collator
0309: .getInstance(Locale.US);
0310:
0311: c.setStrength(Collator.SECONDARY);
0312: c.setDecomposition(Collator.NO_DECOMPOSITION);
0313:
0314: final String[] tests = { "\uFB4F", "\u003d", "\u05D0\u05DC" }; // Alef-Lamed vs. Alef, Lamed
0315: compareArray(c, tests);
0316: }
0317:
0318: // @bug 4058613
0319: //
0320: // Collator::createInstance() causes an ArrayIndexOutofBoundsException for Korean
0321: //
0322: public void Test4058613(/* char* par */) {
0323: // Creating a default collator doesn't work when Korean is the default
0324: // locale
0325:
0326: Locale oldDefault = Locale.getDefault();
0327: Locale.setDefault(new Locale("ko", ""));
0328:
0329: Collator c = null;
0330:
0331: c = Collator.getInstance(new Locale("en", "US"));
0332:
0333: if (c == null) {
0334: errln("Could not create a Korean collator");
0335: Locale.setDefault(oldDefault);
0336: return;
0337: }
0338:
0339: // Since the fix to this bug was to turn off decomposition for Korean collators,
0340: // ensure that's what we got
0341: if (c.getDecomposition() != Collator.NO_DECOMPOSITION) {
0342: errln("Decomposition is not set to NO_DECOMPOSITION for Korean collator");
0343: }
0344:
0345: Locale.setDefault(oldDefault);
0346: }
0347:
0348: // @bug 4059820
0349: //
0350: // RuleBasedCollator.getRules does not return the exact pattern as input
0351: // for expanding character sequences
0352: //
0353: public void Test4059820(/* char* par */) {
0354: RuleBasedCollator c = null;
0355: String rules = "< a < b , c/a < d < z";
0356: try {
0357: c = new RuleBasedCollator(rules);
0358: } catch (Exception e) {
0359: errln("Failure building a collator.");
0360: return;
0361: }
0362:
0363: if (c.getRules().indexOf("c/a") == -1) {
0364: errln("returned rules do not contain 'c/a'");
0365: }
0366: }
0367:
0368: // @bug 4060154
0369: //
0370: // MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I"
0371: //
0372: public void Test4060154(/* char* par */) {
0373: String rules = "< g, G < h, H < i, I < j, J & H < \u0131, \u0130, i, I";
0374:
0375: RuleBasedCollator c = null;
0376: try {
0377: c = new RuleBasedCollator(rules);
0378: } catch (Exception e) {
0379: //System.out.println(e);
0380: errln("failure building collator.");
0381: return;
0382: }
0383:
0384: c.setDecomposition(Collator.NO_DECOMPOSITION);
0385:
0386: String[] tertiary = { "A", "<", "B", "H", "<", "\u0131", "H",
0387: "<", "I", "\u0131", "<", "\u0130", "\u0130", "<", "i",
0388: "\u0130", ">", "H", };
0389:
0390: c.setStrength(Collator.TERTIARY);
0391: compareArray(c, tertiary);
0392:
0393: String[] secondary = { "H", "<", "I", "\u0131", "=", "\u0130", };
0394:
0395: c.setStrength(Collator.PRIMARY);
0396: compareArray(c, secondary);
0397: }
0398:
0399: // @bug 4062418
0400: //
0401: // Secondary/Tertiary comparison incorrect in French Secondary
0402: //
0403: public void Test4062418(/* char* par */) {
0404: RuleBasedCollator c = null;
0405: try {
0406: c = (RuleBasedCollator) Collator.getInstance(Locale.FRANCE);
0407: } catch (Exception e) {
0408: errln("Failed to create collator for Locale::FRANCE()");
0409: return;
0410: }
0411: c.setStrength(Collator.SECONDARY);
0412:
0413: String[] tests = { "p\u00eache", "<", "p\u00e9ch\u00e9", // Comparing accents from end, p\u00e9ch\u00e9 is greater
0414: };
0415:
0416: compareArray(c, tests);
0417: }
0418:
0419: // @bug 4065540
0420: //
0421: // Collator::compare() method broken if either string contains spaces
0422: //
0423: public void Test4065540(/* char* par */) {
0424: RuleBasedCollator en_us = (RuleBasedCollator) Collator
0425: .getInstance(Locale.US);
0426: if (en_us.compare("abcd e", "abcd f") == 0) {
0427: errln("'abcd e' == 'abcd f'");
0428: }
0429: }
0430:
0431: // @bug 4066189
0432: //
0433: // Unicode characters need to be recursively decomposed to get the
0434: // correct result. For example,
0435: // u1EB1 -> \u0103 + \u0300 -> a + \u0306 + \u0300.
0436: //
0437: public void Test4066189(/* char* par */) {
0438: final String test1 = "\u1EB1";
0439: final String test2 = "\u0061\u0306\u0300";
0440:
0441: // NOTE: The java code used en_us to create the
0442: // CollationElementIterator's. I'm pretty sure that
0443: // was wrong, so I've change the code to use c1 and c2
0444: RuleBasedCollator c1 = (RuleBasedCollator) Collator
0445: .getInstance(Locale.US);
0446: c1.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
0447: CollationElementIterator i1 = c1
0448: .getCollationElementIterator(test1);
0449:
0450: RuleBasedCollator c2 = (RuleBasedCollator) Collator
0451: .getInstance(Locale.US);
0452: c2.setDecomposition(Collator.NO_DECOMPOSITION);
0453: CollationElementIterator i2 = c2
0454: .getCollationElementIterator(test2);
0455:
0456: assertEqual(i1, i2);
0457: }
0458:
0459: // @bug 4066696
0460: //
0461: // French secondary collation checking at the end of compare iteration fails
0462: //
0463: public void Test4066696(/* char* par */) {
0464: RuleBasedCollator c = null;
0465: try {
0466: c = (RuleBasedCollator) Collator.getInstance(Locale.FRANCE);
0467: } catch (Exception e) {
0468: errln("Failure creating collator for Locale::getFrance()");
0469: return;
0470: }
0471: c.setStrength(Collator.SECONDARY);
0472:
0473: String[] tests = { "\u00e0", ">", "\u01fa", // a-grave < A-ring-acute
0474: };
0475: compareArray(c, tests);
0476: }
0477:
0478: // @bug 4076676
0479: //
0480: // Bad canonicalization of same-class combining characters
0481: //
0482: public void Test4076676(/* char* par */) {
0483: // These combining characters are all in the same class, so they should not
0484: // be reordered, and they should compare as unequal.
0485: final String s1 = "\u0041\u0301\u0302\u0300";
0486: final String s2 = "\u0041\u0302\u0300\u0301";
0487:
0488: RuleBasedCollator c = (RuleBasedCollator) Collator
0489: .getInstance(Locale.US);
0490: c.setStrength(Collator.TERTIARY);
0491:
0492: if (c.compare(s1, s2) == 0) {
0493: errln("Same-class combining chars were reordered");
0494: }
0495: }
0496:
0497: // @bug 4078588
0498: //
0499: // RuleBasedCollator breaks on "< a < bb" rule
0500: //
0501: public void Test4078588(/* char *par */) {
0502: RuleBasedCollator rbc = null;
0503: try {
0504: rbc = new RuleBasedCollator("< a < bb");
0505: } catch (Exception e) {
0506: errln("Failed to create RuleBasedCollator.");
0507: return;
0508: }
0509:
0510: int result = rbc.compare("a", "bb");
0511:
0512: if (result >= 0) {
0513: errln("Compare(a,bb) returned " + result + "; expected -1");
0514: }
0515: }
0516:
0517: // @bug 4079231
0518: //
0519: // RuleBasedCollator::operator==(NULL) throws NullPointerException
0520: //
0521: public void Test4079231(/* char* par */) {
0522: RuleBasedCollator en_us = (RuleBasedCollator) Collator
0523: .getInstance(Locale.US);
0524: try {
0525: if (en_us.equals(null)) {
0526: errln("en_us.equals(null) returned true");
0527: }
0528: } catch (Exception e) {
0529: errln("en_us.equals(null) threw " + e.toString());
0530: }
0531: }
0532:
0533: // @bug 4081866
0534: //
0535: // Combining characters in different classes not reordered properly.
0536: //
0537: public void Test4081866(/* char* par */) {
0538: // These combining characters are all in different classes,
0539: // so they should be reordered and the strings should compare as equal.
0540: String s1 = "\u0041\u0300\u0316\u0327\u0315";
0541: String s2 = "\u0041\u0327\u0316\u0315\u0300";
0542:
0543: RuleBasedCollator c = (RuleBasedCollator) Collator
0544: .getInstance(Locale.US);
0545: c.setStrength(Collator.TERTIARY);
0546:
0547: // Now that the default collators are set to NO_DECOMPOSITION
0548: // (as a result of fixing bug 4114077), we must set it explicitly
0549: // when we're testing reordering behavior. -- lwerner, 5/5/98
0550: c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
0551: if (c.compare(s1, s2) != 0) {
0552: errln("Combining chars were not reordered");
0553: }
0554: }
0555:
0556: // @bug 4087241
0557: //
0558: // string comparison errors in Scandinavian collators
0559: //
0560: public void Test4087241(/* char* par */) {
0561: Locale da_DK = new Locale("da", "DK");
0562: RuleBasedCollator c = null;
0563: try {
0564: c = (RuleBasedCollator) Collator.getInstance(da_DK);
0565: } catch (Exception e) {
0566: errln("Failed to create collator for da_DK locale");
0567: return;
0568: }
0569: c.setStrength(Collator.SECONDARY);
0570: String tests[] = { "\u007a", "\u003c", "\u00E6", // z < ae
0571: "\u0061\u0308", "\u003c", "\u0061\u030A", // a-unlaut < a-ring
0572: "\u0059", "\u003c", "\u0075\u0308", // Y < u-umlaut
0573: };
0574: compareArray(c, tests);
0575: }
0576:
0577: // @bug 4087243
0578: //
0579: // CollationKey takes ignorable strings into account when it shouldn't
0580: //
0581: public void Test4087243(/* char* par */) {
0582: RuleBasedCollator c = (RuleBasedCollator) Collator
0583: .getInstance(Locale.US);
0584: c.setStrength(Collator.TERTIARY);
0585: String tests[] = { "\u0031\u0032\u0033", "\u003d",
0586: "\u0031\u0032\u0033\u0001" // 1 2 3 = 1 2 3 ctrl-A
0587: };
0588: compareArray(c, tests);
0589: }
0590:
0591: // @bug 4092260
0592: //
0593: // Mu/micro conflict
0594: // Micro symbol and greek lowercase letter Mu should sort identically
0595: //
0596: public void Test4092260(/* char* par */) {
0597: Locale el = new Locale("el", "");
0598: Collator c = null;
0599: try {
0600: c = Collator.getInstance(el);
0601: } catch (Exception e) {
0602: errln("Failed to create collator for el locale.");
0603: return;
0604: }
0605: // These now have tertiary differences in UCA
0606: c.setStrength(Collator.SECONDARY);
0607: String tests[] = { "\u00B5", "\u003d", "\u03BC", };
0608: compareArray(c, tests);
0609: }
0610:
0611: // @bug 4095316
0612: //
0613: public void Test4095316(/* char* par */) {
0614: Locale el_GR = new Locale("el", "GR");
0615: Collator c = null;
0616: try {
0617: c = Collator.getInstance(el_GR);
0618: } catch (Exception e) {
0619: errln("Failed to create collator for el_GR locale");
0620: return;
0621: }
0622: // These now have tertiary differences in UCA
0623: //c->setStrength(Collator::TERTIARY);
0624: //c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status);
0625: c.setStrength(Collator.SECONDARY);
0626: String tests[] = { "\u03D4", "\u003d", "\u03AB", };
0627: compareArray(c, tests);
0628: }
0629:
0630: // @bug 4101940
0631: //
0632: public void Test4101940(/* char* par */) {
0633: RuleBasedCollator c = null;
0634: String rules = "< a < b";
0635: String nothing = "";
0636: try {
0637: c = new RuleBasedCollator(rules);
0638: } catch (Exception e) {
0639: errln("Failed to create RuleBasedCollator");
0640: return;
0641: }
0642: CollationElementIterator i = c
0643: .getCollationElementIterator(nothing);
0644: i.reset();
0645: if (i.next() != CollationElementIterator.NULLORDER) {
0646: errln("next did not return NULLORDER");
0647: }
0648: }
0649:
0650: // @bug 4103436
0651: //
0652: // Collator::compare not handling spaces properly
0653: //
0654: public void Test4103436(/* char* par */) {
0655: RuleBasedCollator c = (RuleBasedCollator) Collator
0656: .getInstance(Locale.US);
0657: c.setStrength(Collator.TERTIARY);
0658: String[] tests = {
0659: "\u0066\u0069\u006c\u0065",
0660: "\u003c",
0661: "\u0066\u0069\u006c\u0065\u0020\u0061\u0063\u0063\u0065\u0073\u0073",
0662: "\u0066\u0069\u006c\u0065",
0663: "\u003c",
0664: "\u0066\u0069\u006c\u0065\u0061\u0063\u0063\u0065\u0073\u0073", };
0665: compareArray(c, tests);
0666: }
0667:
0668: // @bug 4114076
0669: //
0670: // Collation not Unicode conformant with Hangul syllables
0671: //
0672: public void Test4114076(/* char* par */) {
0673: RuleBasedCollator c = (RuleBasedCollator) Collator
0674: .getInstance(Locale.US);
0675: c.setStrength(Collator.TERTIARY);
0676:
0677: //
0678: // With Canonical decomposition, Hangul syllables should get decomposed
0679: // into Jamo, but Jamo characters should not be decomposed into
0680: // conjoining Jamo
0681: //
0682: String test1[] = { "\ud4db", "\u003d", "\u1111\u1171\u11b6" };
0683:
0684: c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
0685: compareArray(c, test1);
0686:
0687: // From UTR #15:
0688: // *In earlier versions of Unicode, jamo characters like ksf
0689: // had compatibility mappings to kf + sf. These mappings were
0690: // removed in Unicode 2.1.9 to ensure that Hangul syllables are maintained.)
0691: // That is, the following test is obsolete as of 2.1.9
0692:
0693: //obsolete- // With Full decomposition, it should go all the way down to
0694: //obsolete- // conjoining Jamo characters.
0695: //obsolete- //
0696: //obsolete- static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] =
0697: //obsolete- {
0698: //obsolete- {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x116e, 0x1175, 0x11af, 0x11c2, 0}
0699: //obsolete- };
0700: //obsolete-
0701: //obsolete- c->setDecomposition(Normalizer::DECOMP_COMPAT);
0702: //obsolete- compareArray(*c, test2, ARRAY_LENGTH(test2));
0703: }
0704:
0705: // @bug 4114077
0706: //
0707: // Collation with decomposition off doesn't work for Europe
0708: //
0709: public void Test4114077(/* char* par */) {
0710: // Ensure that we get the same results with decomposition off
0711: // as we do with it on....
0712: RuleBasedCollator c = (RuleBasedCollator) Collator
0713: .getInstance(Locale.US);
0714: c.setStrength(Collator.TERTIARY);
0715: String test1[] = {
0716: "\u00C0",
0717: "\u003d",
0718: "\u0041\u0300", // Should be equivalent
0719: "\u0070\u00ea\u0063\u0068\u0065", "\u003e",
0720: "\u0070\u00e9\u0063\u0068\u00e9", "\u0204", "\u003d",
0721: "\u0045\u030F", "\u01fa", "\u003d",
0722: "\u0041\u030a\u0301", // a-ring-acute -> a-ring, acute
0723: // -> a, ring, acute
0724: "\u0041\u0300\u0316", "\u003c", "\u0041\u0316\u0300" // No reordering --> unequal
0725: };
0726:
0727: c.setDecomposition(Collator.NO_DECOMPOSITION);
0728: compareArray(c, test1);
0729:
0730: String test2[] = { "\u0041\u0300\u0316", "\u003d",
0731: "\u0041\u0316\u0300" // Reordering --> equal
0732: };
0733:
0734: c.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
0735: compareArray(c, test2);
0736: }
0737:
0738: // @bug 4124632
0739: //
0740: // Collator::getCollationKey was hanging on certain character sequences
0741: //
0742: public void Test4124632(/* char* par */) {
0743: Collator coll = null;
0744: try {
0745: coll = Collator.getInstance(Locale.JAPAN);
0746: } catch (Exception e) {
0747: errln("Failed to create collator for Locale::JAPAN");
0748: return;
0749: }
0750: String test = "\u0041\u0308\u0062\u0063";
0751: CollationKey key;
0752: try {
0753: key = coll.getCollationKey(test);
0754: logln(key.getSourceString());
0755: } catch (Exception e) {
0756: errln("CollationKey creation failed.");
0757: }
0758: }
0759:
0760: // @bug 4132736
0761: //
0762: // sort order of french words with multiple accents has errors
0763: //
0764: public void Test4132736(/* char* par */) {
0765: Collator c = null;
0766: try {
0767: c = Collator.getInstance(Locale.FRANCE);
0768: c.setStrength(Collator.TERTIARY);
0769: } catch (Exception e) {
0770: errln("Failed to create a collator for Locale::getFrance()");
0771: }
0772:
0773: String test1[] = { "\u0065\u0300\u0065\u0301", "\u003c",
0774: "\u0065\u0301\u0065\u0300", "\u0065\u0300\u0301",
0775: "\u003c", "\u0065\u0301\u0300", };
0776: compareArray(c, test1);
0777: }
0778:
0779: // @bug 4133509
0780: //
0781: // The sorting using java.text.CollationKey is not in the exact order
0782: //
0783: public void Test4133509(/* char* par */) {
0784: RuleBasedCollator en_us = (RuleBasedCollator) Collator
0785: .getInstance(Locale.US);
0786: String test1[] = {
0787: "\u0045\u0078\u0063\u0065\u0070\u0074\u0069\u006f\u006e",
0788: "\u003c",
0789: "\u0045\u0078\u0063\u0065\u0070\u0074\u0069\u006f\u006e\u0049\u006e\u0049\u006e\u0069\u0074\u0069\u0061\u006c\u0069\u007a\u0065\u0072\u0045\u0072\u0072\u006f\u0072",
0790: "\u0047\u0072\u0061\u0070\u0068\u0069\u0063\u0073",
0791: "\u003c",
0792: "\u0047\u0072\u0061\u0070\u0068\u0069\u0063\u0073\u0045\u006e\u0076\u0069\u0072\u006f\u006e\u006d\u0065\u006e\u0074",
0793: "\u0053\u0074\u0072\u0069\u006e\u0067",
0794: "\u003c",
0795: "\u0053\u0074\u0072\u0069\u006e\u0067\u0042\u0075\u0066\u0066\u0065\u0072", };
0796:
0797: compareArray(en_us, test1);
0798: }
0799:
0800: // @bug 4139572
0801: //
0802: // getCollationKey throws exception for spanish text
0803: // Cannot reproduce this bug on 1.2, however it DOES fail on 1.1.6
0804: //
0805: public void Test4139572(/* char* par */) {
0806: //
0807: // Code pasted straight from the bug report
0808: // (and then translated to C++ ;-)
0809: //
0810: // create spanish locale and collator
0811: Locale l = new Locale("es", "es");
0812: Collator col = null;
0813: try {
0814: col = Collator.getInstance(l);
0815: } catch (Exception e) {
0816: errln("Failed to create a collator for es_es locale.");
0817: return;
0818: }
0819: CollationKey key = null;
0820: // this spanish phrase kills it!
0821: try {
0822: key = col.getCollationKey("Nombre De Objeto");
0823: logln("source:" + key.getSourceString());
0824: } catch (Exception e) {
0825: errln("Error creating CollationKey for \"Nombre De Ojbeto\"");
0826: }
0827: }
0828:
0829: // @bug 4141640
0830: //
0831: // Support for Swedish gone in 1.1.6 (Can't create Swedish collator)
0832: //
0833: public void Test4141640(/* char* par */) {
0834: //
0835: // Rather than just creating a Swedish collator, we might as well
0836: // try to instantiate one for every locale available on the system
0837: // in order to prevent this sort of bug from cropping up in the future
0838: //
0839: Locale locales[] = Collator.getAvailableLocales();
0840:
0841: for (int i = 0; i < locales.length; i += 1) {
0842: Collator c = null;
0843: try {
0844: c = Collator.getInstance(locales[i]);
0845: logln("source: " + c.getStrength());
0846: } catch (Exception e) {
0847: String msg = "";
0848: msg += "Could not create collator for locale ";
0849: msg += locales[i].getDisplayName();
0850: errln(msg);
0851: }
0852: }
0853: }
0854:
0855: private void checkListOrder(String[] sortedList, Collator c) {
0856: // this function uses the specified Collator to make sure the
0857: // passed-in list is already sorted into ascending order
0858: for (int i = 0; i < sortedList.length - 1; i++) {
0859: if (c.compare(sortedList[i], sortedList[i + 1]) >= 0) {
0860: errln("List out of order at element #" + i + ": "
0861: + sortedList[i] + " >= " + sortedList[i + 1]);
0862: }
0863: }
0864: }
0865:
0866: public void Test4171974() {
0867: // test French accent ordering more thoroughly
0868: /*String[] frenchList = {
0869: "\u0075\u0075", // u u
0870: "\u00fc\u0075", // u-umlaut u
0871: "\u01d6\u0075", // u-umlaut-macron u
0872: "\u016b\u0075", // u-macron u
0873: "\u1e7b\u0075", // u-macron-umlaut u
0874: "\u0075\u00fc", // u u-umlaut
0875: "\u00fc\u00fc", // u-umlaut u-umlaut
0876: "\u01d6\u00fc", // u-umlaut-macron u-umlaut
0877: "\u016b\u00fc", // u-macron u-umlaut
0878: "\u1e7b\u00fc", // u-macron-umlaut u-umlaut
0879: "\u0075\u01d6", // u u-umlaut-macron
0880: "\u00fc\u01d6", // u-umlaut u-umlaut-macron
0881: "\u01d6\u01d6", // u-umlaut-macron u-umlaut-macron
0882: "\u016b\u01d6", // u-macron u-umlaut-macron
0883: "\u1e7b\u01d6", // u-macron-umlaut u-umlaut-macron
0884: "\u0075\u016b", // u u-macron
0885: "\u00fc\u016b", // u-umlaut u-macron
0886: "\u01d6\u016b", // u-umlaut-macron u-macron
0887: "\u016b\u016b", // u-macron u-macron
0888: "\u1e7b\u016b", // u-macron-umlaut u-macron
0889: "\u0075\u1e7b", // u u-macron-umlaut
0890: "\u00fc\u1e7b", // u-umlaut u-macron-umlaut
0891: "\u01d6\u1e7b", // u-umlaut-macron u-macron-umlaut
0892: "\u016b\u1e7b", // u-macron u-macron-umlaut
0893: "\u1e7b\u1e7b" // u-macron-umlaut u-macron-umlaut
0894: };
0895: Collator french = Collator.getInstance(Locale.FRENCH);
0896:
0897: logln("Testing French order...");
0898: checkListOrder(frenchList, french);
0899:
0900: logln("Testing French order without decomposition...");
0901: french.setDecomposition(Collator.NO_DECOMPOSITION);
0902: checkListOrder(frenchList, french);*/
0903:
0904: String[] englishList = { "\u0075\u0075", // u u
0905: "\u0075\u00fc", // u u-umlaut
0906: "\u0075\u01d6", // u u-umlaut-macron
0907: "\u0075\u016b", // u u-macron
0908: "\u0075\u1e7b", // u u-macron-umlaut
0909: "\u00fc\u0075", // u-umlaut u
0910: "\u00fc\u00fc", // u-umlaut u-umlaut
0911: "\u00fc\u01d6", // u-umlaut u-umlaut-macron
0912: "\u00fc\u016b", // u-umlaut u-macron
0913: "\u00fc\u1e7b", // u-umlaut u-macron-umlaut
0914: "\u01d6\u0075", // u-umlaut-macron u
0915: "\u01d6\u00fc", // u-umlaut-macron u-umlaut
0916: "\u01d6\u01d6", // u-umlaut-macron u-umlaut-macron
0917: "\u01d6\u016b", // u-umlaut-macron u-macron
0918: "\u01d6\u1e7b", // u-umlaut-macron u-macron-umlaut
0919: "\u016b\u0075", // u-macron u
0920: "\u016b\u00fc", // u-macron u-umlaut
0921: "\u016b\u01d6", // u-macron u-umlaut-macron
0922: "\u016b\u016b", // u-macron u-macron
0923: "\u016b\u1e7b", // u-macron u-macron-umlaut
0924: "\u1e7b\u0075", // u-macron-umlaut u
0925: "\u1e7b\u00fc", // u-macron-umlaut u-umlaut
0926: "\u1e7b\u01d6", // u-macron-umlaut u-umlaut-macron
0927: "\u1e7b\u016b", // u-macron-umlaut u-macron
0928: "\u1e7b\u1e7b" // u-macron-umlaut u-macron-umlaut
0929: };
0930: Collator english = Collator.getInstance(Locale.ENGLISH);
0931:
0932: logln("Testing English order...");
0933: checkListOrder(englishList, english);
0934:
0935: logln("Testing English order without decomposition...");
0936: english.setDecomposition(Collator.NO_DECOMPOSITION);
0937: checkListOrder(englishList, english);
0938: }
0939:
0940: public void Test4179216() throws Exception {
0941: // you can position a CollationElementIterator in the middle of
0942: // a contracting character sequence, yielding a bogus collation
0943: // element
0944: RuleBasedCollator coll = (RuleBasedCollator) Collator
0945: .getInstance(Locale.US);
0946: coll = new RuleBasedCollator(coll.getRules()
0947: + " & C < ch , cH , Ch , CH < cat < crunchy");
0948: String testText = "church church catcatcher runcrunchynchy";
0949: CollationElementIterator iter = coll
0950: .getCollationElementIterator(testText);
0951:
0952: // test that the "ch" combination works properly
0953: iter.setOffset(4);
0954: int elt4 = CollationElementIterator.primaryOrder(iter.next());
0955:
0956: iter.reset();
0957: int elt0 = CollationElementIterator.primaryOrder(iter.next());
0958:
0959: iter.setOffset(5);
0960: int elt5 = CollationElementIterator.primaryOrder(iter.next());
0961:
0962: if (elt4 != elt0 || elt5 != elt0)
0963: errln("The collation elements at positions 0 (" + elt0
0964: + "), 4 (" + elt4 + "), and 5 (" + elt5
0965: + ") don't match.");
0966:
0967: // test that the "cat" combination works properly
0968: iter.setOffset(14);
0969: int elt14 = CollationElementIterator.primaryOrder(iter.next());
0970:
0971: iter.setOffset(15);
0972: int elt15 = CollationElementIterator.primaryOrder(iter.next());
0973:
0974: iter.setOffset(16);
0975: int elt16 = CollationElementIterator.primaryOrder(iter.next());
0976:
0977: iter.setOffset(17);
0978: int elt17 = CollationElementIterator.primaryOrder(iter.next());
0979:
0980: iter.setOffset(18);
0981: int elt18 = CollationElementIterator.primaryOrder(iter.next());
0982:
0983: iter.setOffset(19);
0984: int elt19 = CollationElementIterator.primaryOrder(iter.next());
0985:
0986: if (elt14 != elt15 || elt14 != elt16 || elt14 != elt17
0987: || elt14 != elt18 || elt14 != elt19)
0988: errln("\"cat\" elements don't match: elt14 = " + elt14
0989: + ", elt15 = " + elt15 + ", elt16 = " + elt16
0990: + ", elt17 = " + elt17 + ", elt18 = " + elt18
0991: + ", elt19 = " + elt19);
0992:
0993: // now generate a complete list of the collation elements,
0994: // first using next() and then using setOffset(), and
0995: // make sure both interfaces return the same set of elements
0996: iter.reset();
0997:
0998: int elt = iter.next();
0999: int count = 0;
1000: while (elt != CollationElementIterator.NULLORDER) {
1001: ++count;
1002: elt = iter.next();
1003: }
1004:
1005: String[] nextElements = new String[count];
1006: String[] setOffsetElements = new String[count];
1007: int lastPos = 0;
1008:
1009: iter.reset();
1010: elt = iter.next();
1011: count = 0;
1012: while (elt != CollationElementIterator.NULLORDER) {
1013: nextElements[count++] = testText.substring(lastPos, iter
1014: .getOffset());
1015: lastPos = iter.getOffset();
1016: elt = iter.next();
1017: }
1018: count = 0;
1019: for (int i = 0; i < testText.length();) {
1020: iter.setOffset(i);
1021: lastPos = iter.getOffset();
1022: elt = iter.next();
1023: setOffsetElements[count++] = testText.substring(lastPos,
1024: iter.getOffset());
1025: i = iter.getOffset();
1026: }
1027: for (int i = 0; i < nextElements.length; i++) {
1028: if (nextElements[i].equals(setOffsetElements[i])) {
1029: logln(nextElements[i]);
1030: } else {
1031: errln("Error: next() yielded " + nextElements[i]
1032: + ", but setOffset() yielded "
1033: + setOffsetElements[i]);
1034: }
1035: }
1036: }
1037:
1038: public void Test4216006() throws Exception {
1039: // rule parser barfs on "<\u00e0=a\u0300", and on other cases
1040: // where the same token (after normalization) appears twice in a row
1041: boolean caughtException = false;
1042: try {
1043: RuleBasedCollator dummy = new RuleBasedCollator(
1044: "\u00e0<a\u0300");
1045: } catch (ParseException e) {
1046: caughtException = true;
1047: }
1048: if (!caughtException) {
1049: throw new Exception(
1050: "\"a<a\" collation sequence didn't cause parse error!");
1051: }
1052:
1053: RuleBasedCollator collator = new RuleBasedCollator(
1054: "<\u00e0=a\u0300");
1055: //commented by Kevin 2003/10/21
1056: //for "FULL_DECOMPOSITION is not supported here." in ICU4J DOC
1057: //collator.setDecomposition(Collator.FULL_DECOMPOSITION);
1058: collator.setStrength(Collator.IDENTICAL);
1059:
1060: String[] tests = { "a\u0300", "=", "\u00e0", "\u00e0", "=",
1061: "a\u0300" };
1062:
1063: compareArray(collator, tests);
1064: }
1065:
1066: // CollationElementIterator.previous broken for expanding char sequences
1067: //
1068: public void Test4179686() throws Exception {
1069:
1070: // Create a collator with a few expanding character sequences in it....
1071: RuleBasedCollator coll = new RuleBasedCollator(en_us.getRules()
1072: + " & ae ; \u00e4 & AE ; \u00c4"
1073: + " & oe ; \u00f6 & OE ; \u00d6"
1074: + " & ue ; \u00fc & UE ; \u00dc");
1075:
1076: String text = "T\u00f6ne"; // o-umlaut
1077:
1078: CollationElementIterator iter = coll
1079: .getCollationElementIterator(text);
1080: Vector elements = new Vector();
1081: int elem;
1082:
1083: // Iterate forward and collect all of the elements into a Vector
1084: while ((elem = iter.next()) != CollationElementIterator.NULLORDER) {
1085: elements.addElement(new Integer(elem));
1086: }
1087:
1088: // Now iterate backward and make sure they're the same
1089: int index = elements.size() - 1;
1090: while ((elem = iter.previous()) != CollationElementIterator.NULLORDER) {
1091: int expect = ((Integer) elements.elementAt(index))
1092: .intValue();
1093:
1094: if (elem != expect) {
1095: errln("Mismatch at index " + index + ": got "
1096: + Integer.toString(elem, 16) + ", expected "
1097: + Integer.toString(expect, 16));
1098: }
1099: index--;
1100: }
1101: }
1102:
1103: private static RuleBasedCollator en_us;
1104:
1105: protected void init() throws Exception {
1106: if (en_us == null) {
1107: en_us = (RuleBasedCollator) Collator.getInstance(Locale.US);
1108: }
1109: }
1110:
1111: public void Test4244884() throws Exception {
1112: RuleBasedCollator coll = (RuleBasedCollator) Collator
1113: .getInstance(Locale.US);
1114: coll = new RuleBasedCollator(coll.getRules()
1115: + " & C < ch , cH , Ch , CH < cat < crunchy");
1116:
1117: String[] testStrings = new String[] { "car", "cave", "clamp",
1118: "cramp", "czar", "church", "catalogue", "crunchy",
1119: "dog" };
1120:
1121: for (int i = 1; i < testStrings.length; i++) {
1122: if (coll.compare(testStrings[i - 1], testStrings[i]) >= 0) {
1123: errln("error: \"" + testStrings[i - 1]
1124: + "\" is greater than or equal to \""
1125: + testStrings[i] + "\".");
1126: }
1127: }
1128: }
1129:
1130: // CollationElementIterator set doesn't work propertly with next/prev
1131: public void Test4663220() {
1132: RuleBasedCollator collator = (RuleBasedCollator) Collator
1133: .getInstance(Locale.US);
1134: java.text.StringCharacterIterator stringIter = new java.text.StringCharacterIterator(
1135: "fox");
1136: CollationElementIterator iter = collator
1137: .getCollationElementIterator(stringIter);
1138:
1139: int[] elements_next = new int[3];
1140: logln("calling next:");
1141: for (int i = 0; i < 3; ++i) {
1142: logln("[" + i + "] " + (elements_next[i] = iter.next()));
1143: }
1144:
1145: int[] elements_fwd = new int[3];
1146: logln("calling set/next:");
1147: for (int i = 0; i < 3; ++i) {
1148: iter.setOffset(i);
1149: logln("[" + i + "] " + (elements_fwd[i] = iter.next()));
1150: }
1151:
1152: for (int i = 0; i < 3; ++i) {
1153: if (elements_next[i] != elements_fwd[i]) {
1154: errln("mismatch at position " + i + ": "
1155: + elements_next[i] + " != " + elements_fwd[i]);
1156: }
1157: }
1158: }
1159:
1160: /* RuleBasedCollator not subclassable
1161: * @bug 4146160
1162: //
1163: // RuleBasedCollator doesn't use createCollationElementIterator internally
1164: //
1165: public void Test4146160() {
1166: //
1167: // Use a custom collator class whose createCollationElementIterator
1168: // methods increment a count....
1169: //
1170: RuleBasedCollator en_us = (RuleBasedCollator) Collator.getInstance(Locale.US);
1171: My4146160Collator.count = 0;
1172: My4146160Collator mc = null;
1173: try {
1174: mc = new My4146160Collator(en_us);
1175: } catch (Exception e) {
1176: errln("Failed to create a My4146160Collator.");
1177: return;
1178: }
1179:
1180: CollationKey key = null;
1181: try {
1182: key = mc.getCollationKey("1");
1183: } catch (Exception e) {
1184: errln("Failure to get a CollationKey from a My4146160Collator.");
1185: return;
1186: }
1187:
1188: if (My4146160Collator.count < 1) {
1189: errln("My4146160Collator.getCollationElementIterator not called for getCollationKey");
1190: }
1191:
1192: My4146160Collator.count = 0;
1193: mc.compare("1", "2");
1194:
1195: if (My4146160Collator.count < 1) {
1196: errln("My4146160Collator.getCollationElementIterator not called for compare");
1197: }
1198: }*/
1199: }
1200:
1201: /* RuleBasedCollator not subclassable
1202: * class My4146160Collator extends RuleBasedCollator {
1203: static int count = 0;
1204:
1205: public My4146160Collator(RuleBasedCollator rbc) throws Exception {
1206: super(rbc.getRules());
1207: }
1208:
1209: public CollationElementIterator getCollationElementIterator(String text) {
1210: count += 1;
1211: return super.getCollationElementIterator(text);
1212: }
1213:
1214: public CollationElementIterator getCollationElementIterator(java.text.CharacterIterator text) {
1215: count += 1;
1216: return super.getCollationElementIterator(text);
1217: }
1218: }
1219: */
|