0001: /*
0002: *******************************************************************************
0003: * Copyright (C) 1996-2006, International Business Machines Corporation and *
0004: * others. All Rights Reserved. *
0005: *******************************************************************************
0006: */
0007: package com.ibm.icu.dev.test.translit;
0008:
0009: import com.ibm.icu.lang.*;
0010: import com.ibm.icu.text.*;
0011: import com.ibm.icu.dev.test.*;
0012: import com.ibm.icu.impl.Utility;
0013: import com.ibm.icu.impl.UtilityExtensions;
0014: import com.ibm.icu.util.CaseInsensitiveString;
0015: import com.ibm.icu.util.ULocale;
0016: import java.util.*;
0017:
0018: /***********************************************************************
0019:
0020: HOW TO USE THIS TEST FILE
0021: -or-
0022: How I developed on two platforms
0023: without losing (too much of) my mind
0024:
0025:
0026: 1. Add new tests by copying/pasting/changing existing tests. On Java,
0027: any public void method named Test...() taking no parameters becomes
0028: a test. On C++, you need to modify the header and add a line to
0029: the runIndexedTest() dispatch method.
0030:
0031: 2. Make liberal use of the expect() method; it is your friend.
0032:
0033: 3. The tests in this file exactly match those in a sister file on the
0034: other side. The two files are:
0035:
0036: icu4j: src/com.ibm.icu.dev.test/translit/TransliteratorTest.java
0037: icu4c: source/test/intltest/transtst.cpp
0038:
0039: ==> THIS IS THE IMPORTANT PART <==
0040:
0041: When you add a test in this file, add it in transtst.cpp too.
0042: Give it the same name and put it in the same relative place. This
0043: makes maintenance a lot simpler for any poor soul who ends up
0044: trying to synchronize the tests between icu4j and icu4c.
0045:
0046: 4. If you MUST enter a test that is NOT paralleled in the sister file,
0047: then add it in the special non-mirrored section. These are
0048: labeled
0049:
0050: "icu4j ONLY"
0051:
0052: or
0053:
0054: "icu4c ONLY"
0055:
0056: Make sure you document the reason the test is here and not there.
0057:
0058:
0059: Thank you.
0060: The Management
0061: ***********************************************************************/
0062:
0063: /**
0064: * @test
0065: * @summary General test of Transliterator
0066: */
0067: public class TransliteratorTest extends TestFmwk {
0068:
0069: public static void main(String[] args) throws Exception {
0070: new TransliteratorTest().run(args);
0071: }
0072:
0073: public void TestInstantiation() {
0074: long ms = System.currentTimeMillis();
0075: String ID;
0076: for (Enumeration e = Transliterator.getAvailableIDs(); e
0077: .hasMoreElements();) {
0078: ID = (String) e.nextElement();
0079: if (ID.equals("Latin-Han/definition")) {
0080: System.out
0081: .println("\nTODO: disabling Latin-Han/definition check for now: fix later");
0082: continue;
0083: }
0084: Transliterator t = null;
0085: try {
0086: t = Transliterator.getInstance(ID);
0087: // This is only true for some subclasses
0088: // // We should get a new instance if we try again
0089: // Transliterator t2 = Transliterator.getInstance(ID);
0090: // if (t != t2) {
0091: // logln("OK: " + Transliterator.getDisplayName(ID) + " (" + ID + "): " + t);
0092: // } else {
0093: // errln("FAIL: " + ID + " returned identical instances");
0094: // t = null;
0095: // }
0096: } catch (IllegalArgumentException ex) {
0097: errln("FAIL: " + ID);
0098: throw ex;
0099: }
0100:
0101: if (t != null) {
0102: // Now test toRules
0103: String rules = null;
0104: try {
0105: rules = t.toRules(true);
0106:
0107: Transliterator u = Transliterator.createFromRules(
0108: "x", rules, Transliterator.FORWARD);
0109: } catch (IllegalArgumentException ex2) {
0110: errln("FAIL: " + ID + ".toRules() => bad rules: "
0111: + rules);
0112: throw ex2;
0113: }
0114: }
0115: }
0116:
0117: // Now test the failure path
0118: try {
0119: ID = "<Not a valid Transliterator ID>";
0120: Transliterator t = Transliterator.getInstance(ID);
0121: errln("FAIL: " + ID + " returned " + t);
0122: } catch (IllegalArgumentException ex) {
0123: logln("OK: Bogus ID handled properly");
0124: }
0125:
0126: ms = System.currentTimeMillis() - ms;
0127: logln("Elapsed time: " + ms + " ms");
0128: }
0129:
0130: public void TestSimpleRules() {
0131: /* Example: rules 1. ab>x|y
0132: * 2. yc>z
0133: *
0134: * []|eabcd start - no match, copy e to tranlated buffer
0135: * [e]|abcd match rule 1 - copy output & adjust cursor
0136: * [ex|y]cd match rule 2 - copy output & adjust cursor
0137: * [exz]|d no match, copy d to transliterated buffer
0138: * [exzd]| done
0139: */
0140: expect("ab>x|y;" + "yc>z", "eabcd", "exzd");
0141:
0142: /* Another set of rules:
0143: * 1. ab>x|yzacw
0144: * 2. za>q
0145: * 3. qc>r
0146: * 4. cw>n
0147: *
0148: * []|ab Rule 1
0149: * [x|yzacw] No match
0150: * [xy|zacw] Rule 2
0151: * [xyq|cw] Rule 4
0152: * [xyqn]| Done
0153: */
0154: expect("ab>x|yzacw;" + "za>q;" + "qc>r;" + "cw>n", "ab", "xyqn");
0155:
0156: /* Test categories
0157: */
0158: Transliterator t = Transliterator.createFromRules("<ID>",
0159: "$dummy=\uE100;" + "$vowel=[aeiouAEIOU];"
0160: + "$lu=[:Lu:];" + "$vowel } $lu > '!';"
0161: + "$vowel > '&';" + "'!' { $lu > '^';"
0162: + "$lu > '*';" + "a>ERROR",
0163: Transliterator.FORWARD);
0164: expect(t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
0165: }
0166:
0167: /**
0168: * Test inline set syntax and set variable syntax.
0169: */
0170: public void TestInlineSet() {
0171: expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
0172: expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
0173:
0174: expect("$digit = [0-9];" + "$alpha = [a-zA-Z];"
0175: + "$alphanumeric = [$digit $alpha];" + // ***
0176: "$special = [^$alphanumeric];" + // ***
0177: "$alphanumeric > '-';" + "$special > '*';",
0178:
0179: "thx-1138", "---*----");
0180: }
0181:
0182: /**
0183: * Create some inverses and confirm that they work. We have to be
0184: * careful how we do this, since the inverses will not be true
0185: * inverses -- we can't throw any random string at the composition
0186: * of the transliterators and expect the identity function. F x
0187: * F' != I. However, if we are careful about the input, we will
0188: * get the expected results.
0189: */
0190: public void TestRuleBasedInverse() {
0191: String RULES = "abc>zyx;" + "ab>yz;" + "bc>zx;" + "ca>xy;"
0192: + "a>x;" + "b>y;" + "c>z;" +
0193:
0194: "abc<zyx;" + "ab<yz;" + "bc<zx;" + "ca<xy;" + "a<x;"
0195: + "b<y;" + "c<z;" +
0196:
0197: "";
0198:
0199: String[] DATA = {
0200: // Careful here -- random strings will not work. If we keep
0201: // the left side to the domain and the right side to the range
0202: // we will be okay though (left, abc; right xyz).
0203: "a", "x", "abcacab", "zyxxxyy", "caccb", "xyzzy", };
0204:
0205: Transliterator fwd = Transliterator.createFromRules("<ID>",
0206: RULES, Transliterator.FORWARD);
0207: Transliterator rev = Transliterator.createFromRules("<ID>",
0208: RULES, Transliterator.REVERSE);
0209: for (int i = 0; i < DATA.length; i += 2) {
0210: expect(fwd, DATA[i], DATA[i + 1]);
0211: expect(rev, DATA[i + 1], DATA[i]);
0212: }
0213: }
0214:
0215: /**
0216: * Basic test of keyboard.
0217: */
0218: public void TestKeyboard() {
0219: Transliterator t = Transliterator.createFromRules("<ID>",
0220: "psch>Y;" + "ps>y;" + "ch>x;" + "a>A;",
0221: Transliterator.FORWARD);
0222: String DATA[] = {
0223: // insertion, buffer
0224: "a", "A", "p", "Ap", "s", "Aps", "c", "Apsc", "a",
0225: "AycA", "psch", "AycAY", null, "AycAY", // null means finishKeyboardTransliteration
0226: };
0227:
0228: keyboardAux(t, DATA);
0229: }
0230:
0231: /**
0232: * Basic test of keyboard with cursor.
0233: */
0234: public void TestKeyboard2() {
0235: Transliterator t = Transliterator.createFromRules("<ID>",
0236: "ych>Y;" + "ps>|y;" + "ch>x;" + "a>A;",
0237: Transliterator.FORWARD);
0238: String DATA[] = {
0239: // insertion, buffer
0240: "a", "A", "p", "Ap", "s", "Aps", // modified for rollback - "Ay",
0241: "c", "Apsc", // modified for rollback - "Ayc",
0242: "a", "AycA", "p", "AycAp", "s", "AycAps", // modified for rollback - "AycAy",
0243: "c", "AycApsc", // modified for rollback - "AycAyc",
0244: "h", "AycAY", null, "AycAY", // null means finishKeyboardTransliteration
0245: };
0246:
0247: keyboardAux(t, DATA);
0248: }
0249:
0250: /**
0251: * Test keyboard transliteration with back-replacement.
0252: */
0253: public void TestKeyboard3() {
0254: // We want th>z but t>y. Furthermore, during keyboard
0255: // transliteration we want t>y then yh>z if t, then h are
0256: // typed.
0257: String RULES = "t>|y;" + "yh>z;" + "";
0258:
0259: String[] DATA = {
0260: // Column 1: characters to add to buffer (as if typed)
0261: // Column 2: expected appearance of buffer after
0262: // keyboard xliteration.
0263: "a", "a", "b", "ab", "t", "abt", // modified for rollback - "aby",
0264: "c", "abyc", "t", "abyct", // modified for rollback - "abycy",
0265: "h", "abycz", null, "abycz", // null means finishKeyboardTransliteration
0266: };
0267:
0268: Transliterator t = Transliterator.createFromRules("<ID>",
0269: RULES, Transliterator.FORWARD);
0270: keyboardAux(t, DATA);
0271: }
0272:
0273: private void keyboardAux(Transliterator t, String[] DATA) {
0274: Transliterator.Position index = new Transliterator.Position();
0275: ReplaceableString s = new ReplaceableString();
0276: for (int i = 0; i < DATA.length; i += 2) {
0277: StringBuffer log;
0278: if (DATA[i] != null) {
0279: log = new StringBuffer(s.toString() + " + " + DATA[i]
0280: + " -> ");
0281: t.transliterate(s, index, DATA[i]);
0282: } else {
0283: log = new StringBuffer(s.toString() + " => ");
0284: t.finishTransliteration(s, index);
0285: }
0286: UtilityExtensions.formatInput(log, s, index);
0287: if (s.toString().equals(DATA[i + 1])) {
0288: logln(log.toString());
0289: } else {
0290: errln("FAIL: " + log.toString() + ", expected "
0291: + DATA[i + 1]);
0292: }
0293: }
0294: }
0295:
0296: // Latin-Arabic has been temporarily removed until it can be
0297: // done correctly.
0298:
0299: // public void TestArabic() {
0300: // String DATA[] = {
0301: // "Arabic",
0302: // "\u062a\u062a\u0645\u062a\u0639 "+
0303: // "\u0627\u0644\u0644\u063a\u0629 "+
0304: // "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629 "+
0305: // "\u0628\u0628\u0646\u0638\u0645 "+
0306: // "\u0643\u062a\u0627\u0628\u0628\u064a\u0629 "+
0307: // "\u062c\u0645\u064a\u0644\u0629"
0308: // };
0309:
0310: // Transliterator t = Transliterator.getInstance("Latin-Arabic");
0311: // for (int i=0; i<DATA.length; i+=2) {
0312: // expect(t, DATA[i], DATA[i+1]);
0313: // }
0314: // }
0315:
0316: /**
0317: * Compose the Kana transliterator forward and reverse and try
0318: * some strings that should come out unchanged.
0319: */
0320: public void TestCompoundKana() {
0321: Transliterator t = Transliterator
0322: .getInstance("Latin-Katakana;Katakana-Latin");
0323: expect(t, "aaaaa", "aaaaa");
0324: }
0325:
0326: /**
0327: * Compose the hex transliterators forward and reverse.
0328: */
0329: public void TestCompoundHex() {
0330: Transliterator a = Transliterator.getInstance("Any-Hex");
0331: Transliterator b = Transliterator.getInstance("Hex-Any");
0332: // Transliterator[] trans = { a, b };
0333: // Transliterator ab = Transliterator.getInstance(trans);
0334: Transliterator ab = Transliterator
0335: .getInstance("Any-Hex;Hex-Any");
0336:
0337: // Do some basic tests of b
0338: expect(b, "\\u0030\\u0031", "01");
0339:
0340: String s = "abcde";
0341: expect(ab, s, s);
0342:
0343: // trans = new Transliterator[] { b, a };
0344: // Transliterator ba = Transliterator.getInstance(trans);
0345: Transliterator ba = Transliterator
0346: .getInstance("Hex-Any;Any-Hex");
0347: ReplaceableString str = new ReplaceableString(s);
0348: a.transliterate(str);
0349: expect(ba, str.toString(), str.toString());
0350: }
0351:
0352: /**
0353: * Do some basic tests of filtering.
0354: */
0355: public void TestFiltering() {
0356: Transliterator hex = Transliterator.getInstance("Any-Hex");
0357: hex.setFilter(new UnicodeFilter() {
0358: public boolean contains(int c) {
0359: return c != 'c';
0360: }
0361:
0362: public String toPattern(boolean escapeUnprintable) {
0363: return "";
0364: }
0365:
0366: public boolean matchesIndexValue(int v) {
0367: return false;
0368: }
0369:
0370: public void addMatchSetTo(UnicodeSet toUnionTo) {
0371: }
0372: });
0373: String s = "abcde";
0374: String out = hex.transliterate(s);
0375: String exp = "\\u0061\\u0062c\\u0064\\u0065";
0376: if (out.equals(exp)) {
0377: logln("Ok: \"" + exp + "\"");
0378: } else {
0379: logln("FAIL: \"" + out + "\", wanted \"" + exp + "\"");
0380: }
0381: }
0382:
0383: /**
0384: * Test anchors
0385: */
0386: public void TestAnchors() {
0387: expect("^ab > 01 ;" + " ab > |8 ;" + " b > k ;"
0388: + " 8x$ > 45 ;" + " 8x > 77 ;",
0389:
0390: "ababbabxabx", "018k7745");
0391: expect("$s = [z$] ;" + "$s{ab > 01 ;" + " ab > |8 ;"
0392: + " b > k ;" + " 8x}$s > 45 ;"
0393: + " 8x > 77 ;",
0394:
0395: "abzababbabxzabxabx", "01z018k45z01x45");
0396: }
0397:
0398: /**
0399: * Test pattern quoting and escape mechanisms.
0400: */
0401: public void TestPatternQuoting() {
0402: // Array of 3n items
0403: // Each item is <rules>, <input>, <expected output>
0404: String[] DATA = { "\u4E01>'[male adult]'", "\u4E01",
0405: "[male adult]", };
0406:
0407: for (int i = 0; i < DATA.length; i += 3) {
0408: logln("Pattern: " + Utility.escape(DATA[i]));
0409: Transliterator t = Transliterator.createFromRules("<ID>",
0410: DATA[i], Transliterator.FORWARD);
0411: expect(t, DATA[i + 1], DATA[i + 2]);
0412: }
0413: }
0414:
0415: /**
0416: * Regression test for bugs found in Greek transliteration.
0417: */
0418: public void TestJ277() {
0419: Transliterator gl = Transliterator
0420: .getInstance("Greek-Latin; NFD; [:M:]Remove; NFC");
0421:
0422: char sigma = (char) 0x3C3;
0423: char upsilon = (char) 0x3C5;
0424: char nu = (char) 0x3BD;
0425: // not used char PHI = (char)0x3A6;
0426: char alpha = (char) 0x3B1;
0427: // not used char omega = (char)0x3C9;
0428: // not used char omicron = (char)0x3BF;
0429: // not used char epsilon = (char)0x3B5;
0430:
0431: // sigma upsilon nu -> syn
0432: StringBuffer buf = new StringBuffer();
0433: buf.append(sigma).append(upsilon).append(nu);
0434: String syn = buf.toString();
0435: expect(gl, syn, "syn");
0436:
0437: // sigma alpha upsilon nu -> saun
0438: buf.setLength(0);
0439: buf.append(sigma).append(alpha).append(upsilon).append(nu);
0440: String sayn = buf.toString();
0441: expect(gl, sayn, "saun");
0442:
0443: // Again, using a smaller rule set
0444: String rules = "$alpha = \u03B1;" + "$nu = \u03BD;"
0445: + "$sigma = \u03C3;" + "$ypsilon = \u03C5;"
0446: + "$vowel = [aeiouAEIOU$alpha$ypsilon];"
0447: + "s <> $sigma;" + "a <> $alpha;"
0448: + "u <> $vowel { $ypsilon;"
0449: + "y <> $ypsilon;" + "n <> $nu;";
0450: Transliterator mini = Transliterator.createFromRules("mini",
0451: rules, Transliterator.REVERSE);
0452: expect(mini, syn, "syn");
0453: expect(mini, sayn, "saun");
0454:
0455: //| // Transliterate the Greek locale data
0456: //| Locale el("el");
0457: //| DateFormatSymbols syms(el, status);
0458: //| if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
0459: //| int32_t i, count;
0460: //| const UnicodeString* data = syms.getMonths(count);
0461: //| for (i=0; i<count; ++i) {
0462: //| if (data[i].length() == 0) {
0463: //| continue;
0464: //| }
0465: //| UnicodeString out(data[i]);
0466: //| gl->transliterate(out);
0467: //| bool_t ok = TRUE;
0468: //| if (data[i].length() >= 2 && out.length() >= 2 &&
0469: //| u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) {
0470: //| if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) {
0471: //| ok = FALSE;
0472: //| }
0473: //| }
0474: //| if (ok) {
0475: //| logln(prettify(data[i] + " -> " + out));
0476: //| } else {
0477: //| errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out));
0478: //| }
0479: //| }
0480: }
0481:
0482: // /**
0483: // * Prefix, suffix support in hex transliterators
0484: // */
0485: // public void TestJ243() {
0486: // // Test default Hex-Any, which should handle
0487: // // \\u, \\U, u+, and U+
0488:// HexToUnicodeTransliterator hex = new HexToUnicodeTransliterator();
0489: // expect(hex, "\\u0041+\\U0042,u+0043uu+0044z", "A+B,CuDz");
0490: //
0491: // // Try a custom Hex-Any
0492: // // \\uXXXX and &#xXXXX;
0493:// HexToUnicodeTransliterator hex2 = new HexToUnicodeTransliterator("\\\\u###0;&\\#x###0\\;");
0494:// expect(hex2, "\\u61\\u062\\u0063\\u00645\\u66x0123",
0495:// "abcd5fx0123");
0496: //
0497: // // Try custom Any-Hex (default is tested elsewhere)
0498: // UnicodeToHexTransliterator hex3 = new UnicodeToHexTransliterator("&\\#x###0;");
0499: // expect(hex3, "012", "012");
0500: // }
0501:
0502: public void TestJ329() {
0503:
0504: Object[] DATA = { new Boolean(false), "a > b; c > d",
0505: new Boolean(true), "a > b; no operator; c > d", };
0506:
0507: for (int i = 0; i < DATA.length; i += 2) {
0508: String err = null;
0509: try {
0510: Transliterator t = Transliterator.createFromRules(
0511: "<ID>", (String) DATA[i + 1],
0512: Transliterator.FORWARD);
0513: } catch (IllegalArgumentException e) {
0514: err = e.getMessage();
0515: }
0516: boolean gotError = (err != null);
0517: String desc = (String) DATA[i + 1]
0518: + (gotError ? (" -> error: " + err)
0519: : " -> no error");
0520: if ((err != null) == ((Boolean) DATA[i]).booleanValue()) {
0521: logln("Ok: " + desc);
0522: } else {
0523: errln("FAIL: " + desc);
0524: }
0525: }
0526: }
0527:
0528: /**
0529: * Test segments and segment references.
0530: */
0531: public void TestSegments() {
0532: // Array of 3n items
0533: // Each item is <rules>, <input>, <expected output>
0534: String[] DATA = { "([a-z]) '.' ([0-9]) > $2 '-' $1",
0535: "abc.123.xyz.456", "ab1-c23.xy4-z56", };
0536:
0537: for (int i = 0; i < DATA.length; i += 3) {
0538: logln("Pattern: " + Utility.escape(DATA[i]));
0539: Transliterator t = Transliterator.createFromRules("<ID>",
0540: DATA[i], Transliterator.FORWARD);
0541: expect(t, DATA[i + 1], DATA[i + 2]);
0542: }
0543: }
0544:
0545: /**
0546: * Test cursor positioning outside of the key
0547: */
0548: public void TestCursorOffset() {
0549: // Array of 3n items
0550: // Each item is <rules>, <input>, <expected output>
0551: String[] DATA = {
0552: "pre {alpha} post > | @ ALPHA ;" + "eALPHA > beta ;"
0553: + "pre {beta} post > BETA @@ | ;"
0554: + "post > xyz",
0555:
0556: "prealphapost prebetapost", "prbetaxyz preBETApost", };
0557:
0558: for (int i = 0; i < DATA.length; i += 3) {
0559: logln("Pattern: " + Utility.escape(DATA[i]));
0560: Transliterator t = Transliterator.createFromRules("<ID>",
0561: DATA[i], Transliterator.FORWARD);
0562: expect(t, DATA[i + 1], DATA[i + 2]);
0563: }
0564: }
0565:
0566: /**
0567: * Test zero length and > 1 char length variable values. Test
0568: * use of variable refs in UnicodeSets.
0569: */
0570: public void TestArbitraryVariableValues() {
0571: // Array of 3n items
0572: // Each item is <rules>, <input>, <expected output>
0573: String[] DATA = {
0574: "$abe = ab;" + "$pat = x[yY]z;" + "$ll = 'a-z';"
0575: + "$llZ = [$ll];" + "$llY = [$ll$pat];"
0576: + "$emp = ;" +
0577:
0578: "$abe > ABE;" + "$pat > END;" + "$llZ > 1;"
0579: + "$llY > 2;" + "7$emp 8 > 9;" + "",
0580:
0581: "ab xYzxyz stY78", "ABE ENDEND 1129", };
0582:
0583: for (int i = 0; i < DATA.length; i += 3) {
0584: logln("Pattern: " + Utility.escape(DATA[i]));
0585: Transliterator t = Transliterator.createFromRules("<ID>",
0586: DATA[i], Transliterator.FORWARD);
0587: expect(t, DATA[i + 1], DATA[i + 2]);
0588: }
0589: }
0590:
0591: /**
0592: * Confirm that the contextStart, contextLimit, start, and limit
0593: * behave correctly.
0594: */
0595: public void TestPositionHandling() {
0596: // Array of 3n items
0597: // Each item is <rules>, <input>, <expected output>
0598: String[] DATA = { "a{t} > SS ; {t}b > UU ; {t} > TT ;",
0599: "xtat txtb", // pos 0,9,0,9
0600: "xTTaSS TTxUUb",
0601:
0602: "a{t} > SS ; {t}b > UU ; {t} > TT ;", "xtat txtb", // pos 2,9,3,8
0603: "xtaSS TTxUUb",
0604:
0605: "a{t} > SS ; {t}b > UU ; {t} > TT ;", "xtat txtb", // pos 3,8,3,8
0606: "xtaTT TTxTTb", };
0607:
0608: // Array of 4n positions -- these go with the DATA array
0609: // They are: contextStart, contextLimit, start, limit
0610: int[] POS = { 0, 9, 0, 9, 2, 9, 3, 8, 3, 8, 3, 8, };
0611:
0612: int n = DATA.length / 3;
0613: for (int i = 0; i < n; i++) {
0614: Transliterator t = Transliterator.createFromRules("<ID>",
0615: DATA[3 * i], Transliterator.FORWARD);
0616: Transliterator.Position pos = new Transliterator.Position(
0617: POS[4 * i], POS[4 * i + 1], POS[4 * i + 2],
0618: POS[4 * i + 3]);
0619: ReplaceableString rsource = new ReplaceableString(
0620: DATA[3 * i + 1]);
0621: t.transliterate(rsource, pos);
0622: t.finishTransliteration(rsource, pos);
0623: String result = rsource.toString();
0624: String exp = DATA[3 * i + 2];
0625: expectAux(Utility.escape(DATA[3 * i]), DATA[3 * i + 1],
0626: result, result.equals(exp), exp);
0627: }
0628: }
0629:
0630: /**
0631: * Test the Hiragana-Katakana transliterator.
0632: */
0633: public void TestHiraganaKatakana() {
0634: Transliterator hk = Transliterator
0635: .getInstance("Hiragana-Katakana");
0636: Transliterator kh = Transliterator
0637: .getInstance("Katakana-Hiragana");
0638:
0639: // Array of 3n items
0640: // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>
0641: String[] DATA = { "both", "\u3042\u3090\u3099\u3092\u3050",
0642: "\u30A2\u30F8\u30F2\u30B0",
0643:
0644: "kh", "\u307C\u3051\u3060\u3042\u3093\u30FC",
0645: "\u30DC\u30F6\u30C0\u30FC\u30F3\u30FC", };
0646:
0647: for (int i = 0; i < DATA.length; i += 3) {
0648: switch (DATA[i].charAt(0)) {
0649: case 'h': // Hiragana-Katakana
0650: expect(hk, DATA[i + 1], DATA[i + 2]);
0651: break;
0652: case 'k': // Katakana-Hiragana
0653: expect(kh, DATA[i + 2], DATA[i + 1]);
0654: break;
0655: case 'b': // both
0656: expect(hk, DATA[i + 1], DATA[i + 2]);
0657: expect(kh, DATA[i + 2], DATA[i + 1]);
0658: break;
0659: }
0660: }
0661:
0662: }
0663:
0664: public void TestCopyJ476() {
0665: // This is a C++-only copy constructor test
0666: }
0667:
0668: /**
0669: * Test inter-Indic transliterators. These are composed.
0670: */
0671: public void TestInterIndic() {
0672: String ID = "Devanagari-Gujarati";
0673: Transliterator dg = Transliterator.getInstance(ID);
0674: if (dg == null) {
0675: errln("FAIL: getInstance(" + ID + ") returned null");
0676: return;
0677: }
0678: String id = dg.getID();
0679: if (!id.equals(ID)) {
0680: errln("FAIL: getInstance(" + ID + ").getID() => " + id);
0681: }
0682: String dev = "\u0901\u090B\u0925";
0683: String guj = "\u0A81\u0A8B\u0AA5";
0684: expect(dg, dev, guj);
0685: }
0686:
0687: /**
0688: * Test filter syntax in IDs. (J23)
0689: */
0690: public void TestFilterIDs() {
0691: String[] DATA = {
0692: "[aeiou]Any-Hex", // ID
0693: "[aeiou]Hex-Any", // expected inverse ID
0694: "quizzical", // src
0695: "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
0696:
0697: "[aeiou]Any-Hex;[^5]Hex-Any",
0698: "[^5]Any-Hex;[aeiou]Hex-Any", "quizzical",
0699: "q\\u0075izzical",
0700:
0701: "[abc]Null", "[abc]Null", "xyz", "xyz", };
0702:
0703: for (int i = 0; i < DATA.length; i += 4) {
0704: String ID = DATA[i];
0705: Transliterator t = Transliterator.getInstance(ID);
0706: expect(t, DATA[i + 2], DATA[i + 3]);
0707:
0708: // Check the ID
0709: if (!ID.equals(t.getID())) {
0710: errln("FAIL: getInstance(" + ID + ").getID() => "
0711: + t.getID());
0712: }
0713:
0714: // Check the inverse
0715: String uID = DATA[i + 1];
0716: Transliterator u = t.getInverse();
0717: if (u == null) {
0718: errln("FAIL: " + ID + ".getInverse() returned NULL");
0719: } else if (!u.getID().equals(uID)) {
0720: errln("FAIL: " + ID + ".getInverse().getID() => "
0721: + u.getID() + ", expected " + uID);
0722: }
0723: }
0724: }
0725:
0726: /**
0727: * Test the case mapping transliterators.
0728: */
0729: public void TestCaseMap() {
0730: Transliterator toUpper = Transliterator
0731: .getInstance("Any-Upper[^xyzXYZ]");
0732: Transliterator toLower = Transliterator
0733: .getInstance("Any-Lower[^xyzXYZ]");
0734: Transliterator toTitle = Transliterator
0735: .getInstance("Any-Title[^xyzXYZ]");
0736:
0737: expect(toUpper,
0738: "The quick brown fox jumped over the lazy dogs.",
0739: "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
0740: expect(toLower,
0741: "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
0742: "the quick brown foX jumped over the lazY dogs.");
0743: expect(toTitle,
0744: "the quick brown foX caN'T jump over the laZy dogs.",
0745: "The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
0746: }
0747:
0748: /**
0749: * Test the name mapping transliterators.
0750: */
0751: public void TestNameMap() {
0752: Transliterator uni2name = Transliterator
0753: .getInstance("Any-Name[^abc]");
0754: Transliterator name2uni = Transliterator
0755: .getInstance("Name-Any");
0756:
0757: expect(
0758: uni2name,
0759: "\u00A0abc\u4E01\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF",
0760: "\\N{NO-BREAK SPACE}abc\\N{CJK UNIFIED IDEOGRAPH-4E01}\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{END OF TRANSMISSION}\\N{CHARACTER TABULATION}\\N{<control-0081>}\\N{<noncharacter-FFFF>}");
0761: expect(
0762: name2uni,
0763: "{\\N { NO-BREAK SPACE}abc\\N{ CJK UNIFIED IDEOGRAPH-4E01 }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{END OF TRANSMISSION}\\N{CHARACTER TABULATION}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{",
0764: "{\u00A0abc\u4E01\\N{x\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF\u0004\\N{");
0765:
0766: // round trip
0767: Transliterator t = Transliterator
0768: .getInstance("Any-Name;Name-Any");
0769:
0770: String s = "{\u00A0abc\u4E01\\N{x\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF\u0004\\N{";
0771: expect(t, s, s);
0772: }
0773:
0774: /**
0775: * Test liberalized ID syntax. 1006c
0776: */
0777: public void TestLiberalizedID() {
0778: // Some test cases have an expected getID() value of NULL. This
0779: // means I have disabled the test case for now. This stuff is
0780: // still under development, and I haven't decided whether to make
0781: // getID() return canonical case yet. It will all get rewritten
0782: // with the move to Source-Target/Variant IDs anyway. [aliu]
0783: String DATA[] = { "latin-greek", null /*"Latin-Greek"*/,
0784: "case insensitivity", " Null ", "Null", "whitespace",
0785: " Latin[a-z]-Greek ", "[a-z]Latin-Greek",
0786: "inline filter", " null ; latin-greek ",
0787: null /*"Null;Latin-Greek"*/, "compound whitespace", };
0788:
0789: for (int i = 0; i < DATA.length; i += 3) {
0790: try {
0791: Transliterator t = Transliterator.getInstance(DATA[i]);
0792: if (DATA[i + 1] == null
0793: || DATA[i + 1].equals(t.getID())) {
0794: logln("Ok: " + DATA[i + 2] + " create ID \""
0795: + DATA[i] + "\" => \"" + t.getID() + "\"");
0796: } else {
0797: errln("FAIL: " + DATA[i + 2] + " create ID \""
0798: + DATA[i] + "\" => \"" + t.getID()
0799: + "\", exp \"" + DATA[i + 1] + "\"");
0800: }
0801: } catch (IllegalArgumentException e) {
0802: errln("FAIL: " + DATA[i + 2] + " create ID \""
0803: + DATA[i] + "\"");
0804: }
0805: }
0806: }
0807:
0808: public void TestCreateInstance() {
0809: String FORWARD = "F";
0810: String REVERSE = "R";
0811: String DATA[] = {
0812: // Column 1: id
0813: // Column 2: direction
0814: // Column 3: expected ID, or "" if expect failure
0815: "Latin-Hangul",
0816: REVERSE,
0817: "Hangul-Latin", // JB#912
0818:
0819: // JB#2689: bad compound causes crash
0820: "InvalidSource-InvalidTarget", FORWARD, "",
0821: "InvalidSource-InvalidTarget", REVERSE, "",
0822: "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "",
0823: "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "",
0824: "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "",
0825: "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "",
0826:
0827: null };
0828:
0829: for (int i = 0; DATA[i] != null; i += 3) {
0830: String id = DATA[i];
0831: int dir = (DATA[i + 1] == FORWARD) ? Transliterator.FORWARD
0832: : Transliterator.REVERSE;
0833: String expID = DATA[i + 2];
0834: Exception e = null;
0835: Transliterator t;
0836: try {
0837: t = Transliterator.getInstance(id, dir);
0838: } catch (Exception e1) {
0839: e = e1;
0840: t = null;
0841: }
0842: String newID = (t != null) ? t.getID() : "";
0843: boolean ok = (newID.equals(expID));
0844: if (t == null) {
0845: newID = e.getMessage();
0846: }
0847: if (ok) {
0848: logln("Ok: createInstance(" + id + "," + DATA[i + 1]
0849: + ") => " + newID);
0850: } else {
0851: errln("FAIL: createInstance(" + id + "," + DATA[i + 1]
0852: + ") => " + newID + ", expected " + expID);
0853: }
0854: }
0855: }
0856:
0857: /**
0858: * Test the normalization transliterator.
0859: */
0860: public void TestNormalizationTransliterator() {
0861: // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.icu.dev.test.normalizer.BasicTest
0862: // PLEASE KEEP THEM IN SYNC WITH BasicTest.
0863: String[][] CANON = {
0864: // Input Decomposed Composed
0865: { "cat", "cat", "cat" },
0866: { "\u00e0ardvark", "a\u0300ardvark", "\u00e0ardvark" },
0867:
0868: { "\u1e0a", "D\u0307", "\u1e0a" }, // D-dot_above
0869: { "D\u0307", "D\u0307", "\u1e0a" }, // D dot_above
0870:
0871: { "\u1e0c\u0307", "D\u0323\u0307", "\u1e0c\u0307" }, // D-dot_below dot_above
0872: { "\u1e0a\u0323", "D\u0323\u0307", "\u1e0c\u0307" }, // D-dot_above dot_below
0873: { "D\u0307\u0323", "D\u0323\u0307", "\u1e0c\u0307" }, // D dot_below dot_above
0874:
0875: { "\u1e10\u0307\u0323", "D\u0327\u0323\u0307",
0876: "\u1e10\u0323\u0307" }, // D dot_below cedilla dot_above
0877: { "D\u0307\u0328\u0323", "D\u0328\u0323\u0307",
0878: "\u1e0c\u0328\u0307" }, // D dot_above ogonek dot_below
0879:
0880: { "\u1E14", "E\u0304\u0300", "\u1E14" }, // E-macron-grave
0881: { "\u0112\u0300", "E\u0304\u0300", "\u1E14" }, // E-macron + grave
0882: { "\u00c8\u0304", "E\u0300\u0304", "\u00c8\u0304" }, // E-grave + macron
0883:
0884: { "\u212b", "A\u030a", "\u00c5" }, // angstrom_sign
0885: { "\u00c5", "A\u030a", "\u00c5" }, // A-ring
0886:
0887: { "\u00fdffin", "y\u0301ffin", "\u00fdffin" }, //updated with 3.0
0888: { "\u00fd\uFB03n", "y\u0301\uFB03n", "\u00fd\uFB03n" }, //updated with 3.0
0889:
0890: { "Henry IV", "Henry IV", "Henry IV" },
0891: { "Henry \u2163", "Henry \u2163", "Henry \u2163" },
0892:
0893: { "\u30AC", "\u30AB\u3099", "\u30AC" }, // ga (Katakana)
0894: { "\u30AB\u3099", "\u30AB\u3099", "\u30AC" }, // ka + ten
0895: { "\uFF76\uFF9E", "\uFF76\uFF9E", "\uFF76\uFF9E" }, // hw_ka + hw_ten
0896: { "\u30AB\uFF9E", "\u30AB\uFF9E", "\u30AB\uFF9E" }, // ka + hw_ten
0897: { "\uFF76\u3099", "\uFF76\u3099", "\uFF76\u3099" }, // hw_ka + ten
0898:
0899: { "A\u0300\u0316", "A\u0316\u0300", "\u00C0\u0316" }, };
0900:
0901: String[][] COMPAT = {
0902: // Input Decomposed Composed
0903: { "\uFB4f", "\u05D0\u05DC", "\u05D0\u05DC" }, // Alef-Lamed vs. Alef, Lamed
0904:
0905: { "\u00fdffin", "y\u0301ffin", "\u00fdffin" }, //updated for 3.0
0906: { "\u00fd\uFB03n", "y\u0301ffin", "\u00fdffin" }, // ffi ligature -> f + f + i
0907:
0908: { "Henry IV", "Henry IV", "Henry IV" },
0909: { "Henry \u2163", "Henry IV", "Henry IV" },
0910:
0911: { "\u30AC", "\u30AB\u3099", "\u30AC" }, // ga (Katakana)
0912: { "\u30AB\u3099", "\u30AB\u3099", "\u30AC" }, // ka + ten
0913:
0914: { "\uFF76\u3099", "\u30AB\u3099", "\u30AC" }, // hw_ka + ten
0915: };
0916:
0917: Transliterator NFD = Transliterator.getInstance("NFD");
0918: Transliterator NFC = Transliterator.getInstance("NFC");
0919: for (int i = 0; i < CANON.length; ++i) {
0920: String in = CANON[i][0];
0921: String expd = CANON[i][1];
0922: String expc = CANON[i][2];
0923: expect(NFD, in, expd);
0924: expect(NFC, in, expc);
0925: }
0926:
0927: Transliterator NFKD = Transliterator.getInstance("NFKD");
0928: Transliterator NFKC = Transliterator.getInstance("NFKC");
0929: for (int i = 0; i < COMPAT.length; ++i) {
0930: String in = COMPAT[i][0];
0931: String expkd = COMPAT[i][1];
0932: String expkc = COMPAT[i][2];
0933: expect(NFKD, in, expkd);
0934: expect(NFKC, in, expkc);
0935: }
0936:
0937: Transliterator t = Transliterator.getInstance("NFD; [x]Remove");
0938: expect(t, "\u010dx", "c\u030C");
0939: }
0940:
0941: /**
0942: * Test compound RBT rules.
0943: */
0944: public void TestCompoundRBT() {
0945: // Careful with spacing and ';' here: Phrase this exactly
0946: // as toRules() is going to return it. If toRules() changes
0947: // with regard to spacing or ';', then adjust this string.
0948: String rule = "::Hex-Any;\n" + "::Any-Lower;\n"
0949: + "a > '.A.';\n" + "b > '.B.';\n" + "::[^t]Any-Upper;";
0950: Transliterator t = Transliterator.createFromRules("Test", rule,
0951: Transliterator.FORWARD);
0952: if (t == null) {
0953: errln("FAIL: createFromRules failed");
0954: return;
0955: }
0956: expect(t, "\u0043at in the hat, bat on the mat",
0957: "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
0958: String r = t.toRules(true);
0959: if (r.equals(rule)) {
0960: logln("OK: toRules() => " + r);
0961: } else {
0962: errln("FAIL: toRules() => " + r + ", expected " + rule);
0963: }
0964:
0965: // Now test toRules
0966: t = Transliterator.getInstance("Greek-Latin; Latin-Cyrillic",
0967: Transliterator.FORWARD);
0968: if (t == null) {
0969: errln("FAIL: createInstance failed");
0970: return;
0971: }
0972: String exp = "::Greek-Latin;\n::Latin-Cyrillic;";
0973: r = t.toRules(true);
0974: if (!r.equals(exp)) {
0975: errln("FAIL: toRules() => " + r + ", expected " + exp);
0976: } else {
0977: logln("OK: toRules() => " + r);
0978: }
0979:
0980: // Round trip the result of toRules
0981: t = Transliterator.createFromRules("Test", r,
0982: Transliterator.FORWARD);
0983: if (t == null) {
0984: errln("FAIL: createFromRules #2 failed");
0985: return;
0986: } else {
0987: logln("OK: createFromRules(" + r + ") succeeded");
0988: }
0989:
0990: // Test toRules again
0991: r = t.toRules(true);
0992: if (!r.equals(exp)) {
0993: errln("FAIL: toRules() => " + r + ", expected " + exp);
0994: } else {
0995: logln("OK: toRules() => " + r);
0996: }
0997:
0998: // Test Foo(Bar) IDs. Careful with spacing in id; make it conform
0999: // to what the regenerated ID will look like.
1000: String id = "Upper(Lower);(NFKC)";
1001: t = Transliterator.getInstance(id, Transliterator.FORWARD);
1002: if (t == null) {
1003: errln("FAIL: createInstance #2 failed");
1004: return;
1005: }
1006: if (t.getID().equals(id)) {
1007: logln("OK: created " + id);
1008: } else {
1009: errln("FAIL: createInstance(" + id + ").getID() => "
1010: + t.getID());
1011: }
1012:
1013: Transliterator u = t.getInverse();
1014: if (u == null) {
1015: errln("FAIL: createInverse failed");
1016: return;
1017: }
1018: exp = "NFKC();Lower(Upper)";
1019: if (u.getID().equals(exp)) {
1020: logln("OK: createInverse(" + id + ") => " + u.getID());
1021: } else {
1022: errln("FAIL: createInverse(" + id + ") => " + u.getID());
1023: }
1024: }
1025:
1026: /**
1027: * Compound filter semantics were orginially not implemented
1028: * correctly. Originally, each component filter f(i) is replaced by
1029: * f'(i) = f(i) && g, where g is the filter for the compound
1030: * transliterator.
1031: *
1032: * From Mark:
1033: *
1034: * Suppose and I have a transliterator X. Internally X is
1035: * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
1036: *
1037: * The compound should convert all greek characters (through latin) to
1038: * cyrillic, then lowercase the result. The filter should say "don't
1039: * touch 'A' in the original". But because an intermediate result
1040: * happens to go through "A", the Greek Alpha gets hung up.
1041: */
1042: public void TestCompoundFilter() {
1043: Transliterator t = Transliterator.getInstance(
1044: "Greek-Latin; Latin-Greek; Lower",
1045: Transliterator.FORWARD);
1046: t.setFilter(new UnicodeSet("[^A]"));
1047:
1048: // Only the 'A' at index 1 should remain unchanged
1049: expect(t, CharsToUnicodeString("BA\\u039A\\u0391"),
1050: CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
1051: }
1052:
1053: /**
1054: * Test the "Remove" transliterator.
1055: */
1056: public void TestRemove() {
1057: Transliterator t = Transliterator.getInstance("Remove[aeiou]");
1058: expect(t, "The quick brown fox.", "Th qck brwn fx.");
1059: }
1060:
1061: public void TestToRules() {
1062: String RBT = "rbt";
1063: String SET = "set";
1064: String[] DATA = {
1065: RBT,
1066: "$a=\\u4E61; [$a] > A;",
1067: "[\\u4E61] > A;",
1068:
1069: RBT,
1070: "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
1071: "[[:Zs:][:Zl:]]{a} > A;",
1072:
1073: SET,
1074: "[[:Zs:][:Zl:]]",
1075: "[[:Zs:][:Zl:]]",
1076:
1077: SET,
1078: "[:Ps:]",
1079: "[:Ps:]",
1080:
1081: SET,
1082: "[:L:]",
1083: "[:L:]",
1084:
1085: SET,
1086: "[[:L:]-[A]]",
1087: "[[:L:]-[A]]",
1088:
1089: SET,
1090: "[~[:Lu:][:Ll:]]",
1091: "[~[:Lu:][:Ll:]]",
1092:
1093: SET,
1094: "[~[a-z]]",
1095: "[~[a-z]]",
1096:
1097: RBT,
1098: "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
1099: "[^[:Zs:]]{a} > A;",
1100:
1101: RBT,
1102: "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
1103: "[[a-z]-[:Zs:]]{a} > A;",
1104:
1105: RBT,
1106: "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
1107: "[[:Zs:]&[a-z]]{a} > A;",
1108:
1109: RBT,
1110: "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
1111: "[x[:Zs:]]{a} > A;",
1112:
1113: RBT,
1114: "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"
1115: + "$macron = \\u0304 ;"
1116: + "$evowel = [aeiouyAEIOUY] ;"
1117: + "$iotasub = \\u0345 ;"
1118: + "($evowel $macron $accentMinus *) i > | $1 $iotasub ;",
1119: "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",
1120:
1121: RBT,
1122: "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1123: "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;", };
1124:
1125: for (int d = 0; d < DATA.length; d += 3) {
1126: if (DATA[d] == RBT) {
1127: // Transliterator test
1128: Transliterator t = Transliterator.createFromRules("ID",
1129: DATA[d + 1], Transliterator.FORWARD);
1130: if (t == null) {
1131: errln("FAIL: createFromRules failed");
1132: return;
1133: }
1134: String rules, escapedRules;
1135: rules = t.toRules(false);
1136: escapedRules = t.toRules(true);
1137: String expRules = Utility.unescape(DATA[d + 2]);
1138: String expEscapedRules = DATA[d + 2];
1139: if (rules.equals(expRules)) {
1140: logln("Ok: " + DATA[d + 1] + " => "
1141: + Utility.escape(rules));
1142: } else {
1143: errln("FAIL: "
1144: + DATA[d + 1]
1145: + " => "
1146: + Utility.escape(rules + ", exp "
1147: + expRules));
1148: }
1149: if (escapedRules.equals(expEscapedRules)) {
1150: logln("Ok: " + DATA[d + 1] + " => " + escapedRules);
1151: } else {
1152: errln("FAIL: " + DATA[d + 1] + " => "
1153: + escapedRules + ", exp " + expEscapedRules);
1154: }
1155:
1156: } else {
1157: // UnicodeSet test
1158: String pat = DATA[d + 1];
1159: String expToPat = DATA[d + 2];
1160: UnicodeSet set = new UnicodeSet(pat);
1161:
1162: // Adjust spacing etc. as necessary.
1163: String toPat;
1164: toPat = set.toPattern(true);
1165: if (expToPat.equals(toPat)) {
1166: logln("Ok: " + pat + " => " + toPat);
1167: } else {
1168: errln("FAIL: " + pat + " => "
1169: + Utility.escape(toPat) + ", exp "
1170: + Utility.escape(pat));
1171: }
1172: }
1173: }
1174: }
1175:
1176: public void TestContext() {
1177: Transliterator.Position pos = new Transliterator.Position(0, 2,
1178: 0, 1); // cs cl s l
1179:
1180: expect("de > x; {d}e > y;", "de", "ye", pos);
1181:
1182: expect("ab{c} > z;", "xadabdabcy", "xadabdabzy");
1183: }
1184:
1185: static final String CharsToUnicodeString(String s) {
1186: return Utility.unescape(s);
1187: }
1188:
1189: public void TestSupplemental() {
1190:
1191: expect(
1192: CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];"
1193: + "a > $a; $s > i;"),
1194: CharsToUnicodeString("ab\\U0001030Fx"),
1195: CharsToUnicodeString("\\U00010300bix"));
1196:
1197: expect(
1198: CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];"
1199: + "$b=[A-Z\\U00010400-\\U0001044D];"
1200: + "($a)($b) > $2 $1;"),
1201: CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
1202: CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
1203:
1204: // k|ax\\U00010300xm
1205:
1206: // k|a\\U00010400\\U00010300xm
1207: // ky|\\U00010400\\U00010300xm
1208: // ky\\U00010400|\\U00010300xm
1209:
1210: // ky\\U00010400|\\U00010300\\U00010400m
1211: // ky\\U00010400y|\\U00010400m
1212: expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];"
1213: + "$a {x} > | @ \\U00010400;"
1214: + "{$a} [^\\u0000-\\uFFFF] > y;"),
1215: CharsToUnicodeString("kax\\U00010300xm"),
1216: CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
1217:
1218: expect(Transliterator.getInstance("Any-Name"),
1219: CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
1220: "\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}");
1221:
1222: expect(
1223: Transliterator.getInstance("Name-Any"),
1224: "\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}",
1225: CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"));
1226:
1227: expect(
1228: Transliterator.getInstance("Any-Hex/Unicode"),
1229: CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1230: "U+10330U+10FF00U+E0061U+00A0");
1231:
1232: expect(
1233: Transliterator.getInstance("Any-Hex/C"),
1234: CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1235: "\\U00010330\\U0010FF00\\U000E0061\\u00A0");
1236:
1237: expect(
1238: Transliterator.getInstance("Any-Hex/Perl"),
1239: CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1240: "\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}");
1241:
1242: expect(
1243: Transliterator.getInstance("Any-Hex/Java"),
1244: CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1245: "\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0");
1246:
1247: expect(
1248: Transliterator.getInstance("Any-Hex/XML"),
1249: CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1250: "𐌰􏼀󠁡 ");
1251:
1252: expect(
1253: Transliterator.getInstance("Any-Hex/XML10"),
1254: CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1255: "𐌰􏼀󠁡 ");
1256:
1257: expect(
1258: Transliterator
1259: .getInstance("[\\U000E0000-\\U000E0FFF] Remove"),
1260: CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1261: CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
1262: }
1263:
1264: public void TestQuantifier() {
1265:
1266: // Make sure @ in a quantified anteContext works
1267: expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
1268: "AAAAAb", "aaa(aac)");
1269:
1270: // Make sure @ in a quantified postContext works
1271: expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';", "baaaaa",
1272: "caa(aaa)");
1273:
1274: // Make sure @ in a quantified postContext with seg ref works
1275: expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';", "baaaaa",
1276: "baa(aaa)");
1277:
1278: // Make sure @ past ante context doesn't enter ante context
1279: Transliterator.Position pos = new Transliterator.Position(0, 5,
1280: 3, 5);
1281: expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';", "xxxab",
1282: "xxx(ac)", pos);
1283:
1284: // Make sure @ past post context doesn't pass limit
1285: Transliterator.Position pos2 = new Transliterator.Position(0,
1286: 4, 0, 2);
1287: expect("{b} a+ > c @@ |; x > y; a > A;", "baxx", "caxx", pos2);
1288:
1289: // Make sure @ past post context doesn't enter post context
1290: expect("{b} a+ > c @@ |; x > y; a > A;", "baxx", "cayy");
1291:
1292: expect("(ab)? c > d;", "c abc ababc", "d d abd");
1293:
1294: // NOTE: The (ab)+ when referenced just yields a single "ab",
1295: // not the full sequence of them. This accords with perl behavior.
1296: expect("(ab)+ {x} > '(' $1 ')';", "x abx ababxy",
1297: "x ab(ab) abab(ab)y");
1298:
1299: expect("b+ > x;", "ac abc abbc abbbc", "ac axc axc axc");
1300:
1301: expect("[abc]+ > x;", "qac abrc abbcs abtbbc", "qx xrx xs xtx");
1302:
1303: expect("q{(ab)+} > x;", "qa qab qaba qababc qaba",
1304: "qa qx qxa qxc qxa");
1305:
1306: expect("q(ab)* > x;", "qa qab qaba qababc", "xa x xa xc");
1307:
1308: // NOTE: The (ab)+ when referenced just yields a single "ab",
1309: // not the full sequence of them. This accords with perl behavior.
1310: expect("q(ab)* > '(' $1 ')';", "qa qab qaba qababc",
1311: "()a (ab) (ab)a (ab)c");
1312:
1313: // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
1314: // quoted string
1315: expect("'ab'+ > x;", "bb ab ababb", "bb x xb");
1316:
1317: // $foo+ and $foo* -- the quantifier should apply to the entire
1318: // variable reference
1319: expect("$var = ab; $var+ > x;", "bb ab ababb", "bb x xb");
1320: }
1321:
1322: static class TestFact implements Transliterator.Factory {
1323: static class NameableNullTrans extends Transliterator {
1324: public NameableNullTrans(String id) {
1325: super (id, null);
1326: }
1327:
1328: protected void handleTransliterate(Replaceable text,
1329: Position offsets, boolean incremental) {
1330: offsets.start = offsets.limit;
1331: }
1332: }
1333:
1334: String id;
1335:
1336: public TestFact(String theID) {
1337: id = theID;
1338: }
1339:
1340: public Transliterator getInstance(String ignoredID) {
1341: return new NameableNullTrans(id);
1342: }
1343: }
1344:
1345: public void TestSTV() {
1346: Enumeration es = Transliterator.getAvailableSources();
1347: for (int i = 0; es.hasMoreElements(); ++i) {
1348: String source = (String) es.nextElement();
1349: logln("" + i + ": " + source);
1350: if (source.length() == 0) {
1351: errln("FAIL: empty source");
1352: continue;
1353: }
1354: Enumeration et = Transliterator.getAvailableTargets(source);
1355: for (int j = 0; et.hasMoreElements(); ++j) {
1356: String target = (String) et.nextElement();
1357: logln(" " + j + ": " + target);
1358: if (target.length() == 0) {
1359: errln("FAIL: empty target");
1360: continue;
1361: }
1362: Enumeration ev = Transliterator.getAvailableVariants(
1363: source, target);
1364: for (int k = 0; ev.hasMoreElements(); ++k) {
1365: String variant = (String) ev.nextElement();
1366: if (variant.length() == 0) {
1367: logln(" " + k + ": <empty>");
1368: } else {
1369: logln(" " + k + ": " + variant);
1370: }
1371: }
1372: }
1373: }
1374:
1375: // Test registration
1376: String[] IDS = { "Fieruwer", "Seoridf-Sweorie",
1377: "Oewoir-Oweri/Vsie" };
1378: String[] FULL_IDS = { "Any-Fieruwer", "Seoridf-Sweorie",
1379: "Oewoir-Oweri/Vsie" };
1380: String[] SOURCES = { null, "Seoridf", "Oewoir" };
1381: for (int i = 0; i < 3; ++i) {
1382: Transliterator
1383: .registerFactory(IDS[i], new TestFact(IDS[i]));
1384: try {
1385: Transliterator t = Transliterator.getInstance(IDS[i]);
1386: if (t.getID().equals(IDS[i])) {
1387: logln("Ok: Registration/creation succeeded for ID "
1388: + IDS[i]);
1389: } else {
1390: errln("FAIL: Registration of ID " + IDS[i]
1391: + " creates ID " + t.getID());
1392: }
1393: Transliterator.unregister(IDS[i]);
1394: try {
1395: t = Transliterator.getInstance(IDS[i]);
1396: errln("FAIL: Unregistration failed for ID "
1397: + IDS[i] + "; still receiving ID "
1398: + t.getID());
1399: } catch (IllegalArgumentException e2) {
1400: // Good; this is what we expect
1401: logln("Ok; Unregistered " + IDS[i]);
1402: }
1403: } catch (IllegalArgumentException e) {
1404: errln("FAIL: Registration/creation failed for ID "
1405: + IDS[i]);
1406: } finally {
1407: Transliterator.unregister(IDS[i]);
1408: }
1409: }
1410:
1411: // Make sure getAvailable API reflects removal
1412: for (Enumeration e = Transliterator.getAvailableIDs(); e
1413: .hasMoreElements();) {
1414: String id = (String) e.nextElement();
1415: for (int i = 0; i < 3; ++i) {
1416: if (id.equals(FULL_IDS[i])) {
1417: errln("FAIL: unregister(" + id + ") failed");
1418: }
1419: }
1420: }
1421: for (Enumeration e = Transliterator.getAvailableTargets("Any"); e
1422: .hasMoreElements();) {
1423: String t = (String) e.nextElement();
1424: if (t.equals(IDS[0])) {
1425: errln("FAIL: unregister(Any-" + t + ") failed");
1426: }
1427: }
1428: for (Enumeration e = Transliterator.getAvailableSources(); e
1429: .hasMoreElements();) {
1430: String s = (String) e.nextElement();
1431: for (int i = 0; i < 3; ++i) {
1432: if (SOURCES[i] == null)
1433: continue;
1434: if (s.equals(SOURCES[i])) {
1435: errln("FAIL: unregister(" + s + "-*) failed");
1436: }
1437: }
1438: }
1439: }
1440:
1441: /**
1442: * Test inverse of Greek-Latin; Title()
1443: */
1444: public void TestCompoundInverse() {
1445: Transliterator t = Transliterator.getInstance(
1446: "Greek-Latin; Title()", Transliterator.REVERSE);
1447: if (t == null) {
1448: errln("FAIL: createInstance");
1449: return;
1450: }
1451: String exp = "(Title);Latin-Greek";
1452: if (t.getID().equals(exp)) {
1453: logln("Ok: inverse of \"Greek-Latin; Title()\" is \""
1454: + t.getID());
1455: } else {
1456: errln("FAIL: inverse of \"Greek-Latin; Title()\" is \""
1457: + t.getID() + "\", expected \"" + exp + "\"");
1458: }
1459: }
1460:
1461: /**
1462: * Test NFD chaining with RBT
1463: */
1464: public void TestNFDChainRBT() {
1465: Transliterator t = Transliterator.createFromRules("TEST",
1466: "::NFD; aa > Q; a > q;", Transliterator.FORWARD);
1467: logln(t.toRules(true));
1468: expect(t, "aa", "Q");
1469: }
1470:
1471: /**
1472: * Inverse of "Null" should be "Null". (J21)
1473: */
1474: public void TestNullInverse() {
1475: Transliterator t = Transliterator.getInstance("Null");
1476: Transliterator u = t.getInverse();
1477: if (!u.getID().equals("Null")) {
1478: errln("FAIL: Inverse of Null should be Null");
1479: }
1480: }
1481:
1482: /**
1483: * Check ID of inverse of alias. (J22)
1484: */
1485: public void TestAliasInverseID() {
1486: String ID = "Latin-Hangul"; // This should be any alias ID with an inverse
1487: Transliterator t = Transliterator.getInstance(ID);
1488: Transliterator u = t.getInverse();
1489: String exp = "Hangul-Latin";
1490: String got = u.getID();
1491: if (!got.equals(exp)) {
1492: errln("FAIL: Inverse of " + ID + " is " + got
1493: + ", expected " + exp);
1494: }
1495: }
1496:
1497: /**
1498: * Test IDs of inverses of compound transliterators. (J20)
1499: */
1500: public void TestCompoundInverseID() {
1501: String ID = "Latin-Jamo;NFC(NFD)";
1502: Transliterator t = Transliterator.getInstance(ID);
1503: Transliterator u = t.getInverse();
1504: String exp = "NFD(NFC);Jamo-Latin";
1505: String got = u.getID();
1506: if (!got.equals(exp)) {
1507: errln("FAIL: Inverse of " + ID + " is " + got
1508: + ", expected " + exp);
1509: }
1510: }
1511:
1512: /**
1513: * Test undefined variable.
1514: */
1515: public void TestUndefinedVariable() {
1516: String rule = "$initial } a <> \u1161;";
1517: try {
1518: Transliterator t = Transliterator.createFromRules("<ID>",
1519: rule, Transliterator.FORWARD);
1520: t = null;
1521: } catch (IllegalArgumentException e) {
1522: logln("OK: Got exception for " + rule + ", as expected: "
1523: + e.getMessage());
1524: return;
1525: }
1526: errln("Fail: bogus rule " + rule + " compiled without error");
1527: }
1528:
1529: /**
1530: * Test empty context.
1531: */
1532: public void TestEmptyContext() {
1533: expect(" { a } > b;", "xay a ", "xby b ");
1534: }
1535:
1536: /**
1537: * Test compound filter ID syntax
1538: */
1539: public void TestCompoundFilterID() {
1540: String[] DATA = {
1541: // Col. 1 = ID or rule set (latter must start with #)
1542:
1543: // = columns > 1 are null if expect col. 1 to be illegal =
1544:
1545: // Col. 2 = direction, "F..." or "R..."
1546: // Col. 3 = source string
1547: // Col. 4 = exp result
1548:
1549: "[abc]; [abc]",
1550: null,
1551: null,
1552: null, // multiple filters
1553: "Latin-Greek; [abc];",
1554: null,
1555: null,
1556: null, // misplaced filter
1557: "[b]; Latin-Greek; Upper; ([xyz])",
1558: "F",
1559: "abc",
1560: "a\u0392c",
1561: "[b]; (Lower); Latin-Greek; Upper(); ([\u0392])",
1562: "R",
1563: "\u0391\u0392\u0393",
1564: "\u0391b\u0393",
1565: "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);",
1566: "F",
1567: "abc",
1568: "a\u0392c",
1569: "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\u0392]);",
1570: "R", "\u0391\u0392\u0393", "\u0391b\u0393", };
1571:
1572: for (int i = 0; i < DATA.length; i += 4) {
1573: String id = DATA[i];
1574: int direction = (DATA[i + 1] != null && DATA[i + 1]
1575: .charAt(0) == 'R') ? Transliterator.REVERSE
1576: : Transliterator.FORWARD;
1577: String source = DATA[i + 2];
1578: String exp = DATA[i + 3];
1579: boolean expOk = (DATA[i + 1] != null);
1580: Transliterator t = null;
1581: IllegalArgumentException e = null;
1582: try {
1583: if (id.charAt(0) == '#') {
1584: t = Transliterator.createFromRules("ID", id,
1585: direction);
1586: } else {
1587: t = Transliterator.getInstance(id, direction);
1588: }
1589: } catch (IllegalArgumentException ee) {
1590: e = ee;
1591: }
1592: boolean ok = (t != null && e == null);
1593: if (ok == expOk) {
1594: logln("Ok: " + id + " => " + t
1595: + (e != null ? (", " + e.getMessage()) : ""));
1596: if (source != null) {
1597: expect(t, source, exp);
1598: }
1599: } else {
1600: errln("FAIL: " + id + " => " + t
1601: + (e != null ? (", " + e.getMessage()) : ""));
1602: }
1603: }
1604: }
1605:
1606: /**
1607: * Test new property set syntax
1608: */
1609: public void TestPropertySet() {
1610: expect("a>A; \\p{Lu}>x; \\p{Any}>y;", "abcDEF", "Ayyxxx");
1611: expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
1612: "[ a stitch ]\n[ in time ]\r[ saves 9]");
1613: }
1614:
1615: /**
1616: * Test various failure points of the new 2.0 engine.
1617: */
1618: public void TestNewEngine() {
1619: Transliterator t = Transliterator.getInstance("Latin-Hiragana");
1620: // Katakana should be untouched
1621: expect(t, "a\u3042\u30A2", "\u3042\u3042\u30A2");
1622:
1623: if (true) {
1624: // This test will only work if Transliterator.ROLLBACK is
1625: // true. Otherwise, this test will fail, revealing a
1626: // limitation of global filters in incremental mode.
1627:
1628: Transliterator a = Transliterator.createFromRules("a_to_A",
1629: "a > A;", Transliterator.FORWARD);
1630: Transliterator A = Transliterator.createFromRules("A_to_b",
1631: "A > b;", Transliterator.FORWARD);
1632:
1633: //Transliterator array[] = new Transliterator[] {
1634: // a,
1635: // Transliterator.getInstance("NFD"),
1636: // A };
1637: //t = Transliterator.getInstance(array, new UnicodeSet("[:Ll:]"));
1638:
1639: try {
1640: Transliterator.registerInstance(a);
1641: Transliterator.registerInstance(A);
1642:
1643: t = Transliterator
1644: .getInstance("[:Ll:];a_to_A;NFD;A_to_b");
1645: expect(t, "aAaA", "bAbA");
1646:
1647: Transliterator[] u = t.getElements();
1648: assertTrue("getElements().length", u.length == 3);
1649: assertEquals("getElements()[0]", u[0].getID(), "a_to_A");
1650: assertEquals("getElements()[1]", u[1].getID(), "NFD");
1651: assertEquals("getElements()[2]", u[2].getID(), "A_to_b");
1652:
1653: t = Transliterator.getInstance("a_to_A;NFD;A_to_b");
1654: t.setFilter(new UnicodeSet("[:Ll:]"));
1655: expect(t, "aAaA", "bAbA");
1656: } finally {
1657: Transliterator.unregister("a_to_A");
1658: Transliterator.unregister("A_to_b");
1659: }
1660: }
1661:
1662: expect(
1663: "$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",
1664: "a", "ax");
1665:
1666: String gr = "$ddot = \u0308 ;"
1667: + "$lcgvowel = [\u03b1\u03b5\u03b7\u03b9\u03bf\u03c5\u03c9] ;"
1668: + "$rough = \u0314 ;"
1669: + "($lcgvowel+ $ddot?) $rough > h | $1 ;"
1670: + "\u03b1 <> a ;" + "$rough <> h ;";
1671:
1672: expect(gr, "\u03B1\u0314", "ha");
1673: }
1674:
1675: /**
1676: * Test quantified segment behavior. We want:
1677: * ([abc])+ > x $1 x; applied to "cba" produces "xax"
1678: */
1679: public void TestQuantifiedSegment() {
1680: // The normal case
1681: expect("([abc]+) > x $1 x;", "cba", "xcbax");
1682:
1683: // The tricky case; the quantifier is around the segment
1684: expect("([abc])+ > x $1 x;", "cba", "xax");
1685:
1686: // Tricky case in reverse direction
1687: expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
1688:
1689: // Check post-context segment
1690: expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
1691:
1692: // Test toRule/toPattern for non-quantified segment.
1693: // Careful with spacing here.
1694: String r = "([a-c]){q} > x $1 x;";
1695: Transliterator t = Transliterator.createFromRules("ID", r,
1696: Transliterator.FORWARD);
1697: String rr = t.toRules(true);
1698: if (!r.equals(rr)) {
1699: errln("FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
1700: } else {
1701: logln("Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
1702: }
1703:
1704: // Test toRule/toPattern for quantified segment.
1705: // Careful with spacing here.
1706: r = "([a-c])+{q} > x $1 x;";
1707: t = Transliterator.createFromRules("ID", r,
1708: Transliterator.FORWARD);
1709: rr = t.toRules(true);
1710: if (!r.equals(rr)) {
1711: errln("FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
1712: } else {
1713: logln("Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
1714: }
1715: }
1716:
1717: //======================================================================
1718: // Ram's tests
1719: //======================================================================
1720: /* this test performs test of rules in ISO 15915 */
1721: public void TestDevanagariLatinRT() {
1722: String[] source = { "bh\u0101rata", "kra", "k\u1E63a", "khra",
1723: "gra", "\u1E45ra", "cra",
1724: "chra",
1725: "j\u00F1a",
1726: "jhra",
1727: "\u00F1ra",
1728: "\u1E6Dya",
1729: "\u1E6Dhra",
1730: "\u1E0Dya",
1731: //"r\u0323ya", // \u095c is not valid in Devanagari
1732: "\u1E0Dhya", "\u1E5Bhra", "\u1E47ra", "tta", "thra",
1733: "dda",
1734: "dhra",
1735: "nna",
1736: "pra",
1737: "phra",
1738: "bra",
1739: "bhra",
1740: "mra",
1741: "\u1E49ra",
1742: //"l\u0331ra",
1743: "yra",
1744: "\u1E8Fra",
1745: //"l-",
1746: "vra", "\u015Bra", "\u1E63ra", "sra",
1747: "hma",
1748: "\u1E6D\u1E6Da",
1749: "\u1E6D\u1E6Dha",
1750: "\u1E6Dh\u1E6Dha",
1751: "\u1E0D\u1E0Da",
1752: "\u1E0D\u1E0Dha",
1753: "\u1E6Dya",
1754: "\u1E6Dhya",
1755: "\u1E0Dya",
1756: "\u1E0Dhya",
1757: // Not roundtrippable --
1758: // \u0939\u094d\u094d\u092E - hma
1759: // \u0939\u094d\u092E - hma
1760: // CharsToUnicodeString("hma"),
1761: "hya", "\u015Br\u0325", "\u015Bca", "\u0115",
1762: "san\u0304j\u012Bb s\u0113nagupta",
1763: "\u0101nand vaddir\u0101ju", };
1764: String[] expected = {
1765: "\u092D\u093E\u0930\u0924", /* bha\u0304rata */
1766: "\u0915\u094D\u0930", /* kra */
1767: "\u0915\u094D\u0937", /* ks\u0323a */
1768: "\u0916\u094D\u0930", /* khra */
1769: "\u0917\u094D\u0930", /* gra */
1770: "\u0919\u094D\u0930", /* n\u0307ra */
1771: "\u091A\u094D\u0930", /* cra */
1772: "\u091B\u094D\u0930", /* chra */
1773: "\u091C\u094D\u091E", /* jn\u0303a */
1774: "\u091D\u094D\u0930", /* jhra */
1775: "\u091E\u094D\u0930", /* n\u0303ra */
1776: "\u091F\u094D\u092F", /* t\u0323ya */
1777: "\u0920\u094D\u0930", /* t\u0323hra */
1778: "\u0921\u094D\u092F", /* d\u0323ya */
1779: //"\u095C\u094D\u092F", /* r\u0323ya */ // \u095c is not valid in Devanagari
1780: "\u0922\u094D\u092F", /* d\u0323hya */
1781: "\u0922\u093C\u094D\u0930", /* r\u0323hra */
1782: "\u0923\u094D\u0930", /* n\u0323ra */
1783: "\u0924\u094D\u0924", /* tta */
1784: "\u0925\u094D\u0930", /* thra */
1785: "\u0926\u094D\u0926", /* dda */
1786: "\u0927\u094D\u0930", /* dhra */
1787: "\u0928\u094D\u0928", /* nna */
1788: "\u092A\u094D\u0930", /* pra */
1789: "\u092B\u094D\u0930", /* phra */
1790: "\u092C\u094D\u0930", /* bra */
1791: "\u092D\u094D\u0930", /* bhra */
1792: "\u092E\u094D\u0930", /* mra */
1793: "\u0929\u094D\u0930", /* n\u0331ra */
1794: //"\u0934\u094D\u0930", /* l\u0331ra */
1795: "\u092F\u094D\u0930", /* yra */
1796: "\u092F\u093C\u094D\u0930", /* y\u0307ra */
1797: //"l-",
1798: "\u0935\u094D\u0930", /* vra */
1799: "\u0936\u094D\u0930", /* s\u0301ra */
1800: "\u0937\u094D\u0930", /* s\u0323ra */
1801: "\u0938\u094D\u0930", /* sra */
1802: "\u0939\u094d\u092E", /* hma */
1803: "\u091F\u094D\u091F", /* t\u0323t\u0323a */
1804: "\u091F\u094D\u0920", /* t\u0323t\u0323ha */
1805: "\u0920\u094D\u0920", /* t\u0323ht\u0323ha*/
1806: "\u0921\u094D\u0921", /* d\u0323d\u0323a */
1807: "\u0921\u094D\u0922", /* d\u0323d\u0323ha */
1808: "\u091F\u094D\u092F", /* t\u0323ya */
1809: "\u0920\u094D\u092F", /* t\u0323hya */
1810: "\u0921\u094D\u092F", /* d\u0323ya */
1811: "\u0922\u094D\u092F", /* d\u0323hya */
1812: // "hma", /* hma */
1813: "\u0939\u094D\u092F", /* hya */
1814: "\u0936\u0943", /* s\u0301r\u0325a */
1815: "\u0936\u094D\u091A", /* s\u0301ca */
1816: "\u090d", /* e\u0306 */
1817: "\u0938\u0902\u091C\u0940\u092C\u094D \u0938\u0947\u0928\u0917\u0941\u092A\u094D\u0924",
1818: "\u0906\u0928\u0902\u0926\u094D \u0935\u0926\u094D\u0926\u093F\u0930\u093E\u091C\u0941", };
1819:
1820: Transliterator latinToDev = Transliterator.getInstance(
1821: "Latin-Devanagari", Transliterator.FORWARD);
1822: Transliterator devToLatin = Transliterator.getInstance(
1823: "Devanagari-Latin", Transliterator.FORWARD);
1824:
1825: for (int i = 0; i < source.length; i++) {
1826: expect(latinToDev, (source[i]), (expected[i]));
1827: expect(devToLatin, (expected[i]), (source[i]));
1828: }
1829:
1830: }
1831:
1832: public void TestTeluguLatinRT() {
1833: String[] source = { "raghur\u0101m vi\u015Bvan\u0101dha", /* Raghuram Viswanadha */
1834: "\u0101nand vaddir\u0101ju", /* Anand Vaddiraju */
1835: "r\u0101j\u012Bv ka\u015Barab\u0101da", /* Rajeev Kasarabada */
1836: "san\u0304j\u012Bv ka\u015Barab\u0101da", /* sanjeev kasarabada */
1837: "san\u0304j\u012Bb sen'gupta", /* sanjib sengupata */
1838: "amar\u0113ndra hanum\u0101nula", /* Amarendra hanumanula */
1839: "ravi kum\u0101r vi\u015Bvan\u0101dha", /* Ravi Kumar Viswanadha */
1840: "\u0101ditya kandr\u0113gula", /* Aditya Kandregula */
1841: "\u015Br\u012Bdhar ka\u1E47\u1E6Dama\u015Be\u1E6D\u1E6Di", /* Shridhar Kantamsetty */
1842: "m\u0101dhav de\u015Be\u1E6D\u1E6Di" /* Madhav Desetty */
1843: };
1844:
1845: String[] expected = {
1846: "\u0c30\u0c18\u0c41\u0c30\u0c3e\u0c2e\u0c4d \u0c35\u0c3f\u0c36\u0c4d\u0c35\u0c28\u0c3e\u0c27",
1847: "\u0c06\u0c28\u0c02\u0c26\u0c4d \u0C35\u0C26\u0C4D\u0C26\u0C3F\u0C30\u0C3E\u0C1C\u0C41",
1848: "\u0c30\u0c3e\u0c1c\u0c40\u0c35\u0c4d \u0c15\u0c36\u0c30\u0c2c\u0c3e\u0c26",
1849: "\u0c38\u0c02\u0c1c\u0c40\u0c35\u0c4d \u0c15\u0c36\u0c30\u0c2c\u0c3e\u0c26",
1850: "\u0c38\u0c02\u0c1c\u0c40\u0c2c\u0c4d \u0c38\u0c46\u0c28\u0c4d\u0c17\u0c41\u0c2a\u0c4d\u0c24",
1851: "\u0c05\u0c2e\u0c30\u0c47\u0c02\u0c26\u0c4d\u0c30 \u0c39\u0c28\u0c41\u0c2e\u0c3e\u0c28\u0c41\u0c32",
1852: "\u0c30\u0c35\u0c3f \u0c15\u0c41\u0c2e\u0c3e\u0c30\u0c4d \u0c35\u0c3f\u0c36\u0c4d\u0c35\u0c28\u0c3e\u0c27",
1853: "\u0c06\u0c26\u0c3f\u0c24\u0c4d\u0c2f \u0C15\u0C02\u0C26\u0C4D\u0C30\u0C47\u0C17\u0C41\u0c32",
1854: "\u0c36\u0c4d\u0c30\u0c40\u0C27\u0C30\u0C4D \u0c15\u0c02\u0c1f\u0c2e\u0c36\u0c46\u0c1f\u0c4d\u0c1f\u0c3f",
1855: "\u0c2e\u0c3e\u0c27\u0c35\u0c4d \u0c26\u0c46\u0c36\u0c46\u0c1f\u0c4d\u0c1f\u0c3f", };
1856:
1857: Transliterator latinToDev = Transliterator.getInstance(
1858: "Latin-Telugu", Transliterator.FORWARD);
1859: Transliterator devToLatin = Transliterator.getInstance(
1860: "Telugu-Latin", Transliterator.FORWARD);
1861:
1862: for (int i = 0; i < source.length; i++) {
1863: expect(latinToDev, (source[i]), (expected[i]));
1864: expect(devToLatin, (expected[i]), (source[i]));
1865: }
1866: }
1867:
1868: public void TestSanskritLatinRT() {
1869: int MAX_LEN = 15;
1870: String[] source = { "rmk\u1E63\u0113t", "\u015Br\u012Bmad",
1871: "bhagavadg\u012Bt\u0101", "adhy\u0101ya", "arjuna",
1872: "vi\u1E63\u0101da", "y\u014Dga",
1873: "dhr\u0325tar\u0101\u1E63\u1E6Dra", "uv\u0101cr\u0325",
1874: "dharmak\u1E63\u0113tr\u0113",
1875: "kuruk\u1E63\u0113tr\u0113", "samav\u0113t\u0101",
1876: "yuyutsava\u1E25", "m\u0101mak\u0101\u1E25",
1877: // "p\u0101\u1E47\u1E0Dav\u0101\u015Bcaiva",
1878: "kimakurvata", "san\u0304java", };
1879: String[] expected = {
1880: "\u0930\u094D\u092E\u094D\u0915\u094D\u0937\u0947\u0924\u094D",
1881: "\u0936\u094d\u0930\u0940\u092e\u0926\u094d",
1882: "\u092d\u0917\u0935\u0926\u094d\u0917\u0940\u0924\u093e",
1883: "\u0905\u0927\u094d\u092f\u093e\u092f",
1884: "\u0905\u0930\u094d\u091c\u0941\u0928",
1885: "\u0935\u093f\u0937\u093e\u0926",
1886: "\u092f\u094b\u0917",
1887: "\u0927\u0943\u0924\u0930\u093e\u0937\u094d\u091f\u094d\u0930",
1888: "\u0909\u0935\u093E\u091A\u0943",
1889: "\u0927\u0930\u094d\u092e\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947",
1890: "\u0915\u0941\u0930\u0941\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947",
1891: "\u0938\u092e\u0935\u0947\u0924\u093e",
1892: "\u092f\u0941\u092f\u0941\u0924\u094d\u0938\u0935\u0903",
1893: "\u092e\u093e\u092e\u0915\u093e\u0903",
1894: //"\u092a\u093e\u0923\u094d\u0921\u0935\u093e\u0936\u094d\u091a\u0948\u0935",
1895: "\u0915\u093f\u092e\u0915\u0941\u0930\u094d\u0935\u0924",
1896: "\u0938\u0902\u091c\u0935", };
1897:
1898: Transliterator latinToDev = Transliterator.getInstance(
1899: "Latin-Devanagari", Transliterator.FORWARD);
1900: Transliterator devToLatin = Transliterator.getInstance(
1901: "Devanagari-Latin", Transliterator.FORWARD);
1902: for (int i = 0; i < MAX_LEN; i++) {
1903: expect(latinToDev, (source[i]), (expected[i]));
1904: expect(devToLatin, (expected[i]), (source[i]));
1905: }
1906: }
1907:
1908: public void TestCompoundLatinRT() {
1909: int MAX_LEN = 15;
1910: String[] source = { "rmk\u1E63\u0113t", "\u015Br\u012Bmad",
1911: "bhagavadg\u012Bt\u0101", "adhy\u0101ya", "arjuna",
1912: "vi\u1E63\u0101da", "y\u014Dga",
1913: "dhr\u0325tar\u0101\u1E63\u1E6Dra", "uv\u0101cr\u0325",
1914: "dharmak\u1E63\u0113tr\u0113",
1915: "kuruk\u1E63\u0113tr\u0113", "samav\u0113t\u0101",
1916: "yuyutsava\u1E25", "m\u0101mak\u0101\u1E25",
1917: // "p\u0101\u1E47\u1E0Dav\u0101\u015Bcaiva",
1918: "kimakurvata", "san\u0304java" };
1919: String[] expected = {
1920: "\u0930\u094D\u092E\u094D\u0915\u094D\u0937\u0947\u0924\u094D",
1921: "\u0936\u094d\u0930\u0940\u092e\u0926\u094d",
1922: "\u092d\u0917\u0935\u0926\u094d\u0917\u0940\u0924\u093e",
1923: "\u0905\u0927\u094d\u092f\u093e\u092f",
1924: "\u0905\u0930\u094d\u091c\u0941\u0928",
1925: "\u0935\u093f\u0937\u093e\u0926",
1926: "\u092f\u094b\u0917",
1927: "\u0927\u0943\u0924\u0930\u093e\u0937\u094d\u091f\u094d\u0930",
1928: "\u0909\u0935\u093E\u091A\u0943",
1929: "\u0927\u0930\u094d\u092e\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947",
1930: "\u0915\u0941\u0930\u0941\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947",
1931: "\u0938\u092e\u0935\u0947\u0924\u093e",
1932: "\u092f\u0941\u092f\u0941\u0924\u094d\u0938\u0935\u0903",
1933: "\u092e\u093e\u092e\u0915\u093e\u0903",
1934: // "\u092a\u093e\u0923\u094d\u0921\u0935\u093e\u0936\u094d\u091a\u0948\u0935",
1935: "\u0915\u093f\u092e\u0915\u0941\u0930\u094d\u0935\u0924",
1936: "\u0938\u0902\u091c\u0935" };
1937:
1938: Transliterator latinToDevToLatin = Transliterator.getInstance(
1939: "Latin-Devanagari;Devanagari-Latin",
1940: Transliterator.FORWARD);
1941: Transliterator devToLatinToDev = Transliterator.getInstance(
1942: "Devanagari-Latin;Latin-Devanagari",
1943: Transliterator.FORWARD);
1944: for (int i = 0; i < MAX_LEN; i++) {
1945: expect(latinToDevToLatin, (source[i]), (source[i]));
1946: expect(devToLatinToDev, (expected[i]), (expected[i]));
1947: }
1948: }
1949:
1950: /**
1951: * Test Gurmukhi-Devanagari Tippi and Bindi
1952: */
1953: public void TestGurmukhiDevanagari() {
1954: // the rule says:
1955: // (\u0902) (when preceded by vowel) ---> (\u0A02)
1956: // (\u0902) (when preceded by consonant) ---> (\u0A70)
1957:
1958: UnicodeSet vowel = new UnicodeSet(
1959: "[\u0905-\u090A \u090F\u0910\u0913\u0914 \u093e-\u0942\u0947\u0948\u094B\u094C\u094D]");
1960: UnicodeSet non_vowel = new UnicodeSet(
1961: "[\u0915-\u0928\u092A-\u0930]");
1962:
1963: UnicodeSetIterator vIter = new UnicodeSetIterator(vowel);
1964: UnicodeSetIterator nvIter = new UnicodeSetIterator(non_vowel);
1965: Transliterator trans = Transliterator
1966: .getInstance("Devanagari-Gurmukhi");
1967: StringBuffer src = new StringBuffer(" \u0902");
1968: StringBuffer expect = new StringBuffer(" \u0A02");
1969: while (vIter.next()) {
1970: src.setCharAt(0, (char) vIter.codepoint);
1971: expect.setCharAt(0, (char) (vIter.codepoint + 0x0100));
1972: expect(trans, src.toString(), expect.toString());
1973: }
1974:
1975: expect.setCharAt(1, '\u0A70');
1976: while (nvIter.next()) {
1977: //src.setCharAt(0,(char) nvIter.codepoint);
1978: src.setCharAt(0, (char) nvIter.codepoint);
1979: expect.setCharAt(0, (char) (nvIter.codepoint + 0x0100));
1980: expect(trans, src.toString(), expect.toString());
1981: }
1982: }
1983:
1984: /**
1985: * Test instantiation from a locale.
1986: */
1987: public void TestLocaleInstantiation() {
1988: try {
1989: Transliterator t = Transliterator
1990: .getInstance("te_IN-Latin");
1991: //expect(t, "\u0430", "a");
1992: } catch (IllegalArgumentException ex) {
1993: warnln("Could not load locale data for obtaining the script used in the locale te_IN. "
1994: + ex.getMessage());
1995: }
1996: try {
1997: Transliterator t = Transliterator
1998: .getInstance("ru_RU-Latin");
1999: expect(t, "\u0430", "a");
2000: } catch (IllegalArgumentException ex) {
2001: warnln("Could not load locale data for obtaining the script used in the locale ru_RU. "
2002: + ex.getMessage());
2003: }
2004: try {
2005: Transliterator t = Transliterator.getInstance("en-el");
2006: expect(t, "a", "\u03B1");
2007: } catch (IllegalArgumentException ex) {
2008: warnln("Could not load locale data for obtaining the script used in the locale el. "
2009: + ex.getMessage());
2010: }
2011: }
2012:
2013: /**
2014: * Test title case handling of accent (should ignore accents)
2015: */
2016: public void TestTitleAccents() {
2017: Transliterator t = Transliterator.getInstance("Title");
2018: expect(t, "a\u0300b can't abe", "A\u0300b Can't Abe");
2019: }
2020:
2021: /**
2022: * Basic test of a locale resource based rule.
2023: */
2024: public void TestLocaleResource() {
2025: String DATA[] = {
2026: // id from to
2027: "Latin-Greek/UNGEGN", "b", "\u03bc\u03c0", "Latin-el",
2028: "b", "\u03bc\u03c0", "Latin-Greek", "b", "\u03B2",
2029: "Greek-Latin/UNGEGN", "\u03B2", "v", "el-Latin",
2030: "\u03B2", "v", "Greek-Latin", "\u03B2", "b", };
2031: for (int i = 0; i < DATA.length; i += 3) {
2032: Transliterator t = Transliterator.getInstance(DATA[i]);
2033: expect(t, DATA[i + 1], DATA[i + 2]);
2034: }
2035: }
2036:
2037: /**
2038: * Make sure parse errors reference the right line.
2039: */
2040: public void TestParseError() {
2041: String rule = "a > b;\n" + "# more stuff\n" + "d << b;";
2042: try {
2043: Transliterator t = Transliterator.createFromRules("ID",
2044: rule, Transliterator.FORWARD);
2045: if (t != null) {
2046: errln("FAIL: Did not get expected exception");
2047: }
2048: } catch (IllegalArgumentException e) {
2049: String err = e.getMessage();
2050: if (err.indexOf("d << b") >= 0) {
2051: logln("Ok: " + err);
2052: } else {
2053: errln("FAIL: " + err);
2054: }
2055: return;
2056: }
2057: errln("FAIL: no syntax error");
2058: }
2059:
2060: /**
2061: * Make sure sets on output are disallowed.
2062: */
2063: public void TestOutputSet() {
2064: String rule = "$set = [a-cm-n]; b > $set;";
2065: Transliterator t = null;
2066: try {
2067: t = Transliterator.createFromRules("ID", rule,
2068: Transliterator.FORWARD);
2069: if (t != null) {
2070: errln("FAIL: Did not get the expected exception");
2071: }
2072: } catch (IllegalArgumentException e) {
2073: logln("Ok: " + e.getMessage());
2074: return;
2075: }
2076: errln("FAIL: No syntax error");
2077: }
2078:
2079: /**
2080: * Test the use variable range pragma, making sure that use of
2081: * variable range characters is detected and flagged as an error.
2082: */
2083: public void TestVariableRange() {
2084: String rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;";
2085: try {
2086: Transliterator t = Transliterator.createFromRules("ID",
2087: rule, Transliterator.FORWARD);
2088: if (t != null) {
2089: errln("FAIL: Did not get the expected exception");
2090: }
2091: } catch (IllegalArgumentException e) {
2092: logln("Ok: " + e.getMessage());
2093: return;
2094: }
2095: errln("FAIL: No syntax error");
2096: }
2097:
2098: /**
2099: * Test invalid post context error handling
2100: */
2101: public void TestInvalidPostContext() {
2102: try {
2103: Transliterator t = Transliterator.createFromRules("ID",
2104: "a}b{c>d;", Transliterator.FORWARD);
2105: if (t != null) {
2106: errln("FAIL: Did not get the expected exception");
2107: }
2108: } catch (IllegalArgumentException e) {
2109: String msg = e.getMessage();
2110: if (msg.indexOf("a}b{c") >= 0) {
2111: logln("Ok: " + msg);
2112: } else {
2113: errln("FAIL: " + msg);
2114: }
2115: return;
2116: }
2117: errln("FAIL: No syntax error");
2118: }
2119:
2120: /**
2121: * Test ID form variants
2122: */
2123: public void TestIDForms() {
2124: String DATA[] = {
2125: "NFC",
2126: null,
2127: "NFD",
2128: "nfd",
2129: null,
2130: "NFC", // make sure case is ignored
2131: "Any-NFKD", null, "Any-NFKC", "Null", null, "Null",
2132: "-nfkc", "nfkc", "NFKD", "-nfkc/", "nfkc", "NFKD",
2133: "Latin-Greek/UNGEGN", null, "Greek-Latin/UNGEGN",
2134: "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN",
2135: "Latin-Greek/UNGEGN", "Bengali-Devanagari/",
2136: "Bengali-Devanagari", "Devanagari-Bengali", "Source-",
2137: null, null, "Source/Variant-", null, null,
2138: "Source-/Variant", null, null, "/Variant", null, null,
2139: "/Variant-", null, null, "-/Variant", null, null, "-/",
2140: null, null, "-", null, null, "/", null, null, };
2141:
2142: for (int i = 0; i < DATA.length; i += 3) {
2143: String ID = DATA[i];
2144: String expID = DATA[i + 1];
2145: String expInvID = DATA[i + 2];
2146: boolean expValid = (expInvID != null);
2147: if (expID == null) {
2148: expID = ID;
2149: }
2150: try {
2151: Transliterator t = Transliterator.getInstance(ID);
2152: Transliterator u = t.getInverse();
2153: if (t.getID().equals(expID)
2154: && u.getID().equals(expInvID)) {
2155: logln("Ok: " + ID + ".getInverse() => " + expInvID);
2156: } else {
2157: errln("FAIL: getInstance(" + ID + ") => "
2158: + t.getID() + " x getInverse() => "
2159: + u.getID() + ", expected " + expInvID);
2160: }
2161: } catch (IllegalArgumentException e) {
2162: if (!expValid) {
2163: logln("Ok: getInstance(" + ID + ") => "
2164: + e.getMessage());
2165: } else {
2166: errln("FAIL: getInstance(" + ID + ") => "
2167: + e.getMessage());
2168: }
2169: }
2170: }
2171: }
2172:
2173: void checkRules(String label, Transliterator t2,
2174: String testRulesForward) {
2175: String rules2 = t2.toRules(true);
2176: //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
2177: rules2 = TestUtility.replace(rules2, " ", "");
2178: rules2 = TestUtility.replace(rules2, "\n", "");
2179: rules2 = TestUtility.replace(rules2, "\r", "");
2180: testRulesForward = TestUtility.replace(testRulesForward, " ",
2181: "");
2182:
2183: if (!rules2.equals(testRulesForward)) {
2184: errln(label);
2185: logln("GENERATED RULES: " + rules2);
2186: logln("SHOULD BE: " + testRulesForward);
2187: }
2188: }
2189:
2190: /**
2191: * Mark's toRules test.
2192: */
2193: public void TestToRulesMark() {
2194:
2195: String testRules = "::[[:Latin:][:Mark:]];" + "::NFKD (NFC);"
2196: + "::Lower (Lower);"
2197: + "a <> \\u03B1;" // alpha
2198: + "::NFKC (NFD);" + "::Upper (Lower);" + "::Lower ();"
2199: + "::([[:Greek:][:Mark:]]);";
2200: String testRulesForward = "::[[:Latin:][:Mark:]];"
2201: + "::NFKD(NFC);" + "::Lower(Lower);" + "a > \\u03B1;"
2202: + "::NFKC(NFD);" + "::Upper (Lower);" + "::Lower ();";
2203: String testRulesBackward = "::[[:Greek:][:Mark:]];"
2204: + "::Lower (Upper);" + "::NFD(NFKC);" + "\\u03B1 > a;"
2205: + "::Lower(Lower);" + "::NFC(NFKD);";
2206: String source = "\u00E1"; // a-acute
2207: String target = "\u03AC"; // alpha-acute
2208:
2209: Transliterator t2 = Transliterator.createFromRules(
2210: "source-target", testRules, Transliterator.FORWARD);
2211: Transliterator t3 = Transliterator.createFromRules(
2212: "target-source", testRules, Transliterator.REVERSE);
2213:
2214: expect(t2, source, target);
2215: expect(t3, target, source);
2216:
2217: checkRules("Failed toRules FORWARD", t2, testRulesForward);
2218: checkRules("Failed toRules BACKWARD", t3, testRulesBackward);
2219: }
2220:
2221: /**
2222: * Test Escape and Unescape transliterators.
2223: */
2224: public void TestEscape() {
2225: expect(Transliterator.getInstance("Hex-Any"),
2226: "\\x{40}\\U000000312Q", "@12Q");
2227: expect(Transliterator.getInstance("Any-Hex/C"),
2228: CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
2229: "\\u0041\\U0010BEEF\\uFEED");
2230: expect(Transliterator.getInstance("Any-Hex/Java"),
2231: CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
2232: "\\u0041\\uDBEF\\uDEEF\\uFEED");
2233: expect(Transliterator.getInstance("Any-Hex/Perl"),
2234: CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
2235: "\\x{41}\\x{10BEEF}\\x{FEED}");
2236: }
2237:
2238: /**
2239: * Make sure display names of variants look reasonable.
2240: */
2241: public void TestDisplayName() {
2242: String DATA[] = {
2243: // ID, forward name, reverse name
2244: // Update the text as necessary -- the important thing is
2245: // not the text itself, but how various cases are handled.
2246:
2247: // Basic test
2248: "Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
2249:
2250: // Variants
2251: "Any-Hex/Perl", "Any to Hex Escape/Perl",
2252: "Hex Escape to Any/Perl",
2253:
2254: // Target-only IDs
2255: "NFC", "Any to NFC", "Any to NFD", };
2256:
2257: Locale US = Locale.US;
2258:
2259: for (int i = 0; i < DATA.length; i += 3) {
2260: String name = Transliterator.getDisplayName(DATA[i], US);
2261: if (!name.equals(DATA[i + 1])) {
2262: errln("FAIL: " + DATA[i] + ".getDisplayName() => "
2263: + name + ", expected " + DATA[i + 1]);
2264: } else {
2265: logln("Ok: " + DATA[i] + ".getDisplayName() => " + name);
2266: }
2267: Transliterator t = Transliterator.getInstance(DATA[i],
2268: Transliterator.REVERSE);
2269: name = Transliterator.getDisplayName(t.getID(), US);
2270: if (!name.equals(DATA[i + 2])) {
2271: errln("FAIL: " + t.getID() + ".getDisplayName() => "
2272: + name + ", expected " + DATA[i + 2]);
2273: } else {
2274: logln("Ok: " + t.getID() + ".getDisplayName() => "
2275: + name);
2276: }
2277:
2278: // Cover getDisplayName(String)
2279: ULocale save = ULocale.getDefault();
2280: ULocale.setDefault(ULocale.US);
2281: String name2 = Transliterator.getDisplayName(t.getID());
2282: if (!name.equals(name2))
2283: errln("FAIL: getDisplayName with default locale failed");
2284: ULocale.setDefault(save);
2285: }
2286: }
2287:
2288: /**
2289: * Test anchor masking
2290: */
2291: public void TestAnchorMasking() {
2292: String rule = "^a > Q; a > q;";
2293: try {
2294: Transliterator t = Transliterator.createFromRules("ID",
2295: rule, Transliterator.FORWARD);
2296: if (t == null) {
2297: errln("FAIL: Did not get the expected exception");
2298: }
2299: } catch (IllegalArgumentException e) {
2300: errln("FAIL: " + rule + " => " + e);
2301: }
2302: }
2303:
2304: /**
2305: * This test is not in trnstst.cpp. This test has been moved from com/ibm/icu/dev/test/lang/TestUScript.java
2306: * during ICU4J modularization to remove dependency of tests on Transliterator.
2307: */
2308: public void TestScriptAllCodepoints() {
2309: int code;
2310: String oldId = "";
2311: String oldAbbrId = "";
2312: for (int i = 0; i <= 0x10ffff; i++) {
2313: code = UScript.INVALID_CODE;
2314: code = UScript.getScript(i);
2315: if (code == UScript.INVALID_CODE) {
2316: errln("UScript.getScript for codepoint 0x" + hex(i)
2317: + " failed");
2318: }
2319: String id = UScript.getName(code);
2320: String abbr = UScript.getShortName(code);
2321: String newId = "[:" + id + ":];NFD";
2322: String newAbbrId = "[:" + abbr + ":];NFD";
2323: if (!oldId.equals(newId)) {
2324: try {
2325: Transliterator t = Transliterator
2326: .getInstance(newId);
2327: if (t == null) {
2328: errln("Failed to create transliterator for "
2329: + hex(i) + " script code: " + id);
2330: }
2331: } catch (Exception e) {
2332: errln("Failed to create transliterator for "
2333: + hex(i) + " script code: " + id
2334: + " Exception: " + e.getMessage());
2335: }
2336: }
2337: oldId = newId;
2338: if (!oldAbbrId.equals(newAbbrId)) {
2339: try {
2340: Transliterator t = Transliterator
2341: .getInstance(newAbbrId);
2342: if (t == null) {
2343: errln("Failed to create transliterator for "
2344: + hex(i) + " script code: " + abbr);
2345: }
2346: } catch (Exception e) {
2347: errln("Failed to create transliterator for "
2348: + hex(i) + " script code: " + abbr
2349: + " Exception: " + e.getMessage());
2350: }
2351: }
2352: oldAbbrId = newAbbrId;
2353: }
2354: }
2355:
2356: static final String[][] registerRules = {
2357: { "Any-Dev1", "x > X; y > Y;" },
2358: { "Any-Dev2", "XY > Z" },
2359: {
2360: "Greek-Latin/FAKE",
2361: "[^[:L:][:M:]] { \u03bc\u03c0 > b ; "
2362: + "\u03bc\u03c0 } [^[:L:][:M:]] > b ; "
2363: + "[^[:L:][:M:]] { [\u039c\u03bc][\u03a0\u03c0] > B ; "
2364: + "[\u039c\u03bc][\u03a0\u03c0] } [^[:L:][:M:]] > B ;" }, };
2365:
2366: static final String DESERET_DEE = UTF16.valueOf(0x10414);
2367: static final String DESERET_dee = UTF16.valueOf(0x1043C);
2368:
2369: static final String[][] testCases = {
2370:
2371: // NORMALIZATION
2372: // should add more test cases
2373: { "NFD", "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3" },
2374: { "NFC", "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3" },
2375: { "NFKD", "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3" },
2376: { "NFKC", "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3" },
2377:
2378: // mp -> b BUG
2379: { "Greek-Latin/UNGEGN", "(\u03BC\u03C0)", "(b)" },
2380: { "Greek-Latin/FAKE", "(\u03BC\u03C0)", "(b)" },
2381:
2382: // check for devanagari bug
2383: { "nfd;Dev1;Dev2;nfc", "xy", "Z" },
2384:
2385: // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
2386: {
2387: "Title",
2388: "ab'cD ffi\u0131I\u0130 \u01C7\u01C8\u01C9 "
2389: + DESERET_dee + DESERET_DEE,
2390: "Ab'cd Ffi\u0131ii\u0307 \u01C8\u01C9\u01C9 "
2391: + DESERET_DEE + DESERET_dee },
2392: //TODO: enable this test once Titlecase works right
2393: //{"Title", "\uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE,
2394: // "Ffi\u0131ii \u01C8\u01C9\u01C9 " + DESERET_DEE + DESERET_dee},
2395:
2396: {
2397: "Upper",
2398: "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 "
2399: + DESERET_dee + DESERET_DEE,
2400: "AB'CD FFIII\u0130 \u01C7\u01C7\u01C7 "
2401: + DESERET_DEE + DESERET_DEE },
2402: {
2403: "Lower",
2404: "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 "
2405: + DESERET_dee + DESERET_DEE,
2406: "ab'cd \uFB00i\u0131ii\u0307 \u01C9\u01C9\u01C9 "
2407: + DESERET_dee + DESERET_dee },
2408:
2409: {
2410: "Upper",
2411: "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 "
2412: + DESERET_dee + DESERET_DEE },
2413: {
2414: "Lower",
2415: "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 "
2416: + DESERET_dee + DESERET_DEE },
2417:
2418: // FORMS OF S
2419: { "Greek-Latin/UNGEGN", "\u03C3 \u03C3\u03C2 \u03C2\u03C3",
2420: "s ss s\u0331s\u0331" },
2421: { "Latin-Greek/UNGEGN", "s ss s\u0331s\u0331",
2422: "\u03C3 \u03C3\u03C2 \u03C2\u03C3" },
2423: { "Greek-Latin", "\u03C3 \u03C3\u03C2 \u03C2\u03C3",
2424: "s ss s\u0331s\u0331" },
2425: { "Latin-Greek", "s ss s\u0331s\u0331",
2426: "\u03C3 \u03C3\u03C2 \u03C2\u03C3" },
2427:
2428: // Tatiana bug
2429: // Upper: TAT\u02B9\u00C2NA
2430: // Lower: tat\u02B9\u00E2na
2431: // Title: Tat\u02B9\u00E2na
2432: { "Upper", "tat\u02B9\u00E2na", "TAT\u02B9\u00C2NA" },
2433: { "Lower", "TAT\u02B9\u00C2NA", "tat\u02B9\u00E2na" },
2434: { "Title", "tat\u02B9\u00E2na", "Tat\u02B9\u00E2na" }, };
2435:
2436: public void TestSpecialCases() {
2437:
2438: for (int i = 0; i < registerRules.length; ++i) {
2439: Transliterator t = Transliterator.createFromRules(
2440: registerRules[i][0], registerRules[i][1],
2441: Transliterator.FORWARD);
2442: DummyFactory.add(registerRules[i][0], t);
2443: }
2444: for (int i = 0; i < testCases.length; ++i) {
2445: String name = testCases[i][0];
2446: Transliterator t = Transliterator.getInstance(name);
2447: String id = t.getID();
2448: String source = testCases[i][1];
2449: String target = null;
2450:
2451: // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
2452:
2453: if (testCases[i].length > 2)
2454: target = testCases[i][2];
2455: else if (id.equalsIgnoreCase("NFD"))
2456: target = com.ibm.icu.text.Normalizer.normalize(source,
2457: com.ibm.icu.text.Normalizer.NFD);
2458: else if (id.equalsIgnoreCase("NFC"))
2459: target = com.ibm.icu.text.Normalizer.normalize(source,
2460: com.ibm.icu.text.Normalizer.NFC);
2461: else if (id.equalsIgnoreCase("NFKD"))
2462: target = com.ibm.icu.text.Normalizer.normalize(source,
2463: com.ibm.icu.text.Normalizer.NFKD);
2464: else if (id.equalsIgnoreCase("NFKC"))
2465: target = com.ibm.icu.text.Normalizer.normalize(source,
2466: com.ibm.icu.text.Normalizer.NFKC);
2467: else if (id.equalsIgnoreCase("Lower"))
2468: target = UCharacter.toLowerCase(Locale.US, source);
2469: else if (id.equalsIgnoreCase("Upper"))
2470: target = UCharacter.toUpperCase(Locale.US, source);
2471:
2472: expect(t, source, target);
2473: }
2474: for (int i = 0; i < registerRules.length; ++i) {
2475: Transliterator.unregister(registerRules[i][0]);
2476: }
2477: }
2478:
2479: // seems like there should be an easier way to just register an instance of a transliterator
2480:
2481: static class DummyFactory implements Transliterator.Factory {
2482: static DummyFactory singleton = new DummyFactory();
2483: static HashMap m = new HashMap();
2484:
2485: // Since Transliterators are immutable, we don't have to clone on set & get
2486: static void add(String ID, Transliterator t) {
2487: m.put(ID, t);
2488: //System.out.println("Registering: " + ID + ", " + t.toRules(true));
2489: Transliterator.registerFactory(ID, singleton);
2490: }
2491:
2492: public Transliterator getInstance(String ID) {
2493: return (Transliterator) m.get(ID);
2494: }
2495: }
2496:
2497: public void TestSurrogateCasing() {
2498: // check that casing handles surrogates
2499: // titlecase is currently defective
2500: int dee = UTF16.charAt(DESERET_dee, 0);
2501: int DEE = UCharacter.toTitleCase(dee);
2502: if (!UTF16.valueOf(DEE).equals(DESERET_DEE)) {
2503: errln("Fails titlecase of surrogates"
2504: + Integer.toString(dee, 16) + ", "
2505: + Integer.toString(DEE, 16));
2506: }
2507:
2508: if (!UCharacter.toUpperCase(DESERET_dee + DESERET_DEE).equals(
2509: DESERET_DEE + DESERET_DEE)) {
2510: errln("Fails uppercase of surrogates");
2511: }
2512:
2513: if (!UCharacter.toLowerCase(DESERET_dee + DESERET_DEE).equals(
2514: DESERET_dee + DESERET_dee)) {
2515: errln("Fails lowercase of surrogates");
2516: }
2517: }
2518:
2519: // Check to see that incremental gets at least part way through a reasonable string.
2520:
2521: public void TestIncrementalProgress() {
2522: String latinTest = "The Quick Brown Fox.";
2523: String devaTest = Transliterator
2524: .getInstance("Latin-Devanagari").transliterate(
2525: latinTest);
2526: String kataTest = Transliterator.getInstance("Latin-Katakana")
2527: .transliterate(latinTest);
2528: String[][] tests = { { "Any", latinTest },
2529: { "Latin", latinTest }, { "Halfwidth", latinTest },
2530: { "Devanagari", devaTest }, { "Katakana", kataTest }, };
2531:
2532: Enumeration sources = Transliterator.getAvailableSources();
2533: while (sources.hasMoreElements()) {
2534: String source = (String) sources.nextElement();
2535: String test = findMatch(source, tests);
2536: if (test == null) {
2537: logln("Skipping " + source + "-X");
2538: continue;
2539: }
2540: Enumeration targets = Transliterator
2541: .getAvailableTargets(source);
2542: while (targets.hasMoreElements()) {
2543: String target = (String) targets.nextElement();
2544: Enumeration variants = Transliterator
2545: .getAvailableVariants(source, target);
2546: while (variants.hasMoreElements()) {
2547: String variant = (String) variants.nextElement();
2548: String id = source + "-" + target + "/" + variant;
2549: logln("id: " + id);
2550:
2551: String filter = getTranslitTestFilter();
2552: if (filter != null && id.indexOf(filter) < 0)
2553: continue;
2554:
2555: Transliterator t = Transliterator.getInstance(id);
2556: CheckIncrementalAux(t, test);
2557:
2558: String rev = t.transliterate(test);
2559: Transliterator inv = t.getInverse();
2560: CheckIncrementalAux(inv, rev);
2561: }
2562: }
2563: }
2564: }
2565:
2566: public String findMatch(String source, String[][] pairs) {
2567: for (int i = 0; i < pairs.length; ++i) {
2568: if (source.equalsIgnoreCase(pairs[i][0]))
2569: return pairs[i][1];
2570: }
2571: return null;
2572: }
2573:
2574: public void CheckIncrementalAux(Transliterator t, String input) {
2575:
2576: Replaceable test = new ReplaceableString(input);
2577: Transliterator.Position pos = new Transliterator.Position(0,
2578: test.length(), 0, test.length());
2579: t.transliterate(test, pos);
2580: boolean gotError = false;
2581:
2582: // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
2583:
2584: if (pos.start == 0 && pos.limit != 0
2585: && !t.getID().equals("Hex-Any/Unicode")) {
2586: errln("No Progress, " + t.getID() + ": "
2587: + UtilityExtensions.formatInput(test, pos));
2588: gotError = true;
2589: } else {
2590: logln("PASS Progress, " + t.getID() + ": "
2591: + UtilityExtensions.formatInput(test, pos));
2592: }
2593: t.finishTransliteration(test, pos);
2594: if (pos.start != pos.limit) {
2595: errln("Incomplete, " + t.getID() + ": "
2596: + UtilityExtensions.formatInput(test, pos));
2597: gotError = true;
2598: }
2599: if (!gotError) {
2600: //errln("FAIL: Did not get expected error");
2601: }
2602: }
2603:
2604: public void TestFunction() {
2605: // Careful with spacing and ';' here: Phrase this exactly
2606: // as toRules() is going to return it. If toRules() changes
2607: // with regard to spacing or ';', then adjust this string.
2608: String rule = "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
2609:
2610: Transliterator t = Transliterator.createFromRules("Test", rule,
2611: Transliterator.FORWARD);
2612: if (t == null) {
2613: errln("FAIL: createFromRules failed");
2614: return;
2615: }
2616:
2617: String r = t.toRules(true);
2618: if (r.equals(rule)) {
2619: logln("OK: toRules() => " + r);
2620: } else {
2621: errln("FAIL: toRules() => " + r + ", expected " + rule);
2622: }
2623:
2624: expect(t, "The Quick Brown Fox",
2625: "T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox");
2626: rule = "([^\\ -\\u007F]) > &Hex/Unicode( $1 ) ' ' &Name( $1 ) ;";
2627:
2628: t = Transliterator.createFromRules("Test", rule,
2629: Transliterator.FORWARD);
2630: if (t == null) {
2631: errln("FAIL: createFromRules failed");
2632: return;
2633: }
2634:
2635: r = t.toRules(true);
2636: if (r.equals(rule)) {
2637: logln("OK: toRules() => " + r);
2638: } else {
2639: errln("FAIL: toRules() => " + r + ", expected " + rule);
2640: }
2641:
2642: expect(t, "\u0301", "U+0301 \\N{COMBINING ACUTE ACCENT}");
2643: }
2644:
2645: public void TestInvalidBackRef() {
2646: String rule = ". > $1;";
2647: String rule2 = "(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\u0020;";
2648: try {
2649: Transliterator t = Transliterator.createFromRules("Test",
2650: rule, Transliterator.FORWARD);
2651: if (t != null) {
2652: errln("FAIL: createFromRules should have returned NULL");
2653: }
2654: errln("FAIL: Ok: . > $1; => no error");
2655: Transliterator t2 = Transliterator.createFromRules("Test2",
2656: rule2, Transliterator.FORWARD);
2657: if (t2 != null) {
2658: errln("FAIL: createFromRules should have returned NULL");
2659: }
2660: errln("FAIL: Ok: . > $1; => no error");
2661: } catch (IllegalArgumentException e) {
2662: logln("Ok: . > $1; => " + e.getMessage());
2663: }
2664: }
2665:
2666: public void TestMulticharStringSet() {
2667: // Basic testing
2668: String rule = " [{aa}] > x;"
2669: + " a > y;"
2670: + " [b{bc}] > z;"
2671: + "[{gd}] { e > q;"
2672: + " e } [{fg}] > r;";
2673:
2674: Transliterator t = Transliterator.createFromRules("Test", rule,
2675: Transliterator.FORWARD);
2676: if (t == null) {
2677: errln("FAIL: createFromRules failed");
2678: return;
2679: }
2680:
2681: expect(t, "a aa ab bc d gd de gde gdefg ddefg",
2682: "y x yz z d gd de gdq gdqfg ddrfg");
2683:
2684: // Overlapped string test. Make sure that when multiple
2685: // strings can match that the longest one is matched.
2686: rule = " [a {ab} {abc}] > x;"
2687: + " b > y;"
2688: + " c > z;"
2689: + " q [t {st} {rst}] { e > p;";
2690:
2691: t = Transliterator.createFromRules("Test", rule,
2692: Transliterator.FORWARD);
2693: if (t == null) {
2694: errln("FAIL: createFromRules failed");
2695: return;
2696: }
2697:
2698: expect(t, "a ab abc qte qste qrste", "x x x qtp qstp qrstp");
2699: }
2700:
2701: /**
2702: * Test that user-registered transliterators can be used under function
2703: * syntax.
2704: */
2705: public void TestUserFunction() {
2706: Transliterator t;
2707:
2708: // There's no need to register inverses if we don't use them
2709: TestUserFunctionFactory
2710: .add(
2711: "Any-gif",
2712: Transliterator
2713: .createFromRules(
2714: "gif",
2715: "'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';",
2716: Transliterator.FORWARD));
2717: //TestUserFunctionFactory.add("gif-Any", Transliterator.getInstance("Any-Null"));
2718:
2719: TestUserFunctionFactory.add("Any-RemoveCurly", Transliterator
2720: .createFromRules("RemoveCurly",
2721: "[\\{\\}] > ; \\\\N > ;",
2722: Transliterator.FORWARD));
2723: //TestUserFunctionFactory.add("RemoveCurly-Any", Transliterator.getInstance("Any-Null"));
2724:
2725: logln("Trying &hex");
2726: t = Transliterator.createFromRules("hex2", "(.) > &hex($1);",
2727: Transliterator.FORWARD);
2728: logln("Registering");
2729: TestUserFunctionFactory.add("Any-hex2", t);
2730: t = Transliterator.getInstance("Any-hex2");
2731: expect(t, "abc", "\\u0061\\u0062\\u0063");
2732:
2733: logln("Trying &gif");
2734: t = Transliterator.createFromRules("gif2",
2735: "(.) > &Gif(&Hex2($1));", Transliterator.FORWARD);
2736: logln("Registering");
2737: TestUserFunctionFactory.add("Any-gif2", t);
2738: t = Transliterator.getInstance("Any-gif2");
2739: expect(
2740: t,
2741: "ab",
2742: "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">"
2743: + "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
2744:
2745: // Test that filters are allowed after &
2746: t = Transliterator.createFromRules("test",
2747: "(.) > &Hex($1) ' ' &Any-RemoveCurly(&Name($1)) ' ';",
2748: Transliterator.FORWARD);
2749: expect(
2750: t,
2751: "abc",
2752: "\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C ");
2753:
2754: // Unregister our test stuff
2755: TestUserFunctionFactory.unregister();
2756: }
2757:
2758: static class TestUserFunctionFactory implements
2759: Transliterator.Factory {
2760: static TestUserFunctionFactory singleton = new TestUserFunctionFactory();
2761: static HashMap m = new HashMap();
2762:
2763: static void add(String ID, Transliterator t) {
2764: m.put(new CaseInsensitiveString(ID), t);
2765: Transliterator.registerFactory(ID, singleton);
2766: }
2767:
2768: public Transliterator getInstance(String ID) {
2769: return (Transliterator) m
2770: .get(new CaseInsensitiveString(ID));
2771: }
2772:
2773: static void unregister() {
2774: Iterator ids = m.keySet().iterator();
2775: while (ids.hasNext()) {
2776: CaseInsensitiveString id = (CaseInsensitiveString) ids
2777: .next();
2778: Transliterator.unregister(id.getString());
2779: ids.remove(); // removes pair from m
2780: }
2781: }
2782: }
2783:
2784: /**
2785: * Test the Any-X transliterators.
2786: */
2787: public void TestAnyX() {
2788: Transliterator anyLatin = Transliterator.getInstance(
2789: "Any-Latin", Transliterator.FORWARD);
2790:
2791: expect(
2792: anyLatin,
2793: "greek:\u03B1\u03B2\u03BA\u0391\u0392\u039A hiragana:\u3042\u3076\u304F cyrillic:\u0430\u0431\u0446",
2794: "greek:abkABK hiragana:abuku cyrillic:abc");
2795: }
2796:
2797: /**
2798: * Test the source and target set API. These are only implemented
2799: * for RBT and CompoundTransliterator at this time.
2800: */
2801: public void TestSourceTargetSet() {
2802: // Rules
2803: String r = "a > b; " + "r [x{lu}] > q;";
2804:
2805: // Expected source
2806: UnicodeSet expSrc = new UnicodeSet("[arx{lu}]");
2807:
2808: // Expected target
2809: UnicodeSet expTrg = new UnicodeSet("[bq]");
2810:
2811: Transliterator t = Transliterator.createFromRules("test", r,
2812: Transliterator.FORWARD);
2813: UnicodeSet src = t.getSourceSet();
2814: UnicodeSet trg = t.getTargetSet();
2815:
2816: if (src.equals(expSrc) && trg.equals(expTrg)) {
2817: logln("Ok: " + r + " => source = " + src.toPattern(true)
2818: + ", target = " + trg.toPattern(true));
2819: } else {
2820: errln("FAIL: " + r + " => source = " + src.toPattern(true)
2821: + ", expected " + expSrc.toPattern(true)
2822: + "; target = " + trg.toPattern(true)
2823: + ", expected " + expTrg.toPattern(true));
2824: }
2825: }
2826:
2827: /**
2828: * Test handling of rule whitespace, for both RBT and UnicodeSet.
2829: */
2830: public void TestRuleWhitespace() {
2831: // Rules
2832: String r = "a > \u200E b;";
2833:
2834: Transliterator t = Transliterator.createFromRules("test", r,
2835: Transliterator.FORWARD);
2836:
2837: expect(t, "a", "b");
2838:
2839: // UnicodeSet
2840: UnicodeSet set = new UnicodeSet("[a \u200E]");
2841:
2842: if (set.contains(0x200E)) {
2843: errln("FAIL: U+200E not being ignored by UnicodeSet");
2844: }
2845: }
2846:
2847: public void TestAlternateSyntax() {
2848: // U+2206 == &
2849: // U+2190 == <
2850: // U+2192 == >
2851: // U+2194 == <>
2852: expect("a \u2192 x; b \u2190 y; c \u2194 z", "abc", "xbz");
2853: expect(
2854: "([:^ASCII:]) \u2192 \u2206Name($1);",
2855: "<=\u2190; >=\u2192; <>=\u2194; &=\u2206",
2856: "<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}");
2857: }
2858:
2859: public void TestPositionAPI() {
2860: Transliterator.Position a = new Transliterator.Position(3, 5,
2861: 7, 11);
2862: Transliterator.Position b = new Transliterator.Position(a);
2863: Transliterator.Position c = new Transliterator.Position();
2864: c.set(a);
2865: // Call the toString() API:
2866: if (a.equals(b) && a.equals(c)) {
2867: logln("Ok: " + a + " == " + b + " == " + c);
2868: } else {
2869: errln("FAIL: " + a + " != " + b + " != " + c);
2870: }
2871: }
2872:
2873: //======================================================================
2874: // New tests for the ::BEGIN/::END syntax
2875: //======================================================================
2876:
2877: private static final String[] BEGIN_END_RULES = new String[] {
2878: // [0]
2879: "abc > xy;" + "aba > z;",
2880:
2881: // [1]
2882: /*
2883: "::BEGIN;"
2884: + "abc > xy;"
2885: + "::END;"
2886: + "::BEGIN;"
2887: + "aba > z;"
2888: + "::END;",
2889: */
2890: "", // test case commented out below, this is here to keep from messing up the indexes
2891:
2892: // [2]
2893: /*
2894: "abc > xy;"
2895: + "::BEGIN;"
2896: + "aba > z;"
2897: + "::END;",
2898: */
2899: "", // test case commented out below, this is here to keep from messing up the indexes
2900:
2901: // [3]
2902: /*
2903: "::BEGIN;"
2904: + "abc > xy;"
2905: + "::END;"
2906: + "aba > z;",
2907: */
2908: "", // test case commented out below, this is here to keep from messing up the indexes
2909:
2910: // [4]
2911: "abc > xy;" + "::Null;" + "aba > z;",
2912:
2913: // [5]
2914: "::Upper;" + "ABC > xy;" + "AB > x;" + "C > z;"
2915: + "::Upper;" + "XYZ > p;" + "XY > q;" + "Z > r;"
2916: + "::Upper;",
2917:
2918: // [6]
2919: "$ws = [[:Separator:][\\u0009-\\u000C]$];"
2920: + "$delim = [\\-$ws];" + "$ws $delim* > ' ';"
2921: + "'-' $delim* > '-';",
2922:
2923: // [7]
2924: "::Null;" + "$ws = [[:Separator:][\\u0009-\\u000C]$];"
2925: + "$delim = [\\-$ws];" + "$ws $delim* > ' ';"
2926: + "'-' $delim* > '-';",
2927:
2928: // [8]
2929: "$ws = [[:Separator:][\\u0009-\\u000C]$];"
2930: + "$delim = [\\-$ws];" + "$ws $delim* > ' ';"
2931: + "'-' $delim* > '-';" + "::Null;",
2932:
2933: // [9]
2934: "$ws = [[:Separator:][\\u0009-\\u000C]$];"
2935: + "$delim = [\\-$ws];" + "::Null;"
2936: + "$ws $delim* > ' ';" + "'-' $delim* > '-';",
2937:
2938: // [10]
2939: /*
2940: "::BEGIN;"
2941: + "$ws = [[:Separator:][\\u0009-\\u000C]$];"
2942: + "$delim = [\\-$ws];"
2943: + "::END;"
2944: + "$ws $delim* > ' ';"
2945: + "'-' $delim* > '-';",
2946: */
2947: "", // test case commented out below, this is here to keep from messing up the indexes
2948:
2949: // [11]
2950: /*
2951: "$ws = [[:Separator:][\\u0009-\\u000C]$];"
2952: + "$delim = [\\-$ws];"
2953: + "::BEGIN;"
2954: + "$ws $delim* > ' ';"
2955: + "'-' $delim* > '-';"
2956: + "::END;",
2957: */
2958: "", // test case commented out below, this is here to keep from messing up the indexes
2959:
2960: // [12]
2961: /*
2962: "$ws = [[:Separator:][\\u0009-\\u000C]$];"
2963: + "$delim = [\\-$ws];"
2964: + "$ab = [ab];"
2965: + "::BEGIN;"
2966: + "$ws $delim* > ' ';"
2967: + "'-' $delim* > '-';"
2968: + "::END;"
2969: + "::BEGIN;"
2970: + "$ab { ' ' } $ab > '-';"
2971: + "c { ' ' > ;"
2972: + "::END;"
2973: + "::BEGIN;"
2974: + "'a-a' > a\\%|a;"
2975: + "::END;",
2976: */
2977: "", // test case commented out below, this is here to keep from messing up the indexes
2978:
2979: // [13]
2980: "$ws = [[:Separator:][\\u0009-\\u000C]$];"
2981: + "$delim = [\\-$ws];" + "$ab = [ab];" + "::Null;"
2982: + "$ws $delim* > ' ';" + "'-' $delim* > '-';"
2983: + "::Null;" + "$ab { ' ' } $ab > '-';"
2984: + "c { ' ' > ;" + "::Null;" + "'a-a' > a\\%|a;",
2985:
2986: // [14]
2987: /*
2988: "::[abc];"
2989: + "::BEGIN;"
2990: + "abc > xy;"
2991: + "::END;"
2992: + "::BEGIN;"
2993: + "aba > yz;"
2994: + "::END;"
2995: + "::Upper;",
2996: */
2997: "", // test case commented out below, this is here to keep from messing up the indexes
2998:
2999: // [15]
3000: "::[abc];" + "abc > xy;" + "::Null;" + "aba > yz;"
3001: + "::Upper;",
3002:
3003: // [16]
3004: /*
3005: "::[abc];"
3006: + "::BEGIN;"
3007: + "abc <> xy;"
3008: + "::END;"
3009: + "::BEGIN;"
3010: + "aba <> yz;"
3011: + "::END;"
3012: + "::Upper(Lower);"
3013: + "::([XYZ]);",
3014: */
3015: "", // test case commented out below, this is here to keep from messing up the indexes
3016:
3017: // [17]
3018: "::[abc];" + "abc <> xy;" + "::Null;" + "aba <> yz;"
3019: + "::Upper(Lower);" + "::([XYZ]);" };
3020:
3021: /*
3022: (This entire test is commented out below and will need some heavy revision when we re-add
3023: the ::BEGIN/::END stuff)
3024: private static final String[] BOGUS_BEGIN_END_RULES = new String[] {
3025: // [7]
3026: "::BEGIN;"
3027: + "abc > xy;"
3028: + "::BEGIN;"
3029: + "aba > z;"
3030: + "::END;"
3031: + "::END;",
3032:
3033: // [8]
3034: "abc > xy;"
3035: + " aba > z;"
3036: + "::END;",
3037:
3038: // [9]
3039: "::BEGIN;"
3040: + "::Upper;"
3041: + "::END;"
3042: };
3043: */
3044:
3045: private static final String[] BEGIN_END_TEST_CASES = new String[] {
3046: BEGIN_END_RULES[0],
3047: "abc ababc aba",
3048: "xy zbc z",
3049: // BEGIN_END_RULES[1], "abc ababc aba", "xy abxy z",
3050: // BEGIN_END_RULES[2], "abc ababc aba", "xy abxy z",
3051: // BEGIN_END_RULES[3], "abc ababc aba", "xy abxy z",
3052: BEGIN_END_RULES[4],
3053: "abc ababc aba",
3054: "xy abxy z",
3055: BEGIN_END_RULES[5],
3056: "abccabaacababcbc",
3057: "PXAARXQBR",
3058:
3059: BEGIN_END_RULES[6],
3060: "e e - e---e- e",
3061: "e e e-e-e",
3062: BEGIN_END_RULES[7],
3063: "e e - e---e- e",
3064: "e e e-e-e",
3065: BEGIN_END_RULES[8],
3066: "e e - e---e- e",
3067: "e e e-e-e",
3068: BEGIN_END_RULES[9],
3069: "e e - e---e- e",
3070: "e e e-e-e",
3071: // BEGIN_END_RULES[10], "e e - e---e- e", "e e e-e-e",
3072: // BEGIN_END_RULES[11], "e e - e---e- e", "e e e-e-e",
3073: // BEGIN_END_RULES[12], "e e - e---e- e", "e e e-e-e",
3074: // BEGIN_END_RULES[12], "a a a a", "a%a%a%a",
3075: // BEGIN_END_RULES[12], "a a-b c b a", "a%a-b cb-a",
3076: BEGIN_END_RULES[13], "e e - e---e- e", "e e e-e-e",
3077: BEGIN_END_RULES[13], "a a a a", "a%a%a%a",
3078: BEGIN_END_RULES[13], "a a-b c b a",
3079: "a%a-b cb-a",
3080:
3081: // BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
3082: BEGIN_END_RULES[15], "abc xy ababc xyz aba",
3083: "XY xy ABXY xyz YZ",
3084: // BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
3085: BEGIN_END_RULES[17], "abc xy ababc xyz aba",
3086: "XY xy ABXY xyz YZ" };
3087:
3088: public void TestBeginEnd() {
3089: // run through the list of test cases above
3090: for (int i = 0; i < BEGIN_END_TEST_CASES.length; i += 3) {
3091: expect(BEGIN_END_TEST_CASES[i],
3092: BEGIN_END_TEST_CASES[i + 1],
3093: BEGIN_END_TEST_CASES[i + 2]);
3094: }
3095:
3096: // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
3097: Transliterator reversed = Transliterator
3098: .createFromRules("Reversed", BEGIN_END_RULES[17],
3099: Transliterator.REVERSE);
3100: expect(reversed, "xy XY XYZ yz YZ", "xy abc xaba yz aba");
3101:
3102: // finally, run through the list of syntactically-ill-formed rule sets above and make sure
3103: // that all of them cause errors
3104: /*
3105: (commented out until we have the real ::BEGIN/::END stuff in place
3106: for (int i = 0; i < BOGUS_BEGIN_END_RULES.length; i++) {
3107: try {
3108: Transliterator t = Transliterator.createFromRules("foo", BOGUS_BEGIN_END_RULES[i],
3109: Transliterator.FORWARD);
3110: errln("Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
3111: }
3112: catch (IllegalArgumentException e) {
3113: // this is supposed to happen; do nothing here
3114: }
3115: }
3116: */
3117: }
3118:
3119: public void TestBeginEndToRules() {
3120: // run through the same list of test cases we used above, but this time, instead of just
3121: // instantiating a Transliterator from the rules and running the test against it, we instantiate
3122: // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
3123: // the resulting set of rules, and make sure that the generated rule set is semantically equivalent
3124: // to (i.e., does the same thing as) the original rule set
3125: for (int i = 0; i < BEGIN_END_TEST_CASES.length; i += 3) {
3126: Transliterator t = Transliterator.createFromRules("--",
3127: BEGIN_END_TEST_CASES[i], Transliterator.FORWARD);
3128: String rules = t.toRules(false);
3129: Transliterator t2 = Transliterator.createFromRules(
3130: "Test case #" + (i / 3), rules,
3131: Transliterator.FORWARD);
3132: expect(t2, BEGIN_END_TEST_CASES[i + 1],
3133: BEGIN_END_TEST_CASES[i + 2]);
3134: }
3135:
3136: // do the same thing for the reversible test case
3137: Transliterator reversed = Transliterator
3138: .createFromRules("Reversed", BEGIN_END_RULES[17],
3139: Transliterator.REVERSE);
3140: String rules = reversed.toRules(false);
3141: Transliterator reversed2 = Transliterator.createFromRules(
3142: "Reversed", rules, Transliterator.FORWARD);
3143: expect(reversed2, "xy XY XYZ yz YZ", "xy abc xaba yz aba");
3144: }
3145:
3146: public void TestRegisterAlias() {
3147: String longID = "Lower;[aeiou]Upper";
3148: String shortID = "Any-CapVowels";
3149: String reallyShortID = "CapVowels";
3150:
3151: Transliterator.registerAlias(shortID, longID);
3152:
3153: Transliterator t1 = Transliterator.getInstance(longID);
3154: Transliterator t2 = Transliterator.getInstance(reallyShortID);
3155:
3156: if (!t1.getID().equals(longID))
3157: errln("Transliterator instantiated with long ID doesn't have long ID");
3158: if (!t2.getID().equals(reallyShortID))
3159: errln("Transliterator instantiated with short ID doesn't have short ID");
3160:
3161: if (!t1.toRules(true).equals(t2.toRules(true)))
3162: errln("Alias transliterators aren't the same");
3163:
3164: Transliterator.unregister(shortID);
3165:
3166: try {
3167: t1 = Transliterator.getInstance(shortID);
3168: errln("Instantiation with short ID succeeded after short ID was unregistered");
3169: } catch (IllegalArgumentException e) {
3170: }
3171:
3172: // try the same thing again, but this time with something other than
3173: // an instance of CompoundTransliterator
3174: String realID = "Latin-Greek";
3175: String fakeID = "Latin-dlgkjdflkjdl";
3176: Transliterator.registerAlias(fakeID, realID);
3177:
3178: t1 = Transliterator.getInstance(realID);
3179: t2 = Transliterator.getInstance(fakeID);
3180:
3181: if (!t1.toRules(true).equals(t2.toRules(true)))
3182: errln("Alias transliterators aren't the same");
3183:
3184: Transliterator.unregister(fakeID);
3185: }
3186:
3187: //======================================================================
3188: // These tests are not mirrored (yet) in icu4c at
3189: // source/test/intltest/transtst.cpp
3190: //======================================================================
3191:
3192: /**
3193: * Improve code coverage.
3194: */
3195: public void TestCoverage() {
3196: // NullTransliterator
3197: Transliterator t = Transliterator.getInstance("Null",
3198: Transliterator.FORWARD);
3199: expect(t, "a", "a");
3200:
3201: // Source, target set
3202: t = Transliterator.getInstance("Latin-Greek",
3203: Transliterator.FORWARD);
3204: t.setFilter(new UnicodeSet("[A-Z]"));
3205: logln("source = " + t.getSourceSet());
3206: logln("target = " + t.getTargetSet());
3207:
3208: t = Transliterator.createFromRules("x", "(.) > &Any-Hex($1);",
3209: Transliterator.FORWARD);
3210: logln("source = " + t.getSourceSet());
3211: logln("target = " + t.getTargetSet());
3212: }
3213:
3214: public void TestAny() {
3215: UnicodeSet alphabetic = (UnicodeSet) new UnicodeSet(
3216: "[:alphabetic:]").freeze();
3217: StringBuffer testString = new StringBuffer();
3218: for (int i = 0; i < UScript.CODE_LIMIT; ++i) {
3219: UnicodeSet sample = new UnicodeSet().applyPropertyAlias(
3220: "script", UScript.getShortName(i)).retainAll(
3221: alphabetic);
3222: int count = 5;
3223: for (UnicodeSetIterator it = new UnicodeSetIterator(sample); it
3224: .next();) {
3225: testString.append(it.getString());
3226: if (--count < 0)
3227: break;
3228: }
3229: }
3230: logln("Sample set for Any-Latin: " + testString);
3231: Transliterator anyLatin = Transliterator
3232: .getInstance("any-Latn");
3233: String result = anyLatin.transliterate(testString.toString());
3234: logln("Sample result for Any-Latin: " + result);
3235: }
3236:
3237: //======================================================================
3238: // Support methods
3239: //======================================================================
3240: void expect(String rules, String source, String expectedResult,
3241: Transliterator.Position pos) {
3242: Transliterator t = Transliterator.createFromRules("<ID>",
3243: rules, Transliterator.FORWARD);
3244: expect(t, source, expectedResult, pos);
3245: }
3246:
3247: void expect(String rules, String source, String expectedResult) {
3248: expect(rules, source, expectedResult, null);
3249: }
3250:
3251: void expect(Transliterator t, String source, String expectedResult,
3252: Transliterator reverseTransliterator) {
3253: expect(t, source, expectedResult);
3254: if (reverseTransliterator != null) {
3255: expect(reverseTransliterator, expectedResult, source);
3256: }
3257: }
3258:
3259: void expect(Transliterator t, String source, String expectedResult) {
3260: expect(t, source, expectedResult,
3261: (Transliterator.Position) null);
3262: }
3263:
3264: void expect(Transliterator t, String source, String expectedResult,
3265: Transliterator.Position pos) {
3266: if (pos == null) {
3267: String result = t.transliterate(source);
3268: if (!expectAux(t.getID() + ":String", source, result,
3269: expectedResult))
3270: return;
3271: }
3272:
3273: Transliterator.Position index = null;
3274: if (pos == null) {
3275: index = new Transliterator.Position(0, source.length(), 0,
3276: source.length());
3277: } else {
3278: index = new Transliterator.Position(pos.contextStart,
3279: pos.contextLimit, pos.start, pos.limit);
3280: }
3281:
3282: ReplaceableString rsource = new ReplaceableString(source);
3283:
3284: t.finishTransliteration(rsource, index);
3285: // Do it all at once -- below we do it incrementally
3286:
3287: if (index.start != index.limit) {
3288: expectAux(t.getID() + ":UNFINISHED", source, "start: "
3289: + index.start + ", limit: " + index.limit, false,
3290: expectedResult);
3291: return;
3292: }
3293: String result = rsource.toString();
3294: if (!expectAux(t.getID() + ":Replaceable", source, result,
3295: expectedResult))
3296: return;
3297:
3298: if (pos == null) {
3299: index = new Transliterator.Position();
3300: } else {
3301: index = new Transliterator.Position(pos.contextStart,
3302: pos.contextLimit, pos.start, pos.limit);
3303: }
3304:
3305: // Test incremental transliteration -- this result
3306: // must be the same after we finalize (see below).
3307: Vector v = new Vector();
3308: v.add(source);
3309: rsource.replace(0, rsource.length(), "");
3310: if (pos != null) {
3311: rsource.replace(0, 0, source);
3312: v.add(UtilityExtensions.formatInput(rsource, index));
3313: t.transliterate(rsource, index);
3314: v.add(UtilityExtensions.formatInput(rsource, index));
3315: } else {
3316: for (int i = 0; i < source.length(); ++i) {
3317: //v.add(i == 0 ? "" : " + " + source.charAt(i) + "");
3318: //log.append(source.charAt(i)).append(" -> "));
3319: t.transliterate(rsource, index, source.charAt(i));
3320: //v.add(UtilityExtensions.formatInput(rsource, index) + source.substring(i+1));
3321: v.add(UtilityExtensions.formatInput(rsource, index)
3322: + ((i < source.length() - 1) ? (" + '"
3323: + source.charAt(i + 1) + "' ->")
3324: : " =>"));
3325: }
3326: }
3327:
3328: // As a final step in keyboard transliteration, we must call
3329: // transliterate to finish off any pending partial matches that
3330: // were waiting for more input.
3331: t.finishTransliteration(rsource, index);
3332: result = rsource.toString();
3333: //log.append(" => ").append(rsource.toString());
3334: v.add(result);
3335:
3336: String[] results = new String[v.size()];
3337: v.copyInto(results);
3338: expectAux(t.getID() + ":Incremental", results, result
3339: .equals(expectedResult), expectedResult);
3340: }
3341:
3342: boolean expectAux(String tag, String source, String result,
3343: String expectedResult) {
3344: return expectAux(tag, new String[] { source, result }, result
3345: .equals(expectedResult), expectedResult);
3346: }
3347:
3348: boolean expectAux(String tag, String source, String result,
3349: boolean pass, String expectedResult) {
3350: return expectAux(tag, new String[] { source, result }, pass,
3351: expectedResult);
3352: }
3353:
3354: boolean expectAux(String tag, String source, boolean pass,
3355: String expectedResult) {
3356: return expectAux(tag, new String[] { source }, pass,
3357: expectedResult);
3358: }
3359:
3360: boolean expectAux(String tag, String[] results, boolean pass,
3361: String expectedResult) {
3362: msg((pass ? "(" : "FAIL: (") + tag + ")", pass ? LOG : ERR,
3363: true, true);
3364:
3365: for (int i = 0; i < results.length; ++i) {
3366: String label;
3367: if (i == 0) {
3368: label = "source: ";
3369: } else if (i == results.length - 1) {
3370: label = "result: ";
3371: } else {
3372: if (!isVerbose() && pass)
3373: continue;
3374: label = "interm" + i + ": ";
3375: }
3376: msg(" " + label + results[i], pass ? LOG : ERR, false,
3377: true);
3378: }
3379:
3380: if (!pass) {
3381: msg(" expected: " + expectedResult, ERR, false, true);
3382: }
3383:
3384: return pass;
3385: }
3386: }
|