Source Code Cross Referenced for RoundTripTest.java in » Internationalization-Localization » icu4j » com » ibm » icu » dev » test » translit » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI
Java
Java Tutorial
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Internationalization Localization » icu4j » com.ibm.icu.dev.test.translit
Source Cross Referenced Class Diagram Java Document (Java Doc)
0001:        /**
0002:         *******************************************************************************
0003:         * Copyright (C) 2000-2006, International Business Machines Corporation and    *
0004:         * others. All Rights Reserved.                                                *
0005:         *******************************************************************************
0006:         */package com.ibm.icu.dev.test.translit;
0007:
0008:        import com.ibm.icu.dev.test.*;
0009:        import com.ibm.icu.lang.*;
0010:        import com.ibm.icu.text.*;
0011:        import com.ibm.icu.util.LocaleData;
0012:        import com.ibm.icu.util.ULocale;
0013:        import com.ibm.icu.impl.Utility;
0014:
0015:        import java.io.BufferedWriter;
0016:        import java.io.ByteArrayOutputStream;
0017:        import java.io.File;
0018:        import java.io.FileNotFoundException;
0019:        import java.io.FileOutputStream;
0020:        import java.io.IOException;
0021:        import java.io.OutputStreamWriter;
0022:        import java.io.PrintWriter;
0023:        import java.io.UnsupportedEncodingException;
0024:        import java.util.MissingResourceException;
0025:
0026:        /**
0027:         * @test
0028:         * @summary Round trip test of Transliterator
0029:         */
0030:        public class RoundTripTest extends TestFmwk {
0031:
0032:            static final boolean EXTRA_TESTS = true;
0033:            static final boolean PRINT_RULES = true;
0034:
0035:            public static void main(String[] args) throws Exception {
0036:                new RoundTripTest().run(args);
0037:            }
0038:
0039:            /*
0040:            public void TestSingle() throws IOException, ParseException {
0041:                Transliterator t = Transliterator.getInstance("Latin-Greek");
0042:                String s = t.transliterate("\u0101\u0069");
0043:            }
0044:             */
0045:
0046:            /*
0047:            Note: Unicode 3.2 added new Hiragana/Katakana characters:
0048:
0049:            3095..3096    ; 3.2 #   [2] HIRAGANA LETTER SMALL KA..HIRAGANA LETTER SMALL KE
0050:            309F..30A0    ; 3.2 #   [2] HIRAGANA DIGRAPH YORI..KATAKANA-HIRAGANA DOUBLE HYPHEN
0051:            30FF          ; 3.2 #       KATAKANA DIGRAPH KOTO
0052:            31F0..31FF    ; 3.2 #  [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO
0053:
0054:            We will not add them to the rules until they are more supported (e.g. in fonts on Windows)
0055:            A bug has been filed to remind us to do this: #1979.
0056:             */
0057:
0058:            static String KATAKANA = "[[[:katakana:][\u30A1-\u30FA\u30FC]]-[\u30FF\u31F0-\u31FF]]";
0059:            static String HIRAGANA = "[[[:hiragana:][\u3040-\u3094]]-[\u3095-\u3096\u309F-\u30A0]]";
0060:            static String LENGTH = "[\u30FC]";
0061:            static String HALFWIDTH_KATAKANA = "[\uFF65-\uFF9D]";
0062:            static String KATAKANA_ITERATION = "[\u30FD\u30FE]";
0063:            static String HIRAGANA_ITERATION = "[\u309D\u309E]";
0064:
0065:            //------------------------------------------------------------------
0066:            // AbbreviatedUnicodeSetIterator
0067:            //------------------------------------------------------------------
0068:
0069:            static class AbbreviatedUnicodeSetIterator extends
0070:                    UnicodeSetIterator {
0071:
0072:                private boolean abbreviated;
0073:                private int perRange;
0074:
0075:                public AbbreviatedUnicodeSetIterator() {
0076:                    super ();
0077:                    abbreviated = false;
0078:                }
0079:
0080:                public void reset(UnicodeSet newSet) {
0081:                    reset(newSet, false);
0082:                }
0083:
0084:                public void reset(UnicodeSet newSet, boolean abb) {
0085:                    reset(newSet, abb, 100);
0086:                }
0087:
0088:                public void reset(UnicodeSet newSet, boolean abb, int density) {
0089:                    super .reset(newSet);
0090:                    abbreviated = abb;
0091:                    perRange = newSet.getRangeCount();
0092:                    if (perRange != 0) {
0093:                        perRange = density / perRange;
0094:                    }
0095:                }
0096:
0097:                protected void loadRange(int myRange) {
0098:                    super .loadRange(myRange);
0099:                    if (abbreviated && (endElement > nextElement + perRange)) {
0100:                        endElement = nextElement + perRange;
0101:                    }
0102:                }
0103:            }
0104:
0105:            //--------------------------------------------------------------------
0106:
0107:            public void showElapsed(long start, String name) {
0108:                double dur = (System.currentTimeMillis() - start) / 1000.0;
0109:                logln(name + " took " + dur + " seconds");
0110:            }
0111:
0112:            public void TestKana() throws IOException {
0113:                long start = System.currentTimeMillis();
0114:                new Test("Katakana-Hiragana").test(KATAKANA, "[" + HIRAGANA
0115:                        + LENGTH + "]",
0116:                        "[" + HALFWIDTH_KATAKANA + LENGTH + "]", this ,
0117:                        new Legal());
0118:                showElapsed(start, "TestKana");
0119:            }
0120:
0121:            public void TestHiragana() throws IOException {
0122:                long start = System.currentTimeMillis();
0123:                new Test("Latin-Hiragana").test("[a-zA-Z]", HIRAGANA,
0124:                        HIRAGANA_ITERATION, this , new Legal());
0125:                showElapsed(start, "TestHiragana");
0126:            }
0127:
0128:            public void TestKatakana() throws IOException {
0129:                long start = System.currentTimeMillis();
0130:                new Test("Latin-Katakana").test("[a-zA-Z]", KATAKANA, "["
0131:                        + KATAKANA_ITERATION + HALFWIDTH_KATAKANA + "]", this ,
0132:                        new Legal());
0133:                showElapsed(start, "TestKatakana");
0134:            }
0135:
0136:            public void TestJamo() throws IOException {
0137:                long start = System.currentTimeMillis();
0138:                new Test("Latin-Jamo").test("[a-zA-Z]",
0139:                        "[\u1100-\u1112 \u1161-\u1175 \u11A8-\u11C2]", "",
0140:                        this , new LegalJamo());
0141:                showElapsed(start, "TestJamo");
0142:            }
0143:
0144:            /*
0145:             SBase = 0xAC00, LBase = 0x1100, VBase = 0x1161, TBase = 0x11A7,
0146:             LCount = 19, VCount = 21, TCount = 28,
0147:             NCount = VCount * TCount,   // 588
0148:             SCount = LCount * NCount,   // 11172
0149:             LLimit = LBase + LCount,    // 1113
0150:             VLimit = VBase + VCount,    // 1176
0151:             TLimit = TBase + TCount,    // 11C3
0152:             SLimit = SBase + SCount;    // D7A4
0153:             */
0154:
0155:            public void TestHangul() throws IOException {
0156:                long start = System.currentTimeMillis();
0157:                Test t = new Test("Latin-Hangul", 5);
0158:                if (getInclusion() < 10)
0159:                    t.setPairLimit(1000);
0160:                t.test("[a-zA-Z]", "[\uAC00-\uD7A4]", "", this , new Legal());
0161:                showElapsed(start, "TestHangul");
0162:            }
0163:
0164:            public void TestHan() throws UnsupportedEncodingException,
0165:                    FileNotFoundException {
0166:                try {
0167:                    UnicodeSet exemplars = LocaleData.getExemplarSet(
0168:                            new ULocale("zh"), 0);
0169:                    // create string with all chars
0170:                    StringBuffer b = new StringBuffer();
0171:                    for (UnicodeSetIterator it = new UnicodeSetIterator(
0172:                            exemplars); it.next();) {
0173:                        UTF16.append(b, it.codepoint);
0174:                    }
0175:                    String source = b.toString();
0176:                    // transform with Han translit
0177:                    Transliterator han = Transliterator
0178:                            .getInstance("Han-Latin");
0179:                    String target = han.transliterate(source);
0180:                    // now verify that there are no Han characters left
0181:                    UnicodeSet allHan = new UnicodeSet("[:han:]");
0182:                    assertFalse(
0183:                            "No Han must be left after Han-Latin transliteration",
0184:                            allHan.containsSome(target));
0185:                    // check the pinyin translit
0186:                    Transliterator pn = Transliterator
0187:                            .getInstance("Latin-NumericPinyin");
0188:                    String target2 = pn.transliterate(target);
0189:                    // verify that there are no marks
0190:                    Transliterator nfc = Transliterator.getInstance("nfc");
0191:                    String nfced = nfc.transliterate(target2);
0192:                    UnicodeSet allMarks = new UnicodeSet("[:mark:]");
0193:                    assertFalse("NumericPinyin must contain no marks", allMarks
0194:                            .containsSome(nfced));
0195:                    // verify roundtrip
0196:                    Transliterator np = pn.getInverse();
0197:                    String target3 = np.transliterate(target);
0198:                    boolean roundtripOK = target3.equals(target);
0199:                    assertTrue("NumericPinyin must roundtrip", roundtripOK);
0200:                    if (!roundtripOK) {
0201:                        String filename = "numeric-pinyin.log.txt";
0202:                        PrintWriter out = new PrintWriter(new BufferedWriter(
0203:                                new OutputStreamWriter(new FileOutputStream(
0204:                                        filename), "UTF8"), 4 * 1024));
0205:                        errln("Creating log file "
0206:                                + new File(filename).getAbsoluteFile());
0207:                        out.println("Pinyin:                " + target);
0208:                        out.println("Pinyin-Numeric-Pinyin: " + target2);
0209:                        out.close();
0210:                    }
0211:                } catch (MissingResourceException ex) {
0212:                    warnln("Could not load the locale data for fetching the exemplar characters.");
0213:                }
0214:            }
0215:
0216:            public void TestSingle() {
0217:                Transliterator t = Transliterator.getInstance("Latin-Greek");
0218:                t.transliterate("\u0061\u0101\u0069");
0219:            }
0220:
0221:            String getGreekSet() {
0222:                // Time bomb
0223:                if (skipIfBeforeICU(3, 6)) {
0224:                    // We temporarily filter against Unicode 4.1, but we only do this
0225:                    // before version 3.5.
0226:                    logln("TestGreek needs to be updated to remove delete the section marked [:Age=4.0:] filter");
0227:                } else {
0228:                    errln("TestGreek needs to be updated to remove delete the [:Age=4.0:] filter ");
0229:                }
0230:                return
0231:                // isICU28() ? "[[\u003B\u00B7[:Greek:]-[\u03D7-\u03EF]]&[:Age=3.2:]]" :
0232:                "[\u003B\u00B7[[:Greek:]&[:Letter:]]-[" + "\u1D26-\u1D2A" + // L&   [5] GREEK LETTER SMALL CAPITAL GAMMA..GREEK LETTER SMALL CAPITAL PSI
0233:                        "\u1D5D-\u1D61" + // Lm   [5] MODIFIER LETTER SMALL BETA..MODIFIER LETTER SMALL CHI
0234:                        "\u1D66-\u1D6A" + // L&   [5] GREEK SUBSCRIPT SMALL LETTER BETA..GREEK SUBSCRIPT SMALL LETTER CHI
0235:                        "\u03D7-\u03EF" + // \N{GREEK KAI SYMBOL}..\N{COPTIC SMALL LETTER DEI}
0236:                        "] & [:Age=4.0:]]";
0237:            }
0238:
0239:            public void TestGreek() throws IOException {
0240:                long start = System.currentTimeMillis();
0241:                new Test("Latin-Greek", 50).test("[a-zA-Z]", getGreekSet(),
0242:                        "[\u00B5\u037A\u03D0-\u03F5\u03F9]", /* roundtrip exclusions */
0243:                        this , new LegalGreek(true));
0244:                showElapsed(start, "TestGreek");
0245:            }
0246:
0247:            public void TestGreekUNGEGN() throws IOException {
0248:                long start = System.currentTimeMillis();
0249:                new Test("Latin-Greek/UNGEGN").test("[a-zA-Z]", getGreekSet(),
0250:                        "[\u00B5\u037A\u03D0-\uFFFF{\u039C\u03C0}]", /* roundtrip exclusions */
0251:                        this , new LegalGreek(false));
0252:                showElapsed(start, "TestGreekUNGEGN");
0253:            }
0254:
0255:            public void Testel() throws IOException {
0256:                long start = System.currentTimeMillis();
0257:                new Test("Latin-el").test("[a-zA-Z]", getGreekSet(),
0258:                        "[\u00B5\u037A\u03D0-\uFFFF{\u039C\u03C0}]", /* roundtrip exclusions */
0259:                        this , new LegalGreek(false));
0260:                showElapsed(start, "Testel");
0261:            }
0262:
0263:            public void TestCyrillic() throws IOException {
0264:                long start = System.currentTimeMillis();
0265:                new Test("Latin-Cyrillic").test(
0266:                        "[a-zA-Z\u0110\u0111\u02BA\u02B9]", "[\u0400-\u045F]",
0267:                        null, this , new Legal());
0268:                showElapsed(start, "TestCyrillic");
0269:            }
0270:
0271:            static final String ARABIC = "[\u060C\u061B\u061F\u0621\u0627-\u063A\u0641-\u0655\u0660-\u066C\u067E\u0686\u0698\u06A4\u06AD\u06AF\u06CB-\u06CC\u06F0-\u06F9]";
0272:
0273:            public void TestArabic() throws IOException {
0274:                long start = System.currentTimeMillis();
0275:                new Test("Latin-Arabic").test("[a-zA-Z\u02BE\u02BF]", ARABIC,
0276:                        "[a-zA-Z\u02BE\u02BF\u207F]", null, this , new Legal()); //
0277:                showElapsed(start, "TestArabic");
0278:            }
0279:
0280:            public void TestHebrew() throws IOException {
0281:                //      Time bomb
0282:                if (skipIfBeforeICU(3, 6)) {
0283:                    // We temporarily filter against Unicode 4.1, but we only do this
0284:                    // before version 3.5.
0285:                    logln("TestHebrew needs to be updated to remove delete the section marked [:Age=4.0:] filter");
0286:                } else {
0287:                    errln("TestHebrew needs to be updated to remove delete the [:Age=4.0:] filter ");
0288:                }
0289:                long start = System.currentTimeMillis();
0290:                new Test("Latin-Hebrew").test("[a-zA-Z\u02BC\u02BB]",
0291:                        "[[[:hebrew:]-[\u05BD\uFB00-\uFBFF]]& [:Age=4.0:]]",
0292:                        "[\u05F0\u05F1\u05F2]", this , new LegalHebrew());
0293:                showElapsed(start, "TestHebrew");
0294:            }
0295:
0296:            public void TestThai() throws IOException {
0297:                long start = System.currentTimeMillis();
0298:                if (isICUVersionAtLeast(3, 7)) {
0299:                    new Test("Latin-Thai")
0300:                            .test(
0301:                                    "[a-zA-Z\u0142\u1ECD\u00E6\u0131\u0268\u02CC]",
0302:                                    "[\u0E01-\u0E3A\u0E40-\u0E5B]",
0303:                                    "[a-zA-Z\u0142\u1ECD\u00E6\u0131\u0268\u02B9\u02CC]",
0304:                                    null, this , new LegalThai());
0305:                } else {
0306:                    new Test("Latin-Thai")
0307:                            .test(
0308:                                    "[a-zA-Z\u0142\u1ECD\u00E6\u0131\u0268\u02CC]",
0309:                                    "[\u0E01-\u0E3A\u0E40-\u0E5B]",
0310:                                    "[a-zA-Z\u0142\u1ECD\u00E6\u0131\u0268\u02B9\u02CC]",
0311:                                    "[\u0E4F]", this , new LegalThai());
0312:                }
0313:
0314:                showElapsed(start, "TestThai");
0315:            }
0316:
0317:            //----------------------------------
0318:            // Inter-Indic Tests
0319:            //----------------------------------
0320:            public static class LegalIndic extends Legal {
0321:                UnicodeSet vowelSignSet = new UnicodeSet();
0322:
0323:                public LegalIndic() {
0324:                    vowelSignSet
0325:                            .addAll(new UnicodeSet(
0326:                                    "[\u0901\u0902\u0903\u0904\u093e-\u094c\u0962\u0963]")); /* Devanagari */
0327:                    vowelSignSet
0328:                            .addAll(new UnicodeSet(
0329:                                    "[\u0981\u0982\u0983\u09be-\u09cc\u09e2\u09e3\u09D7]")); /* Bengali */
0330:                    vowelSignSet
0331:                            .addAll(new UnicodeSet(
0332:                                    "[\u0a01\u0a02\u0a03\u0a3e-\u0a4c\u0a62\u0a63\u0a70\u0a71]")); /* Gurmukhi */
0333:                    vowelSignSet.addAll(new UnicodeSet(
0334:                            "[\u0a81\u0a82\u0a83\u0abe-\u0acc\u0ae2\u0ae3]")); /* Gujarati */
0335:                    vowelSignSet
0336:                            .addAll(new UnicodeSet(
0337:                                    "[\u0b01\u0b02\u0b03\u0b3e-\u0b4c\u0b62\u0b63\u0b56\u0b57]")); /* Oriya */
0338:                    vowelSignSet
0339:                            .addAll(new UnicodeSet(
0340:                                    "[\u0b81\u0b82\u0b83\u0bbe-\u0bcc\u0be2\u0be3\u0bd7]")); /* Tamil */
0341:                    vowelSignSet
0342:                            .addAll(new UnicodeSet(
0343:                                    "[\u0c01\u0c02\u0c03\u0c3e-\u0c4c\u0c62\u0c63\u0c55\u0c56]")); /* Telugu */
0344:                    vowelSignSet
0345:                            .addAll(new UnicodeSet(
0346:                                    "[\u0c81\u0c82\u0c83\u0cbe-\u0ccc\u0ce2\u0ce3\u0cd5\u0cd6]")); /* Kannada */
0347:                    vowelSignSet
0348:                            .addAll(new UnicodeSet(
0349:                                    "[\u0d01\u0d02\u0d03\u0d3e-\u0d4c\u0d62\u0d63\u0d57]")); /* Malayalam */
0350:                }
0351:
0352:                String avagraha = "\u093d\u09bd\u0abd\u0b3d\u0cbd";
0353:                String nukta = "\u093c\u09bc\u0a3c\u0abc\u0b3c\u0cbc";
0354:                String virama = "\u094d\u09cd\u0a4d\u0acd\u0b4d\u0bcd\u0c4d\u0ccd\u0d4d";
0355:                String sanskritStressSigns = "\u0951\u0952\u0953\u0954\u097d";
0356:                String chandrabindu = "\u0901\u0981\u0A81\u0b01\u0c01";
0357:
0358:                public boolean is(String sourceString) {
0359:                    int cp = sourceString.charAt(0);
0360:
0361:                    // A vowel sign cannot be the first char
0362:                    if (vowelSignSet.contains(cp)) {
0363:                        return false;
0364:                    } else if (avagraha.indexOf(cp) != -1) {
0365:                        return false;
0366:                    } else if (virama.indexOf(cp) != -1) {
0367:                        return false;
0368:                    } else if (nukta.indexOf(cp) != -1) {
0369:                        return false;
0370:                    } else if (sanskritStressSigns.indexOf(cp) != -1) {
0371:                        return false;
0372:                    } else if ((chandrabindu.indexOf(cp) != -1)
0373:                            && (sourceString.length() > 1 && vowelSignSet
0374:                                    .contains(sourceString.charAt(1)))) {
0375:                        return false;
0376:                    }
0377:                    return true;
0378:                }
0379:            }
0380:
0381:            static String latinForIndic = "[['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD"
0382:                    + "\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F"
0383:                    + "\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148"
0384:                    + "\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0"
0385:                    + "\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u01FB"
0386:                    + "\u0200-\u021B\u021E-\u021F\u0226-\u0233\u0294\u0303-\u0304\u0306\u0314-\u0315"
0387:                    + "\u0325\u040E\u0419\u0439\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7"
0388:                    + "\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03\u1F05"
0389:                    + "\u1F07\u1F09\u1F0B\u1F0D\u1F0F\u1F11\u1F13\u1F15\u1F19\u1F1B\u1F1D\u1F21"
0390:                    + "\u1F23\u1F25\u1F27\u1F29\u1F2B\u1F2D\u1F2F\u1F31\u1F33\u1F35\u1F37\u1F39"
0391:                    + "\u1F3B\u1F3D\u1F3F\u1F41\u1F43\u1F45\u1F49\u1F4B\u1F4D\u1F51\u1F53\u1F55"
0392:                    + "\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63\u1F65\u1F67\u1F69\u1F6B\u1F6D"
0393:                    + "\u1F6F\u1F81\u1F83\u1F85\u1F87\u1F89\u1F8B\u1F8D\u1F8F\u1F91\u1F93\u1F95"
0394:                    + "\u1F97\u1F99\u1F9B\u1F9D\u1F9F\u1FA1\u1FA3\u1FA5\u1FA7\u1FA9\u1FAB\u1FAD"
0395:                    + "\u1FAF-\u1FB1\u1FB8-\u1FB9\u1FD0-\u1FD1\u1FD8-\u1FD9\u1FE0-\u1FE1\u1FE5"
0396:                    + "\u1FE8-\u1FE9\u1FEC\u212A-\u212B\uE04D\uE064]"
0397:                    + "-[\uE000-\uE080 \u01E2\u01E3]& [[:latin:][:mark:]]]";
0398:
0399:            public void TestDevanagariLatin() throws IOException {
0400:                long start = System.currentTimeMillis();
0401:                if (skipIfBeforeICU(2, 8)) {
0402:                    new Test("Latin-DEVANAGARI", 50)
0403:                            .test(
0404:                                    latinForIndic,
0405:                                    "[[:Devanagari:][\u094d][\u0964\u0965] & [:Age=3.2:]]",
0406:                                    "[\u0965]", this , new LegalIndic());
0407:
0408:                } else {
0409:                    if (isICUVersionAtLeast(3, 8)) {
0410:                        // We temporarily filter against Unicode 4.1, but we only do this
0411:                        // before version 3.4.
0412:                        errln("FAIL: TestDevanagariLatin needs to be updated to remove delete the [:Age=4.1:] filter ");
0413:                        return;
0414:                    } else {
0415:                        logln("Warning: TestDevanagariLatin needs to be updated to remove delete the section marked [:Age=4.1:] filter");
0416:                    }
0417:                    new Test("Latin-DEVANAGARI", 50)
0418:                            .test(
0419:                                    latinForIndic,
0420:                                    "[[[:Devanagari:][\u094d][\u0964\u0965]]&[:Age=4.1:]]",
0421:                                    "[\u0965\u0904]", this , new LegalIndic());
0422:                }
0423:                showElapsed(start, "TestDevanagariLatin");
0424:            }
0425:
0426:            private static final String[][] interIndicArray = new String[][] {
0427:                    new String[] {
0428:                            "BENGALI-DEVANAGARI",
0429:                            "[:BENGALI:]",
0430:                            "[:Devanagari:]",
0431:                            "[\u0904\u0951-\u0954\u0943-\u0949\u094a\u0962\u0963\u090D\u090e\u0911\u0912\u0929\u0933\u0934\u0935\u0950\u0958\u0959\u095a\u095b\u095e\u097d]", /*roundtrip exclusions*/
0432:                    },
0433:                    new String[] {
0434:                            "DEVANAGARI-BENGALI",
0435:                            "[:Devanagari:]",
0436:                            "[:BENGALI:]",
0437:                            "[\u09D7\u090D\u090e\u0911\u0912\u0929\u0933\u0934\u0935\u0950\u0958\u0959\u095a\u095b\u095e\u09f0\u09f1\u09f2-\u09fa\u09ce]", /*roundtrip exclusions*/
0438:                    },
0439:
0440:                    new String[] {
0441:                            "GURMUKHI-DEVANAGARI",
0442:                            "[:GURMUKHI:]",
0443:                            "[:Devanagari:]",
0444:                            "[\u0904\u0902\u0936\u0933\u0951-\u0954\u0902\u0903\u0943-\u0949\u094a\u0962\u0963\u090B\u090C\u090D\u090e\u0911\u0912\u0934\u0937\u093D\u0950\u0960\u0961\u097d]", /*roundtrip exclusions*/
0445:                    },
0446:                    new String[] {
0447:                            "DEVANAGARI-GURMUKHI",
0448:                            "[:Devanagari:]",
0449:                            "[:GURMUKHI:]",
0450:                            "[\u0A02\u0946\u0A5C\u0951-\u0954\u0A70\u0A71\u090B\u090C\u090D\u090e\u0911\u0912\u0934\u0937\u093D\u0950\u0960\u0961\u0a72\u0a73\u0a74]", /*roundtrip exclusions*/
0451:                    },
0452:
0453:                    new String[] {
0454:                            "GUJARATI-DEVANAGARI",
0455:                            "[:GUJARATI:]",
0456:                            "[:Devanagari:]",
0457:                            "[\u0904\u0946\u094A\u0962\u0963\u0951-\u0954\u0961\u090c\u090e\u0912\u097d]", /*roundtrip exclusions*/
0458:                    },
0459:                    new String[] { "DEVANAGARI-GUJARATI", "[:Devanagari:]",
0460:                            "[:GUJARATI:]",
0461:                            "[\u0951-\u0954\u0961\u090c\u090e\u0912]", /*roundtrip exclusions*/
0462:                    },
0463:
0464:                    new String[] {
0465:                            "ORIYA-DEVANAGARI",
0466:                            "[:ORIYA:]",
0467:                            "[:Devanagari:]",
0468:                            "[\u0904\u0912\u0911\u090D\u090e\u0931\u0943-\u094a\u0962\u0963\u0951-\u0954\u0950\u097d]", /*roundtrip exclusions*/
0469:                    },
0470:                    new String[] {
0471:                            "DEVANAGARI-ORIYA",
0472:                            "[:Devanagari:]",
0473:                            "[:ORIYA:]",
0474:                            "[\u0b5f\u0b56\u0b57\u0b70\u0b71\u0950\u090D\u090e\u0912\u0911\u0931]", /*roundtrip exclusions*/
0475:                    },
0476:
0477:                    new String[] {
0478:                            "Tamil-DEVANAGARI",
0479:                            "[:tamil:]",
0480:                            "[:Devanagari:]",
0481:                            "[\u0901\u0904\u093c\u0943-\u094a\u0951-\u0954\u0962\u0963\u090B\u090C\u090D\u0911\u0916\u0917\u0918\u091B\u091D\u0920\u0921\u0922\u0925\u0926\u0927\u092B\u092C\u092D\u0936\u093d\u0950[\u0958-\u0961]\u097d]", /*roundtrip exclusions*/
0482:                    },
0483:                    new String[] { "DEVANAGARI-Tamil", "[:Devanagari:]",
0484:                            "[:tamil:]", "[\u0bd7]", /*roundtrip exclusions*/
0485:                    },
0486:
0487:                    new String[] {
0488:                            "Telugu-DEVANAGARI",
0489:                            "[:telugu:]",
0490:                            "[:Devanagari:]",
0491:                            "[\u0904\u093c\u0950\u0945\u0949\u0951-\u0954\u0962\u0963\u090D\u0911\u093d\u0929\u0934[\u0958-\u095f]\u097d]", /*roundtrip exclusions*/
0492:                    },
0493:                    new String[] {
0494:                            "DEVANAGARI-TELUGU",
0495:                            "[:Devanagari:]",
0496:                            "[:TELUGU:]",
0497:                            "[\u0c55\u0c56\u0950\u090D\u0911\u093d\u0929\u0934[\u0958-\u095f]]", /*roundtrip exclusions*/
0498:                    },
0499:
0500:                    new String[] {
0501:                            "KANNADA-DEVANAGARI",
0502:                            "[:KANNADA:]",
0503:                            "[:Devanagari:]",
0504:                            "[\u0901\u0904\u0946\u0950\u0945\u0949\u0951-\u0954\u0962\u0963\u0950\u090D\u0911\u093d\u0929\u0934[\u0958-\u095f]\u097d]", /*roundtrip exclusions*/
0505:                    },
0506:                    new String[] {
0507:                            "DEVANAGARI-KANNADA",
0508:                            "[:Devanagari:]",
0509:                            "[:KANNADA:]",
0510:                            "[{\u0cb0\u0cbc}{\u0cb3\u0cbc}\u0cde\u0cd5\u0cd6\u0950\u090D\u0911\u093d\u0929\u0934[\u0958-\u095f]]", /*roundtrip exclusions*/
0511:                    },
0512:
0513:                    new String[] {
0514:                            "MALAYALAM-DEVANAGARI",
0515:                            "[:MALAYALAM:]",
0516:                            "[:Devanagari:]",
0517:                            "[\u0901\u0904\u094a\u094b\u094c\u093c\u0950\u0944\u0945\u0949\u0951-\u0954\u0962\u0963\u090D\u0911\u093d\u0929\u0934[\u0958-\u095f]\u097d]", /*roundtrip exclusions*/
0518:                    },
0519:                    new String[] {
0520:                            "DEVANAGARI-MALAYALAM",
0521:                            "[:Devanagari:]",
0522:                            "[:MALAYALAM:]",
0523:                            "[\u0d4c\u0d57\u0950\u090D\u0911\u093d\u0929\u0934[\u0958-\u095f]]", /*roundtrip exclusions*/
0524:                    },
0525:
0526:                    new String[] {
0527:                            "GURMUKHI-BENGALI",
0528:                            "[:GURMUKHI:]",
0529:                            "[:BENGALI:]",
0530:                            "[\u0982\u09b6\u09e2\u09e3\u09c3\u09c4\u09d7\u098B\u098C\u09B7\u09E0\u09E1\u09F0\u09F1\u09f2-\u09fa\u09ce]", /*roundtrip exclusions*/
0531:                    },
0532:                    new String[] {
0533:                            "BENGALI-GURMUKHI",
0534:                            "[:BENGALI:]",
0535:                            "[:GURMUKHI:]",
0536:                            "[\u0A02\u0a5c\u0a47\u0a70\u0a71\u0A33\u0A35\u0A59\u0A5A\u0A5B\u0A5E\u0A72\u0A73\u0A74]", /*roundtrip exclusions*/
0537:                    },
0538:
0539:                    new String[] {
0540:                            "GUJARATI-BENGALI",
0541:                            "[:GUJARATI:]",
0542:                            "[:BENGALI:]",
0543:                            "[\u09d7\u09e2\u09e3\u098c\u09e1\u09f0\u09f1\u09f2-\u09fa\u09ce]", /*roundtrip exclusions*/
0544:                    },
0545:                    new String[] {
0546:                            "BENGALI-GUJARATI",
0547:                            "[:BENGALI:]",
0548:                            "[:GUJARATI:]",
0549:                            "[\u0A82\u0a83\u0Ac9\u0Ac5\u0ac7\u0A8D\u0A91\u0AB3\u0AB5\u0ABD\u0AD0]", /*roundtrip exclusions*/
0550:                    },
0551:
0552:                    new String[] {
0553:                            "ORIYA-BENGALI",
0554:                            "[:ORIYA:]",
0555:                            "[:BENGALI:]",
0556:                            "[\u09c4\u09e2\u09e3\u09f0\u09f1\u09f2-\u09fa\u09ce]", /*roundtrip exclusions*/
0557:                    },
0558:                    new String[] { "BENGALI-ORIYA", "[:BENGALI:]", "[:ORIYA:]",
0559:                            "[\u0b35\u0b71\u0b5f\u0b56\u0b33\u0b3d]", /*roundtrip exclusions*/
0560:                    },
0561:
0562:                    new String[] {
0563:                            "Tamil-BENGALI",
0564:                            "[:tamil:]",
0565:                            "[:BENGALI:]",
0566:                            "[\u0981\u09bc\u09c3\u09c4\u09e2\u09e3\u09f0\u09f1\u098B\u098C\u0996\u0997\u0998\u099B\u099D\u09A0\u09A1\u09A2\u09A5\u09A6\u09A7\u09AB\u09AC\u09AD\u09B6\u09DC\u09DD\u09DF\u09E0\u09E1\u09f2-\u09fa\u09ce]", /*roundtrip exclusions*/
0567:                    },
0568:                    new String[] {
0569:                            "BENGALI-Tamil",
0570:                            "[:BENGALI:]",
0571:                            "[:tamil:]",
0572:                            "[\u0bc6\u0bc7\u0bca\u0B8E\u0B92\u0BA9\u0BB1\u0BB3\u0BB4\u0BB5]", /*roundtrip exclusions*/
0573:                    },
0574:
0575:                    new String[] {
0576:                            "Telugu-BENGALI",
0577:                            "[:telugu:]",
0578:                            "[:BENGALI:]",
0579:                            "[\u09e2\u09e3\u09bc\u09d7\u09f0\u09f1\u09dc\u09dd\u09df\u09f2-\u09fa\u09ce]", /*roundtrip exclusions*/
0580:                    },
0581:                    new String[] {
0582:                            "BENGALI-TELUGU",
0583:                            "[:BENGALI:]",
0584:                            "[:TELUGU:]",
0585:                            "[\u0c55\u0c56\u0c47\u0c46\u0c4a\u0C0E\u0C12\u0C31\u0C33\u0C35]", /*roundtrip exclusions*/
0586:                    },
0587:
0588:                    new String[] {
0589:                            "KANNADA-BENGALI",
0590:                            "[:KANNADA:]",
0591:                            "[:BENGALI:]",
0592:                            "[\u0981\u09e2\u09e3\u09bc\u09d7\u09f0\u09f1\u09dc\u09dd\u09df\u09f2-\u09fa\u09ce]", /*roundtrip exclusions*/
0593:                    },
0594:                    new String[] {
0595:                            "BENGALI-KANNADA",
0596:                            "[:BENGALI:]",
0597:                            "[:KANNADA:]",
0598:                            "[{\u0cb0\u0cbc}{\u0cb3\u0cbc}\u0cc6\u0cca\u0cd5\u0cd6\u0cc7\u0C8E\u0C92\u0CB1\u0cb3\u0cb5\u0cde]", /*roundtrip exclusions*/
0599:                    },
0600:
0601:                    new String[] {
0602:                            "MALAYALAM-BENGALI",
0603:                            "[:MALAYALAM:]",
0604:                            "[:BENGALI:]",
0605:                            "[\u0981\u09e2\u09e3\u09bc\u09c4\u09f0\u09f1\u09dc\u09dd\u09df\u09f2-\u09fa\u09ce]", /*roundtrip exclusions*/
0606:                    },
0607:                    new String[] { "BENGALI-MALAYALAM", "[:BENGALI:]",
0608:                            "[:MALAYALAM:]",
0609:                            "[\u0d46\u0d4a\u0d47\u0d31-\u0d35\u0d0e\u0d12]", /*roundtrip exclusions*/
0610:                    },
0611:
0612:                    new String[] {
0613:                            "GUJARATI-GURMUKHI",
0614:                            "[:GUJARATI:]",
0615:                            "[:GURMUKHI:]",
0616:                            "[\u0A02\u0ab3\u0ab6\u0A70\u0a71\u0a82\u0a83\u0ac3\u0ac4\u0ac5\u0ac9\u0a5c\u0a72\u0a73\u0a74\u0a8b\u0a8d\u0a91\u0abd]", /*roundtrip exclusions*/
0617:                    },
0618:                    new String[] {
0619:                            "GURMUKHI-GUJARATI",
0620:                            "[:GURMUKHI:]",
0621:                            "[:GUJARATI:]",
0622:                            "[\u0a5c\u0A70\u0a71\u0a72\u0a73\u0a74\u0a82\u0a83\u0a8b\u0a8c\u0a8d\u0a91\u0ab3\u0ab6\u0ab7\u0abd\u0ac3\u0ac4\u0ac5\u0ac9\u0ad0\u0ae0\u0ae1]", /*roundtrip exclusions*/
0623:                    },
0624:
0625:                    new String[] {
0626:                            "ORIYA-GURMUKHI",
0627:                            "[:ORIYA:]",
0628:                            "[:GURMUKHI:]",
0629:                            "[\u0A02\u0a5c\u0a21\u0a47\u0a71\u0b02\u0b03\u0b33\u0b36\u0b43\u0b56\u0b57\u0B0B\u0B0C\u0B37\u0B3D\u0B5F\u0B60\u0B61\u0a35\u0a72\u0a73\u0a74]", /*roundtrip exclusions*/
0630:                    },
0631:                    new String[] {
0632:                            "GURMUKHI-ORIYA",
0633:                            "[:GURMUKHI:]",
0634:                            "[:ORIYA:]",
0635:                            "[\u0a71\u0b02\u0b03\u0b33\u0b36\u0b43\u0b56\u0b57\u0B0B\u0B0C\u0B37\u0B3D\u0B5F\u0B60\u0B61\u0b70\u0b71]", /*roundtrip exclusions*/
0636:                    },
0637:
0638:                    new String[] {
0639:                            "TAMIL-GURMUKHI",
0640:                            "[:TAMIL:]",
0641:                            "[:GURMUKHI:]",
0642:                            "[\u0A01\u0A02\u0a33\u0a36\u0a3c\u0a70\u0a71\u0a47\u0A16\u0A17\u0A18\u0A1B\u0A1D\u0A20\u0A21\u0A22\u0A25\u0A26\u0A27\u0A2B\u0A2C\u0A2D\u0A59\u0A5A\u0A5B\u0A5C\u0A5E\u0A72\u0A73\u0A74]", /*roundtrip exclusions*/
0643:                    },
0644:                    new String[] {
0645:                            "GURMUKHI-TAMIL",
0646:                            "[:GURMUKHI:]",
0647:                            "[:TAMIL:]",
0648:                            "[\u0b82\u0bc6\u0bca\u0bd7\u0bb7\u0bb3\u0b83\u0B8E\u0B92\u0BA9\u0BB1\u0BB4\u0bb6]", /*roundtrip exclusions*/
0649:                    },
0650:
0651:                    new String[] {
0652:                            "TELUGU-GURMUKHI",
0653:                            "[:TELUGU:]",
0654:                            "[:GURMUKHI:]",
0655:                            "[\u0A02\u0a33\u0a36\u0a3c\u0a70\u0a71\u0A59\u0A5A\u0A5B\u0A5C\u0A5E\u0A72\u0A73\u0A74]", /*roundtrip exclusions*/
0656:                    },
0657:                    new String[] {
0658:                            "GURMUKHI-TELUGU",
0659:                            "[:GURMUKHI:]",
0660:                            "[:TELUGU:]",
0661:                            "[\u0c02\u0c03\u0c33\u0c36\u0c44\u0c43\u0c46\u0c4a\u0c56\u0c55\u0C0B\u0C0C\u0C0E\u0C12\u0C31\u0C37\u0C60\u0C61]", /*roundtrip exclusions*/
0662:                    },
0663:                    new String[] {
0664:                            "KANNADA-GURMUKHI",
0665:                            "[:KANNADA:]",
0666:                            "[:GURMUKHI:]",
0667:                            "[\u0A01\u0A02\u0a33\u0a36\u0a3c\u0a70\u0a71\u0A59\u0A5A\u0A5B\u0A5C\u0A5E\u0A72\u0A73\u0A74]", /*roundtrip exclusions*/
0668:                    },
0669:                    new String[] {
0670:                            "GURMUKHI-KANNADA",
0671:                            "[:GURMUKHI:]",
0672:                            "[:KANNADA:]",
0673:                            "[{\u0cb0\u0cbc}{\u0cb3\u0cbc}\u0c82\u0c83\u0cb3\u0cb6\u0cc4\u0cc3\u0cc6\u0cca\u0cd5\u0cd6\u0C8B\u0C8C\u0C8E\u0C92\u0CB1\u0CB7\u0cbd\u0CE0\u0CE1\u0cde]", /*roundtrip exclusions*/
0674:                    },
0675:
0676:                    new String[] {
0677:                            "MALAYALAM-GURMUKHI",
0678:                            "[:MALAYALAM:]",
0679:                            "[:GURMUKHI:]",
0680:                            "[\u0A01\u0A02\u0a4b\u0a4c\u0a33\u0a36\u0a3c\u0a70\u0a71\u0A59\u0A5A\u0A5B\u0A5C\u0A5E\u0A72\u0A73\u0A74]", /*roundtrip exclusions*/
0681:                    },
0682:                    new String[] {
0683:                            "GURMUKHI-MALAYALAM",
0684:                            "[:GURMUKHI:]",
0685:                            "[:MALAYALAM:]",
0686:                            "[\u0d02\u0d03\u0d33\u0d36\u0d43\u0d46\u0d4a\u0d4c\u0d57\u0D0B\u0D0C\u0D0E\u0D12\u0D31\u0D34\u0D37\u0D60\u0D61]", /*roundtrip exclusions*/
0687:                    },
0688:
0689:                    new String[] { "GUJARATI-ORIYA", "[:GUJARATI:]",
0690:                            "[:ORIYA:]",
0691:                            "[\u0b56\u0b57\u0B0C\u0B5F\u0B61\u0b70\u0b71]", /*roundtrip exclusions*/
0692:                    },
0693:                    new String[] {
0694:                            "ORIYA-GUJARATI",
0695:                            "[:ORIYA:]",
0696:                            "[:GUJARATI:]",
0697:                            "[\u0Ac4\u0Ac5\u0Ac9\u0Ac7\u0A8D\u0A91\u0AB5\u0Ad0]", /*roundtrip exclusions*/
0698:                    },
0699:
0700:                    new String[] {
0701:                            "TAMIL-GUJARATI",
0702:                            "[:TAMIL:]",
0703:                            "[:GUJARATI:]",
0704:                            "[\u0A81\u0a8c\u0abc\u0ac3\u0Ac4\u0Ac5\u0Ac9\u0Ac7\u0A8B\u0A8D\u0A91\u0A96\u0A97\u0A98\u0A9B\u0A9D\u0AA0\u0AA1\u0AA2\u0AA5\u0AA6\u0AA7\u0AAB\u0AAC\u0AAD\u0AB6\u0ABD\u0AD0\u0AE0\u0AE1]", /*roundtrip exclusions*/
0705:                    },
0706:                    new String[] {
0707:                            "GUJARATI-TAMIL",
0708:                            "[:GUJARATI:]",
0709:                            "[:TAMIL:]",
0710:                            "[\u0Bc6\u0Bca\u0Bd7\u0B8E\u0B92\u0BA9\u0BB1\u0BB4]", /*roundtrip exclusions*/
0711:                    },
0712:
0713:                    new String[] { "TELUGU-GUJARATI", "[:TELUGU:]",
0714:                            "[:GUJARATI:]",
0715:                            "[\u0abc\u0Ac5\u0Ac9\u0A8D\u0A91\u0ABD\u0Ad0]", /*roundtrip exclusions*/
0716:                    },
0717:                    new String[] {
0718:                            "GUJARATI-TELUGU",
0719:                            "[:GUJARATI:]",
0720:                            "[:TELUGU:]",
0721:                            "[\u0c46\u0c4a\u0c55\u0c56\u0C0C\u0C0E\u0C12\u0C31\u0C61]", /*roundtrip exclusions*/
0722:                    },
0723:
0724:                    new String[] {
0725:                            "KANNADA-GUJARATI",
0726:                            "[:KANNADA:]",
0727:                            "[:GUJARATI:]",
0728:                            "[\u0A81\u0abc\u0Ac5\u0Ac9\u0A8D\u0A91\u0ABD\u0Ad0]", /*roundtrip exclusions*/
0729:                    },
0730:                    new String[] {
0731:                            "GUJARATI-KANNADA",
0732:                            "[:GUJARATI:]",
0733:                            "[:KANNADA:]",
0734:                            "[{\u0cb0\u0cbc}{\u0cb3\u0cbc}\u0cc6\u0cca\u0cd5\u0cd6\u0C8C\u0C8E\u0C92\u0CB1\u0CDE\u0CE1]", /*roundtrip exclusions*/
0735:                    },
0736:
0737:                    new String[] {
0738:                            "MALAYALAM-GUJARATI",
0739:                            "[:MALAYALAM:]",
0740:                            "[:GUJARATI:]",
0741:                            "[\u0A81\u0ac4\u0acb\u0acc\u0abc\u0Ac5\u0Ac9\u0A8D\u0A91\u0ABD\u0Ad0]", /*roundtrip exclusions*/
0742:                    },
0743:                    new String[] {
0744:                            "GUJARATI-MALAYALAM",
0745:                            "[:GUJARATI:]",
0746:                            "[:MALAYALAM:]",
0747:                            "[\u0d46\u0d4a\u0d4c\u0d55\u0d57\u0D0C\u0D0E\u0D12\u0D31\u0D34\u0D61]", /*roundtrip exclusions*/
0748:                    },
0749:
0750:                    new String[] {
0751:                            "TAMIL-ORIYA",
0752:                            "[:TAMIL:]",
0753:                            "[:ORIYA:]",
0754:                            "[\u0B01\u0b3c\u0b43\u0b56\u0B0B\u0B0C\u0B16\u0B17\u0B18\u0B1B\u0B1D\u0B20\u0B21\u0B22\u0B25\u0B26\u0B27\u0B2B\u0B2C\u0B2D\u0B36\u0B3D\u0B5C\u0B5D\u0B5F\u0B60\u0B61\u0b70\u0b71]", /*roundtrip exclusions*/
0755:                    },
0756:                    new String[] {
0757:                            "ORIYA-TAMIL",
0758:                            "[:ORIYA:]",
0759:                            "[:TAMIL:]",
0760:                            "[\u0bc6\u0bca\u0bc7\u0B8E\u0B92\u0BA9\u0BB1\u0BB4\u0BB5]", /*roundtrip exclusions*/
0761:                    },
0762:
0763:                    new String[] {
0764:                            "TELUGU-ORIYA",
0765:                            "[:TELUGU:]",
0766:                            "[:ORIYA:]",
0767:                            "[\u0b3c\u0b57\u0b56\u0B3D\u0B5C\u0B5D\u0B5F\u0b70\u0b71]", /*roundtrip exclusions*/
0768:                    },
0769:                    new String[] {
0770:                            "ORIYA-TELUGU",
0771:                            "[:ORIYA:]",
0772:                            "[:TELUGU:]",
0773:                            "[\u0c44\u0c46\u0c4a\u0c55\u0c47\u0C0E\u0C12\u0C31\u0C35]", /*roundtrip exclusions*/
0774:                    },
0775:
0776:                    new String[] {
0777:                            "KANNADA-ORIYA",
0778:                            "[:KANNADA:]",
0779:                            "[:ORIYA:]",
0780:                            "[\u0B01\u0b3c\u0b57\u0B3D\u0B5C\u0B5D\u0B5F\u0b70\u0b71]", /*roundtrip exclusions*/
0781:                    },
0782:                    new String[] {
0783:                            "ORIYA-KANNADA",
0784:                            "[:ORIYA:]",
0785:                            "[:KANNADA:]",
0786:                            "[{\u0cb0\u0cbc}{\u0cb3\u0cbc}\u0cc4\u0cc6\u0cca\u0cd5\u0cc7\u0C8E\u0C92\u0CB1\u0CB5\u0CDE]", /*roundtrip exclusions*/
0787:                    },
0788:
0789:                    new String[] {
0790:                            "MALAYALAM-ORIYA",
0791:                            "[:MALAYALAM:]",
0792:                            "[:ORIYA:]",
0793:                            "[\u0B01\u0b3c\u0b56\u0B3D\u0B5C\u0B5D\u0B5F\u0b70\u0b71]", /*roundtrip exclusions*/
0794:                    },
0795:                    new String[] {
0796:                            "ORIYA-MALAYALAM",
0797:                            "[:ORIYA:]",
0798:                            "[:MALAYALAM:]",
0799:                            "[\u0D47\u0D46\u0D4a\u0D0E\u0D12\u0D31\u0D34\u0D35]", /*roundtrip exclusions*/
0800:                    },
0801:
0802:                    new String[] { "TELUGU-TAMIL", "[:TELUGU:]", "[:TAMIL:]",
0803:                            "[\u0bd7\u0ba9\u0bb4]", /*roundtrip exclusions*/
0804:                    },
0805:                    new String[] {
0806:                            "TAMIL-TELUGU",
0807:                            "[:TAMIL:]",
0808:                            "[:TELUGU:]",
0809:                            "[\u0C01\u0c43\u0c44\u0c46\u0c47\u0c55\u0c56\u0c66\u0C0B\u0C0C\u0C16\u0C17\u0C18\u0C1B\u0C1D\u0C20\u0C21\u0C22\u0C25\u0C26\u0C27\u0C2B\u0C2C\u0C2D\u0C36\u0C60\u0C61]", /*roundtrip exclusions*/
0810:                    },
0811:
0812:                    new String[] { "KANNADA-TAMIL", "[:KANNADA:]", "[:TAMIL:]",
0813:                            "[\u0bd7\u0bc6\u0ba9\u0bb4]", /*roundtrip exclusions*/
0814:                    },
0815:                    new String[] {
0816:                            "TAMIL-KANNADA",
0817:                            "[:TAMIL:]",
0818:                            "[:KANNADA:]",
0819:                            "[\u0cc3\u0cc4\u0cc6\u0cc7\u0cd5\u0cd6\u0C8B\u0C8C\u0C96\u0C97\u0C98\u0C9B\u0C9D\u0CA0\u0CA1\u0CA2\u0CA5\u0CA6\u0CA7\u0CAB\u0CAC\u0CAD\u0CB6\u0cbc\u0cbd\u0CDE\u0CE0\u0CE1]", /*roundtrip exclusions*/
0820:                    },
0821:
0822:                    new String[] { "MALAYALAM-TAMIL", "[:MALAYALAM:]",
0823:                            "[:TAMIL:]", "[\u0ba9]", /*roundtrip exclusions*/
0824:                    },
0825:                    new String[] {
0826:                            "TAMIL-MALAYALAM",
0827:                            "[:TAMIL:]",
0828:                            "[:MALAYALAM:]",
0829:                            "[\u0d43\u0d12\u0D0B\u0D0C\u0D16\u0D17\u0D18\u0D1B\u0D1D\u0D20\u0D21\u0D22\u0D25\u0D26\u0D27\u0D2B\u0D2C\u0D2D\u0D36\u0D60\u0D61]", /*roundtrip exclusions*/
0830:                    },
0831:
0832:                    new String[] { "KANNADA-TELUGU", "[:KANNADA:]",
0833:                            "[:TELUGU:]", "[\u0C01\u0c3f\u0c46\u0c48\u0c4a]", /*roundtrip exclusions*/
0834:                    },
0835:                    new String[] { "TELUGU-KANNADA", "[:TELUGU:]",
0836:                            "[:KANNADA:]",
0837:                            "[\u0cc8\u0cd5\u0cd6\u0CDE\u0cbc\u0cbd]", /*roundtrip exclusions*/
0838:                    },
0839:
0840:                    new String[] { "MALAYALAM-TELUGU", "[:MALAYALAM:]",
0841:                            "[:TELUGU:]",
0842:                            "[\u0C01\u0c44\u0c4a\u0c4c\u0c4b\u0c55\u0c56]", /*roundtrip exclusions*/
0843:                    },
0844:                    new String[] { "TELUGU-MALAYALAM", "[:TELUGU:]",
0845:                            "[:MALAYALAM:]", "[\u0d4c\u0d57\u0D34]", /*roundtrip exclusions*/
0846:                    },
0847:
0848:                    new String[] {
0849:                            "MALAYALAM-KANNADA",
0850:                            "[:MALAYALAM:]",
0851:                            "[:KANNADA:]",
0852:                            "[\u0cbc\u0cbd\u0cc4\u0cc6\u0cca\u0ccc\u0ccb\u0cd5\u0cd6\u0cDe]", /*roundtrip exclusions*/
0853:                    },
0854:                    new String[] { "Latin-Bengali", latinForIndic,
0855:                            "[[:Bengali:][\u0964\u0965]]",
0856:                            "[\u0965\u09f0-\u09fa\u09ce]", /*roundtrip exclusions*/
0857:                    },
0858:                    new String[] { "Latin-Gurmukhi", latinForIndic,
0859:                            "[[:Gurmukhi:][\u0964\u0965]]",
0860:                            "[\u0a01\u0a02\u0965\u0a72\u0a73\u0a74]", /*roundtrip exclusions*/
0861:                    },
0862:                    new String[] { "Latin-Gujarati", latinForIndic,
0863:                            "[[:Gujarati:][\u0964\u0965]]", "[\u0965]", /*roundtrip exclusions*/
0864:                    },
0865:                    new String[] { "Latin-Oriya", latinForIndic,
0866:                            "[[:Oriya:][\u0964\u0965]]", "[\u0965\u0b70]", /*roundtrip exclusions*/
0867:                    },
0868:                    new String[] { "Latin-Tamil", latinForIndic, "[:Tamil:]",
0869:                            null, /*roundtrip exclusions*/
0870:                    },
0871:                    new String[] { "Latin-Telugu", latinForIndic, "[:Telugu:]",
0872:                            null, /*roundtrip exclusions*/
0873:                    },
0874:                    new String[] { "Latin-Kannada", latinForIndic,
0875:                            "[:Kannada:]", null, /*roundtrip exclusions*/
0876:                    },
0877:                    new String[] { "Latin-Malayalam", latinForIndic,
0878:                            "[:Malayalam:]", null, /*roundtrip exclusions*/
0879:                    }, };
0880:
0881:            public void TestInterIndic() throws Exception {
0882:                long start = System.currentTimeMillis();
0883:                int num = interIndicArray.length;
0884:                if (isQuick()) {
0885:                    logln("Testing only 5 of " + interIndicArray.length
0886:                            + " Skipping rest (use -e for exhaustive)");
0887:                    num = 5;
0888:                }
0889:                if (isICUVersionAtLeast(3, 8)) {
0890:                    // We temporarily filter against Unicode 4.1, but we only do this
0891:                    // before version 3.4.
0892:                    errln("FAIL: TestInterIndic needs to be updated to remove delete the [:Age=4.1:] filter ");
0893:                    return;
0894:                } else {
0895:                    logln("Warning: TestInterIndic needs to be updated to remove delete the section marked [:Age=4.1:] filter");
0896:                }
0897:                for (int i = 0; i < num; i++) {
0898:                    logln("Testing " + interIndicArray[i][0] + " at index " + i);
0899:                    if (skipIfBeforeICU(2, 8)) {
0900:                        new Test(interIndicArray[i][0], 50)
0901:                                .test("[" + interIndicArray[i][1]
0902:                                        + " & [:Age=3.2:]]", "["
0903:                                        + interIndicArray[i][2]
0904:                                        + " & [:Age=3.2:]]",
0905:                                        interIndicArray[i][3], this ,
0906:                                        new LegalIndic());
0907:                    } else {
0908:                        /*TODO: uncomment the line below when the transliterator is fixed
0909:                        new Test(interIndicArray[i][0], 50)
0910:                             .test(interIndicArray[i][1],
0911:                                   interIndicArray[i][2],
0912:                                   interIndicArray[i][3],
0913:                                   this, new LegalIndic());
0914:                         */
0915:                        /* comment lines below  when transliterator is fixed */
0916:                        // start
0917:                        new Test(interIndicArray[i][0], 50).test("["
0918:                                + interIndicArray[i][1] + " &[:Age=4.1:]]", "["
0919:                                + interIndicArray[i][2] + " &[:Age=4.1:]]",
0920:                                interIndicArray[i][3], this , new LegalIndic());
0921:                        //end
0922:                    }
0923:
0924:                }
0925:                showElapsed(start, "TestInterIndic");
0926:            }
0927:
0928:            //---------------
0929:            // End Indic
0930:            //---------------
0931:
0932:            public static class Legal {
0933:                public boolean is(String sourceString) {
0934:                    return true;
0935:                }
0936:            }
0937:
0938:            public static class LegalJamo extends Legal {
0939:                // any initial must be followed by a medial (or initial)
0940:                // any medial must follow an initial (or medial)
0941:                // any final must follow a medial (or final)
0942:
0943:                public boolean is(String sourceString) {
0944:                    try {
0945:                        int t;
0946:                        String decomp = Normalizer.normalize(sourceString,
0947:                                Normalizer.NFD);
0948:                        for (int i = 0; i < decomp.length(); ++i) { // don't worry about surrogates
0949:                            switch (getType(decomp.charAt(i))) {
0950:                            case 0:
0951:                                t = getType(decomp.charAt(i + 1));
0952:                                if (t != 0 && t != 1)
0953:                                    return false;
0954:                                break;
0955:                            case 1:
0956:                                t = getType(decomp.charAt(i - 1));
0957:                                if (t != 0 && t != 1)
0958:                                    return false;
0959:                                break;
0960:                            case 2:
0961:                                t = getType(decomp.charAt(i - 1));
0962:                                if (t != 1 && t != 2)
0963:                                    return false;
0964:                                break;
0965:                            }
0966:                        }
0967:                        return true;
0968:                    } catch (StringIndexOutOfBoundsException e) {
0969:                        return false;
0970:                    }
0971:                }
0972:
0973:                public int getType(char c) {
0974:                    if ('\u1100' <= c && c <= '\u1112')
0975:                        return 0;
0976:                    else if ('\u1161' <= c && c <= '\u1175')
0977:                        return 1;
0978:                    else if ('\u11A8' <= c && c <= '\u11C2')
0979:                        return 2;
0980:                    return -1; // other
0981:                }
0982:            }
0983:
0984:            //static BreakIterator thaiBreak = BreakIterator.getWordInstance(new Locale("th", "TH"));
0985:            // anything is legal except word ending with Logical-order-exception
0986:            public static class LegalThai extends Legal {
0987:                public boolean is(String sourceString) {
0988:                    if (sourceString.length() == 0)
0989:                        return true;
0990:                    char ch = sourceString.charAt(sourceString.length() - 1); // don't worry about surrogates.
0991:                    if (UCharacter.hasBinaryProperty(ch,
0992:                            UProperty.LOGICAL_ORDER_EXCEPTION))
0993:                        return false;
0994:
0995:                    // disallow anything with a wordbreak between
0996:                    /*
0997:                    if (UTF16.countCodePoint(sourceString) <= 1) return true;
0998:                    thaiBreak.setText(sourceString);
0999:                    for (int pos = thaiBreak.first(); pos != BreakIterator.DONE; pos = thaiBreak.next()) {
1000:                        if (pos > 0 && pos < sourceString.length()) {
1001:                            System.out.println("Skipping " + Utility.escape(sourceString));
1002:                            return false;
1003:                        }
1004:                    }
1005:                     */
1006:                    return true;
1007:                }
1008:            }
1009:
1010:            // anything is legal except that Final letters can't be followed by letter; NonFinal must be
1011:            public static class LegalHebrew extends Legal {
1012:                static UnicodeSet FINAL = new UnicodeSet(
1013:                        "[\u05DA\u05DD\u05DF\u05E3\u05E5]");
1014:                static UnicodeSet NON_FINAL = new UnicodeSet(
1015:                        "[\u05DB\u05DE\u05E0\u05E4\u05E6]");
1016:                static UnicodeSet LETTER = new UnicodeSet("[:letter:]");
1017:
1018:                public boolean is(String sourceString) {
1019:                    if (sourceString.length() == 0)
1020:                        return true;
1021:                    // don't worry about surrogates.
1022:                    for (int i = 0; i < sourceString.length(); ++i) {
1023:                        char ch = sourceString.charAt(i);
1024:                        char next = i + 1 == sourceString.length() ? '\u0000'
1025:                                : sourceString.charAt(i);
1026:                        if (FINAL.contains(ch)) {
1027:                            if (LETTER.contains(next))
1028:                                return false;
1029:                        } else if (NON_FINAL.contains(ch)) {
1030:                            if (!LETTER.contains(next))
1031:                                return false;
1032:                        }
1033:                    }
1034:                    return true;
1035:                }
1036:            }
1037:
1038:            public static class LegalGreek extends Legal {
1039:
1040:                boolean full;
1041:
1042:                public LegalGreek(boolean full) {
1043:                    this .full = full;
1044:                }
1045:
1046:                static final char IOTA_SUBSCRIPT = '\u0345';
1047:                static final UnicodeSet breathing = new UnicodeSet(
1048:                        "[\\u0313\\u0314']");
1049:                static final UnicodeSet validSecondVowel = new UnicodeSet(
1050:                        "[\\u03C5\\u03B9\\u03A5\\u0399]");
1051:
1052:                public static boolean isVowel(char c) {
1053:                    return "\u03B1\u03B5\u03B7\u03B9\u03BF\u03C5\u03C9\u0391\u0395\u0397\u0399\u039F\u03A5\u03A9"
1054:                            .indexOf(c) >= 0;
1055:                }
1056:
1057:                public static boolean isRho(char c) {
1058:                    return "\u03C1\u03A1".indexOf(c) >= 0;
1059:                }
1060:
1061:                public boolean is(String sourceString) {
1062:                    try {
1063:                        String decomp = Normalizer.normalize(sourceString,
1064:                                Normalizer.NFD);
1065:
1066:                        // modern is simpler: don't care about anything but a grave
1067:                        if (!full) {
1068:                            //if (sourceString.equals("\u039C\u03C0")) return false;
1069:                            for (int i = 0; i < decomp.length(); ++i) {
1070:                                char c = decomp.charAt(i);
1071:                                // exclude all the accents
1072:                                if (c == '\u0313' || c == '\u0314'
1073:                                        || c == '\u0300' || c == '\u0302'
1074:                                        || c == '\u0342' || c == '\u0345')
1075:                                    return false;
1076:                            }
1077:                            return true;
1078:                        }
1079:
1080:                        // Legal full Greek has breathing marks IFF there is a vowel or RHO at the start
1081:                        // IF it has them, it has exactly one.
1082:                        // IF it starts with a RHO, then the breathing mark must come before the second letter.
1083:                        // IF it starts with a vowel, then it must before the third letter.
1084:                        //  it will only come after the second if of the format [vowel] [no iota subscript!] [upsilon or iota]
1085:                        // Since there are no surrogates in greek, don't worry about them
1086:
1087:                        boolean firstIsVowel = false;
1088:                        boolean firstIsRho = false;
1089:                        boolean noLetterYet = true;
1090:                        int breathingCount = 0;
1091:                        int letterCount = 0;
1092:                        //int breathingPosition = -1;
1093:
1094:                        for (int i = 0; i < decomp.length(); ++i) {
1095:                            char c = decomp.charAt(i);
1096:                            if (UCharacter.isLetter(c)) {
1097:                                ++letterCount;
1098:                                if (firstIsVowel
1099:                                        && !validSecondVowel.contains(c)
1100:                                        && breathingCount == 0)
1101:                                    return false;
1102:                                if (noLetterYet) {
1103:                                    noLetterYet = false;
1104:                                    firstIsVowel = isVowel(c);
1105:                                    firstIsRho = isRho(c);
1106:                                }
1107:                                if (firstIsRho && letterCount == 2
1108:                                        && breathingCount == 0)
1109:                                    return false;
1110:                            }
1111:                            if (c == IOTA_SUBSCRIPT && firstIsVowel
1112:                                    && breathingCount == 0)
1113:                                return false;
1114:                            if (breathing.contains(c)) {
1115:                                // breathingPosition = i;
1116:                                ++breathingCount;
1117:                            }
1118:                        }
1119:
1120:                        if (firstIsVowel || firstIsRho)
1121:                            return breathingCount == 1;
1122:                        return breathingCount == 0;
1123:                    } catch (Throwable t) {
1124:                        System.out.println(t.getClass().getName() + " "
1125:                                + t.getMessage());
1126:                        return true;
1127:                    }
1128:                }
1129:            }
1130:
1131:            static class Test {
1132:
1133:                PrintWriter out;
1134:
1135:                private String transliteratorID;
1136:                private int errorLimit = 500;
1137:                private int errorCount = 0;
1138:                private int pairLimit = 0x10000;
1139:                private int density = 100;
1140:                UnicodeSet sourceRange;
1141:                UnicodeSet targetRange;
1142:                UnicodeSet toSource;
1143:                UnicodeSet toTarget;
1144:                UnicodeSet roundtripExclusions;
1145:
1146:                RoundTripTest log;
1147:                Legal legalSource;
1148:                UnicodeSet badCharacters;
1149:
1150:                /*
1151:                 * create a test for the given script transliterator.
1152:                 */
1153:                Test(String transliteratorID) {
1154:                    this (transliteratorID, 100);
1155:                }
1156:
1157:                Test(String transliteratorID, int dens) {
1158:                    this .transliteratorID = transliteratorID;
1159:                    this .density = dens;
1160:                }
1161:
1162:                public void setErrorLimit(int limit) {
1163:                    errorLimit = limit;
1164:                }
1165:
1166:                public void setPairLimit(int limit) {
1167:                    pairLimit = limit;
1168:                }
1169:
1170:                // Added to do better equality check.
1171:
1172:                public static boolean isSame(String a, String b) {
1173:                    if (a.equals(b))
1174:                        return true;
1175:                    if (a.equalsIgnoreCase(b) && isCamel(a))
1176:                        return true;
1177:                    a = Normalizer.normalize(a, Normalizer.NFD);
1178:                    b = Normalizer.normalize(b, Normalizer.NFD);
1179:                    if (a.equals(b))
1180:                        return true;
1181:                    if (a.equalsIgnoreCase(b) && isCamel(a))
1182:                        return true;
1183:                    return false;
1184:                }
1185:
1186:                /*
1187:                public boolean includesSome(UnicodeSet set, String a) {
1188:                    int cp;
1189:                    for (int i = 0; i < a.length(); i += UTF16.getCharCount(cp)) {
1190:                        cp = UTF16.charAt(a, i);
1191:                        if (set.contains(cp)) return true;
1192:                    }
1193:                    return false;
1194:                }
1195:                 */
1196:
1197:                public static boolean isCamel(String a) {
1198:                    //System.out.println("CamelTest");
1199:                    // see if string is of the form aB; e.g. lower, then upper or title
1200:                    int cp;
1201:                    boolean haveLower = false;
1202:                    for (int i = 0; i < a.length(); i += UTF16.getCharCount(cp)) {
1203:                        cp = UTF16.charAt(a, i);
1204:                        int t = UCharacter.getType(cp);
1205:                        //System.out.println("\t" + t + " " + Integer.toString(cp,16) + " " + UCharacter.getName(cp));
1206:                        switch (t) {
1207:                        case Character.UPPERCASE_LETTER:
1208:                            if (haveLower)
1209:                                return true;
1210:                            break;
1211:                        case Character.TITLECASE_LETTER:
1212:                            if (haveLower)
1213:                                return true;
1214:                            // drop through, since second letter is lower.
1215:                        case Character.LOWERCASE_LETTER:
1216:                            haveLower = true;
1217:                            break;
1218:                        }
1219:                    }
1220:                    //System.out.println("FALSE");
1221:                    return false;
1222:                }
1223:
1224:                static final UnicodeSet okAnyway = new UnicodeSet(
1225:                        "[^[:Letter:]]");
1226:                static final UnicodeSet neverOk = new UnicodeSet("[:Other:]");
1227:
1228:                public void test(String sourceRange, String targetRange,
1229:                        String roundtripExclusions, RoundTripTest log,
1230:                        Legal legalSource) throws java.io.IOException {
1231:                    test(sourceRange, targetRange, sourceRange,
1232:                            roundtripExclusions, log, legalSource);
1233:                }
1234:
1235:                /**
1236:                 * Will test 
1237:                 * that everything in sourceRange maps to targetRange,
1238:                 * that everything in targetRange maps to backtoSourceRange
1239:                 * that everything roundtrips from target -> source -> target, except roundtripExceptions
1240:                 */
1241:                public void test(String sourceRange, String targetRange,
1242:                        String backtoSourceRange, String roundtripExclusions,
1243:                        RoundTripTest log, Legal legalSource)
1244:                        throws java.io.IOException {
1245:
1246:                    this .legalSource = legalSource;
1247:                    this .sourceRange = new UnicodeSet(sourceRange);
1248:                    this .sourceRange.removeAll(neverOk);
1249:
1250:                    this .targetRange = new UnicodeSet(targetRange);
1251:                    this .targetRange.removeAll(neverOk);
1252:
1253:                    this .toSource = new UnicodeSet(backtoSourceRange);
1254:                    this .toSource.addAll(okAnyway);
1255:
1256:                    this .toTarget = new UnicodeSet(targetRange);
1257:                    this .toTarget.addAll(okAnyway);
1258:
1259:                    if (roundtripExclusions != null
1260:                            && roundtripExclusions.length() > 0) {
1261:                        this .roundtripExclusions = new UnicodeSet(
1262:                                roundtripExclusions);
1263:                    } else {
1264:                        this .roundtripExclusions = new UnicodeSet(); // empty
1265:                    }
1266:
1267:                    this .log = log;
1268:
1269:                    log.logln(Utility.escape("Source:  " + this .sourceRange));
1270:                    log.logln(Utility.escape("Target:  " + this .targetRange));
1271:                    log.logln(Utility.escape("Exclude: "
1272:                            + this .roundtripExclusions));
1273:                    if (log.isQuick())
1274:                        log.logln("Abbreviated Test");
1275:
1276:                    badCharacters = new UnicodeSet("[:other:]");
1277:
1278:                    // make a UTF-8 output file we can read with a browser
1279:
1280:                    // note: check that every transliterator transliterates the null string correctly!
1281:
1282:                    // {dlf} reorganize so can run test in protected security environment
1283:                    //              String logFileName = "test_" + transliteratorID.replace('/', '_') + ".html";
1284:
1285:                    //              File lf = new File(logFileName);
1286:                    //              log.logln("Creating log file " + lf.getAbsoluteFile());
1287:
1288:                    //              out = new PrintWriter(new BufferedWriter(new OutputStreamWriter(
1289:                    //                        new FileOutputStream(logFileName), "UTF8"), 4*1024));
1290:
1291:                    ByteArrayOutputStream bast = new ByteArrayOutputStream();
1292:                    out = new PrintWriter(new BufferedWriter(
1293:                            new OutputStreamWriter(bast, "UTF8"), 4 * 1024));
1294:                    //out.write('\uFFEF');    // BOM
1295:                    out
1296:                            .println("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\">");
1297:                    out.println("<HTML><HEAD>");
1298:                    out
1299:                            .println("<META content=\"text/html; charset=utf-8\" http-equiv=Content-Type></HEAD>");
1300:                    out
1301:                            .println("<BODY bgcolor='#FFFFFF' style='font-family: Arial Unicode MS'>");
1302:
1303:                    try {
1304:                        test2();
1305:                    } catch (TestTruncated e) {
1306:                        out.println(e.getMessage());
1307:                    }
1308:                    out.println("</BODY></HTML>");
1309:                    out.close();
1310:
1311:                    if (errorCount > 0) {
1312:                        try {
1313:                            String logFileName = "test_"
1314:                                    + transliteratorID.replace('/', '_')
1315:                                    + ".html";
1316:                            File lf = new File(logFileName);
1317:                            log.logln("Creating log file "
1318:                                    + lf.getAbsoluteFile());
1319:                            FileOutputStream fos = new FileOutputStream(lf);
1320:                            fos.write(bast.toByteArray());
1321:                            fos.close();
1322:                            log.errln(transliteratorID
1323:                                    + " errors: "
1324:                                    + errorCount
1325:                                    + (errorCount > errorLimit ? " (at least!)"
1326:                                            : "") + ", see "
1327:                                    + lf.getAbsoluteFile());
1328:                        } catch (SecurityException e) {
1329:                            log
1330:                                    .errln(transliteratorID
1331:                                            + " errors: "
1332:                                            + errorCount
1333:                                            + (errorCount > errorLimit ? " (at least!)"
1334:                                                    : "")
1335:                                            + ", no log provided due to protected test domain");
1336:                        }
1337:                    } else {
1338:                        log.logln(transliteratorID + " ok");
1339:                        //                  new File(logFileName).delete();
1340:                    }
1341:                }
1342:
1343:                // ok if at least one is not equal
1344:                public boolean checkIrrelevants(Transliterator t,
1345:                        String irrelevants) {
1346:                    for (int i = 0; i < irrelevants.length(); ++i) {
1347:                        char c = irrelevants.charAt(i);
1348:                        String cs = UTF16.valueOf(c);
1349:                        String targ = t.transliterate(cs);
1350:                        if (cs.equals(targ))
1351:                            return true;
1352:                    }
1353:                    return false;
1354:                }
1355:
1356:                public void test2() {
1357:
1358:                    Transliterator sourceToTarget = Transliterator
1359:                            .getInstance(transliteratorID);
1360:                    Transliterator targetToSource = sourceToTarget.getInverse();
1361:                    AbbreviatedUnicodeSetIterator usi = new AbbreviatedUnicodeSetIterator();
1362:                    AbbreviatedUnicodeSetIterator usi2 = new AbbreviatedUnicodeSetIterator();
1363:
1364:                    log
1365:                            .logln("Checking that at least one irrevant characters is not NFC'ed");
1366:                    out
1367:                            .println("<h3>Checking that at least one irrevant characters is not NFC'ed</h3>");
1368:
1369:                    String irrelevants = "\u2000\u2001\u2126\u212A\u212B\u2329"; // string is from NFC_NO in the UCD
1370:
1371:                    if (!checkIrrelevants(sourceToTarget, irrelevants)) {
1372:                        logFails("Source-Target, Must not NFC everything");
1373:                    }
1374:                    if (!checkIrrelevants(targetToSource, irrelevants)) {
1375:                        logFails("Target-Source, irrelevants");
1376:                    }
1377:
1378:                    if (EXTRA_TESTS) {
1379:                        log.logln("Checking that toRules works");
1380:                        String rules = "";
1381:                        Transliterator sourceToTarget2;
1382:                        Transliterator targetToSource2;
1383:                        try {
1384:                            rules = sourceToTarget.toRules(false);
1385:                            sourceToTarget2 = Transliterator.createFromRules(
1386:                                    "s2t2", rules, Transliterator.FORWARD);
1387:                            if (PRINT_RULES) {
1388:                                out.println("<h3>Forward Rules:</h3><p>");
1389:                                out.println(TestUtility.replace(rules, "\n",
1390:                                        "<br>\n"));
1391:                                out.println("</p>");
1392:                            }
1393:                            rules = targetToSource.toRules(false);
1394:                            targetToSource2 = Transliterator.createFromRules(
1395:                                    "t2s2", rules, Transliterator.FORWARD);
1396:                            if (PRINT_RULES) {
1397:                                out.println("<h3>Backward Rules:</h3><p>");
1398:                                out.println(TestUtility.replace(rules, "\n",
1399:                                        "<br>\n"));
1400:                                out.println("</p>");
1401:                            }
1402:                        } catch (RuntimeException e) {
1403:                            out.println("<h3>Broken Rules:</h3><p>");
1404:                            out.println(TestUtility.replace(rules, "\n",
1405:                                    "<br>\n"));
1406:                            out.println("</p>");
1407:                            out.flush();
1408:                            throw e;
1409:                        }
1410:
1411:                        out
1412:                                .println("<h3>Roundtrip Exclusions: "
1413:                                        + new UnicodeSet(roundtripExclusions)
1414:                                        + "</h3>");
1415:                        out.flush();
1416:
1417:                        log.logln("Checking that source -> target -> source");
1418:                        out
1419:                                .println("<h3>Checking that source -> target -> source</h3>");
1420:
1421:                        usi.reset(sourceRange);
1422:                        while (usi.next()) {
1423:                            int c = usi.codepoint;
1424:
1425:                            String cs = UTF16.valueOf(c);
1426:                            String targ = sourceToTarget.transliterate(cs);
1427:                            String targ2 = sourceToTarget2.transliterate(cs);
1428:                            if (!targ.equals(targ2)) {
1429:                                logToRulesFails("Source-Target, toRules", cs,
1430:                                        targ, targ2);
1431:                            }
1432:                        }
1433:
1434:                        log.logln("Checking that target -> source -> target");
1435:                        out
1436:                                .println("<h3>Checking that target -> source -> target</h3>");
1437:                        usi.reset(targetRange);
1438:                        while (usi.next()) {
1439:                            int c = usi.codepoint;
1440:
1441:                            String cs = UTF16.valueOf(c);
1442:                            String targ = targetToSource.transliterate(cs);
1443:                            String targ2 = targetToSource2.transliterate(cs);
1444:                            if (!targ.equals(targ2)) {
1445:                                logToRulesFails("Target-Source, toRules", cs,
1446:                                        targ, targ2);
1447:                            }
1448:                        }
1449:                    }
1450:
1451:                    log
1452:                            .logln("Checking that source characters convert to target - Singles");
1453:                    out
1454:                            .println("<h3>Checking that source characters convert to target - Singles</h3>");
1455:
1456:                    UnicodeSet failSourceTarg = new UnicodeSet();
1457:
1458:                    /*
1459:                    for (char c = 0; c < 0xFFFF; ++c) {
1460:                        if (!sourceRange.contains(c)) continue;
1461:                     */
1462:                    usi.reset(sourceRange);
1463:                    while (usi.next()) {
1464:                        int c = usi.codepoint;
1465:
1466:                        String cs = UTF16.valueOf(c);
1467:                        String targ = sourceToTarget.transliterate(cs);
1468:                        if (!toTarget.containsAll(targ)
1469:                                || badCharacters.containsSome(targ)) {
1470:                            String targD = Normalizer.normalize(targ,
1471:                                    Normalizer.NFD);
1472:                            if (!toTarget.containsAll(targD)
1473:                                    || badCharacters.containsSome(targD)) {
1474:                                logWrongScript("Source-Target", cs, targ,
1475:                                        toTarget, badCharacters);
1476:                                failSourceTarg.add(c);
1477:                                continue;
1478:                            }
1479:                        }
1480:
1481:                        String cs2 = Normalizer.normalize(cs, Normalizer.NFD);
1482:                        String targ2 = sourceToTarget.transliterate(cs2);
1483:                        if (!targ.equals(targ2)) {
1484:                            logNotCanonical("Source-Target", cs, targ, cs2,
1485:                                    targ2);
1486:                        }
1487:                    }
1488:
1489:                    log
1490:                            .logln("Checking that source characters convert to target - Doubles");
1491:                    out
1492:                            .println("<h3>Checking that source characters convert to target - Doubles</h3>");
1493:
1494:                    /*
1495:                    for (char c = 0; c < 0xFFFF; ++c) {
1496:                        if (TestUtility.isUnassigned(c) ||
1497:                            !sourceRange.contains(c)) continue;
1498:                        if (failSourceTarg.get(c)) continue;
1499:
1500:                     */
1501:
1502:                    UnicodeSet sourceRangeMinusFailures = new UnicodeSet(
1503:                            sourceRange);
1504:                    sourceRangeMinusFailures.removeAll(failSourceTarg);
1505:
1506:                    boolean quickRt = log.getInclusion() < 10;
1507:
1508:                    usi.reset(sourceRangeMinusFailures, quickRt, density);
1509:
1510:                    while (usi.next()) {
1511:                        int c = usi.codepoint;
1512:
1513:                        /*
1514:                        for (char d = 0; d < 0xFFFF; ++d) {
1515:                            if (TestUtility.isUnassigned(d) ||
1516:                                !sourceRange.contains(d)) continue;
1517:                            if (failSourceTarg.get(d)) continue;
1518:                         */
1519:                        usi2.reset(sourceRangeMinusFailures, quickRt, density);
1520:
1521:                        while (usi2.next()) {
1522:                            int d = usi2.codepoint;
1523:
1524:                            String cs = UTF16.valueOf(c) + UTF16.valueOf(d);
1525:                            String targ = sourceToTarget.transliterate(cs);
1526:                            if (!toTarget.containsAll(targ)
1527:                                    || badCharacters.containsSome(targ)) {
1528:                                String targD = Normalizer.normalize(targ,
1529:                                        Normalizer.NFD);
1530:                                if (!toTarget.containsAll(targD)
1531:                                        || badCharacters.containsSome(targD)) {
1532:                                    logWrongScript("Source-Target", cs, targ,
1533:                                            toTarget, badCharacters);
1534:                                    continue;
1535:                                }
1536:                            }
1537:                            String cs2 = Normalizer.normalize(cs,
1538:                                    Normalizer.NFD);
1539:                            String targ2 = sourceToTarget.transliterate(cs2);
1540:                            if (!targ.equals(targ2)) {
1541:                                logNotCanonical("Source-Target", cs, targ, cs2,
1542:                                        targ2);
1543:                            }
1544:                        }
1545:                    }
1546:
1547:                    log
1548:                            .logln("Checking that target characters convert to source and back - Singles");
1549:                    out
1550:                            .println("<h3>Checking that target characters convert to source and back - Singles</h3>");
1551:
1552:                    UnicodeSet failTargSource = new UnicodeSet();
1553:                    UnicodeSet failRound = new UnicodeSet();
1554:
1555:                    /*for (char c = 0; c < 0xFFFF; ++c) {
1556:                        if (TestUtility.isUnassigned(c) ||
1557:                            !targetRange.contains(c)) continue;
1558:                     */
1559:
1560:                    usi.reset(targetRange);
1561:                    while (usi.next()) {
1562:                        String cs;
1563:                        int c;
1564:                        if (usi.codepoint == UnicodeSetIterator.IS_STRING) {
1565:                            cs = usi.string;
1566:                            c = UTF16.charAt(cs, 0);
1567:                        } else {
1568:                            c = usi.codepoint;
1569:                            cs = UTF16.valueOf(c);
1570:                        }
1571:
1572:                        String targ = targetToSource.transliterate(cs);
1573:                        String reverse = sourceToTarget.transliterate(targ);
1574:
1575:                        if (!toSource.containsAll(targ)
1576:                                || badCharacters.containsSome(targ)) {
1577:                            String targD = Normalizer.normalize(targ,
1578:                                    Normalizer.NFD);
1579:                            if (!toSource.containsAll(targD)
1580:                                    || badCharacters.containsSome(targD)) {
1581:                                /*UnicodeSet temp = */new UnicodeSet()
1582:                                        .addAll(targD);
1583:                                logWrongScript("Target-Source", cs, targ,
1584:                                        toSource, badCharacters);
1585:                                failTargSource.add(cs);
1586:                                continue;
1587:                            }
1588:                        }
1589:                        if (!isSame(cs, reverse)
1590:                                && !roundtripExclusions.contains(c)
1591:                                && !roundtripExclusions.contains(cs)) {
1592:                            logRoundTripFailure(cs, targetToSource.getID(),
1593:                                    targ, sourceToTarget.getID(), reverse);
1594:                            failRound.add(c);
1595:                            continue;
1596:                        }
1597:                        String targ2 = Normalizer.normalize(targ,
1598:                                Normalizer.NFD);
1599:                        String reverse2 = sourceToTarget.transliterate(targ2);
1600:                        if (!reverse.equals(reverse2)) {
1601:                            logNotCanonical("Target-Source", targ, reverse,
1602:                                    targ2, reverse2);
1603:                        }
1604:                    }
1605:
1606:                    log
1607:                            .logln("Checking that target characters convert to source and back - Doubles");
1608:                    out
1609:                            .println("<h3>Checking that target characters convert to source and back - Doubles</h3>");
1610:                    int count = 0;
1611:
1612:                    UnicodeSet targetRangeMinusFailures = new UnicodeSet(
1613:                            targetRange);
1614:                    targetRangeMinusFailures.removeAll(failTargSource);
1615:                    targetRangeMinusFailures.removeAll(failRound);
1616:
1617:                    //char[] buf = new char[4]; // maximum we can have with 2 code points
1618:                    /*
1619:                    for (char c = 0; c < 0xFFFF; ++c) {
1620:                        if (TestUtility.isUnassigned(c) ||
1621:                            !targetRange.contains(c)) continue;
1622:                     */
1623:
1624:                    usi.reset(targetRangeMinusFailures, quickRt, density);
1625:
1626:                    while (usi.next()) {
1627:                        int c = usi.codepoint;
1628:
1629:                        if (++count > pairLimit) {
1630:                            throw new TestTruncated("Test truncated at "
1631:                                    + pairLimit + " x 64k pairs");
1632:                        }
1633:                        //log.log(TestUtility.hex(c));
1634:
1635:                        /*
1636:                        for (char d = 0; d < 0xFFFF; ++d) {
1637:                            if (TestUtility.isUnassigned(d) ||
1638:                                !targetRange.contains(d)) continue;
1639:                         */
1640:                        usi2.reset(targetRangeMinusFailures, quickRt, density);
1641:
1642:                        while (usi2.next()) {
1643:                            int d = usi2.codepoint;
1644:                            if (d < 0)
1645:                                break;
1646:
1647:                            String cs = UTF16.valueOf(c) + UTF16.valueOf(d);
1648:                            String targ = targetToSource.transliterate(cs);
1649:                            String reverse = sourceToTarget.transliterate(targ);
1650:
1651:                            if (!toSource.containsAll(targ) /*&& !failTargSource.contains(c) && !failTargSource.contains(d)*/
1652:                                    || badCharacters.containsSome(targ)) {
1653:                                String targD = Normalizer.normalize(targ,
1654:                                        Normalizer.NFD);
1655:                                if (!toSource.containsAll(targD) /*&& !failTargSource.contains(c) && !failTargSource.contains(d)*/
1656:                                        || badCharacters.containsSome(targD)) {
1657:                                    logWrongScript("Target-Source", cs, targ,
1658:                                            toSource, badCharacters);
1659:                                    continue;
1660:                                }
1661:                            }
1662:                            if (!isSame(cs, reverse) /*&& !failRound.contains(c) && !failRound.contains(d)*/
1663:                                    && !roundtripExclusions.contains(c)
1664:                                    && !roundtripExclusions.contains(d)
1665:                                    && !roundtripExclusions.contains(cs)) {
1666:                                logRoundTripFailure(cs, targetToSource.getID(),
1667:                                        targ, sourceToTarget.getID(), reverse);
1668:                                continue;
1669:                            }
1670:                            String targ2 = Normalizer.normalize(targ,
1671:                                    Normalizer.NFD);
1672:                            String reverse2 = sourceToTarget
1673:                                    .transliterate(targ2);
1674:                            if (!reverse.equals(reverse2)) {
1675:                                logNotCanonical("Target-Source", targ, reverse,
1676:                                        targ2, reverse2);
1677:                            }
1678:                        }
1679:                    }
1680:                    log.logln("");
1681:                }
1682:
1683:                final String info(String s) {
1684:                    StringBuffer result = new StringBuffer();
1685:                    result.append("\u200E").append(s).append("\u200E (")
1686:                            .append(TestUtility.hex(s)).append("/");
1687:                    if (false) { // append age, as a check
1688:                        int cp = 0;
1689:                        for (int i = 0; i < s.length(); i += UTF16
1690:                                .getCharCount(cp)) {
1691:                            cp = UTF16.charAt(s, i);
1692:                            if (i > 0)
1693:                                result.append(", ");
1694:                            result.append(UCharacter.getAge(cp));
1695:                        }
1696:                    }
1697:                    result.append(")");
1698:                    return result.toString();
1699:                }
1700:
1701:                final void logWrongScript(String label, String from, String to,
1702:                        UnicodeSet shouldContainAll,
1703:                        UnicodeSet shouldNotContainAny) {
1704:                    if (++errorCount > errorLimit) {
1705:                        throw new TestTruncated(
1706:                                "Test truncated; too many failures");
1707:                    }
1708:                    String toD = Normalizer.normalize(to, Normalizer.NFD);
1709:                    UnicodeSet temp = new UnicodeSet().addAll(toD);
1710:                    UnicodeSet bad = new UnicodeSet(shouldNotContainAny)
1711:                            .retainAll(temp).addAll(
1712:                                    new UnicodeSet(temp)
1713:                                            .removeAll(shouldContainAll));
1714:
1715:                    out.println("<br>Fail " + label + ": " + info(from)
1716:                            + " => " + info(to) + " " + bad);
1717:                }
1718:
1719:                final void logNotCanonical(String label, String from,
1720:                        String to, String fromCan, String toCan) {
1721:                    if (++errorCount > errorLimit) {
1722:                        throw new TestTruncated(
1723:                                "Test truncated; too many failures");
1724:                    }
1725:                    out.println("<br>Fail (can.equiv) " + label + ": "
1726:                            + info(from) + " => " + info(to) + " -- "
1727:                            + info(fromCan) + " => " + info(toCan) + ")");
1728:                }
1729:
1730:                final void logFails(String label) {
1731:                    if (++errorCount > errorLimit) {
1732:                        throw new TestTruncated(
1733:                                "Test truncated; too many failures");
1734:                    }
1735:                    out.println("<br>Fail (can.equiv)" + label);
1736:                }
1737:
1738:                final void logToRulesFails(String label, String from,
1739:                        String to, String toCan) {
1740:                    if (++errorCount > errorLimit) {
1741:                        throw new TestTruncated(
1742:                                "Test truncated; too many failures");
1743:                    }
1744:                    out.println("<br>Fail " + label + ": " + info(from)
1745:                            + " => " + info(to) + ", " + info(toCan));
1746:                }
1747:
1748:                final void logRoundTripFailure(String from, String toID,
1749:                        String to, String backID, String back) {
1750:                    if (!legalSource.is(from))
1751:                        return; // skip illegals
1752:
1753:                    if (++errorCount > errorLimit) {
1754:                        throw new TestTruncated(
1755:                                "Test truncated; too many failures");
1756:                    }
1757:                    out.println("<br>Fail Roundtrip: " + info(from) + " "
1758:                            + toID + " => " + info(to) + " " + backID + " => "
1759:                            + info(back));
1760:                }
1761:
1762:                /*
1763:                 * Characters to filter for source-target mapping completeness
1764:                 * Typically is base alphabet, minus extended characters
1765:                 * Default is ASCII letters for Latin
1766:                 */
1767:                /*
1768:                public boolean isSource(char c) {
1769:                   if (!sourceRange.contains(c)) return false;
1770:                   return true;
1771:                }
1772:                 */
1773:
1774:                /*
1775:                 * Characters to check for target back to source mapping.
1776:                 * Typically the same as the target script, plus punctuation
1777:                 */
1778:                /*
1779:                public boolean isReceivingSource(char c) {
1780:                   if (!targetRange.contains(c)) return false;
1781:                   return true;
1782:                }
1783:                 */
1784:                /*
1785:                 * Characters to filter for target-source mapping
1786:                 * Typically is base alphabet, minus extended characters
1787:                 */
1788:                /*
1789:                public boolean isTarget(char c) {
1790:                   byte script = TestUtility.getScript(c);
1791:                   if (script != targetScript) return false;
1792:                   if (!TestUtility.isLetter(c)) return false;
1793:                   if (targetRange != null && !targetRange.contains(c)) return false;
1794:                   return true;
1795:                }
1796:                 */
1797:
1798:                /*
1799:                 * Characters to check for target-source mapping
1800:                 * Typically the same as the source script, plus punctuation
1801:                 */
1802:                /*
1803:                public boolean isReceivingTarget(char c) {
1804:                    byte script = TestUtility.getScript(c);
1805:                    return (script == targetScript || script == TestUtility.COMMON_SCRIPT);
1806:                }
1807:
1808:                final boolean isSource(String s) {
1809:                    for (int i = 0; i < s.length(); ++i) {
1810:                        if (!isSource(s.charAt(i))) return false;
1811:                    }
1812:                    return true;
1813:                }
1814:
1815:                final boolean isTarget(String s) {
1816:                    for (int i = 0; i < s.length(); ++i) {
1817:                        if (!isTarget(s.charAt(i))) return false;
1818:                    }
1819:                    return true;
1820:                }
1821:
1822:                final boolean isReceivingSource(String s) {
1823:                    for (int i = 0; i < s.length(); ++i) {
1824:                        if (!isReceivingSource(s.charAt(i))) return false;
1825:                    }
1826:                    return true;
1827:                }
1828:
1829:                final boolean isReceivingTarget(String s) {
1830:                    for (int i = 0; i < s.length(); ++i) {
1831:                        if (!isReceivingTarget(s.charAt(i))) return false;
1832:                    }
1833:                    return true;
1834:                }
1835:                 */
1836:
1837:                static class TestTruncated extends RuntimeException {
1838:                    TestTruncated(String msg) {
1839:                        super (msg);
1840:                    }
1841:                }
1842:            }
1843:
1844:            //  static class TestHangul extends Test {
1845:            //      TestHangul () {
1846:            //          super("Jamo-Hangul", TestUtility.JAMO_SCRIPT, TestUtility.HANGUL_SCRIPT);
1847:            //      }
1848:            //
1849:            //      public boolean isSource(char c) {
1850:            //          if (0x1113 <= c && c <= 0x1160) return false;
1851:            //          if (0x1176 <= c && c <= 0x11F9) return false;
1852:            //          if (0x3131 <= c && c <= 0x318E) return false;
1853:            //          return super.isSource(c);
1854:            //      }
1855:            //  }
1856:        }
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.