Source Code Cross Referenced for UnicodeNormalizerConformanceTest.java in  » Internationalization-Localization » icu4j » com » ibm » icu » dev » test » normalizer » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Internationalization Localization » icu4j » com.ibm.icu.dev.test.normalizer 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


001:        /*
002:         *******************************************************************************
003:         * Copyright (C) 1996-2005, International Business Machines Corporation and    *
004:         * others. All Rights Reserved.                                                *
005:         *******************************************************************************
006:         */
007:
008:        package com.ibm.icu.dev.test.normalizer;
009:
010:        import java.io.BufferedReader;
011:        import java.io.IOException;
012:
013:        import com.ibm.icu.dev.test.TestFmwk;
014:        import com.ibm.icu.dev.test.TestUtil;
015:        import com.ibm.icu.text.UTF16;
016:        import com.ibm.icu.text.UnicodeSet;
017:
018:        public class UnicodeNormalizerConformanceTest extends TestFmwk {
019:
020:            UnicodeNormalizer normalizer_C, normalizer_D, normalizer_KC,
021:                    normalizer_KD;
022:
023:            public static void main(String[] args) throws Exception {
024:                new UnicodeNormalizerConformanceTest().run(args);
025:            }
026:
027:            public UnicodeNormalizerConformanceTest() {
028:                // Doesn't matter what the string and mode are; we'll change
029:                // them later as needed.
030:                normalizer_C = new UnicodeNormalizer(UnicodeNormalizer.C, true);
031:                normalizer_D = new UnicodeNormalizer(UnicodeNormalizer.D, false);
032:                normalizer_KC = new UnicodeNormalizer(UnicodeNormalizer.KC,
033:                        false);
034:                normalizer_KD = new UnicodeNormalizer(UnicodeNormalizer.KD,
035:                        false);
036:
037:            }
038:
039:            // more interesting conformance test cases, not in the unicode.org NormalizationTest.txt
040:            static String[] moreCases = {
041:            // Markus 2001aug30
042:                    "0061 0332 0308;00E4 0332;0061 0332 0308;00E4 0332;0061 0332 0308; # Markus 0",
043:
044:                    // Markus 2001oct26 - test edge case for iteration: U+0f73.cc==0 but decomposition.lead.cc==129
045:                    "0061 0301 0F73;00E1 0F71 0F72;0061 0F71 0F72 0301;00E1 0F71 0F72;0061 0F71 0F72 0301; # Markus 1" };
046:
047:            /**
048:             * Test the conformance of NewNormalizer to
049:             * http://www.unicode.org/unicode/reports/tr15/conformance/Draft-TestSuite.txt.
050:             * This file must be located at the path specified as TEST_SUITE_FILE.
051:             */
052:            public void TestConformance() throws Exception {
053:                BufferedReader input = null;
054:                String line = null;
055:                String[] fields = new String[5];
056:                StringBuffer buf = new StringBuffer();
057:                int passCount = 0;
058:                int failCount = 0;
059:                UnicodeSet other = new UnicodeSet(0, 0x10ffff);
060:                int c = 0;
061:                try {
062:                    input = TestUtil
063:                            .getDataReader("unicode/NormalizationTest.txt");
064:                    for (int count = 0;; ++count) {
065:                        line = input.readLine();
066:                        if (line == null) {
067:                            //read the extra test cases
068:                            if (count > moreCases.length) {
069:                                count = 0;
070:                            } else if (count == moreCases.length) {
071:                                // all done
072:                                break;
073:                            }
074:                            line = moreCases[count++];
075:                        }
076:                        if (line.length() == 0)
077:                            continue;
078:
079:                        // Expect 5 columns of this format:
080:                        // 1E0C;1E0C;0044 0323;1E0C;0044 0323; # <comments>
081:
082:                        // Skip comments
083:                        if (line.charAt(0) == '#' || line.charAt(0) == '@')
084:                            continue;
085:
086:                        // Parse out the fields
087:                        hexsplit(line, ';', fields, buf);
088:
089:                        // Remove a single code point from the "other" UnicodeSet
090:                        if (fields[0].length() == UTF16.moveCodePointOffset(
091:                                fields[0], 0, 1)) {
092:                            c = UTF16.charAt(fields[0], 0);
093:                            if (0xac20 <= c && c <= 0xd73f) {
094:                                // not an exhaustive test run: skip most Hangul syllables
095:                                if (c == 0xac20) {
096:                                    other.remove(0xac20, 0xd73f);
097:                                }
098:                                continue;
099:                            }
100:                            other.remove(c);
101:                        }
102:                        if (checkConformance(fields, line)) {
103:                            ++passCount;
104:                        } else {
105:                            ++failCount;
106:                        }
107:                        if ((count % 1000) == 999) {
108:                            logln("Line " + (count + 1));
109:                        }
110:                    }
111:                } catch (IOException ex) {
112:                    try {
113:                        input.close();
114:                    } catch (Exception ex2) {
115:                        System.out.print("");
116:                    }
117:                    ex.printStackTrace();
118:                    throw new IllegalArgumentException("Couldn't read file "
119:                            + ex.getClass().getName() + " " + ex.getMessage()
120:                            + " line = " + line);
121:                }
122:
123:                if (failCount != 0) {
124:                    errln("Total: " + failCount + " lines failed, " + passCount
125:                            + " lines passed");
126:                } else {
127:                    logln("Total: " + passCount + " lines passed");
128:                }
129:            }
130:
131:            /**
132:             * Verify the conformance of the given line of the Unicode
133:             * normalization (UTR 15) test suite file.  For each line,
134:             * there are five columns, corresponding to field[0]..field[4].
135:             *
136:             * The following invariants must be true for all conformant implementations
137:             *  c2 == NFC(c1) == NFC(c2) == NFC(c3)
138:             *  c3 == NFD(c1) == NFD(c2) == NFD(c3)
139:             *  c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)
140:             *  c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)
141:             *
142:             * @param field the 5 columns
143:             * @param line the source line from the test suite file
144:             * @return true if the test passes
145:             */
146:            private boolean checkConformance(String[] field, String line)
147:                    throws Exception {
148:                boolean pass = true;
149:                // StringBuffer buf = new StringBuffer(); // scratch
150:                String out;
151:                int i = 0;
152:                for (i = 0; i < 5; ++i) {
153:                    if (i < 3) {
154:                        out = normalizer_C.normalize(field[i]);
155:                        pass &= assertEqual("C", field[i], out, field[1],
156:                                "c2!=C(c" + (i + 1));
157:
158:                        out = normalizer_D.normalize(field[i]);
159:                        pass &= assertEqual("D", field[i], out, field[2],
160:                                "c3!=D(c" + (i + 1));
161:
162:                    }
163:                    out = normalizer_KC.normalize(field[i]);
164:                    pass &= assertEqual("KC", field[i], out, field[3],
165:                            "c4!=KC(c" + (i + 1));
166:
167:                    out = normalizer_KD.normalize(field[i]);
168:                    pass &= assertEqual("KD", field[i], out, field[4],
169:                            "c5!=KD(c" + (i + 1));
170:
171:                }
172:
173:                if (!pass) {
174:                    errln("FAIL: " + line);
175:                }
176:
177:                return pass;
178:            }
179:
180:            /**
181:             * @param op name of normalization form, e.g., "KC"
182:             * @param s string being normalized
183:             * @param got value received
184:             * @param exp expected value
185:             * @param msg description of this test
186:             * @returns true if got == exp
187:             */
188:            private boolean assertEqual(String op, String s, String got,
189:                    String exp, String msg) {
190:                if (exp.equals(got)) {
191:                    return true;
192:                }
193:                errln(("      " + msg + ") " + op + "(" + s + ")=" + hex(got)
194:                        + ", exp. " + hex(exp)));
195:                return false;
196:            }
197:
198:            /**
199:             * Split a string into pieces based on the given delimiter
200:             * character.  Then, parse the resultant fields from hex into
201:             * characters.  That is, "0040 0400;0C00;0899" -> new String[] {
202:             * "\u0040\u0400", "\u0C00", "\u0899" }.  The output is assumed to
203:             * be of the proper length already, and exactly output.length
204:             * fields are parsed.  If there are too few an exception is
205:             * thrown.  If there are too many the extras are ignored.
206:             *
207:             * @param buf scratch buffer
208:             */
209:            private static void hexsplit(String s, char delimiter,
210:                    String[] output, StringBuffer buf) {
211:                int i;
212:                int pos = 0;
213:                for (i = 0; i < output.length; ++i) {
214:                    int delim = s.indexOf(delimiter, pos);
215:                    if (delim < 0) {
216:                        throw new IllegalArgumentException("Missing field in "
217:                                + s);
218:                    }
219:                    // Our field is from pos..delim-1.
220:                    buf.setLength(0);
221:
222:                    String toHex = s.substring(pos, delim);
223:                    pos = delim;
224:                    int index = 0;
225:                    int len = toHex.length();
226:                    while (index < len) {
227:                        if (toHex.charAt(index) == ' ') {
228:                            index++;
229:                        } else {
230:                            int spacePos = toHex.indexOf(' ', index);
231:                            if (spacePos == -1) {
232:                                appendInt(buf, toHex.substring(index, len), s);
233:                                spacePos = len;
234:                            } else {
235:                                appendInt(buf,
236:                                        toHex.substring(index, spacePos), s);
237:                            }
238:                            index = spacePos + 1;
239:                        }
240:                    }
241:
242:                    if (buf.length() < 1) {
243:                        throw new IllegalArgumentException("Empty field " + i
244:                                + " in " + s);
245:                    }
246:                    output[i] = buf.toString();
247:                    ++pos; // Skip over delim
248:                }
249:            }
250:
251:            public static void appendInt(StringBuffer buf, String strToHex,
252:                    String s) {
253:                int hex = Integer.parseInt(strToHex, 16);
254:                if (hex < 0) {
255:                    throw new IllegalArgumentException("Out of range hex "
256:                            + hex + " in " + s);
257:                } else if (hex > 0xFFFF) {
258:                    buf.append((char) ((hex >> 10) + 0xd7c0));
259:                    buf.append((char) ((hex & 0x3ff) | 0xdc00));
260:                } else {
261:                    buf.append((char) hex);
262:                }
263:            }
264:
265:            // Specific tests for debugging.  These are generally failures
266:            // taken from the conformance file, but culled out to make
267:            // debugging easier.  These can be eliminated without affecting
268:            // coverage.
269:
270:            public void _hideTestCase6() throws Exception {
271:                _testOneLine("0385;0385;00A8 0301;0020 0308 0301;0020 0308 0301;");
272:            }
273:
274:            public void _testOneLine(String line) throws Exception {
275:                String[] fields = new String[5];
276:                StringBuffer buf = new StringBuffer();
277:                // Parse out the fields
278:                hexsplit(line, ';', fields, buf);
279:                checkConformance(fields, line);
280:            }
281:
282:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.