Source Code Cross Referenced for WriteCharts.java in » Internationalization-Localization » icu4j » com » ibm » icu » dev » test » translit » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation

1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI

Java

Java Tutorial

Illustrator Tutorials

GIMP Tutorials

C# / C Sharp

C# / CSharp Tutorial

C# / CSharp Open Source

SQL Server / T-SQL Tutorial

Oracle PL / SQL

Oracle PL/SQL Tutorial

Flash / Flex / ActionScript

VBA / Excel / Access / Word

XML

XML Tutorial

Microsoft Office PowerPoint 2007 Tutorial

Microsoft Office Excel 2007 Tutorial

Microsoft Office Word 2007 Tutorial

Java Source Code / Java Documentation » Internationalization Localization » icu4j » com.ibm.icu.dev.test.translit

Source Cross Referenced Class Diagram Java Document (Java Doc)

001:        /*
002:         *******************************************************************************
003:         * Copyright (C) 1996-2005, International Business Machines Corporation and    *
004:         * others. All Rights Reserved.                                                *
005:         *******************************************************************************
006:         */
007:
008:        package com.ibm.icu.dev.test.translit;
009:
010:        import com.ibm.icu.lang.*;
011:        import com.ibm.icu.text.*;
012:        import java.util.*;
013:        import java.io.*;
014:
015:        public class WriteCharts {
016:            public static void main(String[] args) throws IOException {
017:                if (false) {
018:                    printSet("[[\u0000-\u007E \u30A1-\u30FC \uFF61-\uFF9F\u3001\u3002][:Katakana:][:Mark:]]");
019:                }
020:                String testSet = "";
021:                if (args.length == 0)
022:                    args = getAllScripts();
023:                for (int i = 0; i < args.length; ++i) {
024:                    // Enumeration enum = Transliterator.getAvailableIDs();
025:                    if (args[i].startsWith("[")) {
026:                        testSet = args[i];
027:                    } else {
028:                        print(testSet, args[i]);
029:                        testSet = "";
030:                    }
031:                }
032:            }
033:
034:            public static void printSet(String source) {
035:                UnicodeSet s = new UnicodeSet(source);
036:                System.out.println("Printout for '" + source + "'");
037:                int count = s.getRangeCount();
038:                for (int i = 0; i < count; ++i) {
039:                    int start = s.getRangeStart(i);
040:                    int end = s.getRangeEnd(i);
041:                    System.out.println(Integer.toString(start, 16) + ".."
042:                            + Integer.toString(end, 16));
043:                }
044:            }
045:
046:            public static String[] getAllScripts() {
047:                Set set = new TreeSet();
048:                int scripts[];
049:                Enumeration sources = Transliterator.getAvailableSources();
050:                while (sources.hasMoreElements()) {
051:                    String source = (String) sources.nextElement();
052:                    scripts = UScript.getCode(source);
053:                    if (scripts == null) {
054:                        System.out.println("[Skipping " + source + "]");
055:                        continue;
056:                    }
057:                    int sourceScript = scripts[0];
058:                    System.out.println("Source: " + source + ";\tScripts: "
059:                            + showScripts(scripts));
060:                    Enumeration targets = Transliterator
061:                            .getAvailableTargets(source);
062:                    while (targets.hasMoreElements()) {
063:                        String target = (String) targets.nextElement();
064:                        scripts = UScript.getCode(target);
065:                        if (scripts == null
066:                                || priority(scripts[0]) < priority(sourceScript)) {
067:                            // skip doing both directions
068:                            System.out.println("[Skipping '" + source + "-"
069:                                    + target + "']");
070:                            continue;
071:                        }
072:                        System.out.println("\tTarget: " + target
073:                                + ";\tScripts: " + showScripts(scripts));
074:                        Enumeration variants = Transliterator
075:                                .getAvailableVariants(source, target);
076:                        while (variants.hasMoreElements()) {
077:                            String variant = (String) variants.nextElement();
078:                            String id = source + "-" + target;
079:                            if (variant.length() != 0) {
080:                                id += "/" + variant;
081:                                if (false) {
082:                                    System.out
083:                                            .println("SKIPPING VARIANT, SINCE IT CURRENTLY BREAKS!\t"
084:                                                    + id);
085:                                    continue;
086:                                }
087:                            }
088:                            System.out.println("\t\t\t\tAdding: '" + id + "'");
089:                            set.add(id);
090:                        }
091:                    }
092:                }
093:                String[] results = new String[set.size()];
094:                set.toArray(results);
095:                return results;
096:            }
097:
098:            static public int priority(int script) {
099:                if (script == UScript.LATIN)
100:                    return -2;
101:                return script;
102:            }
103:
104:            public static String showScripts(int[] scripts) {
105:                StringBuffer results = new StringBuffer();
106:                for (int i = 0; i < scripts.length; ++i) {
107:                    if (i != 0)
108:                        results.append(", ");
109:                    results.append(UScript.getName(scripts[i]));
110:                }
111:                return results.toString();
112:            }
113:
114:            public static void print(String testSet, String rawId)
115:                    throws IOException {
116:                System.out.println("Processing " + rawId);
117:                Transliterator t = Transliterator.getInstance(rawId);
118:                String id = t.getID();
119:
120:                // clean up IDs. Ought to be API for getting source, target, variant
121:                int minusPos = id.indexOf('-');
122:                String source = id.substring(0, minusPos);
123:                String target = id.substring(minusPos + 1);
124:                int slashPos = target.indexOf('/');
125:                if (slashPos >= 0)
126:                    target = target.substring(0, slashPos);
127:
128:                // check that the source is a script
129:                if (testSet.equals("")) {
130:                    int[] scripts = UScript.getCode(source);
131:                    if (scripts == null) {
132:                        System.out.println("FAILED: "
133:                                + Transliterator.getDisplayName(id)
134:                                + " does not have a script as the source");
135:                        return;
136:                    } else {
137:                        testSet = "[:" + source + ":]";
138:                        if (source.equalsIgnoreCase("katakana")) {
139:                            testSet = "[" + testSet + "\u30FC]";
140:                            printSet(testSet);
141:                        }
142:                    }
143:                }
144:                UnicodeSet sourceSet = new UnicodeSet(testSet);
145:
146:                // check that the target is a script
147:                int[] scripts = UScript.getCode(target);
148:                if (scripts == null) {
149:                    target = "[:Latin:]";
150:                } else {
151:                    target = "[:" + target + ":]";
152:                }
153:                UnicodeSet targetSet = new UnicodeSet(target);
154:
155:                Transliterator inverse = t.getInverse();
156:
157:                //Transliterator hex = Transliterator.getInstance("Any-Hex");
158:
159:                // iterate through script
160:                System.out.println("Transliterating "
161:                        + sourceSet.toPattern(true) + " with "
162:                        + Transliterator.getDisplayName(id));
163:
164:                UnicodeSet leftOverSet = new UnicodeSet(targetSet);
165:                UnicodeSet privateUse = new UnicodeSet("[:private use:]");
166:
167:                Map map = new TreeMap();
168:
169:                UnicodeSet targetSetPlusAnyways = new UnicodeSet(targetSet);
170:                targetSetPlusAnyways.addAll(okAnyway);
171:
172:                UnicodeSet sourceSetPlusAnyways = new UnicodeSet(sourceSet);
173:                sourceSetPlusAnyways.addAll(okAnyway);
174:
175:                UnicodeSetIterator usi = new UnicodeSetIterator(sourceSet);
176:
177:                while (usi.next()) {
178:                    int j = usi.codepoint;
179:                    /*
180:                    int count = sourceSet.getRangeCount();
181:                    for (int i = 0; i < count; ++i) {
182:                    int end = sourceSet.getRangeEnd(i);
183:                    for (int j = sourceSet.getRangeStart(i); j <= end; ++j) {
184:                     */
185:                    // String flag = "";
186:                    String ss = UTF16.valueOf(j);
187:                    String ts = t.transliterate(ss);
188:                    char group = 0;
189:                    if (!targetSetPlusAnyways.containsAll(ts)) {
190:                        group |= 1;
191:                    }
192:                    if (UTF16.countCodePoint(ts) == 1) {
193:                        leftOverSet.remove(UTF16.charAt(ts, 0));
194:                    }
195:                    String rt = inverse.transliterate(ts);
196:                    if (!sourceSetPlusAnyways.containsAll(rt)) {
197:                        group |= 2;
198:                    } else if (!ss.equals(rt)) {
199:                        group |= 4;
200:                    }
201:
202:                    if (!privateUse.containsNone(ts)
203:                            || !privateUse.containsNone(rt)) {
204:                        group |= 16;
205:                    }
206:
207:                    map.put(group
208:                            + UCharacter.toLowerCase(Normalizer.normalize(ss,
209:                                    Normalizer.NFKD)) + "\u0000" + ss,
210:                            "<td class='s'>" + ss + "<br><tt>" + hex(ss)
211:                                    + "</tt></td><td class='t'>" + ts
212:                                    + "<br><tt>" + hex(ts)
213:                                    + "</tt></td><td class='r'>" + rt
214:                                    + "<br><tt>" + hex(rt) + "</tt></td>");
215:
216:                    // Check Duals
217:                    /*
218:                    int maxDual = 200;
219:                    dual:
220:                    for (int i2 = 0; i2 < count; ++i2) {
221:                        int end2 = sourceSet.getRangeEnd(i2);
222:                        for (int j2 = sourceSet.getRangeStart(i2); j2 <= end; ++j2) {
223:                            String ss2 = UTF16.valueOf(j2);
224:                            String ts2 = t.transliterate(ss2);
225:                            String rt2 = inverse.transliterate(ts2);
226:                            
227:                            String ss12 = ss + ss2;
228:                            String ts12 = t.transliterate(ss + ss12);
229:                            String rt12 = inverse.transliterate(ts12);
230:                            if (ts12.equals(ts + ts2) && rt12.equals(rt + rt2)) continue;   
231:                            if (--maxDual < 0) break dual;
232:                            
233:                            // transliteration of whole differs from that of parts
234:                            group = 0x100;
235:                            map.put(group + UCharacter.toLowerCase(Normalizer.normalize(ss12, Normalizer.DECOMP_COMPAT, 0))
236:                                    + "\u0000" + ss12, 
237:                                "<td class='s'>" + ss12 + "<br><tt>" + hex(ss12)
238:                                    + "</tt></td><td class='t'>" + ts12 + "<br><tt>" + hex(ts12)
239:                                    + "</tt></td><td class='r'>" + rt12 + "<br><tt>" + hex(rt12) + "</tt></td>" );
240:                        }
241:                    }
242:                     */
243:                    //}
244:                }
245:
246:                leftOverSet.remove(0x0100, 0x02FF); // remove extended & IPA
247:
248:                /*int count = leftOverSet.getRangeCount();
249:                for (int i = 0; i < count; ++i) {
250:                    int end = leftOverSet.getRangeEnd(i);
251:                    for (int j = leftOverSet.getRangeStart(i); j <= end; ++j) {
252:                 */
253:
254:                usi.reset(leftOverSet);
255:                while (usi.next()) {
256:                    int j = usi.codepoint;
257:
258:                    String ts = UTF16.valueOf(j);
259:                    // String decomp = Normalizer.normalize(ts, Normalizer.DECOMP_COMPAT, 0);
260:                    // if (!decomp.equals(ts)) continue;
261:
262:                    String rt = inverse.transliterate(ts);
263:                    // String flag = "";
264:                    char group = 0x80;
265:
266:                    if (!sourceSetPlusAnyways.containsAll(rt)) {
267:                        group |= 8;
268:                    }
269:                    if (!privateUse.containsNone(rt)) {
270:                        group |= 16;
271:                    }
272:
273:                    map.put(group
274:                            + UCharacter.toLowerCase(Normalizer.normalize(ts,
275:                                    Normalizer.NFKD)) + ts,
276:                            "<td class='s'>-</td><td class='t'>" + ts
277:                                    + "<br><tt>" + hex(ts)
278:                                    + "</tt></td><td class='r'>" + rt
279:                                    + "<br><tt>" + hex(rt) + "</tt></td>");
280:                    //}
281:                }
282:
283:                // make file name and open
284:                File f = new File("transliteration/chart_"
285:                        + id.replace('/', '_') + ".html");
286:                String filename = f.getCanonicalFile().toString();
287:                PrintWriter out = new PrintWriter(new OutputStreamWriter(
288:                        new FileOutputStream(filename), "UTF-8"));
289:                //out.print('\uFEFF'); // BOM
290:
291:                System.out.println("Writing " + filename);
292:
293:                try {
294:                    out
295:                            .println("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\">");
296:                    out.println("<HTML><HEAD>");
297:                    out
298:                            .println("<META content=\"text/html; charset=utf-8\" http-equiv=Content-Type></HEAD>");
299:                    out
300:                            .println("<link rel='stylesheet' href='http://www.unicode.org/charts/uca/charts.css' type='text/css'>");
301:
302:                    out.println("<BODY>");
303:                    out.println("<h1>Transliteration Samples for '"
304:                            + Transliterator.getDisplayName(id) + "'</h1>");
305:                    out
306:                            .println("<p>This file illustrates the transliterations of "
307:                                    + Transliterator.getDisplayName(id) + ".");
308:                    out
309:                            .println("The samples are mechanically generated, and only include single characters");
310:                    out
311:                            .println("from the source set. Thus it will <i>not</i> contain examples where the transliteration");
312:                    out
313:                            .println("depends on the context around the character. For a more detailed -- and interactive -- example, see the");
314:                    out
315:                            .println("<a href='http://www.ibm.com/software/globalization/icu/demo/transform'>Transliteration Demo</a></p><hr>");
316:
317:                    // set up the headers
318:                    int columnCount = 3;
319:                    String headerBase = "<th>Source</th><th>Target</th><th>Return</th>";
320:                    String headers = headerBase;
321:                    for (int i = columnCount - 1; i > 0; --i) {
322:                        if (i != columnCount - 1)
323:                            headers += "<th>&nbsp;</th>";
324:                        headers += headerBase;
325:                    }
326:
327:                    String tableHeader = "<p><table border='1'><tr>" + headers
328:                            + "</tr>";
329:                    String tableFooter = "</table></p>";
330:                    out.println("<h2>Round Trip</h2>");
331:                    out.println(tableHeader);
332:
333:                    Iterator it = map.keySet().iterator();
334:                    char lastGroup = 0;
335:                    int count = 0;
336:                    int column = 0;
337:                    while (it.hasNext()) {
338:                        String key = (String) it.next();
339:                        char group = key.charAt(0);
340:                        if (group != lastGroup || count++ > 50) {
341:                            lastGroup = group;
342:                            count = 0;
343:                            if (column != 0) {
344:                                out.println("</tr>");
345:                                column = 0;
346:                            }
347:                            out.println(tableFooter);
348:
349:                            // String title = "";
350:                            if ((group & 0x100) != 0)
351:                                out.println("<hr><h2>Duals</h2>");
352:                            else if ((group & 0x80) != 0)
353:                                out.println("<hr><h2>Completeness</h2>");
354:                            else
355:                                out.println("<hr><h2>Round Trip</h2>");
356:                            if ((group & 16) != 0)
357:                                out
358:                                        .println("<h3>Errors: Contains Private Use Characters</h3>");
359:                            if ((group & 8) != 0)
360:                                out
361:                                        .println("<h3>Possible Errors: Return not in Source Set</h3>");
362:                            if ((group & 4) != 0)
363:                                out
364:                                        .println("<h3>One-Way Mapping: Return not equal to Source</h3>");
365:                            if ((group & 2) != 0)
366:                                out
367:                                        .println("<h3>Errors: Return not in Source Set</h3>");
368:                            if ((group & 1) != 0)
369:                                out
370:                                        .println("<h3>Errors: Target not in Target Set</h3>");
371:
372:                            out.println(tableHeader);
373:                            column = 0;
374:                        }
375:                        String value = (String) map.get(key);
376:                        if (column++ == 0)
377:                            out.print("<tr>");
378:                        else
379:                            out.print("<th>&nbsp;</th>");
380:                        out.println(value);
381:                        if (column == 3) {
382:                            out.println("</tr>");
383:                            column = 0;
384:                        }
385:                    }
386:                    if (column != 0) {
387:                        out.println("</tr>");
388:                        column = 0;
389:                    }
390:                    out.println(tableFooter + "</BODY></HTML>");
391:
392:                } finally {
393:                    out.close();
394:                }
395:            }
396:
397:            public static String hex(String s) {
398:                int cp;
399:                StringBuffer results = new StringBuffer();
400:                for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
401:                    cp = UTF16.charAt(s, i);
402:                    if (i != 0)
403:                        results.append(' ');
404:                    results.append(Integer.toHexString(cp));
405:                }
406:                return results.toString().toUpperCase();
407:            }
408:
409:            static final UnicodeSet okAnyway = new UnicodeSet("[^[:Letter:]]");
410:
411:            /*
412:            // tests whether a string is in a set. Also checks for Common and Inherited
413:            public static boolean isIn(String s, UnicodeSet set) {
414:                int cp;
415:                for (int i = 0; i < s.length(); i += UTF16.getCharCount(i)) {
416:                    cp = UTF16.charAt(s, i);
417:                    if (set.contains(cp)) continue;
418:                    if (okAnyway.contains(cp)) continue;
419:                    return false;
420:                }
421:                return true;
422:            }
423:             */
424:
425:        }

www.java2java.com | Contact Us

All other trademarks are property of their respective owners.