001: /**
002: *******************************************************************************
003: * Copyright (C) 2002-2005, International Business Machines Corporation and *
004: * others. All Rights Reserved. *
005: *******************************************************************************
006: */package com.ibm.icu.dev.tool.translit;
007:
008: import com.ibm.icu.text.*;
009: import java.util.Hashtable;
010: import java.io.*;
011: import com.ibm.icu.impl.*;
012: import com.ibm.icu.lang.*;
013:
014: /**
015: * @author ram
016: *
017: * To change this generated comment edit the template variable "typecomment":
018: * Window>Preferences>Java>Templates.
019: * To enable and disable the creation of type comments go to
020: * Window>Preferences>Java>Code Generation.7F
021: */
022: public class WriteIndicCharts {
023:
024: public static void main(String[] args) {
025: writeIICharts();
026: }
027:
028: static String header = "<html>\n"
029: + " <head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">"
030: + " Inter-Indic Transliteration Comparison chart"
031: + " </head>\n" + " <body bgcolor=#FFFFFF>\n"
032: + " <table border=1 width=100% >\n"
033: + " <tr>\n"
034: + " <th width=9%>Inter-Indic</th>\n"
035: + " <th width=9%>Latin</th>\n"
036: + " <th width=9%>Devanagari</th>\n"
037: + " <th width=9%>Bengali</th>\n"
038: + " <th width=9%>Gurmukhi</th>\n"
039: + " <th width=9%>Gujarati</th>\n"
040: + " <th width=9%>Oriya</th>\n"
041: + " <th width=9%>Tamil</th>\n"
042: + " <th width=9%>Telugu</th>\n"
043: + " <th width=9%>Kannada</th>\n"
044: + " <th width=9%>Malayalam</th>\n"
045: + " </tr>\n";
046: static String footer = " </table>\n" + " </body>\n"
047: + "</html>\n";
048:
049: static UnicodeSet deva = new UnicodeSet("[:deva:]");
050: static UnicodeSet beng = new UnicodeSet("[:beng:]");
051: static UnicodeSet gujr = new UnicodeSet("[:gujr:]");
052: static UnicodeSet guru = new UnicodeSet("[:guru:]");
053: static UnicodeSet orya = new UnicodeSet("[:orya:]");
054: static UnicodeSet taml = new UnicodeSet("[:taml:]");
055: static UnicodeSet telu = new UnicodeSet("[:telu:]");
056: static UnicodeSet knda = new UnicodeSet("[:knda:]");
057: static UnicodeSet mlym = new UnicodeSet("[:mlym:]");
058: static UnicodeSet inter = new UnicodeSet("[\uE000-\uE082]");
059:
060: public static void writeIICharts() {
061: try {
062: Transliterator t1 = Transliterator
063: .getInstance("InterIndic-Bengali");
064: Transliterator t2 = Transliterator
065: .getInstance("InterIndic-Gurmukhi");
066: Transliterator t3 = Transliterator
067: .getInstance("InterIndic-Gujarati");
068: Transliterator t4 = Transliterator
069: .getInstance("InterIndic-Oriya");
070: Transliterator t5 = Transliterator
071: .getInstance("InterIndic-Tamil");
072: Transliterator t6 = Transliterator
073: .getInstance("InterIndic-Telugu");
074: Transliterator t7 = Transliterator
075: .getInstance("InterIndic-Kannada");
076: Transliterator t8 = Transliterator
077: .getInstance("InterIndic-Malayalam");
078: Transliterator t9 = Transliterator
079: .getInstance("InterIndic-Devanagari");
080: Transliterator t10 = Transliterator
081: .getInstance("InterIndic-Latin");
082: //UnicodeSetIterator sIter = new UnicodeSetIterator(deva);
083:
084: for (int i = 0x00; i <= 0x80; i++) {
085: String[] arr = new String[10];
086: arr[0] = UTF16.valueOf(i + 0xE000);
087: table.put(UTF16.valueOf(i), arr);
088: }
089:
090: OutputStreamWriter os = new OutputStreamWriter(
091: new FileOutputStream("comparison-chart.html"),
092: "UTF-8");
093:
094: os.write(header);
095:
096: writeIICharts(t9, 0x0900, 1);
097: writeIICharts(t1, 0x0980, 2);
098: writeIICharts(t2, 0x0A00, 3);
099: writeIICharts(t3, 0x0A80, 4);
100: writeIICharts(t4, 0x0B00, 5);
101: writeIICharts(t5, 0x0B80, 6);
102: writeIICharts(t6, 0x0c00, 7);
103: writeIICharts(t7, 0x0C80, 8);
104: writeIICharts(t8, 0x0D00, 9);
105:
106: for (int i = 0x00; i <= 0x80; i++) {
107: String[] temp = (String[]) table.get(UTF16.valueOf(i));
108: boolean write = false;
109: for (int k = 1; k < temp.length && temp[k] != null; k++) {
110: if (UCharacter.getExtendedName(
111: UTF16.charAt(temp[k], 0)).indexOf(
112: "unassigned") < 0
113: || temp[k].indexOf(":UNASSIGNED") < 0) {
114: write = true;
115: }
116: }
117: if (write) {
118: os.write(" <tr>\n");
119: for (int j = 0; j < temp.length; j++) {
120: if (temp[j] != null) {
121: boolean fallback = false;
122: boolean unassigned = false;
123: boolean unmapped = false;
124: boolean consumed = false;
125: String str = temp[j];
126:
127: if (temp[j].indexOf(":FALLBACK") >= 0) {
128: str = temp[j].substring(0, temp[j]
129: .indexOf(":"));
130: fallback = true;
131: // os.write(" <td bgcolor=#FFFF00 align=center title=\""++"\">"+str+"<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
132: }
133: if (temp[j].indexOf(":UNASSIGNED") >= 0) {
134: str = temp[j].substring(0, temp[j]
135: .indexOf(":"));
136: unassigned = true;
137: }
138:
139: if (temp[j].indexOf(":UNMAPPED") >= 0) {
140: str = temp[j].substring(0, temp[j]
141: .indexOf(":"));
142: unmapped = true;
143: }
144: if (temp[j].indexOf(":CONSUMED") >= 0) {
145: str = temp[j].substring(0, temp[j]
146: .indexOf(":"));
147: consumed = true;
148: }
149:
150: String name;
151: StringBuffer nameBuf = new StringBuffer();
152: for (int f = 0; f < str.length(); f++) {
153: if (f > 0) {
154: nameBuf.append("+");
155: }
156: nameBuf.append(UCharacter
157: .getExtendedName(UTF16.charAt(
158: str, f)));
159: }
160: name = nameBuf.toString();
161: if (fallback) {
162:
163: if (UCharacter.getExtendedName(
164: UTF16.charAt(str, 0)).indexOf(
165: "unassigned") > 0) {
166: os
167: .write(" <td width=9% bgcolor=#BBBBFF align=center title=\""
168: + name
169: + "\">"
170: + " <br><tt>"
171: + Utility.hex(str)
172: + "</tt>"
173: + "</td>\n");
174: } else {
175: os
176: .write(" <td width=9% bgcolor=#BBBBFF align=center title=\""
177: + name
178: + "\">"
179: + str
180: + "<br><tt>"
181: + Utility.hex(str)
182: + "</tt>"
183: + "</td>\n");
184: }
185: } else if (unmapped) {
186: os
187: .write(" <td bgcolor=#FF9999 align=center title=\""
188: + name
189: + "\">"
190: + " <br><tt>"
191: + Utility.hex(str)
192: + "</tt>" + "</td>\n");
193: } else if (unassigned) {
194: if (UCharacter.getExtendedName(
195: UTF16.charAt(str, 0)).indexOf(
196: "unassigned") > 0) {
197: os
198: .write(" <td width=9% bgcolor=#00FFFF align=center title=\""
199: + name
200: + "\">"
201: + " <br><tt>"
202: + Utility.hex(str)
203: + "</tt>"
204: + "</td>\n");
205: } else {
206: os
207: .write(" <td width=9% bgcolor=#00FFFF align=center title=\""
208: + name
209: + "\">"
210: + str
211: + "<br><tt>"
212: + Utility.hex(str)
213: + "</tt>"
214: + "</td>\n");
215: }
216: } else if (consumed) {
217: if (UCharacter.getExtendedName(
218: UTF16.charAt(str, 0)).indexOf(
219: "unassigned") > 0) {
220: os
221: .write(" <td width=9% bgcolor=#FFFF55 align=center title=\""
222: + name
223: + "\">"
224: + " <br><tt>"
225: + Utility.hex(str)
226: + "</tt>"
227: + "</td>\n");
228: } else {
229: os
230: .write(" <td width=9% bgcolor=#FFFF55 align=center title=\""
231: + ""
232: + "\">"
233: + " <br><tt>"
234: + Utility.hex(str)
235: + "</tt>"
236: + "</td>\n");
237: }
238: } else if (name.indexOf("private") != -1) {
239: String s = t10.transliterate(str);
240: os
241: .write(" <td width=9% bgcolor=#FFBBBB align=center title=\""
242: + name
243: + "\">"
244: + " <br><tt>"
245: + Utility.hex(str)
246: + "</tt>" + "</td>\n");
247: if (!s.equals(str)) {
248: os
249: .write(" <td width=9% bgcolor=#CCEEDD align=center>"
250: + s + "</td>");
251: } else {
252: os
253: .write(" <td width=9% bgcolor=#CCEEDD align=center> </td>");
254: }
255: } else {
256: os
257: .write(" <td width=9% align=center title=\""
258: + name
259: + "\">"
260: + str
261: + "<br><tt>"
262: + Utility.hex(str)
263: + "</tt>" + "</td>\n");
264: }
265: } else {
266: os
267: .write(" <td width=9% > </td>\n");
268: }
269: }
270: os.write(" </tr>\n");
271: }
272: }
273: os.write(footer);
274: os.close();
275: } catch (Exception e) {
276: e.printStackTrace();
277: }
278: }
279:
280: public static void writeCharts() {
281: try {
282: Transliterator t1 = Transliterator
283: .getInstance("InterIndic-Bengali");
284: Transliterator t2 = Transliterator
285: .getInstance("InterIndic-Gurmukhi");
286: Transliterator t3 = Transliterator
287: .getInstance("InterIndic-Gujarati");
288: Transliterator t4 = Transliterator
289: .getInstance("InterIndic-Oriya");
290: Transliterator t5 = Transliterator
291: .getInstance("InterIndic-Tamil");
292: Transliterator t6 = Transliterator
293: .getInstance("InterIndic-Telugu");
294: Transliterator t7 = Transliterator
295: .getInstance("InterIndic-Kannada");
296: Transliterator t8 = Transliterator
297: .getInstance("InterIndic-Malayalam");
298: Transliterator t9 = Transliterator
299: .getInstance("InterIndic-Devanagari");
300:
301: //UnicodeSetIterator sIter = new UnicodeSetIterator(deva);
302:
303: for (int i = 0x0900; i <= 0x097F; i++) {
304: String[] arr = new String[10];
305: arr[0] = UTF16.valueOf((i & 0xFF) + 0xE000);
306: table.put(UTF16.valueOf(i), arr);
307: }
308:
309: OutputStreamWriter os = new OutputStreamWriter(
310: new FileOutputStream("comparison-chart.html"),
311: "UTF-8");
312:
313: os.write(header);
314: /*
315: writeCharts(t1,beng,1);
316: writeCharts(t2,guru,2);
317: writeCharts(t3,gujr,3);
318: writeCharts(t4,orya,4);
319: writeCharts(t5,taml,5);
320: writeCharts(t6,telu,6);
321: writeCharts(t7,knda,7);
322: writeCharts(t8,mlym,8);
323: */
324: /*
325: writeCharts(t9,0x0900,1);
326: writeCharts(t1,0x0980,2);
327: writeCharts(t2,0x0A00,3);
328: writeCharts(t3,0x0A80,4);
329: writeCharts(t4,0x0B00,5);
330: writeCharts(t5,0x0B80,6);
331: writeCharts(t6,0x0c00,7);
332: writeCharts(t7,0x0C80,8);
333: writeCharts(t8,0x0D00,9);
334: */
335: writeIICharts(t9, 0x0900, 1);
336: writeIICharts(t1, 0x0980, 2);
337: writeIICharts(t2, 0x0A00, 3);
338: writeIICharts(t3, 0x0A80, 4);
339: writeIICharts(t4, 0x0B00, 5);
340: writeIICharts(t5, 0x0B80, 6);
341: writeIICharts(t6, 0x0c00, 7);
342: writeIICharts(t7, 0x0C80, 8);
343: writeIICharts(t8, 0x0D00, 9);
344: for (int i = 0x0900; i <= 0x097F; i++) {
345: String[] temp = (String[]) table.get(UTF16.valueOf(i));
346: boolean write = false;
347: for (int k = 1; k < temp.length; k++) {
348: if (UCharacter.getExtendedName(
349: UTF16.charAt(temp[k], 0)).indexOf(
350: "unassigned") < 0) {
351: write = true;
352: }
353: }
354: if (write) {
355: os.write(" <tr>\n");
356: for (int j = 0; j < temp.length; j++) {
357: if (temp[j] != null) {
358: boolean fallback = false;
359: String str = temp[j];
360:
361: if (temp[j].indexOf(":FALLBACK") >= 0) {
362: str = temp[j].substring(0, temp[j]
363: .indexOf(":"));
364: fallback = true;
365: // os.write(" <td bgcolor=#FFFF00 align=center title=\""++"\">"+str+"<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
366: }
367: String name = UCharacter
368: .getExtendedName(UTF16.charAt(str,
369: 0));
370: if (fallback) {
371: os
372: .write(" <td bgcolor=#BBBBFF align=center title=\""
373: + name
374: + "\">"
375: + str
376: + "<br><tt>"
377: + Utility.hex(str)
378: + "</tt>" + "</td>\n");
379: } else if (name.indexOf("unassigned") != -1) {
380: os
381: .write(" <td bgcolor=#CCCCCC align=center title=\""
382: + name
383: + "\">"
384: + " <br><tt>"
385: + Utility.hex(str)
386: + "</tt>" + "</td>\n");
387: } else if (name.indexOf("private") != -1) {
388:
389: os
390: .write(" <td bgcolor=#FFBBBB align=center title=\""
391: + name
392: + "\">"
393: + " <br><tt>"
394: + Utility.hex(str)
395: + "</tt>" + "</td>\n");
396:
397: } else {
398: os
399: .write(" <td align=center title=\""
400: + name
401: + "\">"
402: + str
403: + "<br><tt>"
404: + Utility.hex(str)
405: + "</tt>" + "</td>\n");
406: }
407: } else {
408: os.write(" <td> </td>\n");
409: }
410: }
411: os.write(" </tr>\n");
412: }
413: }
414: os.write(footer);
415: os.close();
416: } catch (Exception e) {
417: e.printStackTrace();
418: }
419: }
420:
421: static Hashtable table = new Hashtable();
422:
423: static String getKey(int cp) {
424: int delta = cp & 0xFF;
425: delta -= (delta > 0x7f) ? 0x80 : 0;
426: //delta+=0x0900;
427: return UTF16.valueOf(delta);
428: }
429:
430: public static void writeCharts(Transliterator trans, int start,
431: int index) {
432:
433: Transliterator inverse = trans.getInverse();
434: for (int i = 0; i <= 0x7f; i++) {
435: String cp = UTF16.valueOf(start + i);
436: String s1 = inverse.transliterate(cp);
437: String s2 = trans.transliterate(s1);
438:
439: String[] arr = (String[]) table.get(getKey(start + i));
440: if (cp.equals(s2)) {
441: arr[index] = s1;
442: } else {
443: arr[index] = s1 + ":FALLBACK";
444: }
445: }
446: }
447:
448: public static void writeIICharts(Transliterator trans, int start,
449: int index) {
450:
451: Transliterator inverse = trans.getInverse();
452: UnicodeSetIterator iter = new UnicodeSetIterator(inter);
453:
454: while (iter.next()) {
455: String cp = UTF16.valueOf(iter.codepoint);
456: String s1 = trans.transliterate(cp);
457: String s2 = inverse.transliterate(s1);
458: String[] arr = (String[]) table.get(UTF16
459: .valueOf(iter.codepoint & 0xFF));
460: if (cp.equals(s1)) {
461: arr[index] = UTF16.valueOf(start
462: + (((byte) iter.codepoint) & 0xFF))
463: + ":UNASSIGNED";
464: } else if (cp.equals(s2)) {
465: arr[index] = s1;
466: } else if (s1.equals(s2)) {
467: if (s1.equals("")) {
468: arr[index] = UTF16.valueOf(start
469: + (((byte) iter.codepoint) & 0xFF))
470: + ":CONSUMED";
471: } else {
472: arr[index] = s1 + ":FALLBACK";
473: }
474: } else {
475: if (s2.equals("")) {
476: arr[index] = UTF16.valueOf(start
477: + (((byte) iter.codepoint) & 0xFF))
478: + ":CONSUMED";
479: } else {
480: arr[index] = s1 + ":FALLBACK";
481: }
482: }
483: }
484: }
485:
486: public static void writeCharts(Transliterator trans,
487: UnicodeSet target, int index) {
488: UnicodeSetIterator tIter = new UnicodeSetIterator(target);
489: Transliterator inverse = trans.getInverse();
490: while (tIter.next()) {
491: String cp = UTF16.valueOf(tIter.codepoint);
492: String s1 = inverse.transliterate(cp);
493: String s2 = trans.transliterate(s1);
494:
495: String[] arr = (String[]) table
496: .get(getKey(tIter.codepoint));
497: if (cp.equals(s2)) {
498: arr[index] = cp;
499: } else {
500: arr[index] = cp + ":FALLBACK";
501: }
502: }
503: }
504: }
|