001: /*
002: **********************************************************************
003: * Copyright (c) 2001, International Business Machines
004: * Corporation and others. All Rights Reserved.
005: **********************************************************************
006: * Date Name Description
007: * 11/29/2001 aliu Creation.
008: * 06/26/2002 aliu Moved to com.ibm.icu.dev.tool.translit
009: **********************************************************************
010: */
011: package com.ibm.icu.dev.tool.translit;
012:
013: import java.util.*;
014: import com.ibm.icu.dev.tool.translit.UnicodeSetClosure;
015: import java.io.*;
016: import com.ibm.icu.text.*;
017:
018: /**
019: * Class that generates source set information for a transliterator.
020: *
021: * To run, use:
022: *
023: * java com.ibm.icu.dev.tool.translit.SourceSet Latin-Katakana NFD lower
024: *
025: * Output is produced in the command console, and a file with more detail is also written.
026: *
027: * To see if it works, use:
028: *
029: * java com.ibm.icu.dev.test.translit.TransliteratorTest -v -nothrow TestIncrementalProgress
030: *
031: * and
032: *
033: * java com.ibm.icu.dev.demo.translit.Demo
034: */
035: public class SourceSet {
036:
037: public static void main(String[] args) throws IOException {
038: if (args.length == 0) {
039: // Compute and display the source sets for all system
040: // transliterators.
041: for (Enumeration e = Transliterator.getAvailableIDs(); e
042: .hasMoreElements();) {
043: String ID = (String) e.nextElement();
044: showSourceSet(ID, Normalizer.NONE, false);
045: }
046: } else {
047: // Usage: ID [NFKD | NFD] [lower]
048: Normalizer.Mode m = Normalizer.NONE;
049: boolean lowerFirst = false;
050: if (args.length >= 2) {
051: if (args[1].equalsIgnoreCase("NFD")) {
052: m = Normalizer.NFD;
053: } else if (args[1].equalsIgnoreCase("NFKD")) {
054: m = Normalizer.NFKD;
055: } else {
056: usage();
057: }
058: }
059: if (args.length >= 3) {
060: if (args[2].equalsIgnoreCase("lower")) {
061: lowerFirst = true;
062: } else {
063: usage();
064: }
065: }
066: if (args.length > 3) {
067: usage();
068: }
069: showSourceSet(args[0], m, lowerFirst);
070: }
071: }
072:
073: static void showSourceSet(String ID, Normalizer.Mode m,
074: boolean lowerFirst) throws IOException {
075: File f = new File("UnicodeSetClosure.txt");
076: String filename = f.getCanonicalFile().toString();
077: out = new PrintWriter(new OutputStreamWriter(
078: new FileOutputStream(filename), "UTF-8"));
079: out.print('\uFEFF'); // BOM
080: System.out.println();
081: System.out.println("Writing " + filename);
082: Transliterator t = Transliterator.getInstance(ID);
083: showSourceSetAux(t, m, lowerFirst, true);
084: showSourceSetAux(t.getInverse(), m, lowerFirst, false);
085: out.close();
086: }
087:
088: static PrintWriter out;
089:
090: static void showSourceSetAux(Transliterator t, Normalizer.Mode m,
091: boolean lowerFirst, boolean forward) throws IOException {
092: UnicodeSet sourceSet = t.getSourceSet();
093: if (m != Normalizer.NONE || lowerFirst) {
094: UnicodeSetClosure.close(sourceSet, m, lowerFirst);
095: }
096: System.out
097: .println(t.getID() + ": " + sourceSet.toPattern(true));
098: out.println("# MINIMAL FILTER GENERATED FOR: " + t.getID()
099: + (forward ? "" : " REVERSE"));
100: out.println(":: " + (forward ? "" : "( ")
101: + sourceSet.toPattern(true) + (forward ? "" : " )")
102: + " ;");
103: out.println("# Unicode: " + sourceSet.toPattern(false));
104: out.println();
105: }
106:
107: static void usage() {
108: System.err.println("Usage: ID [ NFD|NFKD [lower] ]");
109: System.exit(1);
110: }
111: }
|