001: /**
002: *******************************************************************************
003: * Copyright (C) 2001-2004, International Business Machines Corporation and *
004: * others. All Rights Reserved. *
005: *******************************************************************************
006: */package com.ibm.icu.dev.tool.translit;
007:
008: import com.ibm.icu.text.*;
009: import java.io.*;
010:
011: /**
012: * A command-line interface to the ICU4J transliterators.
013: * @author Alan Liu
014: */
015: public class Trans {
016:
017: public static void main(String[] args) throws Exception {
018: boolean isHTML = false;
019: int pos = 0;
020:
021: String transName = null; // first untagged string is this
022: String inText = null; // all other untagged strings are this
023: String inName = null;
024: String outName = null;
025:
026: while (pos < args.length) {
027: if (args[pos].equals("-html")) {
028: isHTML = true;
029: } else if (args[pos].equals("-i")) {
030: if (++pos == args.length)
031: usage();
032: inName = args[pos];
033: } else if (args[pos].equals("-o")) {
034: if (++pos == args.length)
035: usage();
036: outName = args[pos];
037: } else if (transName == null) {
038: transName = args[pos];
039: } else {
040: if (inText == null) {
041: inText = args[pos];
042: } else {
043: inText = inText + " " + args[pos];
044: }
045: }
046: ++pos;
047: }
048:
049: if (inText != null && inName != null) {
050: usage();
051: }
052:
053: Transliterator trans = Transliterator.getInstance(transName);
054: BufferedReader in = null;
055: if (inName != null) {
056: in = new BufferedReader(new InputStreamReader(
057: new FileInputStream(inName), "UTF8"));
058: }
059: PrintWriter out = null;
060: if (outName != null) {
061: out = new PrintWriter(new OutputStreamWriter(
062: new FileOutputStream(outName), "UTF8"));
063: } else {
064: out = new PrintWriter(System.out);
065: }
066: trans(trans, inText, in, out, isHTML);
067: out.close();
068: }
069:
070: static void trans(Transliterator trans, String inText,
071: BufferedReader in, PrintWriter out, boolean isHTML)
072: throws IOException {
073: boolean inTag = false; // If true, we are within a <tag>
074: for (;;) {
075: String line = null;
076: if (inText != null) {
077: line = inText;
078: inText = null;
079: } else if (in != null) {
080: line = in.readLine();
081: }
082: if (line == null) {
083: break;
084: }
085: if (isHTML) {
086: // Pass tags between < and > unchanged
087: StringBuffer buf = new StringBuffer();
088: int right = -1;
089: if (inTag) {
090: right = line.indexOf('>');
091: if (right < 0) {
092: right = line.length() - 1;
093: }
094: buf.append(line.substring(0, right + 1));
095: if (DEBUG)
096: System.out.println("*S:"
097: + line.substring(0, right + 1));
098: inTag = false;
099: }
100: for (;;) {
101: int left = line.indexOf('<', right + 1);
102: if (left < 0) {
103: if (right < line.length() - 1) {
104: buf.append(trans.transliterate(line
105: .substring(right + 1)));
106: if (DEBUG)
107: System.out.println("T:"
108: + line.substring(right + 1));
109: }
110: break;
111: }
112: // Append transliterated segment right+1..left-1
113: buf.append(trans.transliterate(line.substring(
114: right + 1, left)));
115: if (DEBUG)
116: System.out.println("T:"
117: + line.substring(right + 1, left));
118: right = line.indexOf('>', left + 1);
119: if (right < 0) {
120: inTag = true;
121: buf.append(line.substring(left));
122: if (DEBUG)
123: System.out.println("S:"
124: + line.substring(left));
125: break;
126: }
127: buf.append(line.substring(left, right + 1));
128: if (DEBUG)
129: System.out.println("S:"
130: + line.substring(left, right + 1));
131: }
132: line = buf.toString();
133: } else {
134: line = trans.transliterate(line);
135: }
136: out.println(line);
137: }
138: }
139:
140: static final boolean DEBUG = false;
141:
142: /**
143: * Emit usage and die.
144: */
145: static void usage() {
146: System.out
147: .println("Usage: java com.ibm.icu.dev.tool.translit.Trans [-html] <trans> ( <input> | -i <infile>) [ -o <outfile> ]");
148: System.out.println("<trans> Name of transliterator");
149: System.out.println("<input> Text to transliterate");
150: System.out.println("<infile> Name of input file");
151: System.out.println("<outfile> Name of output file");
152: System.out
153: .println("-html Only transliterate text outside of <tags>");
154: System.out
155: .println("Input may come from the command line or a file.\n");
156: System.out.println("Ouput may go to stdout or a file.\n");
157: System.exit(0);
158: }
159: }
|