001: /**
002: *******************************************************************************
003: * Copyright (C) 2001-2005, International Business Machines Corporation and *
004: * others. All Rights Reserved. *
005: *******************************************************************************
006: */package com.ibm.icu.dev.demo.translit;
007:
008: import com.ibm.icu.lang.*;
009: import com.ibm.icu.text.*;
010: import java.util.*;
011:
012: public class AnyTransliterator extends Transliterator {
013:
014: static final boolean DEBUG = false;
015: private String targetName;
016: private RunIterator it;
017: private Position run;
018:
019: public AnyTransliterator(String targetName, UnicodeFilter filter,
020: RunIterator it) {
021: super ("Any-" + targetName, filter);
022: this .targetName = targetName;
023: this .it = it;
024: run = new Position();
025: }
026:
027: public AnyTransliterator(String targetName, UnicodeFilter filter) {
028: this (targetName, filter, new ScriptRunIterator());
029: }
030:
031: static private Transliterator hex = Transliterator
032: .getInstance("[^\\u0020-\\u007E] hex");
033:
034: protected void handleTransliterate(Replaceable text,
035: Position offsets, boolean isIncremental) {
036: if (DEBUG) {
037: System.out.println("- handleTransliterate "
038: + hex.transliterate(text.toString()) + ", "
039: + toString(offsets));
040: }
041: it.reset(text, offsets);
042:
043: while (it.next(run)) {
044: if (targetName.equalsIgnoreCase(it.getName())) {
045: if (DEBUG)
046: System.out.println("Skipping identical: "
047: + targetName);
048: run.start = run.limit; // show we processed
049: continue; // skip if same
050: }
051:
052: Transliterator t;
053: String id = it.getName() + '-' + targetName;
054: try {
055: t = Transliterator.getInstance(id);
056: } catch (IllegalArgumentException ex) {
057: if (DEBUG)
058: System.out.println("Couldn't find: " + id
059: + ", Trying Latin as Pivot");
060: id = it.getName() + "-Latin; Latin-" + targetName;
061: try {
062: t = Transliterator.getInstance(id);
063: } catch (IllegalArgumentException ex2) {
064: if (DEBUG)
065: System.out.println("Couldn't find: " + id);
066: continue;
067: }
068: }
069: // TODO catch error later!!
070:
071: if (DEBUG) {
072: System.out.println(t.getID());
073: System.out.println("input: "
074: + hex.transliterate(text.toString()) + ", "
075: + toString(run));
076: }
077:
078: if (isIncremental && it.atEnd()) {
079: t.transliterate(text, run);
080: } else {
081: t.finishTransliteration(text, run);
082: }
083: // adjust the offsets in line with the changes
084: it.adjust(run.limit);
085:
086: if (DEBUG) {
087: System.out.println("output: "
088: + hex.transliterate(text.toString()) + ", "
089: + toString(run));
090: }
091: }
092:
093: // show how far we got!
094: it.getExpanse(offsets);
095: if (run.start == run.limit)
096: offsets.start = offsets.limit;
097: else
098: offsets.start = run.start;
099: if (DEBUG) {
100: System.out.println("+ handleTransliterate: " + ", "
101: + toString(offsets));
102: System.out.println();
103: }
104: }
105:
106: // should be method on Position
107: public static String toString(Position offsets) {
108: return "[cs: " + offsets.contextStart + ", s: " + offsets.start
109: + ", l: " + offsets.limit + ", cl: "
110: + offsets.contextLimit + "]";
111: }
112:
113: public interface RunIterator {
114: public void reset(Replaceable text, Position expanse);
115:
116: public void getExpanse(Position run);
117:
118: public void reset();
119:
120: public boolean next(Position run);
121:
122: public void getCurrent(Position run);
123:
124: public String getName();
125:
126: public void adjust(int newCurrentLimit);
127:
128: public boolean atEnd();
129: }
130:
131: /**
132: * Returns a series of ranges corresponding to scripts. They will be of the form:
133: * ccccSScSSccccTTcTcccc - where c is common, S is the first script and T is the second
134: *| | - first run
135: * | | - second run
136: * That is, the runs will overlap. The reason for this is so that a transliterator can
137: * consider common characters both before and after the scripts.
138: * The only time that contextStart != start is for the first run
139: * (the context is the start context of the entire expanse)
140: * The only time that contextLimit != limit is for the last run
141: * (the context is the end context of the entire expanse)
142: */
143: public static class ScriptRunIterator implements RunIterator {
144: private Replaceable text;
145: private Position expanse = new Position();
146: private Position current = new Position();
147: private int script;
148: private boolean done = true;
149:
150: public void reset(Replaceable text, Position expanse) {
151: set(this .expanse, expanse);
152: this .text = text;
153: reset();
154: }
155:
156: public void reset() {
157: done = false;
158: //this.expanse = expanse;
159: script = UScript.INVALID_CODE;
160: // set up first range to be empty, at beginning
161: current.contextStart = expanse.contextStart;
162: current.start = current.limit = current.contextLimit = expanse.start;
163: }
164:
165: public boolean next(Position run) {
166: if (done)
167: return false;
168: if (DEBUG) {
169: System.out.println("+cs: " + current.contextStart
170: + ", s: " + current.start + ", l: "
171: + current.limit + ", cl: "
172: + current.contextLimit);
173: }
174: // reset start context run to the last end
175: current.start = current.limit;
176:
177: // Phase 1. Backup the START value through COMMON until we get to expanse.start or a real script.
178: int i, cp;
179: int limit = expanse.start;
180: for (i = current.start; i > limit; i -= UTF16
181: .getCharCount(cp)) {
182: cp = text.char32At(i);
183: int script = UScript.getScript(cp);
184: if (script != UScript.COMMON
185: && script != UScript.INHERITED)
186: break;
187: }
188: current.start = i;
189: current.contextStart = (i == limit) ? expanse.contextStart
190: : i; // extend at start
191:
192: // PHASE 2. Move up the LIMIT value through COMMON or single script until we get to expanse.limit
193: int lastScript = UScript.COMMON;
194: //int veryLastScript = UScript.COMMON;
195: limit = expanse.limit;
196: for (i = current.limit; i < limit; i += UTF16
197: .getCharCount(cp)) {
198: cp = text.char32At(i);
199: int script = UScript.getScript(cp);
200: if (script == UScript.INHERITED)
201: script = UScript.COMMON;
202: if (script != UScript.COMMON) {
203: // if we find a real script:
204: // if we already had a script, bail
205: // otherwise set our script
206: if (lastScript == UScript.COMMON)
207: lastScript = script;
208: else if (lastScript != script)
209: break;
210: }
211: }
212: current.limit = i;
213: current.contextLimit = (i == limit) ? expanse.contextLimit
214: : i; // extend at end
215: done = (i == limit);
216: script = lastScript;
217:
218: if (DEBUG) {
219: System.out.println("-cs: " + current.contextStart
220: + ", s: " + current.start + ", l: "
221: + current.limit + ", cl: "
222: + current.contextLimit);
223: }
224:
225: set(run, current);
226: return true;
227: }
228:
229: // SHOULD BE METHOD ON POSITION
230: public static void set(Position run, Position current) {
231: run.contextStart = current.contextStart;
232: run.start = current.start;
233: run.limit = current.limit;
234: run.contextLimit = current.contextLimit;
235: }
236:
237: public boolean atEnd() {
238: return current.limit == expanse.limit;
239: }
240:
241: public void getCurrent(Position run) {
242: set(run, current);
243: }
244:
245: public void getExpanse(Position run) {
246: set(run, expanse);
247: }
248:
249: public String getName() {
250: return UScript.getName(script);
251: }
252:
253: public void adjust(int newCurrentLimit) {
254: if (expanse == null) {
255: throw new IllegalArgumentException(
256: "Must reset() before calling");
257: }
258: int delta = newCurrentLimit - current.limit;
259: current.limit += delta;
260: current.contextLimit += delta;
261: expanse.limit += delta;
262: expanse.contextLimit += delta;
263: }
264:
265: // register Any-Script for every script.
266:
267: private static Set scriptList = new HashSet();
268:
269: public static void registerAnyToScript() {
270: synchronized (scriptList) {
271: Enumeration sources = Transliterator
272: .getAvailableSources();
273: while (sources.hasMoreElements()) {
274: String source = (String) sources.nextElement();
275: if (source.equals("Any"))
276: continue; // to keep from looping
277:
278: Enumeration targets = Transliterator
279: .getAvailableTargets(source);
280: while (targets.hasMoreElements()) {
281: String target = (String) targets.nextElement();
282: if (UScript.getCode(target) == null)
283: continue; // SKIP unless we have a script (or locale)
284: if (scriptList.contains(target))
285: continue; // already encountered
286: scriptList.add(target); // otherwise add for later testing
287:
288: Set variantSet = add(new TreeSet(),
289: Transliterator.getAvailableVariants(
290: source, target));
291: if (variantSet.size() < 2) {
292: AnyTransliterator at = new AnyTransliterator(
293: target, null);
294: DummyFactory.add(at.getID(), at);
295: } else {
296: Iterator variants = variantSet.iterator();
297: while (variants.hasNext()) {
298: String variant = (String) variants
299: .next();
300: AnyTransliterator at = new AnyTransliterator(
301: (variant.length() > 0) ? target
302: + "/" + variant
303: : target, null);
304: DummyFactory.add(at.getID(), at);
305: }
306: }
307: }
308: }
309: }
310: }
311:
312: static class DummyFactory implements Transliterator.Factory {
313: static DummyFactory singleton = new DummyFactory();
314: static HashMap m = new HashMap();
315:
316: // Since Transliterators are immutable, we don't have to clone on set & get
317: static void add(String ID, Transliterator t) {
318: m.put(ID, t);
319: System.out.println("Registering: " + ID + ", "
320: + t.toRules(true));
321: Transliterator.registerFactory(ID, singleton);
322: }
323:
324: public Transliterator getInstance(String ID) {
325: return (Transliterator) m.get(ID);
326: }
327: }
328:
329: // Nice little Utility for converting Enumeration to collection
330: static Set add(Set s, Enumeration enumeration) {
331: while (enumeration.hasMoreElements()) {
332: s.add(enumeration.nextElement());
333: }
334: return s;
335: }
336:
337: }
338: }
|