Source Code Cross Referenced for CompoundTransliterator.java in » Internationalization-Localization » icu4j » com » ibm » icu » text » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI
Java
Java Tutorial
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Internationalization Localization » icu4j » com.ibm.icu.text
Source Cross Referenced Class Diagram Java Document (Java Doc)
001:        /*
002:         *******************************************************************************
003:         * Copyright (C) 1996-2005, International Business Machines Corporation and    *
004:         * others. All Rights Reserved.                                                *
005:         *******************************************************************************
006:         */
007:        package com.ibm.icu.text;
008:
009:        import com.ibm.icu.impl.Utility;
010:        import com.ibm.icu.impl.UtilityExtensions;
011:        import java.util.Vector;
012:
013:        /**
014:         * A transliterator that is composed of two or more other
015:         * transliterator objects linked together.  For example, if one
016:         * transliterator transliterates from script A to script B, and
017:         * another transliterates from script B to script C, the two may be
018:         * combined to form a new transliterator from A to C.
019:         *
020:         * <p>Composed transliterators may not behave as expected.  For
021:         * example, inverses may not combine to form the identity
022:         * transliterator.  See the class documentation for {@link
023:         * Transliterator} for details.
024:         *
025:         * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
026:         *
027:         * @author Alan Liu
028:         * @internal
029:         */
030:        class CompoundTransliterator extends Transliterator {
031:
032:            private Transliterator[] trans;
033:
034:            private int numAnonymousRBTs = 0;
035:
036:            private static final String COPYRIGHT = "\u00A9 IBM Corporation 1999-2001. All rights reserved.";
037:
038:            /**
039:             * Constructs a new compound transliterator given an array of
040:             * transliterators.  The array of transliterators may be of any
041:             * length, including zero or one, however, useful compound
042:             * transliterators have at least two components.
043:             * @param transliterators array of <code>Transliterator</code>
044:             * objects
045:             * @param filter the filter.  Any character for which
046:             * <tt>filter.contains()</tt> returns <tt>false</tt> will not be
047:             * altered by this transliterator.  If <tt>filter</tt> is
048:             * <tt>null</tt> then no filtering is applied.
049:             * @internal
050:             */
051:            public CompoundTransliterator(Transliterator[] transliterators,
052:                    UnicodeFilter filter) {
053:                super (joinIDs(transliterators), filter);
054:                trans = new Transliterator[transliterators.length];
055:                System.arraycopy(transliterators, 0, trans, 0, trans.length);
056:                computeMaximumContextLength();
057:            }
058:
059:            /**
060:             * Constructs a new compound transliterator given an array of
061:             * transliterators.  The array of transliterators may be of any
062:             * length, including zero or one, however, useful compound
063:             * transliterators have at least two components.
064:             * @param transliterators array of <code>Transliterator</code>
065:             * objects
066:             * @internal
067:             */
068:            public CompoundTransliterator(Transliterator[] transliterators) {
069:                this (transliterators, null);
070:            }
071:
072:            /**
073:             * Constructs a new compound transliterator.
074:             * @param ID compound ID
075:             * @param direction either Transliterator.FORWARD or Transliterator.REVERSE
076:             * @param filter a global filter for this compound transliterator
077:             * or null
078:             * @internal
079:             */
080:            public CompoundTransliterator(String ID, int direction,
081:                    UnicodeFilter filter) {
082:                super (ID, filter);
083:                init(ID, direction, true);
084:            }
085:
086:            /**
087:             * Constructs a new compound transliterator with no filter.
088:             * @param ID compound ID
089:             * @param direction either Transliterator.FORWARD or Transliterator.REVERSE
090:             * @internal
091:             */
092:            public CompoundTransliterator(String ID, int direction) {
093:                this (ID, direction, null);
094:            }
095:
096:            /**
097:             * Constructs a new forward compound transliterator with no filter.
098:             * @param ID compound ID
099:             * @internal
100:             */
101:            public CompoundTransliterator(String ID) {
102:                this (ID, FORWARD, null);
103:            }
104:
105:            /**
106:             * Package private constructor for Transliterator from a vector of
107:             * transliterators.  The caller is responsible for fixing up the
108:             * ID.
109:             */
110:            CompoundTransliterator(Vector list) {
111:                this (list, 0);
112:            }
113:
114:            CompoundTransliterator(Vector list, int numAnonymousRBTs) {
115:                super ("", null);
116:                trans = null;
117:                init(list, FORWARD, false);
118:                this .numAnonymousRBTs = numAnonymousRBTs;
119:                // assume caller will fixup ID
120:            }
121:
122:            /**
123:             * Finish constructing a transliterator: only to be called by
124:             * constructors.  Before calling init(), set trans and filter to NULL.
125:             * @param id the id containing ';'-separated entries
126:             * @param direction either FORWARD or REVERSE
127:             * @param idSplitPoint the index into id at which the
128:             * splitTrans should be inserted, if there is one, or
129:             * -1 if there is none.
130:             * @param splitTrans a transliterator to be inserted
131:             * before the entry at offset idSplitPoint in the id string.  May be
132:             * NULL to insert no entry.
133:             * @param fixReverseID if TRUE, then reconstruct the ID of reverse
134:             * entries by calling getID() of component entries.  Some constructors
135:             * do not require this because they apply a facade ID anyway.
136:             */
137:            private void init(String id, int direction, boolean fixReverseID) {
138:                // assert(trans == 0);
139:
140:                Vector list = new Vector();
141:                UnicodeSet[] compoundFilter = new UnicodeSet[1];
142:                StringBuffer regenID = new StringBuffer();
143:                if (!TransliteratorIDParser.parseCompoundID(id, direction,
144:                        regenID, list, compoundFilter)) {
145:                    throw new IllegalArgumentException("Invalid ID " + id);
146:                }
147:
148:                TransliteratorIDParser.instantiateList(list);
149:
150:                init(list, direction, fixReverseID);
151:
152:                if (compoundFilter[0] != null) {
153:                    setFilter(compoundFilter[0]);
154:                }
155:            }
156:
157:            /**
158:             * Finish constructing a transliterator: only to be called by
159:             * constructors.  Before calling init(), set trans and filter to NULL.
160:             * @param list a vector of transliterator objects to be adopted.  It
161:             * should NOT be empty.  The list should be in declared order.  That
162:             * is, it should be in the FORWARD order; if direction is REVERSE then
163:             * the list order will be reversed.
164:             * @param direction either FORWARD or REVERSE
165:             * @param fixReverseID if TRUE, then reconstruct the ID of reverse
166:             * entries by calling getID() of component entries.  Some constructors
167:             * do not require this because they apply a facade ID anyway.
168:             */
169:            private void init(Vector list, int direction, boolean fixReverseID) {
170:                // assert(trans == 0);
171:
172:                // Allocate array
173:                int count = list.size();
174:                trans = new Transliterator[count];
175:
176:                // Move the transliterators from the vector into an array.
177:                // Reverse the order if necessary.
178:                int i;
179:                for (i = 0; i < count; ++i) {
180:                    int j = (direction == FORWARD) ? i : count - 1 - i;
181:                    trans[i] = (Transliterator) list.elementAt(j);
182:                }
183:
184:                // If the direction is UTRANS_REVERSE then we may need to fix the
185:                // ID.
186:                if (direction == REVERSE && fixReverseID) {
187:                    StringBuffer newID = new StringBuffer();
188:                    for (i = 0; i < count; ++i) {
189:                        if (i > 0) {
190:                            newID.append(ID_DELIM);
191:                        }
192:                        newID.append(trans[i].getID());
193:                    }
194:                    setID(newID.toString());
195:                }
196:
197:                computeMaximumContextLength();
198:            }
199:
200:            /**
201:             * Return the IDs of the given list of transliterators, concatenated
202:             * with ';' delimiting them.  Equivalent to the perlish expression
203:             * join(';', map($_.getID(), transliterators).
204:             */
205:            private static String joinIDs(Transliterator[] transliterators) {
206:                StringBuffer id = new StringBuffer();
207:                for (int i = 0; i < transliterators.length; ++i) {
208:                    if (i > 0) {
209:                        id.append(';');
210:                    }
211:                    id.append(transliterators[i].getID());
212:                }
213:                return id.toString();
214:            }
215:
216:            /**
217:             * Returns the number of transliterators in this chain.
218:             * @return number of transliterators in this chain.
219:             * @internal
220:             */
221:            public int getCount() {
222:                return trans.length;
223:            }
224:
225:            /**
226:             * Returns the transliterator at the given index in this chain.
227:             * @param index index into chain, from 0 to <code>getCount() - 1</code>
228:             * @return transliterator at the given index
229:             * @internal
230:             */
231:            public Transliterator getTransliterator(int index) {
232:                return trans[index];
233:            }
234:
235:            /**
236:             * Append c to buf, unless buf is empty or buf already ends in c.
237:             */
238:            private static void _smartAppend(StringBuffer buf, char c) {
239:                if (buf.length() != 0 && buf.charAt(buf.length() - 1) != c) {
240:                    buf.append(c);
241:                }
242:            }
243:
244:            /**
245:             * Override Transliterator:
246:             * Create a rule string that can be passed to createFromRules()
247:             * to recreate this transliterator.
248:             * @param escapeUnprintable if TRUE then convert unprintable
249:             * character to their hex escape representations, \\uxxxx or
250:     * \\Uxxxxxxxx.  Unprintable characters are those other than
251:     * U+000A, U+0020..U+007E.
252:     * @return the rule string
253:     * @internal
254:     */
255:            public String toRules(boolean escapeUnprintable) {
256:                // We do NOT call toRules() on our component transliterators, in
257:                // general.  If we have several rule-based transliterators, this
258:                // yields a concatenation of the rules -- not what we want.  We do
259:                // handle compound RBT transliterators specially -- those for which
260:                // compoundRBTIndex >= 0.  For the transliterator at compoundRBTIndex,
261:                // we do call toRules() recursively.
262:                StringBuffer rulesSource = new StringBuffer();
263:                if (numAnonymousRBTs >= 1 && getFilter() != null) {
264:                    // If we are a compound RBT and if we have a global
265:                    // filter, then emit it at the top.
266:                    rulesSource.append("::").append(
267:                            getFilter().toPattern(escapeUnprintable)).append(
268:                            ID_DELIM);
269:                }
270:                for (int i = 0; i < trans.length; ++i) {
271:                    String rule;
272:
273:                    // Anonymous RuleBasedTransliterators (inline rules and
274:                    // ::BEGIN/::END blocks) are given IDs that begin with
275:                    // "%Pass": use toRules() to write all the rules to the output
276:                    // (and insert "::Null;" if we have two in a row)
277:                    if (trans[i].getID().startsWith("%Pass")) {
278:                        rule = trans[i].toRules(escapeUnprintable);
279:                        if (numAnonymousRBTs > 1 && i > 0
280:                                && trans[i - 1].getID().startsWith("%Pass"))
281:                            rule = "::Null;" + rule;
282:
283:                        // we also use toRules() on CompoundTransliterators (which we
284:                        // check for by looking for a semicolon in the ID)-- this gets
285:                        // the list of their child transliterators output in the right
286:                        // format
287:                    } else if (trans[i].getID().indexOf(';') >= 0) {
288:                        rule = trans[i].toRules(escapeUnprintable);
289:
290:                        // for everything else, use baseToRules()
291:                    } else {
292:                        rule = trans[i].baseToRules(escapeUnprintable);
293:                    }
294:                    _smartAppend(rulesSource, '\n');
295:                    rulesSource.append(rule);
296:                    _smartAppend(rulesSource, ID_DELIM);
297:                }
298:                return rulesSource.toString();
299:            }
300:
301:            /**
302:             * Return the set of all characters that may be modified by this
303:             * Transliterator, ignoring the effect of our filter.
304:             * @internal
305:             */
306:            protected UnicodeSet handleGetSourceSet() {
307:                UnicodeSet set = new UnicodeSet();
308:                for (int i = 0; i < trans.length; ++i) {
309:                    set.addAll(trans[i].getSourceSet());
310:                    // Take the example of Hiragana-Latin.  This is really
311:                    // Hiragana-Katakana; Katakana-Latin.  The source set of
312:                    // these two is roughly [:Hiragana:] and [:Katakana:].
313:                    // But the source set for the entire transliterator is
314:                    // actually [:Hiragana:] ONLY -- that is, the first
315:                    // non-empty source set.
316:
317:                    // This is a heuristic, and not 100% reliable.
318:                    if (!set.isEmpty()) {
319:                        break;
320:                    }
321:                }
322:                return set;
323:            }
324:
325:            /**
326:             * Returns the set of all characters that may be generated as
327:             * replacement text by this transliterator.
328:             * @internal
329:             */
330:            public UnicodeSet getTargetSet() {
331:                UnicodeSet set = new UnicodeSet();
332:                for (int i = 0; i < trans.length; ++i) {
333:                    // This is a heuristic, and not 100% reliable.
334:                    set.addAll(trans[i].getTargetSet());
335:                }
336:                return set;
337:            }
338:
339:            /**
340:             * Implements {@link Transliterator#handleTransliterate}.
341:             * @internal
342:             */
343:            protected void handleTransliterate(Replaceable text,
344:                    Position index, boolean incremental) {
345:                /* Call each transliterator with the same start value and
346:                 * initial cursor index, but with the limit index as modified
347:                 * by preceding transliterators.  The cursor index must be
348:                 * reset for each transliterator to give each a chance to
349:                 * transliterate the text.  The initial cursor index is known
350:                 * to still point to the same place after each transliterator
351:                 * is called because each transliterator will not change the
352:                 * text between start and the initial value of cursor.
353:                 *
354:                 * IMPORTANT: After the first transliterator, each subsequent
355:                 * transliterator only gets to transliterate text committed by
356:                 * preceding transliterators; that is, the cursor (output
357:                 * value) of transliterator i becomes the limit (input value)
358:                 * of transliterator i+1.  Finally, the overall limit is fixed
359:                 * up before we return.
360:                 *
361:                 * Assumptions we make here:
362:                 * (1) contextStart <= start <= limit <= contextLimit <= text.length()
363:                 * (2) start <= start' <= limit'  ;cursor doesn't move back
364:                 * (3) start <= limit'            ;text before cursor unchanged
365:                 * - start' is the value of start after calling handleKT
366:                 * - limit' is the value of limit after calling handleKT
367:                 */
368:
369:                /**
370:                 * Example: 3 transliterators.  This example illustrates the
371:                 * mechanics we need to implement.  C, S, and L are the contextStart,
372:                 * start, and limit.  gl is the globalLimit.  contextLimit is
373:                 * equal to limit throughout.
374:                 *
375:                 * 1. h-u, changes hex to Unicode
376:                 *
377:                 *    4  7  a  d  0      4  7  a
378:                 *    abc/u0061/u    =>  abca/u
379:                 *    C  S       L       C   S L   gl=f->a
380:                 *
381:                 * 2. upup, changes "x" to "XX"
382:                 *
383:                 *    4  7  a       4  7  a
384:                 *    abca/u    =>  abcAA/u
385:                 *    C  SL         C    S
386:                 *                       L    gl=a->b
387:                 * 3. u-h, changes Unicode to hex
388:                 *
389:                 *    4  7  a        4  7  a  d  0  3
390:                 *    abcAA/u    =>  abc/u0041/u0041/u
391:                 *    C  S L         C              S
392:                 *                                  L   gl=b->15
393:                 * 4. return
394:                 *
395:                 *    4  7  a  d  0  3
396:                 *    abc/u0041/u0041/u
397:                 *    C S L
398:                 */
399:
400:                if (trans.length < 1) {
401:                    index.start = index.limit;
402:                    return; // Short circuit for empty compound transliterators
403:                }
404:
405:                // compoundLimit is the limit value for the entire compound
406:                // operation.  We overwrite index.limit with the previous
407:                // index.start.  After each transliteration, we update
408:                // compoundLimit for insertions or deletions that have happened.
409:                int compoundLimit = index.limit;
410:
411:                // compoundStart is the start for the entire compound
412:                // operation.
413:                int compoundStart = index.start;
414:
415:                int delta = 0; // delta in length
416:
417:                StringBuffer log = null;
418:                if (DEBUG) {
419:                    log = new StringBuffer("CompoundTransliterator{" + getID()
420:                            + (incremental ? "}i: IN=" : "}: IN="));
421:                    UtilityExtensions.formatInput(log, text, index);
422:                    System.out.println(Utility.escape(log.toString()));
423:                }
424:
425:                // Give each transliterator a crack at the run of characters.
426:                // See comments at the top of the method for more detail.
427:                for (int i = 0; i < trans.length; ++i) {
428:                    index.start = compoundStart; // Reset start
429:                    int limit = index.limit;
430:
431:                    if (index.start == index.limit) {
432:                        // Short circuit for empty range
433:                        if (DEBUG) {
434:                            System.out.println("CompoundTransliterator["
435:                                    + i
436:                                    + ".."
437:                                    + (trans.length - 1)
438:                                    + (incremental ? "]i: " : "]: ")
439:                                    + UtilityExtensions
440:                                            .formatInput(text, index)
441:                                    + " (NOTHING TO DO)");
442:                        }
443:                        break;
444:                    }
445:
446:                    if (DEBUG) {
447:                        log.setLength(0);
448:                        log.append("CompoundTransliterator[" + i + "="
449:                                + trans[i].getID()
450:                                + (incremental ? "]i: " : "]: "));
451:                        UtilityExtensions.formatInput(log, text, index);
452:                    }
453:
454:                    trans[i].filteredTransliterate(text, index, incremental);
455:
456:                    // In a properly written transliterator, start == limit after
457:                    // handleTransliterate() returns when incremental is false.
458:                    // Catch cases where the subclass doesn't do this, and throw
459:                    // an exception.  (Just pinning start to limit is a bad idea,
460:                    // because what's probably happening is that the subclass
461:                    // isn't transliterating all the way to the end, and it should
462:                    // in non-incremental mode.)
463:                    if (!incremental && index.start != index.limit) {
464:                        throw new RuntimeException(
465:                                "ERROR: Incomplete non-incremental transliteration by "
466:                                        + trans[i].getID());
467:                    }
468:
469:                    if (DEBUG) {
470:                        log.append(" => ");
471:                        UtilityExtensions.formatInput(log, text, index);
472:                        System.out.println(Utility.escape(log.toString()));
473:                    }
474:
475:                    // Cumulative delta for insertions/deletions
476:                    delta += index.limit - limit;
477:
478:                    if (incremental) {
479:                        // In the incremental case, only allow subsequent
480:                        // transliterators to modify what has already been
481:                        // completely processed by prior transliterators.  In the
482:                        // non-incrmental case, allow each transliterator to
483:                        // process the entire text.
484:                        index.limit = index.start;
485:                    }
486:                }
487:
488:                compoundLimit += delta;
489:
490:                // Start is good where it is -- where the last transliterator left
491:                // it.  Limit needs to be put back where it was, modulo
492:                // adjustments for deletions/insertions.
493:                index.limit = compoundLimit;
494:
495:                if (DEBUG) {
496:                    log.setLength(0);
497:                    log.append("CompoundTransliterator{" + getID()
498:                            + (incremental ? "}i: OUT=" : "}: OUT="));
499:                    UtilityExtensions.formatInput(log, text, index);
500:                    System.out.println(Utility.escape(log.toString()));
501:                }
502:            }
503:
504:            /**
505:             * Compute and set the length of the longest context required by this transliterator.
506:             * This is <em>preceding</em> context.
507:             */
508:            private void computeMaximumContextLength() {
509:                int max = 0;
510:                for (int i = 0; i < trans.length; ++i) {
511:                    int len = trans[i].getMaximumContextLength();
512:                    if (len > max) {
513:                        max = len;
514:                    }
515:                }
516:                setMaximumContextLength(max);
517:            }
518:        }
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.