Source Code Cross Referenced for AnyTransliterator.java in  » Internationalization-Localization » icu4j » com » ibm » icu » text » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Internationalization Localization » icu4j » com.ibm.icu.text 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


001:        /*
002:         *****************************************************************
003:         * Copyright (c) 2002-2006, International Business Machines Corporation
004:         * and others.  All Rights Reserved.
005:         *****************************************************************
006:         * Date        Name        Description
007:         * 06/06/2002  aliu        Creation.
008:         *****************************************************************
009:         */
010:        package com.ibm.icu.text;
011:
012:        import com.ibm.icu.lang.UScript;
013:        import java.lang.Math;
014:        import java.util.Enumeration;
015:        import java.util.HashSet;
016:        import java.util.HashMap;
017:        import java.util.Map;
018:        import java.util.MissingResourceException;
019:
020:        /**
021:         * A transliterator that translates multiple input scripts to a single
022:         * output script.  It is named Any-T or Any-T/V, where T is the target
023:         * and V is the optional variant.  The target T is a script.
024:         *
025:         * <p>An AnyTransliterator partitions text into runs of the same
026:         * script, together with adjacent COMMON or INHERITED characters.
027:         * After determining the script of each run, it transliterates from
028:         * that script to the given target/variant.  It does so by
029:         * instantiating a transliterator from the source script to the
030:         * target/variant.  If a run consists only of the target script,
031:         * COMMON, or INHERITED characters, then the run is not changed.
032:         *
033:         * <p>At startup, all possible AnyTransliterators are registered with
034:         * the system, as determined by examining the registered script
035:         * transliterators.
036:         *
037:         * @since ICU 2.2
038:         * @author Alan Liu
039:         */
040:        class AnyTransliterator extends Transliterator {
041:
042:            //------------------------------------------------------------
043:            // Constants
044:
045:            static final char TARGET_SEP = '-';
046:            static final char VARIANT_SEP = '/';
047:            static final String ANY = "Any";
048:            static final String NULL_ID = "Null";
049:            static final String LATIN_PIVOT = "-Latin;Latin-";
050:
051:            /**
052:             * Cache mapping UScriptCode values to Transliterator*.
053:             */
054:            private Map cache;
055:
056:            /**
057:             * The target or target/variant string.
058:             */
059:            private String target;
060:
061:            /**
062:             * The target script code.  Never USCRIPT_INVALID_CODE.
063:             */
064:            private int targetScript;
065:
066:            /**
067:             * Implements {@link Transliterator#handleTransliterate}.
068:             */
069:            protected void handleTransliterate(Replaceable text, Position pos,
070:                    boolean isIncremental) {
071:                int allStart = pos.start;
072:                int allLimit = pos.limit;
073:
074:                ScriptRunIterator it = new ScriptRunIterator(text,
075:                        pos.contextStart, pos.contextLimit);
076:
077:                while (it.next()) {
078:                    // Ignore runs in the ante context
079:                    if (it.limit <= allStart)
080:                        continue;
081:
082:                    // Try to instantiate transliterator from it.scriptCode to
083:                    // our target or target/variant
084:                    Transliterator t = getTransliterator(it.scriptCode);
085:
086:                    if (t == null) {
087:                        // We have no transliterator.  Do nothing, but keep
088:                        // pos.start up to date.
089:                        pos.start = it.limit;
090:                        continue;
091:                    }
092:
093:                    // If the run end is before the transliteration limit, do
094:                    // a non-incremental transliteration.  Otherwise do an
095:                    // incremental one.
096:                    boolean incremental = isIncremental
097:                            && (it.limit >= allLimit);
098:
099:                    pos.start = Math.max(allStart, it.start);
100:                    pos.limit = Math.min(allLimit, it.limit);
101:                    int limit = pos.limit;
102:                    t.filteredTransliterate(text, pos, incremental);
103:                    int delta = pos.limit - limit;
104:                    allLimit += delta;
105:                    it.adjustLimit(delta);
106:
107:                    // We're done if we enter the post context
108:                    if (it.limit >= allLimit)
109:                        break;
110:                }
111:
112:                // Restore limit.  pos.start is fine where the last transliterator
113:                // left it, or at the end of the last run.
114:                pos.limit = allLimit;
115:            }
116:
117:            /**
118:             * Private constructor
119:             * @param id the ID of the form S-T or S-T/V, where T is theTarget
120:             * and V is theVariant.  Must not be empty.
121:             * @param theTarget the target name.  Must not be empty, and must
122:             * name a script corresponding to theTargetScript.
123:             * @param theVariant the variant name, or the empty string if
124:             * there is no variant
125:             * @param theTargetScript the script code corresponding to
126:             * theTarget.
127:             */
128:            private AnyTransliterator(String id, String theTarget,
129:                    String theVariant, int theTargetScript) {
130:                super (id, null);
131:                targetScript = theTargetScript;
132:                cache = new HashMap();
133:
134:                target = theTarget;
135:                if (theVariant.length() > 0) {
136:                    target = theTarget + VARIANT_SEP + theVariant;
137:                }
138:            }
139:
140:            /**
141:             * Returns a transliterator from the given source to our target or
142:             * target/variant.  Returns NULL if the source is the same as our
143:             * target script, or if the source is USCRIPT_INVALID_CODE.
144:             * Caches the result and returns the same transliterator the next
145:             * time.  The caller does NOT own the result and must not delete
146:             * it.
147:             */
148:            private Transliterator getTransliterator(int source) {
149:                if (source == targetScript || source == UScript.INVALID_CODE) {
150:                    return null;
151:                }
152:
153:                Integer key = new Integer(source);
154:                Transliterator t = (Transliterator) cache.get(key);
155:                if (t == null) {
156:                    String sourceName = UScript.getName(source);
157:                    String id = sourceName + TARGET_SEP + target;
158:
159:                    try {
160:                        t = Transliterator.getInstance(id, FORWARD);
161:                    } catch (RuntimeException e) {
162:                    }
163:                    if (t == null) {
164:
165:                        // Try to pivot around Latin, our most common script
166:                        id = sourceName + LATIN_PIVOT + target;
167:                        try {
168:                            t = Transliterator.getInstance(id, FORWARD);
169:                        } catch (RuntimeException e) {
170:                        }
171:                    }
172:
173:                    if (t != null) {
174:                        cache.put(key, t);
175:                    }
176:                }
177:
178:                return t;
179:            }
180:
181:            /**
182:             * Registers standard transliterators with the system.  Called by
183:             * Transliterator during initialization.  Scan all current targets
184:             * and register those that are scripts T as Any-T/V.
185:             */
186:            static void register() {
187:
188:                HashSet seen = new HashSet();
189:
190:                for (Enumeration s = Transliterator.getAvailableSources(); s
191:                        .hasMoreElements();) {
192:                    String source = (String) s.nextElement();
193:
194:                    // Ignore the "Any" source
195:                    if (source.equalsIgnoreCase(ANY))
196:                        continue;
197:
198:                    for (Enumeration t = Transliterator
199:                            .getAvailableTargets(source); t.hasMoreElements();) {
200:                        String target = (String) t.nextElement();
201:
202:                        // Only process each target once
203:                        if (seen.contains(target))
204:                            continue;
205:                        seen.add(target);
206:
207:                        // Get the script code for the target.  If not a script, ignore.
208:                        int targetScript = scriptNameToCode(target);
209:                        if (targetScript == UScript.INVALID_CODE)
210:                            continue;
211:
212:                        for (Enumeration v = Transliterator
213:                                .getAvailableVariants(source, target); v
214:                                .hasMoreElements();) {
215:                            String variant = (String) v.nextElement();
216:
217:                            String id;
218:                            id = TransliteratorIDParser.STVtoID(ANY, target,
219:                                    variant);
220:                            AnyTransliterator trans = new AnyTransliterator(id,
221:                                    target, variant, targetScript);
222:                            Transliterator.registerInstance(trans);
223:                            Transliterator.registerSpecialInverse(target,
224:                                    NULL_ID, false);
225:                        }
226:                    }
227:                }
228:            }
229:
230:            /**
231:             * Return the script code for a given name, or
232:             * UScript.INVALID_CODE if not found.
233:             */
234:            private static int scriptNameToCode(String name) {
235:                try {
236:                    int[] codes = UScript.getCode(name);
237:                    return codes != null ? codes[0] : UScript.INVALID_CODE;
238:                } catch (MissingResourceException e) {
239:                    return UScript.INVALID_CODE;
240:                }
241:            }
242:
243:            //------------------------------------------------------------
244:            // ScriptRunIterator
245:
246:            /**
247:             * Returns a series of ranges corresponding to scripts. They will be
248:             * of the form:
249:             *
250:             * ccccSScSSccccTTcTcccc   - c = common, S = first script, T = second
251:             * |            |          - first run (start, limit)
252:             *          |           |  - second run (start, limit)
253:             *
254:             * That is, the runs will overlap. The reason for this is so that a
255:             * transliterator can consider common characters both before and after
256:             * the scripts.
257:             */
258:            private static class ScriptRunIterator {
259:
260:                private Replaceable text;
261:                private int textStart;
262:                private int textLimit;
263:
264:                /**
265:                 * The code of the current run, valid after next() returns.  May
266:                 * be UScript.INVALID_CODE if and only if the entire text is
267:                 * COMMON/INHERITED.
268:                 */
269:                public int scriptCode;
270:
271:                /**
272:                 * The start of the run, inclusive, valid after next() returns.
273:                 */
274:                public int start;
275:
276:                /**
277:                 * The end of the run, exclusive, valid after next() returns.
278:                 */
279:                public int limit;
280:
281:                /**
282:                 * Constructs a run iterator over the given text from start
283:                 * (inclusive) to limit (exclusive).
284:                 */
285:                public ScriptRunIterator(Replaceable text, int start, int limit) {
286:                    this .text = text;
287:                    this .textStart = start;
288:                    this .textLimit = limit;
289:                    this .limit = start;
290:                }
291:
292:                /**
293:                 * Returns TRUE if there are any more runs.  TRUE is always
294:                 * returned at least once.  Upon return, the caller should
295:                 * examine scriptCode, start, and limit.
296:                 */
297:                public boolean next() {
298:                    int ch;
299:                    int s;
300:
301:                    scriptCode = UScript.INVALID_CODE; // don't know script yet
302:                    start = limit;
303:
304:                    // Are we done?
305:                    if (start == textLimit) {
306:                        return false;
307:                    }
308:
309:                    // Move start back to include adjacent COMMON or INHERITED
310:                    // characters
311:                    while (start > textStart) {
312:                        ch = text.char32At(start - 1); // look back
313:                        s = UScript.getScript(ch);
314:                        if (s == UScript.COMMON || s == UScript.INHERITED) {
315:                            --start;
316:                        } else {
317:                            break;
318:                        }
319:                    }
320:
321:                    // Move limit ahead to include COMMON, INHERITED, and characters
322:                    // of the current script.
323:                    while (limit < textLimit) {
324:                        ch = text.char32At(limit); // look ahead
325:                        s = UScript.getScript(ch);
326:                        if (s != UScript.COMMON && s != UScript.INHERITED) {
327:                            if (scriptCode == UScript.INVALID_CODE) {
328:                                scriptCode = s;
329:                            } else if (s != scriptCode) {
330:                                break;
331:                            }
332:                        }
333:                        ++limit;
334:                    }
335:
336:                    // Return TRUE even if the entire text is COMMON / INHERITED, in
337:                    // which case scriptCode will be UScript.INVALID_CODE.
338:                    return true;
339:                }
340:
341:                /**
342:                 * Adjusts internal indices for a change in the limit index of the
343:                 * given delta.  A positive delta means the limit has increased.
344:                 */
345:                public void adjustLimit(int delta) {
346:                    limit += delta;
347:                    textLimit += delta;
348:                }
349:            }
350:        }
351:
352:        //eof
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.