Source Code Cross Referenced for TransliterationRuleSet.java in » Internationalization-Localization » icu4j » com » ibm » icu » text » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation

1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI

Java

Java Tutorial

Illustrator Tutorials

GIMP Tutorials

C# / C Sharp

C# / CSharp Tutorial

C# / CSharp Open Source

SQL Server / T-SQL Tutorial

Oracle PL / SQL

Oracle PL/SQL Tutorial

Flash / Flex / ActionScript

VBA / Excel / Access / Word

XML

XML Tutorial

Microsoft Office PowerPoint 2007 Tutorial

Microsoft Office Excel 2007 Tutorial

Microsoft Office Word 2007 Tutorial

Java Source Code / Java Documentation » Internationalization Localization » icu4j » com.ibm.icu.text

Source Cross Referenced Class Diagram Java Document (Java Doc)

001:        /*
002:         *******************************************************************************
003:         * Copyright (C) 1996-2004, International Business Machines Corporation and    *
004:         * others. All Rights Reserved.                                                *
005:         *******************************************************************************
006:         */
007:        package com.ibm.icu.text;
008:
009:        import java.util.*;
010:        import com.ibm.icu.impl.UtilityExtensions;
011:
012:        /**
013:         * A set of rules for a <code>RuleBasedTransliterator</code>.  This set encodes
014:         * the transliteration in one direction from one set of characters or short
015:         * strings to another.  A <code>RuleBasedTransliterator</code> consists of up to
016:         * two such sets, one for the forward direction, and one for the reverse.
017:         *
018:         * <p>A <code>TransliterationRuleSet</code> has one important operation, that of
019:         * finding a matching rule at a given point in the text.  This is accomplished
020:         * by the <code>findMatch()</code> method.
021:         *
022:         * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
023:         *
024:         * @author Alan Liu
025:         */
026:        class TransliterationRuleSet {
027:            /**
028:             * Vector of rules, in the order added.
029:             */
030:            private Vector ruleVector;
031:
032:            /**
033:             * Length of the longest preceding context
034:             */
035:            private int maxContextLength;
036:
037:            /**
038:             * Sorted and indexed table of rules.  This is created by freeze() from
039:             * the rules in ruleVector.  rules.length >= ruleVector.size(), and the
040:             * references in rules[] are aliases of the references in ruleVector.
041:             * A single rule in ruleVector is listed one or more times in rules[].
042:             */
043:            private TransliterationRule[] rules;
044:
045:            /**
046:             * Index table.  For text having a first character c, compute x = c&0xFF.
047:             * Now use rules[index[x]..index[x+1]-1].  This index table is created by
048:             * freeze().
049:             */
050:            private int[] index;
051:
052:            private static final String COPYRIGHT = "\u00A9 IBM Corporation 1999-2001. All rights reserved.";
053:
054:            /**
055:             * Construct a new empty rule set.
056:             */
057:            public TransliterationRuleSet() {
058:                ruleVector = new Vector();
059:                maxContextLength = 0;
060:            }
061:
062:            /**
063:             * Return the maximum context length.
064:             * @return the length of the longest preceding context.
065:             */
066:            public int getMaximumContextLength() {
067:                return maxContextLength;
068:            }
069:
070:            /**
071:             * Add a rule to this set.  Rules are added in order, and order is
072:             * significant.
073:             * @param rule the rule to add
074:             */
075:            public void addRule(TransliterationRule rule) {
076:                ruleVector.addElement(rule);
077:                int len;
078:                if ((len = rule.getAnteContextLength()) > maxContextLength) {
079:                    maxContextLength = len;
080:                }
081:
082:                rules = null;
083:            }
084:
085:            /**
086:             * Close this rule set to further additions, check it for masked rules,
087:             * and index it to optimize performance.
088:             * @exception IllegalArgumentException if some rules are masked
089:             */
090:            public void freeze() {
091:                /* Construct the rule array and index table.  We reorder the
092:                 * rules by sorting them into 256 bins.  Each bin contains all
093:                 * rules matching the index value for that bin.  A rule
094:                 * matches an index value if string whose first key character
095:                 * has a low byte equal to the index value can match the rule.
096:                 *
097:                 * Each bin contains zero or more rules, in the same order
098:                 * they were found originally.  However, the total rules in
099:                 * the bins may exceed the number in the original vector,
100:                 * since rules that have a variable as their first key
101:                 * character will generally fall into more than one bin.
102:                 *
103:                 * That is, each bin contains all rules that either have that
104:                 * first index value as their first key character, or have
105:                 * a set containing the index value as their first character.
106:                 */
107:                int n = ruleVector.size();
108:                index = new int[257]; // [sic]
109:                Vector v = new Vector(2 * n); // heuristic; adjust as needed
110:
111:                /* Precompute the index values.  This saves a LOT of time.
112:                 */
113:                int[] indexValue = new int[n];
114:                for (int j = 0; j < n; ++j) {
115:                    TransliterationRule r = (TransliterationRule) ruleVector
116:                            .elementAt(j);
117:                    indexValue[j] = r.getIndexValue();
118:                }
119:                for (int x = 0; x < 256; ++x) {
120:                    index[x] = v.size();
121:                    for (int j = 0; j < n; ++j) {
122:                        if (indexValue[j] >= 0) {
123:                            if (indexValue[j] == x) {
124:                                v.addElement(ruleVector.elementAt(j));
125:                            }
126:                        } else {
127:                            // If the indexValue is < 0, then the first key character is
128:                            // a set, and we must use the more time-consuming
129:                            // matchesIndexValue check.  In practice this happens
130:                            // rarely, so we seldom tread this code path.
131:                            TransliterationRule r = (TransliterationRule) ruleVector
132:                                    .elementAt(j);
133:                            if (r.matchesIndexValue(x)) {
134:                                v.addElement(r);
135:                            }
136:                        }
137:                    }
138:                }
139:                index[256] = v.size();
140:
141:                /* Freeze things into an array.
142:                 */
143:                rules = new TransliterationRule[v.size()];
144:                v.copyInto(rules);
145:
146:                StringBuffer errors = null;
147:
148:                /* Check for masking.  This is MUCH faster than our old check,
149:                 * which was each rule against each following rule, since we
150:                 * only have to check for masking within each bin now.  It's
151:                 * 256*O(n2^2) instead of O(n1^2), where n1 is the total rule
152:                 * count, and n2 is the per-bin rule count.  But n2<<n1, so
153:                 * it's a big win.
154:                 */
155:                for (int x = 0; x < 256; ++x) {
156:                    for (int j = index[x]; j < index[x + 1] - 1; ++j) {
157:                        TransliterationRule r1 = rules[j];
158:                        for (int k = j + 1; k < index[x + 1]; ++k) {
159:                            TransliterationRule r2 = rules[k];
160:                            if (r1.masks(r2)) {
161:                                if (errors == null) {
162:                                    errors = new StringBuffer();
163:                                } else {
164:                                    errors.append("\n");
165:                                }
166:                                errors.append("Rule " + r1 + " masks " + r2);
167:                            }
168:                        }
169:                    }
170:                }
171:
172:                if (errors != null) {
173:                    throw new IllegalArgumentException(errors.toString());
174:                }
175:            }
176:
177:            /**
178:             * Transliterate the given text with the given UTransPosition
179:             * indices.  Return TRUE if the transliteration should continue
180:             * or FALSE if it should halt (because of a U_PARTIAL_MATCH match).
181:             * Note that FALSE is only ever returned if isIncremental is TRUE.
182:             * @param text the text to be transliterated
183:             * @param pos the position indices, which will be updated
184:             * @param incremental if TRUE, assume new text may be inserted
185:             * at index.limit, and return FALSE if thre is a partial match.
186:             * @return TRUE unless a U_PARTIAL_MATCH has been obtained,
187:             * indicating that transliteration should stop until more text
188:             * arrives.
189:             */
190:            public boolean transliterate(Replaceable text,
191:                    Transliterator.Position pos, boolean incremental) {
192:                int indexByte = text.char32At(pos.start) & 0xFF;
193:                for (int i = index[indexByte]; i < index[indexByte + 1]; ++i) {
194:                    int m = rules[i].matchAndReplace(text, pos, incremental);
195:                    switch (m) {
196:                    case UnicodeMatcher.U_MATCH:
197:                        if (Transliterator.DEBUG) {
198:                            System.out.println((incremental ? "Rule.i: match "
199:                                    : "Rule: match ")
200:                                    + rules[i].toRule(true)
201:                                    + " => "
202:                                    + UtilityExtensions.formatInput(text, pos));
203:                        }
204:                        return true;
205:                    case UnicodeMatcher.U_PARTIAL_MATCH:
206:                        if (Transliterator.DEBUG) {
207:                            System.out
208:                                    .println((incremental ? "Rule.i: partial match "
209:                                            : "Rule: partial match ")
210:                                            + rules[i].toRule(true)
211:                                            + " => "
212:                                            + UtilityExtensions.formatInput(
213:                                                    text, pos));
214:                        }
215:                        return false;
216:                    }
217:                }
218:                // No match or partial match from any rule
219:                pos.start += UTF16.getCharCount(text.char32At(pos.start));
220:                if (Transliterator.DEBUG) {
221:                    System.out.println((incremental ? "Rule.i: no match => "
222:                            : "Rule: no match => ")
223:                            + UtilityExtensions.formatInput(text, pos));
224:                }
225:                return true;
226:            }
227:
228:            /**
229:             * Create rule strings that represents this rule set.
230:             */
231:            String toRules(boolean escapeUnprintable) {
232:                int i;
233:                int count = ruleVector.size();
234:                StringBuffer ruleSource = new StringBuffer();
235:                for (i = 0; i < count; ++i) {
236:                    if (i != 0) {
237:                        ruleSource.append('\n');
238:                    }
239:                    TransliterationRule r = (TransliterationRule) ruleVector
240:                            .elementAt(i);
241:                    ruleSource.append(r.toRule(escapeUnprintable));
242:                }
243:                return ruleSource.toString();
244:            }
245:
246:            /**
247:             * Return the set of all characters that may be modified (getTarget=false)
248:             * or emitted (getTarget=true) by this set.
249:             */
250:            UnicodeSet getSourceTargetSet(boolean getTarget) {
251:                UnicodeSet set = new UnicodeSet();
252:                int count = ruleVector.size();
253:                for (int i = 0; i < count; ++i) {
254:                    TransliterationRule r = (TransliterationRule) ruleVector
255:                            .elementAt(i);
256:                    if (getTarget) {
257:                        r.addTargetSetTo(set);
258:                    } else {
259:                        r.addSourceSetTo(set);
260:                    }
261:                }
262:                return set;
263:            }
264:        }

www.java2java.com | Contact Us

All other trademarks are property of their respective owners.