001: /*
002: *
003: * %W% %E%
004: *
005: * Portions Copyright 2000-2006 Sun Microsystems, Inc. All Rights
006: * Reserved. Use is subject to license terms.
007: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER
008: *
009: * This program is free software; you can redistribute it and/or
010: * modify it under the terms of the GNU General Public License version
011: * 2 only, as published by the Free Software Foundation.
012: *
013: * This program is distributed in the hope that it will be useful, but
014: * WITHOUT ANY WARRANTY; without even the implied warranty of
015: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
016: * General Public License version 2 for more details (a copy is
017: * included at /legal/license.txt).
018: *
019: * You should have received a copy of the GNU General Public License
020: * version 2 along with this work; if not, write to the Free Software
021: * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
022: * 02110-1301 USA
023: *
024: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
025: * Clara, CA 95054 or visit www.sun.com if you need additional
026: * information or have any questions.
027: */
028:
029: /*
030: * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
031: * (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved
032: *
033: * The original version of this source code and documentation is copyrighted
034: * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
035: * materials are provided under terms of a License Agreement between Taligent
036: * and Sun. This technology is protected by multiple US and International
037: * patents. This notice and attribution to Taligent may not be removed.
038: * Taligent is a registered trademark of Taligent, Inc.
039: *
040: */
041:
042: package java.text;
043:
044: import java.util.Vector;
045: import sun.text.CompactIntArray;
046: import sun.text.IntHashtable;
047:
048: /**
049: * This class contains the static state of a RuleBasedCollator: The various
050: * tables that are used by the collation routines. Several RuleBasedCollators
051: * can share a single RBCollationTables object, easing memory requirements and
052: * improving performance.
053: */
054: final class RBCollationTables {
055: //===========================================================================================
056: // The following diagram shows the data structure of the RBCollationTables object.
057: // Suppose we have the rule, where 'o-umlaut' is the unicode char 0x00F6.
058: // "a, A < b, B < c, C, ch, cH, Ch, CH < d, D ... < o, O; 'o-umlaut'/E, 'O-umlaut'/E ...".
059: // What the rule says is, sorts 'ch'ligatures and 'c' only with tertiary difference and
060: // sorts 'o-umlaut' as if it's always expanded with 'e'.
061: //
062: // mapping table contracting list expanding list
063: // (contains all unicode char
064: // entries) ___ ____________ _________________________
065: // ________ +>|_*_|->|'c' |v('c') | +>|v('o')|v('umlaut')|v('e')|
066: // |_\u0001_|-> v('\u0001') | |_:_| |------------| | |-------------------------|
067: // |_\u0002_|-> v('\u0002') | |_:_| |'ch'|v('ch')| | | : |
068: // |____:___| | |_:_| |------------| | |-------------------------|
069: // |____:___| | |'cH'|v('cH')| | | : |
070: // |__'a'___|-> v('a') | |------------| | |-------------------------|
071: // |__'b'___|-> v('b') | |'Ch'|v('Ch')| | | : |
072: // |____:___| | |------------| | |-------------------------|
073: // |____:___| | |'CH'|v('CH')| | | : |
074: // |___'c'__|---------------- ------------ | |-------------------------|
075: // |____:___| | | : |
076: // |o-umlaut|---------------------------------------- |_________________________|
077: // |____:___|
078: //
079: // Noted by Helena Shih on 6/23/97
080: //============================================================================================
081:
082: public RBCollationTables(String rules, int decmp)
083: throws ParseException {
084: this .rules = rules;
085:
086: RBTableBuilder builder = new RBTableBuilder(new BuildAPI());
087: builder.build(rules, decmp); // this object is filled in through
088: // the BuildAPI object
089: }
090:
091: final class BuildAPI {
092: /**
093: * Private constructor. Prevents anyone else besides RBTableBuilder
094: * from gaining direct access to the internals of this class.
095: */
096: private BuildAPI() {
097: }
098:
099: /**
100: * This function is used by RBTableBuilder to fill in all the members of this
101: * object. (Effectively, the builder class functions as a "friend" of this
102: * class, but to avoid changing too much of the logic, it carries around "shadow"
103: * copies of all these variables until the end of the build process and then
104: * copies them en masse into the actual tables object once all the construction
105: * logic is complete. This function does that "copying en masse".
106: * @param f2ary The value for frenchSec (the French-secondary flag)
107: * @param swap The value for SE Asian swapping rule
108: * @param map The collator's character-mapping table (the value for mapping)
109: * @param cTbl The collator's contracting-character table (the value for contractTable)
110: * @param eTbl The collator's expanding-character table (the value for expandTable)
111: * @param cFlgs The hash table of characters that participate in contracting-
112: * character sequences (the value for contractFlags)
113: * @param mso The value for maxSecOrder
114: * @param mto The value for maxTerOrder
115: */
116: void fillInTables(boolean f2ary, boolean swap,
117: CompactIntArray map, Vector cTbl, Vector eTbl,
118: IntHashtable cFlgs, short mso, short mto) {
119: frenchSec = f2ary;
120: seAsianSwapping = swap;
121: mapping = map;
122: contractTable = cTbl;
123: expandTable = eTbl;
124: contractFlags = cFlgs;
125: maxSecOrder = mso;
126: maxTerOrder = mto;
127: }
128: }
129:
130: /**
131: * Gets the table-based rules for the collation object.
132: * @return returns the collation rules that the table collation object
133: * was created from.
134: */
135: public String getRules() {
136: return rules;
137: }
138:
139: public boolean isFrenchSec() {
140: return frenchSec;
141: }
142:
143: public boolean isSEAsianSwapping() {
144: return seAsianSwapping;
145: }
146:
147: // ==============================================================
148: // internal (for use by CollationElementIterator)
149: // ==============================================================
150:
151: /**
152: * Get the entry of hash table of the contracting string in the collation
153: * table.
154: * @param ch the starting character of the contracting string
155: */
156: Vector getContractValues(char ch) {
157: int index = mapping.elementAt(ch);
158: return getContractValues(index - CONTRACTCHARINDEX);
159: }
160:
161: Vector getContractValues(int index) {
162: if (index >= 0) {
163: return (Vector) contractTable.elementAt(index);
164: } else // not found
165: {
166: return null;
167: }
168: }
169:
170: /**
171: * Returns true if this character appears anywhere in a contracting
172: * character sequence. (Used by CollationElementIterator.setOffset().)
173: */
174: boolean usedInContractSeq(char c) {
175: return contractFlags.get(c) == 1;
176: }
177:
178: /**
179: * Return the maximum length of any expansion sequences that end
180: * with the specified comparison order.
181: *
182: * @param order a collation order returned by previous or next.
183: * @return the maximum length of any expansion seuences ending
184: * with the specified order.
185: *
186: * @see CollationElementIterator#getMaxExpansion
187: */
188: int getMaxExpansion(int order) {
189: int result = 1;
190:
191: if (expandTable != null) {
192: // Right now this does a linear search through the entire
193: // expandsion table. If a collator had a large number of expansions,
194: // this could cause a performance problem, but in practise that
195: // rarely happens
196: for (int i = 0; i < expandTable.size(); i++) {
197: int[] valueList = (int[]) expandTable.elementAt(i);
198: int length = valueList.length;
199:
200: if (length > result && valueList[length - 1] == order) {
201: result = length;
202: }
203: }
204: }
205:
206: return result;
207: }
208:
209: /**
210: * Get the entry of hash table of the expanding string in the collation
211: * table.
212: * @param idx the index of the expanding string value list
213: */
214: final int[] getExpandValueList(int order) {
215: return (int[]) expandTable.elementAt(order - EXPANDCHARINDEX);
216: }
217:
218: /**
219: * Get the comarison order of a character from the collation table.
220: * @return the comparison order of a character.
221: */
222: int getUnicodeOrder(char ch) {
223: return mapping.elementAt(ch);
224: }
225:
226: short getMaxSecOrder() {
227: return maxSecOrder;
228: }
229:
230: short getMaxTerOrder() {
231: return maxTerOrder;
232: }
233:
234: /**
235: * Reverse a string.
236: */
237: static void reverse(StringBuffer result, int from, int to) {
238: int i = from;
239: char swap;
240:
241: int j = to - 1;
242: while (i < j) {
243: swap = result.charAt(i);
244: result.setCharAt(i, result.charAt(j));
245: result.setCharAt(j, swap);
246: i++;
247: j--;
248: }
249: }
250:
251: final static int getEntry(Vector list, String name, boolean fwd) {
252: for (int i = 0; i < list.size(); i++) {
253: EntryPair pair = (EntryPair) list.elementAt(i);
254: if (pair.fwd == fwd && pair.entryName.equals(name)) {
255: return i;
256: }
257: }
258: return UNMAPPED;
259: }
260:
261: // ==============================================================
262: // constants
263: // ==============================================================
264: final static int EXPANDCHARINDEX = 0x7E000000; // Expand index follows
265: final static int CONTRACTCHARINDEX = 0x7F000000; // contract indexes follow
266: final static int UNMAPPED = 0xFFFFFFFF;
267:
268: final static int PRIMARYORDERMASK = 0xffff0000;
269: final static int SECONDARYORDERMASK = 0x0000ff00;
270: final static int TERTIARYORDERMASK = 0x000000ff;
271: final static int PRIMARYDIFFERENCEONLY = 0xffff0000;
272: final static int SECONDARYDIFFERENCEONLY = 0xffffff00;
273: final static int PRIMARYORDERSHIFT = 16;
274: final static int SECONDARYORDERSHIFT = 8;
275:
276: // ==============================================================
277: // instance variables
278: // ==============================================================
279: private String rules = null;
280: private boolean frenchSec = false;
281: private boolean seAsianSwapping = false;
282:
283: private CompactIntArray mapping = null;
284: private Vector contractTable = null;
285: private Vector expandTable = null;
286: private IntHashtable contractFlags = null;
287:
288: private short maxSecOrder = 0;
289: private short maxTerOrder = 0;
290: }
|