001: /*
002: *******************************************************************************
003: * Copyright (C) 2002-2004, International Business Machines Corporation and *
004: * others. All Rights Reserved. *
005: *******************************************************************************
006: */
007:
008: package com.ibm.icu.dev.tool.localeconverter;
009:
010: import java.util.*;
011:
012: class PosixCollationBuilder {
013: private static final int MAX_WEIGHTS = 4;
014: private static final int MAX_COMPOSITION = 4;
015: private static int nextCharNumber = 1;
016: private Hashtable weightSymbols = new Hashtable();
017: private Hashtable contractions = new Hashtable();
018: private Hashtable rules = new Hashtable();
019: private CollationRule lastRule = null;
020: private PosixCharMap map;
021: private SortedVector order;
022: private static int FIRST_WEIGHT_CHAR = 0x0000F7FF;
023: private int nextWeightChar = FIRST_WEIGHT_CHAR;
024: private CollationRule ignoreRule; //rule for the collating-symbol IGNORE
025:
026: public class CollationRule {
027: int charNumber;
028: String value;
029: int nextWeight = 0;
030: String[] weightSource = new String[MAX_WEIGHTS];
031: int weight[][] = null;
032: StringBuffer source = new StringBuffer();
033:
034: public CollationRule(String symbol) {
035: charNumber = nextCharNumber++;
036: value = symbol;
037: for (int i = 0; i < MAX_WEIGHTS; i++) {
038: weightSource[i] = symbol;
039: }
040: source.append(map.backmapValue(symbol));
041: source.append("\t\t");
042: }
043:
044: private CollationRule(CollationRule other, int composition) {
045: charNumber = other.charNumber;
046: value = other.value;
047: nextWeight = other.nextWeight;
048: for (int i = 0; i < MAX_WEIGHTS; i++) {
049: String source = other.weightSource[i];
050: if (source.length() > composition) {
051: weightSource[i] = "" + source.charAt(composition);
052: } else {
053: weightSource[i] = value;
054: }
055: }
056: }
057:
058: //HexToUnicodeTransliterator myTranslit = new HexToUnicodeTransliterator("<U###0>");
059: public void addWeight(String symbol) {
060: // ReplaceableString tSymbol = new ReplaceableString(symbol);
061: // myTranslit.transliterate(tSymbol);
062: //limit the size of a single weight
063: symbol = unescape(symbol);
064: if (symbol.length() > MAX_COMPOSITION) {
065: System.err
066: .println("WARNING: Weights of composition greater than "
067: + MAX_COMPOSITION + " were truncated.");
068: symbol = symbol.substring(0, MAX_COMPOSITION);
069: }
070: //limit the number of weights
071: if (nextWeight < MAX_WEIGHTS) {
072: if (nextWeight > 0) {
073: source.append(";");
074: }
075: for (int i = 0; i < symbol.length(); i++) {
076: source.append(map.backmapValue(""
077: + symbol.charAt(i)));
078: }
079: weightSource[nextWeight++] = symbol;
080: weight = null;
081: }
082: }
083:
084: public int compare(CollationRule other) {
085: if (other == null)
086: return compare(ignoreRule);
087: resolveWeights();
088: other.resolveWeights();
089: int compareSize = Math.min(getSize(), other.getSize());
090: for (int j = 0; j < compareSize; j++) {
091: for (int i = 0; i < MAX_WEIGHTS; i++) {
092: int diff = weight[j][i] - other.weight[j][i];
093: if (diff < 0) {
094: return -(i + 1);
095: }
096: if (diff > 0) {
097: return i + 1;
098: }
099: }
100: }
101: return getSize() - other.getSize();
102: }
103:
104: public boolean isMultiWeight() {
105: return getSize() > 1;
106: }
107:
108: public int getSize() {
109: int size = 0;
110: for (int i = 1; i < weightSource.length; i++) {
111: size = Math.max(size, weightSource[i].length());
112: }
113: return size;
114: }
115:
116: public CollationRule getComponent(int ndx) {
117: return new CollationRule(this , ndx);
118: }
119:
120: public String getValue() {
121: return value;
122: }
123:
124: public String getSymbol() {
125: String newValue = isContraction();
126: if (newValue != null) {
127: return newValue;
128: } else {
129: newValue = isWeightSymbol();
130: if (newValue != null) {
131: return newValue;
132: } else {
133: return value;
134: }
135: }
136: }
137:
138: public String getSource() {
139: return source.toString();
140: }
141:
142: private String isContraction() {
143: return (String) contractions.get(value);
144: }
145:
146: private String isWeightSymbol() {
147: return (String) weightSymbols.get(value);
148: }
149:
150: public CollationRule seeksToRule() {
151: CollationRule comp;
152: if (getSize() <= 1) {
153: comp = this ; //save an object creation
154: } else {
155: comp = getComponent(0);
156: }
157: int ndx = order.indexOf(comp);
158: if (ndx == 0) {
159: return this ;
160: } else {
161: CollationRule exp;
162: do {
163: exp = (CollationRule) order.elementAt(ndx--);
164: } while (ndx > 0 && exp.getSize() > 1);
165: return exp;
166: }
167: }
168:
169: public String getExpansion() {
170: if (getSize() <= 1) {
171: return null;
172: } else {
173: StringBuffer expansion = new StringBuffer();
174: for (int j = 0; j < getSize(); j++) {
175: CollationRule comp = getComponent(j);
176: int ndx = order.indexOf(comp);
177: CollationRule exp;
178: do {
179: exp = (CollationRule) order.elementAt(ndx--);
180: } while (ndx >= 0 && exp.getSize() > 1);
181: expansion.append(exp.getSymbol());
182: }
183: return expansion.toString();
184: }
185: }
186:
187: public String toString() {
188: return source.toString();
189: /* resolveWeights();
190: StringBuffer buf = new StringBuffer();
191: buf.append(charNumber);
192: buf.append(' ');
193: buf.append(value);
194: buf.append(' ');
195: buf.append(getSymbol());
196: buf.append(' ');
197: buf.append((isWeightSymbol() != null)?"W":" ");
198: buf.append(' ');
199: for (int i = 0; i < MAX_WEIGHTS; i++) {
200: buf.append(weightSource[i]);
201: buf.append(' ');
202: }
203: for (int i = 0; i < getSize(); i++) {
204: buf.append("[ ");
205: for (int j = 0; j < MAX_WEIGHTS; j++) {
206: int w = weight[i][j];
207: buf.append(w);
208: buf.append(' ');
209: }
210: buf.append(']');
211: }
212: return buf.toString();
213: */
214: }
215:
216: private void resolveWeights() {
217: if (weight == null) {
218: weight = new int[MAX_COMPOSITION][MAX_WEIGHTS];
219: for (int j = 0; j < MAX_WEIGHTS; j++) {
220: String symbol = weightSource[j];
221: if (symbol.length() <= 1) {
222: weight[0][j] = ordinalityOf(symbol);
223: } else {
224: for (int i = 0; i < symbol.length(); i++) {
225: char c = symbol.charAt(i);
226: weight[i][j] = ordinalityOf("" + c);
227: }
228: }
229: }
230: }
231: }
232: }
233:
234: public PosixCollationBuilder(PosixCharMap map) {
235: this .map = map;
236: String ignoreSymbol = defineWeightSymbol("IGNORE");
237: ignoreRule = new CollationRule(ignoreSymbol);
238: rules.put(ignoreSymbol, ignoreRule);
239: lastRule = ignoreRule;
240: //{{INIT_CONTROLS
241: //}}
242: }
243:
244: public String defineWeightSymbol(String symbol) {
245: order = null;
246: String c = nextFreeWeightChar();
247: map.defineMapping(symbol, c);
248: weightSymbols.put(c, symbol);
249: weightSymbols.put(symbol, c);
250: return c;
251: }
252:
253: public String defineContraction(String symbol, String value) {
254: order = null;
255: String c = nextFreeWeightChar();
256: map.defineMapping(symbol, c);
257: contractions.put(c, value);
258: return c;
259: }
260:
261: private String nextFreeWeightChar() {
262: String result = "";
263: String mappedSource;
264: do {
265: result = "" + (char) nextWeightChar--;
266: mappedSource = map.backmapValue(result);
267: } while (result != mappedSource);
268: return result;
269: }
270:
271: /**
272: * unescape a string in the format <U####>
273: */
274: public static String unescape(String src) {
275: StringBuffer result = new StringBuffer();
276: int maxDig = 4;
277: if (src == null) {
278: return src;
279: }
280: int srcLen = src.length();
281: for (int i = 0; i < srcLen; i++) {
282: char c = src.charAt(i);
283: if (c == '<') {
284: if (srcLen > i + 1) {
285: char c2 = src.charAt(++i);
286: if (c2 == 'U' && (i + maxDig + 1) < srcLen) {
287: i++;
288: if (src.charAt(i + maxDig) == '>') {
289: String subStr = src
290: .substring(i, i + maxDig);
291: try {
292: Integer val = Integer.valueOf(subStr,
293: 16);
294: result.append((char) val.intValue());
295:
296: } catch (NumberFormatException ex) {
297: result.append(c);
298: result.append(c2);
299: result.append(subStr);
300: }
301: i += maxDig;
302: continue;
303: } else {
304: result.append(c);
305: result.append(c2);
306: result.append(src.charAt(i));
307: System.err
308: .println("WARNING: The escape sequence is not terminated at "
309: + i + " in string: " + src);
310: continue;
311: }
312:
313: } else {
314: result.append(c);
315: result.append(c2);
316: continue;
317: }
318: }
319: }
320: result.append(c);
321: }
322: return result.toString();
323: }
324:
325: public int ordinalityOf(String symbol) {
326: // HexToUnicodeTransliterator newTranslit = new HexToUnicodeTransliterator();
327: // ReplaceableString tSymbol = new ReplaceableString(symbol);
328: // newTranslit.transliterate(tSymbol);
329: symbol = unescape(symbol);
330: CollationRule w = (CollationRule) rules.get(symbol);
331: if (w != null) {
332: return w.charNumber;
333: } else {
334: System.err.print("ERROR: Weight symbol not found: ");
335: for (int i = 0; i < symbol.length(); i++) {
336: char c = symbol.charAt(i);
337: System.err.print("\\u");
338: System.err.print(HEX_DIGIT[(c & 0x0F000) >> 12]); // HEX_DIGIT works for octal
339: System.err.print(HEX_DIGIT[(c & 0x0F00) >> 8]); // HEX_DIGIT works for octal
340: System.err.print(HEX_DIGIT[(c & 0x00F0) >> 4]);
341: System.err.println(HEX_DIGIT[(c & 0x000F)]);
342: }
343: System.err
344: .println(" Weight given maximum possible value.");
345: return Integer.MAX_VALUE;
346: }
347: }
348:
349: // HexToUnicodeTransliterator myTranslit = new HexToUnicodeTransliterator("<U###0>");
350: public void addRule(String symbol) {
351: // ReplaceableString tSymbol = new ReplaceableString(symbol);
352: // myTranslit.transliterate(tSymbol);
353: symbol = unescape(symbol);
354: if (symbol.length() > 1) {
355: System.err.println("WARNING: Undefined element '" + symbol
356: + "'. collating-symbol generated.");
357: symbol = defineWeightSymbol(symbol);
358: }
359:
360: order = null;
361: lastRule = new CollationRule(symbol);
362: rules.put(symbol, lastRule);
363: }
364:
365: public void addRule(CollationRule rule) {
366: order = null;
367: lastRule = rule;
368: rules.put(rule.value, rule);
369: }
370:
371: public void addWeight(String weight) {
372: if (weight.length() > 1) {
373: //check to see if it's a bogus weight symbol.
374: weight = map.mapKey(weight);
375: }
376: order = null;
377: lastRule.addWeight(weight);
378: }
379:
380: public Enumeration getRules() {
381: return rules.elements();
382: }
383:
384: public SortedVector getSortOrder() {
385: if (order == null) {
386: order = new SortedVector(new Comparator() {
387: public int compare(final Object i, final Object j) {
388: final CollationRule o1 = (CollationRule) i;
389: final CollationRule o2 = (CollationRule) j;
390: if (o1 != null && o2 != null) {
391: final boolean w1 = o1.isWeightSymbol() != null;
392: final boolean w2 = o2.isWeightSymbol() != null;
393: //sort weights first
394: if (w1 && !w2) {
395: return -1;
396: } else if (!w1 && w2) {
397: return 1;
398: } else {
399: return o1.compare(o2);
400: }
401: }
402: return -1;
403: }
404: });
405: order.addElements(rules.elements());
406: //remove weight symbols from the list
407: int i;
408: for (i = 0; i < order.size(); i++) {
409: CollationRule r = (CollationRule) order.elementAt(i);
410: if (r.isWeightSymbol() == null) {
411: break;
412: }
413: }
414: order.removeElements(0, i);
415: }
416: return order;
417: }
418:
419: static final char[] HEX_DIGIT = { '0', '1', '2', '3', '4', '5',
420: '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
421: //{{DECLARE_CONTROLS
422: //}}
423: }
|