001: /*
002: *******************************************************************************
003: * Copyright (C) 1998-2006, International Business Machines Corporation and *
004: * others. All Rights Reserved. *
005: *******************************************************************************
006: */
007: package com.ibm.icu.dev.tool.layout;
008:
009: import java.util.*;
010:
011: import com.ibm.icu.lang.UCharacter;
012: import com.ibm.icu.lang.UScript;
013: import com.ibm.icu.lang.UProperty;
014: import com.ibm.icu.text.UnicodeSet;
015: import com.ibm.icu.text.UnicodeSetIterator;
016: import com.ibm.icu.impl.Utility;
017:
018: public class ScriptData extends TagValueData {
019: public static class Record {
020: private int startChar;
021: private int endChar;
022: private int scriptCode;
023:
024: Record() {
025: // nothing?
026: }
027:
028: Record(int theChar, int theScriptCode) {
029: this (theChar, theChar, theScriptCode);
030: }
031:
032: Record(int theStartChar, int theEndChar, int theScriptCode) {
033: startChar = theStartChar;
034: endChar = theEndChar;
035: scriptCode = theScriptCode;
036: }
037:
038: public int startChar() {
039: return startChar;
040: }
041:
042: public int endChar() {
043: return endChar;
044: }
045:
046: public int scriptCode() {
047: return scriptCode;
048: }
049:
050: public int compareTo(Record that) {
051: return this .startChar - that.startChar;
052: }
053:
054: public String toString() {
055: return "[" + Utility.hex(startChar, 6) + ".."
056: + Utility.hex(endChar, 6) + ", "
057: + UScript.getShortName(scriptCode).toLowerCase()
058: + "ScriptCode]";
059: }
060: }
061:
062: // TODO: Exceptions could be generated algorithmically
063: private static class TagException {
064: private String icuTag;
065: private String otTag;
066:
067: public TagException(String icu, String ot) {
068: icuTag = icu;
069: otTag = ot;
070: }
071:
072: public String getICUTag() {
073: return icuTag;
074: }
075:
076: public String getOTTag() {
077: return otTag;
078: }
079: }
080:
081: // TODO: short name longer than long name, replace repeated chars w/ space...
082: private ScriptData.TagException exceptions[] = {
083: new ScriptData.TagException("laoo", "lao "),
084: new ScriptData.TagException("nkoo", "nko "), // New code from ISO 15924, not sure this will be OT tag
085: new ScriptData.TagException("vaii", "vai "), // New code from ISO 15924, not sure this will be OT tag
086: new ScriptData.TagException("yiii", "yi ") };
087:
088: // TODO: binary search the exceptions list?
089: private String getException(String icu) {
090: for (int i = 0; i < exceptions.length; i += 1) {
091: if (exceptions[i].getICUTag().equals(icu)) {
092: return exceptions[i].getOTTag();
093: }
094: }
095:
096: return icu;
097: }
098:
099: //
100: // Straight insertion sort from Knuth vol. III, pg. 81
101: //
102: private void sort() {
103: for (int j = 1; j < fRecords.length; j += 1) {
104: int i;
105: Record v = fRecords[j];
106:
107: for (i = j - 1; i >= 0; i -= 1) {
108: if (v.compareTo(fRecords[i]) >= 0) {
109: break;
110: }
111:
112: fRecords[i + 1] = fRecords[i];
113: }
114:
115: fRecords[i + 1] = v;
116: }
117: }
118:
119: ScriptData() {
120: int commonScript = UCharacter.getPropertyValueEnum(
121: UProperty.SCRIPT, "COMMON");
122: int scriptCount;
123: Vector rv = new Vector();
124:
125: fMinScript = UCharacter
126: .getIntPropertyMinValue(UProperty.SCRIPT);
127: fMaxScript = UCharacter
128: .getIntPropertyMaxValue(UProperty.SCRIPT);
129: scriptCount = fMaxScript - fMinScript + 1;
130:
131: System.out.println("Collecting script data for " + scriptCount
132: + " scripts...");
133:
134: fScriptNames = new String[scriptCount];
135: fScriptTags = new String[scriptCount];
136:
137: for (int script = fMinScript; script <= fMaxScript; script += 1) {
138: fScriptNames[script - fMinScript] = UScript.getName(script)
139: .toUpperCase();
140: fScriptTags[script - fMinScript] = UScript.getShortName(
141: script).toLowerCase();
142:
143: if (script != commonScript) {
144: UnicodeSet scriptSet = new UnicodeSet("\\p{"
145: + fScriptTags[script - fMinScript] + "}");
146: UnicodeSetIterator it = new UnicodeSetIterator(
147: scriptSet);
148:
149: while (it.nextRange()) {
150: Record record = new Record(it.codepoint,
151: it.codepointEnd, script);
152:
153: rv.addElement(record);
154: }
155: }
156: }
157:
158: fRecords = new Record[rv.size()];
159:
160: for (int i = 0; i < rv.size(); i += 1) {
161: fRecords[i] = (Record) rv.elementAt(i);
162: }
163:
164: System.out.println("Collected " + rv.size()
165: + " records. Sorting...");
166: sort();
167:
168: System.out.println("Done.");
169: }
170:
171: public int getMinValue() {
172: return fMinScript;
173: }
174:
175: public int getMaxValue() {
176: return fMaxScript;
177: }
178:
179: public int getRecordCount() {
180: return fRecords.length;
181: }
182:
183: public String getTag(int value) {
184: if (value >= fMinScript && value <= fMaxScript) {
185: return getException(fScriptTags[value - fMinScript]);
186: }
187:
188: return "zyyx";
189: }
190:
191: public String getTagLabel(int value) {
192: if (value >= fMinScript && value <= fMaxScript) {
193: return fScriptTags[value - fMinScript];
194: }
195:
196: return "zyyx";
197: }
198:
199: public String makeTag(int value) {
200: if (value >= fMinScript && value <= fMaxScript) {
201: String tag = getException(fScriptTags[value - fMinScript]);
202:
203: return TagUtilities.makeTag(tag);
204: } else {
205: return "0x00000000";
206: }
207: }
208:
209: public String getName(int value) {
210: if (value >= fMinScript && value <= fMaxScript) {
211: return fScriptNames[value - fMinScript];
212: }
213:
214: return "COMMON";
215: }
216:
217: public Record getRecord(int index) {
218: if (fRecords != null && index < fRecords.length) {
219: return fRecords[index];
220: }
221:
222: return null;
223: }
224:
225: private int fMinScript;
226: private int fMaxScript;
227: private String fScriptNames[];
228: private String fScriptTags[];
229: private Record fRecords[];
230: }
|