001: /*
002: *******************************************************************************
003: *
004: * Copyright (C) 2004-2005, International Business Machines
005: * Corporation and others. All Rights Reserved.
006: *
007: *******************************************************************************
008: * file name: UBiDiProps.java
009: * encoding: US-ASCII
010: * tab size: 8 (not used)
011: * indentation:4
012: *
013: * created on: 2005jan16
014: * created by: Markus W. Scherer
015: *
016: * Low-level Unicode bidi/shaping properties access.
017: * Java port of ubidi_props.h/.c.
018: */
019:
020: package com.ibm.icu.impl;
021:
022: import java.io.InputStream;
023: import java.io.DataInputStream;
024: import java.io.BufferedInputStream;
025: import java.io.IOException;
026: import java.util.MissingResourceException;
027:
028: import com.ibm.icu.util.VersionInfo;
029: import com.ibm.icu.util.RangeValueIterator;
030:
031: import com.ibm.icu.text.UnicodeSet;
032:
033: import com.ibm.icu.lang.UCharacter;
034: import com.ibm.icu.lang.UProperty;
035:
036: public final class UBiDiProps {
037: // constructors etc. --------------------------------------------------- ***
038:
039: // port of ubidi_openProps()
040: public UBiDiProps() throws IOException {
041: InputStream is = ICUData.getStream(ICUResourceBundle.ICU_BUNDLE
042: + "/" + DATA_FILE_NAME);
043: BufferedInputStream b = new BufferedInputStream(is, 4096 /* data buffer size */);
044: readData(b);
045: b.close();
046: is.close();
047:
048: }
049:
050: private void readData(InputStream is) throws IOException {
051: DataInputStream inputStream = new DataInputStream(is);
052:
053: // read the header
054: unicodeVersion = ICUBinary.readHeader(inputStream, FMT,
055: new IsAcceptable());
056:
057: // read indexes[]
058: int i, count;
059: count = inputStream.readInt();
060: if (count < IX_INDEX_TOP) {
061: throw new IOException("indexes[0] too small in "
062: + DATA_FILE_NAME);
063: }
064: indexes = new int[count];
065:
066: indexes[0] = count;
067: for (i = 1; i < count; ++i) {
068: indexes[i] = inputStream.readInt();
069: }
070:
071: // read the trie
072: trie = new CharTrie(inputStream, null);
073:
074: // read mirrors[]
075: count = indexes[IX_MIRROR_LENGTH];
076: if (count > 0) {
077: mirrors = new int[count];
078: for (i = 0; i < count; ++i) {
079: mirrors[i] = inputStream.readInt();
080: }
081: }
082:
083: // read jgArray[]
084: count = indexes[IX_JG_LIMIT] - indexes[IX_JG_START];
085: jgArray = new byte[count];
086: for (i = 0; i < count; ++i) {
087: jgArray[i] = inputStream.readByte();
088: }
089: }
090:
091: // implement ICUBinary.Authenticate
092: private final class IsAcceptable implements ICUBinary.Authenticate {
093: public boolean isDataVersionAcceptable(byte version[]) {
094: formatVersion = version;
095: return version[0] == 1
096: && version[2] == Trie.INDEX_STAGE_1_SHIFT_
097: && version[3] == Trie.INDEX_STAGE_2_SHIFT_;
098: }
099: }
100:
101: // UBiDiProps singleton
102: private static UBiDiProps gBdp = null;
103:
104: // port of ubidi_getSingleton()
105: public static final synchronized UBiDiProps getSingleton()
106: throws IOException {
107: if (gBdp == null) {
108: gBdp = new UBiDiProps();
109: }
110: return gBdp;
111: }
112:
113: // UBiDiProps dummy singleton
114: private static UBiDiProps gBdpDummy = null;
115:
116: private UBiDiProps(boolean makeDummy) { // ignore makeDummy, only creates a unique signature
117: formatVersion = new byte[] { 1, 0, Trie.INDEX_STAGE_1_SHIFT_,
118: Trie.INDEX_STAGE_2_SHIFT_ };
119: unicodeVersion = new byte[] { 2, 0, 0, 0 };
120: indexes = new int[IX_TOP];
121: indexes[0] = IX_TOP;
122: trie = new CharTrie(0, 0, null); // dummy trie, always returns 0
123: }
124:
125: /**
126: * Get a singleton dummy object, one that works with no real data.
127: * This can be used when the real data is not available.
128: * Using the dummy can reduce checks for available data after an initial failure.
129: * Port of ucase_getDummy().
130: */
131: public static final synchronized UBiDiProps getDummy() {
132: if (gBdpDummy == null) {
133: gBdpDummy = new UBiDiProps(true);
134: }
135: return gBdpDummy;
136: }
137:
138: // set of property starts for UnicodeSet ------------------------------- ***
139:
140: public final void addPropertyStarts(UnicodeSet set) {
141: int i, length;
142: int c, start, limit;
143:
144: byte prev, jg;
145:
146: /* add the start code point of each same-value range of the trie */
147: TrieIterator iter = new TrieIterator(trie);
148: RangeValueIterator.Element element = new RangeValueIterator.Element();
149:
150: while (iter.next(element)) {
151: set.add(element.start);
152: }
153:
154: /* add the code points from the bidi mirroring table */
155: length = indexes[IX_MIRROR_LENGTH];
156: for (i = 0; i < length; ++i) {
157: c = getMirrorCodePoint(mirrors[i]);
158: set.add(c, c + 1);
159: }
160:
161: /* add the code points from the Joining_Group array where the value changes */
162: start = indexes[IX_JG_START];
163: limit = indexes[IX_JG_LIMIT];
164: length = limit - start;
165: prev = 0;
166: for (i = 0; i < length; ++i) {
167: jg = jgArray[i];
168: if (jg != prev) {
169: set.add(start);
170: prev = jg;
171: }
172: ++start;
173: }
174: if (prev != 0) {
175: /* add the limit code point if the last value was not 0 (it is now start==limit) */
176: set.add(limit);
177: }
178:
179: /* add code points with hardcoded properties, plus the ones following them */
180:
181: /* (none right now) */
182: }
183:
184: // property access functions ------------------------------------------- ***
185:
186: public final int getMaxValue(int which) {
187: int max;
188:
189: max = indexes[IX_MAX_VALUES];
190: switch (which) {
191: case UProperty.BIDI_CLASS:
192: return (max & CLASS_MASK);
193: case UProperty.JOINING_GROUP:
194: return (max & MAX_JG_MASK) >> MAX_JG_SHIFT;
195: case UProperty.JOINING_TYPE:
196: return (max & JT_MASK) >> JT_SHIFT;
197: default:
198: return -1; /* undefined */
199: }
200: }
201:
202: public final int getClass(int c) {
203: return getClassFromProps(trie.getCodePointValue(c));
204: }
205:
206: public final boolean isMirrored(int c) {
207: return getFlagFromProps(trie.getCodePointValue(c),
208: IS_MIRRORED_SHIFT);
209: }
210:
211: public final int getMirror(int c) {
212: int props;
213: int delta;
214:
215: props = trie.getCodePointValue(c);
216: delta = ((short) props) >> MIRROR_DELTA_SHIFT;
217: if (delta != ESC_MIRROR_DELTA) {
218: return c + delta;
219: } else {
220: /* look for mirror code point in the mirrors[] table */
221: int m;
222: int i, length;
223: int c2;
224:
225: length = indexes[IX_MIRROR_LENGTH];
226:
227: /* linear search */
228: for (i = 0; i < length; ++i) {
229: m = mirrors[i];
230: c2 = getMirrorCodePoint(m);
231: if (c == c2) {
232: /* found c, return its mirror code point using the index in m */
233: return getMirrorCodePoint(mirrors[getMirrorIndex(m)]);
234: } else if (c < c2) {
235: break;
236: }
237: }
238:
239: /* c not found, return it itself */
240: return c;
241: }
242: }
243:
244: public final boolean isBidiControl(int c) {
245: return getFlagFromProps(trie.getCodePointValue(c),
246: BIDI_CONTROL_SHIFT);
247: }
248:
249: public final boolean isJoinControl(int c) {
250: return getFlagFromProps(trie.getCodePointValue(c),
251: JOIN_CONTROL_SHIFT);
252: }
253:
254: public final int getJoiningType(int c) {
255: return (trie.getCodePointValue(c) & JT_MASK) >> JT_SHIFT;
256: }
257:
258: public final int getJoiningGroup(int c) {
259: int start, limit;
260:
261: start = indexes[IX_JG_START];
262: limit = indexes[IX_JG_LIMIT];
263: if (start <= c && c < limit) {
264: return (int) jgArray[c - start] & 0xff;
265: } else {
266: return UCharacter.JoiningGroup.NO_JOINING_GROUP;
267: }
268: }
269:
270: // data members -------------------------------------------------------- ***
271: private int indexes[];
272: private int mirrors[];
273: private byte jgArray[];
274:
275: private CharTrie trie;
276: private byte formatVersion[];
277: private byte unicodeVersion[];
278:
279: // data format constants ----------------------------------------------- ***
280: private static final String DATA_NAME = "ubidi";
281: private static final String DATA_TYPE = "icu";
282: private static final String DATA_FILE_NAME = DATA_NAME + "."
283: + DATA_TYPE;
284:
285: /* format "BiDi" */
286: private static final byte FMT[] = { 0x42, 0x69, 0x44, 0x69 };
287:
288: /* indexes into indexes[] */
289: private static final int IX_INDEX_TOP = 0;
290: private static final int IX_LENGTH = 1;
291: private static final int IX_TRIE_SIZE = 2;
292: private static final int IX_MIRROR_LENGTH = 3;
293:
294: private static final int IX_JG_START = 4;
295: private static final int IX_JG_LIMIT = 5;
296:
297: private static final int IX_MAX_VALUES = 15;
298: private static final int IX_TOP = 16;
299:
300: // definitions for 16-bit bidi/shaping properties word ----------------- ***
301:
302: /* CLASS_SHIFT=0, *//* bidi class: 5 bits (4..0) */
303: private static final int JT_SHIFT = 5; /* joining type: 3 bits (7..5) */
304:
305: /* private static final int _SHIFT=8, reserved: 2 bits (9..8) */
306:
307: private static final int JOIN_CONTROL_SHIFT = 10;
308: private static final int BIDI_CONTROL_SHIFT = 11;
309:
310: private static final int IS_MIRRORED_SHIFT = 12; /* 'is mirrored' */
311: private static final int MIRROR_DELTA_SHIFT = 13; /* bidi mirroring delta: 3 bits (15..13) */
312:
313: private static final int MAX_JG_SHIFT = 16; /* max JG value in indexes[MAX_VALUES_INDEX] bits 23..16 */
314:
315: private static final int CLASS_MASK = 0x0000001f;
316: private static final int JT_MASK = 0x000000e0;
317:
318: private static final int MAX_JG_MASK = 0x00ff0000;
319:
320: private static final int getClassFromProps(int props) {
321: return props & CLASS_MASK;
322: }
323:
324: private static final boolean getFlagFromProps(int props, int shift) {
325: return ((props >> shift) & 1) != 0;
326: }
327:
328: private static final int ESC_MIRROR_DELTA = -4;
329: private static final int MIN_MIRROR_DELTA = -3;
330: private static final int MAX_MIRROR_DELTA = 3;
331:
332: // definitions for 32-bit mirror table entry --------------------------- ***
333:
334: /* the source Unicode code point takes 21 bits (20..0) */
335: private static final int MIRROR_INDEX_SHIFT = 21;
336: private static final int MAX_MIRROR_INDEX = 0x7ff;
337:
338: private static final int getMirrorCodePoint(int m) {
339: return m & 0x1fffff;
340: }
341:
342: private static final int getMirrorIndex(int m) {
343: return m >>> MIRROR_INDEX_SHIFT;
344: }
345: }
|