001: /*
002: **********************************************************************
003: * Copyright (c) 2002-2006, International Business Machines
004: * Corporation and others. All Rights Reserved.
005: **********************************************************************
006: * Author: Alan Liu
007: * Created: November 5 2002
008: * Since: ICU 2.4
009: **********************************************************************
010: */
011: package com.ibm.icu.impl;
012:
013: import java.io.*;
014:
015: import com.ibm.icu.lang.*;
016:
017: /**
018: * Wrapper for the pnames.icu binary data file. This data file is
019: * imported from icu4c. It contains property and property value
020: * aliases from the UCD files PropertyAliases.txt and
021: * PropertyValueAliases.txt. The file is built by the icu4c tool
022: * genpname. It must be built on an ASCII big-endian platform to be
023: * usable in icu4j.
024: *
025: * This class performs two functions.
026: *
027: * (1) It can import the flat binary data into a tree of usable
028: * objects.
029: *
030: * (2) It provides an API to access the tree of objects.
031: *
032: * Needless to say, this class is tightly coupled to the binary format
033: * of icu4c's pnames.icu file.
034: *
035: * Each time a UPropertyAliases is constructed, the pnames.icu file is
036: * read, parsed, and a data tree assembled. Clients should create one
037: * singleton instance and cache it.
038: *
039: * @author Alan Liu
040: * @since ICU 2.4
041: */
042: public final class UPropertyAliases implements ICUBinary.Authenticate {
043:
044: //----------------------------------------------------------------
045: // Runtime data. This is an unflattened representation of the
046: // data in pnames.icu.
047:
048: /**
049: * Map from property enum value to nameGroupPool[] index
050: */
051: private NonContiguousEnumToShort enumToName;
052:
053: /**
054: * Map from property alias to property enum value
055: */
056: private NameToEnum nameToEnum;
057:
058: /**
059: * Map from property enum value to valueMapArray[] index
060: */
061: private NonContiguousEnumToShort enumToValue;
062:
063: /**
064: * Each entry represents a binary or enumerated property
065: */
066: private ValueMap valueMapArray[];
067:
068: /**
069: * Pool of concatenated integer runs. Each run contains one
070: * or more entries. The last entry of the run is negative.
071: * A zero entry indicates "n/a" in the Property*Aliases.txt.
072: * Each entry is a stringPool[] index.
073: */
074: private short nameGroupPool[];
075:
076: /**
077: * Pool of strings.
078: */
079: private String stringPool[];
080:
081: //----------------------------------------------------------------
082: // Constants
083:
084: /**
085: * Debug flag (not really constant)
086: */
087: private static boolean DEBUG = ICUDebug.enabled("pnames");
088:
089: /**
090: * File format that this class understands.
091: * See icu4c/src/common/propname.h.
092: */
093: private static final byte DATA_FORMAT_ID[] = { 'p', 'n', 'a', 'm' };
094:
095: /**
096: * File version that this class understands.
097: * See icu4c/src/common/propname.h.
098: */
099: private static final byte DATA_FORMAT_VERSION = 1;
100:
101: /**
102: * Name of the datafile
103: */
104: private static final String DATA_FILE_NAME = ICUResourceBundle.ICU_BUNDLE
105: + "/pnames.icu";
106:
107: /**
108: * Buffer size of datafile. The whole file is < 16k.
109: */
110: private static final int DATA_BUFFER_SIZE = 8192;
111:
112: //----------------------------------------------------------------
113: // Constructor
114:
115: /**
116: * Constructs a UPropertyAliases object. The binary file
117: * DATA_FILE_NAME is read from the jar/classpath and unflattened
118: * into member variables of this object.
119: */
120: public UPropertyAliases() throws IOException {
121:
122: // Open the .icu file from the jar/classpath
123: InputStream is = ICUData.getRequiredStream(DATA_FILE_NAME);
124: BufferedInputStream b = new BufferedInputStream(is,
125: DATA_BUFFER_SIZE);
126: // Read and discard Unicode version...
127: /* byte unicodeVersion[] = */ICUBinary.readHeader(b,
128: DATA_FORMAT_ID, this );
129: DataInputStream d = new DataInputStream(b);
130:
131: // Record the origin position of the file. Keep enough around
132: // to seek back to the start of the header.
133: d.mark(256);
134:
135: short enumToName_offset = d.readShort();
136: short nameToEnum_offset = d.readShort();
137: short enumToValue_offset = d.readShort();
138: short total_size = d.readShort();
139: short valueMap_offset = d.readShort();
140: short valueMap_count = d.readShort();
141: short nameGroupPool_offset = d.readShort();
142: short nameGroupPool_count = d.readShort();
143: short stringPool_offset = d.readShort();
144: short stringPool_count = d.readShort();
145:
146: if (DEBUG) {
147: System.out.println("enumToName_offset=" + enumToName_offset
148: + "\n" + "nameToEnum_offset=" + nameToEnum_offset
149: + "\n" + "enumToValue_offset=" + enumToValue_offset
150: + "\n" + "total_size=" + total_size + "\n"
151: + "valueMap_offset=" + valueMap_offset + "\n"
152: + "valueMap_count=" + valueMap_count + "\n"
153: + "nameGroupPool_offset=" + nameGroupPool_offset
154: + "\n" + "nameGroupPool_count="
155: + nameGroupPool_count + "\n" + "stringPool_offset="
156: + stringPool_offset + "\n" + "stringPool_count="
157: + stringPool_count);
158: }
159:
160: // Read it all (less than 32k). Seeking around (using
161: // mark/reset/skipBytes) doesn't work directly on the file,
162: // but it works fine if we read everything into a byte[] array
163: // first.
164: byte raw[] = new byte[total_size];
165: d.reset();
166: d.readFully(raw);
167: d.close();
168:
169: Builder builder = new Builder(raw);
170:
171: stringPool = builder.readStringPool(stringPool_offset,
172: stringPool_count);
173:
174: nameGroupPool = builder.readNameGroupPool(nameGroupPool_offset,
175: nameGroupPool_count);
176:
177: builder.setupValueMap_map(valueMap_offset, valueMap_count);
178:
179: // Some of the following data structures have to be set up
180: // here, _not_ in Builder. That's because they are instances
181: // of non-static inner classes, and they contain implicit
182: // references to this.
183:
184: builder.seek(enumToName_offset);
185: enumToName = new NonContiguousEnumToShort(builder);
186: builder.nameGroupOffsetToIndex(enumToName.offsetArray);
187:
188: builder.seek(nameToEnum_offset);
189: nameToEnum = new NameToEnum(builder);
190:
191: builder.seek(enumToValue_offset);
192: enumToValue = new NonContiguousEnumToShort(builder);
193: builder.valueMapOffsetToIndex(enumToValue.offsetArray);
194:
195: valueMapArray = new ValueMap[valueMap_count];
196: for (int i = 0; i < valueMap_count; ++i) {
197: // Must seek to the start of each entry.
198: builder.seek(builder.valueMap_map[i]);
199: valueMapArray[i] = new ValueMap(builder);
200: }
201:
202: builder.close();
203: }
204:
205: //----------------------------------------------------------------
206: // Public API
207:
208: /**
209: * Return a property name given a property enum. Multiple
210: * names may be available for each property; the nameChoice
211: * selects among them.
212: */
213: public String getPropertyName(int property, int nameChoice) {
214: short nameGroupIndex = enumToName.getShort(property);
215: return chooseNameInGroup(nameGroupIndex, nameChoice);
216: }
217:
218: /**
219: * Return a property enum given one of its property names.
220: */
221: public int getPropertyEnum(String propertyAlias) {
222: return nameToEnum.getEnum(propertyAlias);
223: }
224:
225: /**
226: * Return a value name given a property enum and a value enum.
227: * Multiple names may be available for each value; the nameChoice
228: * selects among them.
229: */
230: public String getPropertyValueName(int property, int value,
231: int nameChoice) {
232: ValueMap vm = getValueMap(property);
233: short nameGroupIndex = vm.enumToName.getShort(value);
234: return chooseNameInGroup(nameGroupIndex, nameChoice);
235: }
236:
237: /**
238: * Return a value enum given one of its value names and the
239: * corresponding property alias.
240: */
241: public int getPropertyValueEnum(int property, String valueAlias) {
242: ValueMap vm = getValueMap(property);
243: return vm.nameToEnum.getEnum(valueAlias);
244: }
245:
246: //----------------------------------------------------------------
247: // Data structures
248:
249: /**
250: * A map for the legal values of a binary or enumerated properties.
251: */
252: private class ValueMap {
253:
254: /**
255: * Maps value enum to index into the nameGroupPool[]
256: */
257: EnumToShort enumToName; // polymorphic
258:
259: /**
260: * Maps value name to value enum.
261: */
262: NameToEnum nameToEnum;
263:
264: ValueMap(Builder b) throws IOException {
265: short enumToName_offset = b.readShort();
266: short ncEnumToName_offset = b.readShort();
267: short nameToEnum_offset = b.readShort();
268: if (enumToName_offset != 0) {
269: b.seek(enumToName_offset);
270: ContiguousEnumToShort x = new ContiguousEnumToShort(b);
271: b.nameGroupOffsetToIndex(x.offsetArray);
272: enumToName = x;
273: } else {
274: b.seek(ncEnumToName_offset);
275: NonContiguousEnumToShort x = new NonContiguousEnumToShort(
276: b);
277: b.nameGroupOffsetToIndex(x.offsetArray);
278: enumToName = x;
279: }
280: b.seek(nameToEnum_offset);
281: nameToEnum = new NameToEnum(b);
282: }
283: }
284:
285: /**
286: * Abstract map from enum values to integers.
287: */
288: private interface EnumToShort {
289: short getShort(int enumProbe);
290: }
291:
292: /**
293: * Generic map from enum values to offsets. Enum values are
294: * contiguous.
295: */
296: private static class ContiguousEnumToShort implements EnumToShort {
297: int enumStart;
298: int enumLimit;
299: short offsetArray[];
300:
301: public short getShort(int enumProbe) {
302: if (enumProbe < enumStart || enumProbe >= enumLimit) {
303: throw new IllegalArgumentException(
304: "Invalid enum. enumStart = " + enumStart
305: + " enumLimit = " + enumLimit
306: + " enumProbe = " + enumProbe);
307: }
308: return offsetArray[enumProbe - enumStart];
309: }
310:
311: ContiguousEnumToShort(ICUBinaryStream s) throws IOException {
312: enumStart = s.readInt();
313: enumLimit = s.readInt();
314: int count = enumLimit - enumStart;
315: offsetArray = new short[count];
316: for (int i = 0; i < count; ++i) {
317: offsetArray[i] = s.readShort();
318: }
319: }
320: }
321:
322: /**
323: * Generic map from enum values to offsets. Enum values need not
324: * be contiguous.
325: */
326: private static class NonContiguousEnumToShort implements
327: EnumToShort {
328: int enumArray[];
329: short offsetArray[];
330:
331: public short getShort(int enumProbe) {
332: for (int i = 0; i < enumArray.length; ++i) {
333: if (enumArray[i] < enumProbe)
334: continue;
335: if (enumArray[i] > enumProbe)
336: break;
337: return offsetArray[i];
338: }
339: throw new IllegalArgumentException("Invalid enum");
340: }
341:
342: NonContiguousEnumToShort(ICUBinaryStream s) throws IOException {
343: int i;
344: int count = s.readInt();
345: enumArray = new int[count];
346: offsetArray = new short[count];
347: for (i = 0; i < count; ++i) {
348: enumArray[i] = s.readInt();
349: }
350: for (i = 0; i < count; ++i) {
351: offsetArray[i] = s.readShort();
352: }
353: }
354: }
355:
356: /**
357: * Map from names to enum values.
358: */
359: private class NameToEnum {
360: int enumArray[];
361: short nameArray[];
362:
363: int getEnum(String nameProbe) {
364: for (int i = 0; i < nameArray.length; ++i) {
365: int c = UPropertyAliases.compare(nameProbe,
366: stringPool[nameArray[i]]);
367: if (c > 0)
368: continue;
369: if (c < 0)
370: break;
371: return enumArray[i];
372: }
373: throw new IllegalArgumentException("Invalid name: "
374: + nameProbe);
375: }
376:
377: NameToEnum(Builder b) throws IOException {
378: int i;
379: int count = b.readInt();
380: enumArray = new int[count];
381: nameArray = new short[count];
382: for (i = 0; i < count; ++i) {
383: enumArray[i] = b.readInt();
384: }
385: for (i = 0; i < count; ++i) {
386: nameArray[i] = b.stringOffsetToIndex(b.readShort());
387: }
388: }
389: }
390:
391: //----------------------------------------------------------------
392: // Runtime implementation
393:
394: /**
395: * Compare two property names, returning <0, 0, or >0. The
396: * comparison is that described as "loose" matching in the
397: * Property*Aliases.txt files.
398: */
399: public static int compare(String stra, String strb) {
400: // Note: This implementation is a literal copy of
401: // uprv_comparePropertyNames. It can probably be improved.
402: int istra = 0, istrb = 0, rc;
403: int cstra = 0, cstrb = 0;
404: for (;;) {
405: /* Ignore delimiters '-', '_', and ASCII White_Space */
406: while (istra < stra.length()) {
407: cstra = stra.charAt(istra);
408: switch (cstra) {
409: case '-':
410: case '_':
411: case ' ':
412: case '\t':
413: case '\n':
414: case 0xb/*\v*/:
415: case '\f':
416: case '\r':
417: ++istra;
418: continue;
419: }
420: break;
421: }
422:
423: while (istrb < strb.length()) {
424: cstrb = strb.charAt(istrb);
425: switch (cstrb) {
426: case '-':
427: case '_':
428: case ' ':
429: case '\t':
430: case '\n':
431: case 0xb/*\v*/:
432: case '\f':
433: case '\r':
434: ++istrb;
435: continue;
436: }
437: break;
438: }
439:
440: /* If we reach the ends of both strings then they match */
441: boolean endstra = istra == stra.length();
442: boolean endstrb = istrb == strb.length();
443: if (endstra) {
444: if (endstrb)
445: return 0;
446: cstra = 0;
447: } else if (endstrb) {
448: cstrb = 0;
449: }
450:
451: rc = UCharacter.toLowerCase(cstra)
452: - UCharacter.toLowerCase(cstrb);
453: if (rc != 0) {
454: return rc;
455: }
456:
457: ++istra;
458: ++istrb;
459: }
460: }
461:
462: /**
463: * Given an index to a run within the nameGroupPool[], and a
464: * nameChoice (0,1,...), select the nameChoice-th entry of the run.
465: */
466: private String chooseNameInGroup(short nameGroupIndex,
467: int nameChoice) {
468: if (nameChoice < 0) {
469: throw new IllegalArgumentException("Invalid name choice");
470: }
471: while (nameChoice-- > 0) {
472: if (nameGroupPool[nameGroupIndex++] < 0) {
473: throw new IllegalArgumentException(
474: "Invalid name choice");
475: }
476: }
477: short a = nameGroupPool[nameGroupIndex];
478: return stringPool[(a < 0) ? -a : a];
479: }
480:
481: /**
482: * Return the valueMap[] entry for a given property.
483: */
484: private ValueMap getValueMap(int property) {
485: int valueMapIndex = enumToValue.getShort(property);
486: return valueMapArray[valueMapIndex];
487: }
488:
489: //----------------------------------------------------------------
490: // ICUBinary API
491:
492: /**
493: * Return true if the given data version can be used.
494: */
495: public boolean isDataVersionAcceptable(byte version[]) {
496: return version[0] == DATA_FORMAT_VERSION;
497: }
498:
499: //----------------------------------------------------------------
500: // Builder
501:
502: /**
503: * A specialized ICUBinaryStream that can map between offsets and
504: * index values into various arrays (stringPool, nameGroupPool,
505: * and valueMap). It also knows how to read various structures.
506: */
507: static class Builder extends ICUBinaryStream {
508:
509: // map[i] = offset of object i. We need maps for all of our
510: // arrays. The arrays are indexed by offset in the raw binary
511: // file; we need to translate that to index.
512:
513: private short stringPool_map[];
514:
515: private short valueMap_map[];
516:
517: private short nameGroup_map[];
518:
519: public Builder(byte raw[]) {
520: super (raw);
521: }
522:
523: /**
524: * The valueMap_map[] must be setup in advance. This method
525: * does that.
526: */
527: public void setupValueMap_map(short offset, short count) {
528: valueMap_map = new short[count];
529: for (int i = 0; i < count; ++i) {
530: // Start of each entry. Each entry is 6 bytes long.
531: valueMap_map[i] = (short) (offset + i * 6);
532: }
533: }
534:
535: /**
536: * Read stringPool[]. Build up translation table from offsets
537: * to string indices (stringPool_map[]).
538: */
539: public String[] readStringPool(short offset, short count)
540: throws IOException {
541: seek(offset);
542: // Allocate one more stringPool entry than needed. Use this
543: // to store a "no string" entry in the pool, at index 0. This
544: // maps to offset 0, so let stringPool_map[0] = 0.
545: String stringPool[] = new String[count + 1];
546: stringPool_map = new short[count + 1];
547: short pos = offset;
548: StringBuffer buf = new StringBuffer();
549: stringPool_map[0] = 0;
550: for (int i = 1; i <= count; ++i) {
551: buf.setLength(0);
552: for (;;) {
553: // This works because the name is invariant-ASCII
554: char c = (char) readUnsignedByte();
555: if (c == 0)
556: break;
557: buf.append(c);
558: }
559: stringPool_map[i] = pos;
560: stringPool[i] = buf.toString();
561: pos += stringPool[i].length() + 1;
562: }
563: if (DEBUG) {
564: System.out.println("read stringPool x " + count + ": "
565: + stringPool[1] + ", " + stringPool[2] + ", "
566: + stringPool[3] + ",...");
567: }
568: return stringPool;
569: }
570:
571: /**
572: * Read the nameGroupPool[], and build up the offset->index
573: * map (nameGroupPool_map[]).
574: */
575: public short[] readNameGroupPool(short offset, short count)
576: throws IOException {
577: // Read nameGroupPool[]. This contains offsets from start of
578: // header. We translate these into indices into stringPool[]
579: // on the fly. The offset 0, which indicates "no entry", we
580: // translate into index 0, which contains a null String
581: // pointer.
582: seek(offset);
583: short pos = offset;
584: short nameGroupPool[] = new short[count];
585: nameGroup_map = new short[count];
586: for (int i = 0; i < count; ++i) {
587: nameGroup_map[i] = pos;
588: nameGroupPool[i] = stringOffsetToIndex(readShort());
589: pos += 2;
590: }
591: if (DEBUG) {
592: System.out.println("read nameGroupPool x " + count
593: + ": " + nameGroupPool[0] + ", "
594: + nameGroupPool[1] + ", " + nameGroupPool[2]
595: + ",...");
596: }
597: return nameGroupPool;
598: }
599:
600: /**
601: * Convert an offset into the string pool into a stringPool[]
602: * index.
603: */
604: private short stringOffsetToIndex(short offset) {
605: int probe = offset;
606: if (probe < 0)
607: probe = -probe;
608: for (int i = 0; i < stringPool_map.length; ++i) {
609: if (stringPool_map[i] == probe) {
610: return (short) ((offset < 0) ? -i : i);
611: }
612: }
613: throw new IllegalStateException(
614: "Can't map string pool offset " + offset
615: + " to index");
616: }
617:
618: /**
619: * Convert an array of offsets into the string pool into an
620: * array of stringPool[] indices. MODIFIES THE ARRAY IN
621: * PLACE.
622: */
623: ///CLOVER:OFF
624: private void stringOffsetToIndex(short array[]) {
625: for (int i = 0; i < array.length; ++i) {
626: array[i] = stringOffsetToIndex(array[i]);
627: }
628: }
629:
630: ///CLOVER:ON
631:
632: /**
633: * Convert an offset into the value map into a valueMap[]
634: * index.
635: */
636: private short valueMapOffsetToIndex(short offset) {
637: for (short i = 0; i < valueMap_map.length; ++i) {
638: if (valueMap_map[i] == offset) {
639: return i;
640: }
641: }
642: throw new IllegalStateException(
643: "Can't map value map offset " + offset
644: + " to index");
645: }
646:
647: /**
648: * Convert an array of offsets into the value map array into
649: * an array of valueMap[] indices. MODIFIES THE ARRAY IN
650: * PLACE.
651: */
652: private void valueMapOffsetToIndex(short array[]) {
653: for (int i = 0; i < array.length; ++i) {
654: array[i] = valueMapOffsetToIndex(array[i]);
655: }
656: }
657:
658: /**
659: * Convert an offset into the name group pool into a
660: * nameGroupPool[] index.
661: */
662: private short nameGroupOffsetToIndex(short offset) {
663: for (short i = 0; i < nameGroup_map.length; ++i) {
664: if (nameGroup_map[i] == offset) {
665: return i;
666: }
667: }
668: throw new RuntimeException("Can't map name group offset "
669: + offset + " to index");
670: }
671:
672: /**
673: * Convert an array of offsets into the name group pool into an
674: * array of nameGroupPool[] indices. MODIFIES THE ARRAY IN
675: * PLACE.
676: */
677: private void nameGroupOffsetToIndex(short array[]) {
678: for (int i = 0; i < array.length; ++i) {
679: array[i] = nameGroupOffsetToIndex(array[i]);
680: }
681: }
682: }
683: }
|