001: //##header
002: /*
003: *******************************************************************************
004: * Copyright (C) 2004-2005, International Business Machines Corporation and *
005: * others. All Rights Reserved. *
006: *******************************************************************************
007: */
008: package com.ibm.icu.impl;
009:
010: import java.io.BufferedInputStream;
011: import java.io.DataInputStream;
012: import java.io.IOException;
013: import java.io.InputStream;
014:
015: import com.ibm.icu.util.ULocale;
016: import com.ibm.icu.util.VersionInfo;
017:
018: /**
019: * This class reads the *.res resource bundle format
020: *
021: * (For the latest version of the file format documentation see
022: * ICU4C's source/common/uresdata.h file.)
023: *
024: * File format for .res resource bundle files (formatVersion=1.2)
025: *
026: * An ICU4C resource bundle file (.res) is a binary, memory-mappable file
027: * with nested, hierarchical data structures.
028: * It physically contains the following:
029: *
030: * Resource root; -- 32-bit Resource item, root item for this bundle's tree;
031: * currently, the root item must be a table or table32 resource item
032: * int32_t indexes[indexes[0]]; -- array of indexes for friendly
033: * reading and swapping; see URES_INDEX_* above
034: * new in formatVersion 1.1 (ICU 2.8)
035: * char keys[]; -- characters for key strings
036: * (formatVersion 1.0: up to 65k of characters; 1.1: <2G)
037: * (minus the space for root and indexes[]),
038: * which consist of invariant characters (ASCII/EBCDIC) and are NUL-terminated;
039: * padded to multiple of 4 bytes for 4-alignment of the following data
040: * data; -- data directly and indirectly indexed by the root item;
041: * the structure is determined by walking the tree
042: *
043: * Each resource bundle item has a 32-bit Resource handle (see typedef above)
044: * which contains the item type number in its upper 4 bits (31..28) and either
045: * an offset or a direct value in its lower 28 bits (27..0).
046: * The order of items is undefined and only determined by walking the tree.
047: * Leaves of the tree may be stored first or last or anywhere in between,
048: * and it is in theory possible to have unreferenced holes in the file.
049: *
050: * Direct values:
051: * - Empty Unicode strings have an offset value of 0 in the Resource handle itself.
052: * - Integer values are 28-bit values stored in the Resource handle itself;
053: * the interpretation of unsigned vs. signed integers is up to the application.
054: *
055: * All other types and values use 28-bit offsets to point to the item's data.
056: * The offset is an index to the first 32-bit word of the value, relative to the
057: * start of the resource data (i.e., the root item handle is at offset 0).
058: * To get byte offsets, the offset is multiplied by 4 (or shifted left by 2 bits).
059: * All resource item values are 4-aligned.
060: *
061: * The structures (memory layouts) for the values for each item type are listed
062: * in the table above.
063: *
064: * Nested, hierarchical structures: -------------
065: *
066: * Table items contain key-value pairs where the keys are 16-bit offsets to char * key strings.
067: * Key string offsets are also relative to the start of the resource data (of the root handle),
068: * i.e., the first string has an offset of 4 (after the 4-byte root handle).
069: *
070: * The values of these pairs are Resource handles.
071: *
072: * Array items are simple vectors of Resource handles.
073: *
074: * An alias item is special (and new in ICU 2.4): --------------
075: *
076: * Its memory layout is just like for a UnicodeString, but at runtime it resolves to
077: * another resource bundle's item according to the path in the string.
078: * This is used to share items across bundles that are in different lookup/fallback
079: * chains (e.g., large collation data among zh_TW and zh_HK).
080: * This saves space (for large items) and maintenance effort (less duplication of data).
081: *
082: * --------------------------------------------------------------------------
083: *
084: * Resource types:
085: *
086: * Most resources have their values stored at four-byte offsets from the start
087: * of the resource data. These values are at least 4-aligned.
088: * Some resource values are stored directly in the offset field of the Resource itself.
089: * See UResType in unicode/ures.h for enumeration constants for Resource types.
090: *
091: * Type Name Memory layout of values
092: * (in parentheses: scalar, non-offset values)
093: *
094: * 0 Unicode String: int32_t length, UChar[length], (UChar)0, (padding)
095: * or (empty string ("") if offset==0)
096: * 1 Binary: int32_t length, uint8_t[length], (padding)
097: * - this value should be 32-aligned -
098: * 2 Table: uint16_t count, uint16_t keyStringOffsets[count], (uint16_t padding), Resource[count]
099: * 3 Alias: (physically same value layout as string, new in ICU 2.4)
100: * 4 Table32: int32_t count, int32_t keyStringOffsets[count], Resource[count]
101: * (new in formatVersion 1.1/ICU 2.8)
102: *
103: * 7 Integer: (28-bit offset is integer value)
104: * 8 Array: int32_t count, Resource[count]
105: *
106: * 14 Integer Vector: int32_t length, int32_t[length]
107: * 15 Reserved: This value denotes special purpose resources and is for internal use.
108: *
109: * Note that there are 3 types with data vector values:
110: * - Vectors of 8-bit bytes stored as type Binary.
111: * - Vectors of 16-bit words stored as type Unicode String
112: * (no value restrictions, all values 0..ffff allowed!).
113: * - Vectors of 32-bit words stored as type Integer Vector.
114: *
115: *
116: */
117: public final class ICUResourceBundleReader implements
118: ICUBinary.Authenticate {
119:
120: /**
121: * File format version that this class understands.
122: * "ResB"
123: */
124: private static final byte DATA_FORMAT_ID[] = { (byte) 0x52,
125: (byte) 0x65, (byte) 0x73, (byte) 0x42 };
126:
127: private static final String ICU_RESOURCE_SUFFIX = ".res";
128:
129: /* indexes[] value names; indexes are generally 32-bit (Resource) indexes */
130: private static final int URES_INDEX_LENGTH = 0; /* [0] contains URES_INDEX_TOP==the length of indexes[] */
131: private static final int URES_INDEX_STRINGS_TOP = 1; /* [1] contains the top of the strings, */
132: /* same as the bottom of resources, rounded up */
133: private static final int URES_INDEX_RESOURCES_TOP = 2; /* [2] contains the top of all resources */
134: private static final int URES_INDEX_BUNDLE_TOP = 3; /* [3] contains the top of the bundle, */
135: /* in case it were ever different from [2] */
136: private static final int URES_INDEX_MAX_TABLE_LENGTH = 4; /* [4] max. length of any table */
137: private static final int URES_INDEX_ATTRIBUTES = 5; /* [5] attributes bit set, see URES_ATT_* (new in formatVersion 1.2) */
138: private static final int URES_INDEX_TOP = 6;
139:
140: //private static final int URES_STRINGS_BOTTOM=(1+URES_INDEX_TOP)*4;
141:
142: /*
143: * Nofallback attribute, attribute bit 0 in indexes[URES_INDEX_ATTRIBUTES].
144: * New in formatVersion 1.2 (ICU 3.6).
145: *
146: * If set, then this resource bundle is a standalone bundle.
147: * If not set, then the bundle participates in locale fallback, eventually
148: * all the way to the root bundle.
149: * If indexes[] is missing or too short, then the attribute cannot be determined
150: * reliably. Dependency checking should ignore such bundles, and loading should
151: * use fallbacks.
152: */
153: private static final int URES_ATT_NO_FALLBACK = 1;
154:
155: private static final boolean DEBUG = false;
156:
157: private byte[] /* formatVersion, */dataVersion;
158:
159: private int rootRes;
160: private int[] indexes;
161: private boolean noFallback; /* see URES_ATT_NO_FALLBACK */
162:
163: private byte[] data;
164:
165: private ICUResourceBundleReader(InputStream stream,
166: String resolvedName) {
167:
168: BufferedInputStream bs = new BufferedInputStream(stream);
169: try {
170: if (DEBUG)
171: System.out.println("The InputStream class is: "
172: + stream.getClass().getName());
173: if (DEBUG)
174: System.out.println("The BufferedInputStream class is: "
175: + bs.getClass().getName());
176: if (DEBUG)
177: System.out
178: .println("The bytes avialable in stream before reading the header: "
179: + bs.available());
180:
181: dataVersion = ICUBinary
182: .readHeader(bs, DATA_FORMAT_ID, this );
183:
184: if (DEBUG)
185: System.out
186: .println("The bytes available in stream after reading the header: "
187: + bs.available());
188:
189: readData(bs);
190: stream.close();
191: } catch (IOException ex) {
192: //#ifndef FOUNDATION
193: throw new RuntimeException("Data file " + resolvedName
194: + " is corrupt.", ex);
195: //#else
196: //## throw new RuntimeException("Data file "+ resolvedName+ " is corrupt.");
197: //#endif
198: }
199: }
200:
201: public static ICUResourceBundleReader getReader(String baseName,
202: String localeName, ClassLoader root) {
203: String resolvedName = getFullName(baseName, localeName);
204: InputStream stream = ICUData.getStream(root, resolvedName);
205:
206: if (stream == null) {
207: return null;
208: }
209: ICUResourceBundleReader reader = new ICUResourceBundleReader(
210: stream, resolvedName);
211: return reader;
212: }
213:
214: private static void writeInt(int i, byte[] bytes, int offset) {
215: bytes[offset++] = (byte) (i >> 24);
216: bytes[offset++] = (byte) (i >> 16);
217: bytes[offset++] = (byte) (i >> 8);
218: bytes[offset] = (byte) i;
219: }
220:
221: private void readData(InputStream stream) throws IOException {
222:
223: DataInputStream ds = new DataInputStream(stream);
224:
225: if (DEBUG)
226: System.out.println("The DataInputStream class is: "
227: + ds.getClass().getName());
228: if (DEBUG)
229: System.out
230: .println("The available bytes in the stream before reading the data: "
231: + ds.available());
232:
233: /*
234: * The following will read two integers before ds.mark().
235: * Later, the two integers need to be placed into data[],
236: * then ds.reset(), then ds.readFully(into rest of data[]).
237: *
238: * This is necessary because we don't know the readLimit for ds.mark()
239: * until we have read the second integer (indexLength).
240: */
241: rootRes = ds.readInt();
242:
243: // read the variable-length indexes[] array
244: int indexLength = ds.readInt();
245: ds.mark((indexLength - 1) * 4);
246:
247: indexes = new int[indexLength];
248: indexes[URES_INDEX_LENGTH] = indexLength;
249:
250: for (int i = 1; i < indexLength; i++) {
251: indexes[i] = ds.readInt();
252: }
253:
254: // determine if this resource bundle falls back to a parent bundle
255: // along normal locale ID fallback
256: noFallback = indexLength > URES_INDEX_ATTRIBUTES
257: && (indexes[URES_INDEX_ATTRIBUTES] & URES_ATT_NO_FALLBACK) != 0;
258:
259: // read the entire bundle (after the header) into data[]
260: // put rootRes and indexLength into data[0..7]
261: // and the rest of the data into data[8..length-1]
262: int length = indexes[URES_INDEX_BUNDLE_TOP] * 4;
263: if (DEBUG)
264: System.out.println("The number of bytes in the bundle: "
265: + length);
266:
267: data = new byte[length];
268: writeInt(rootRes, data, 0);
269: writeInt(indexLength, data, 4);
270:
271: // now reset to the mark, which was set after reading rootRes and indexLength
272: ds.reset();
273: ds.readFully(data, 8, length - 8);
274: }
275:
276: /**
277: * Gets the full name of the resource with suffix.
278: */
279: public static String getFullName(String baseName, String localeName) {
280: if (baseName == null || baseName.length() == 0) {
281: if (localeName.length() == 0) {
282: return ULocale.getDefault().toString()
283: + ICU_RESOURCE_SUFFIX;
284: } else {
285: return localeName + ICU_RESOURCE_SUFFIX;
286: }
287: } else {
288: if (baseName.indexOf('.') == -1) {
289: if (baseName.charAt(baseName.length() - 1) != '/') {
290: return baseName + "/" + localeName
291: + ICU_RESOURCE_SUFFIX;
292: } else {
293: return baseName + localeName + ICU_RESOURCE_SUFFIX;
294: }
295: } else {
296: baseName = baseName.replace('.', '/');
297: if (localeName.length() == 0) {
298: return baseName + ICU_RESOURCE_SUFFIX;
299: } else {
300: return baseName + "_" + localeName
301: + ICU_RESOURCE_SUFFIX;
302: }
303: }
304: }
305: }
306:
307: public VersionInfo getVersion() {
308: return VersionInfo.getInstance(dataVersion[0], dataVersion[1],
309: dataVersion[2], dataVersion[3]);
310: }
311:
312: public boolean isDataVersionAcceptable(byte version[]) {
313: // while ICU4C can read formatVersion 1.0 and up,
314: // ICU4J requires 1.1 as a minimum
315: // formatVersion = version;
316: return version[0] == 1 && version[1] >= 1;
317: }
318:
319: public byte[] getData() {
320: return data;
321: }
322:
323: public int getRootResource() {
324: return rootRes;
325: }
326:
327: public boolean getNoFallback() {
328: return noFallback;
329: }
330: }
|