001: /*
002: *******************************************************************************
003: * Copyright (C) 2005, International Business Machines Corporation and *
004: * others. All Rights Reserved. *
005: *******************************************************************************
006: *
007: */
008:
009: package com.ibm.icu.impl;
010:
011: import com.ibm.icu.text.UTF16;
012:
013: /**
014: * This class converts between an array of bytes in UTF-32 encoding (BE or LE) and
015: * Java Strings.
016: *
017: * @internal
018: */
019: public abstract class UTF32 {
020: /**
021: * This method packs a 32-bit Unicode code point into the byte array. It is
022: * implemented by subclasses that implement the BE and LE encodings.
023: *
024: * @param bytes the destination byte array
025: * @param codePoint the 32-bit Unicode code point
026: * @param out the destination index in <code>bytes</code>.
027: *
028: * @internal
029: */
030: abstract protected void pack(byte[] bytes, int codePoint, int out);
031:
032: /**
033: * This method unpacks bytes from the encoded byte array into a 32-bit
034: * Unicode code point. It is implmeented by subclasses that implmeent the BE and LE encodings.
035: *
036: * @param bytes the source byte array.
037: * @param index the index of the first source byte.
038: * @return the 32-bit Unicode code point.
039: *
040: * @internal
041: */
042: abstract protected int unpack(byte[] bytes, int index);
043:
044: /**
045: * Convert a Java String into an array of UTF-32 encoded bytes. Calls
046: * the <code>pack</code> method to do the encoding.
047: *
048: * @param utf16 the source Java String.
049: * @return an array of UTF-32 encoded bytes.
050: *
051: * @internal
052: */
053: public byte[] toBytes(String utf16) {
054: int codePoints = UTF16.countCodePoint(utf16);
055: byte[] bytes = new byte[codePoints * 4];
056: int out = 0;
057:
058: for (int cp = 0; cp < codePoints; out += 4) {
059: int codePoint = UTF16.charAt(utf16, cp);
060:
061: pack(bytes, codePoint, out);
062: cp += UTF16.getCharCount(codePoint);
063: }
064:
065: return bytes;
066: }
067:
068: /**
069: * This method converts a sequence of UTF-32 encoded bytes into
070: * a Java String. It calls the <code>unpack</code> method to implement
071: * the encoding.
072: *
073: * @param bytes the source byte array.
074: * @param offset the starting offset in the byte array.
075: * @param count the number of bytes to process.
076: * @return the Java String.
077: *
078: * @internal
079: */
080: public String fromBytes(byte[] bytes, int offset, int count) {
081: StringBuffer buffer = new StringBuffer();
082: int limit = offset + count;
083:
084: for (int cp = offset; cp < limit; cp += 4) {
085: int codePoint = unpack(bytes, cp);
086:
087: UTF16.append(buffer, codePoint);
088: }
089:
090: return buffer.toString();
091: }
092:
093: /**
094: * A convenience method that converts an entire byte array
095: * into a Java String.
096: *
097: * @param bytes the source byte array.
098: * @return the Java String.
099: *
100: * @internal
101: */
102: public String fromBytes(byte[] bytes) {
103: return fromBytes(bytes, 0, bytes.length);
104: }
105:
106: /**
107: * Get an instance that implements UTF-32BE encoding.
108: *
109: * @return the instance.
110: *
111: * @internal
112: */
113: static public UTF32 getBEInstance() {
114: if (beInstance == null) {
115: beInstance = new BE();
116: }
117:
118: return beInstance;
119: }
120:
121: /**
122: * Get an instance that implemnts the UTF-32LE encoding.
123: *
124: * @return the instance.
125: *
126: * @internal
127: */
128: static public UTF32 getLEInstance() {
129: if (leInstance == null) {
130: leInstance = new LE();
131: }
132:
133: return leInstance;
134: }
135:
136: /**
137: * Get an instance that implements either UTF-32BE or UTF32-LE,
138: * depending on the encoding name suppled.
139: *
140: * @param encoding the encoding name - must be <code>"UTF-32BE"</code> or <code>"UTF-32LE"</code>.
141: * @return the instance.
142: *
143: * @internal
144: */
145: static public UTF32 getInstance(String encoding) {
146: if (encoding.equals("UTF-32BE")) {
147: return getBEInstance();
148: }
149:
150: if (encoding.equals("UTF-32LE")) {
151: return getLEInstance();
152: }
153:
154: return null;
155: }
156:
157: /**
158: * This sublcass implements the UTF-32BE encoding via the
159: * <code>pack</code> and <code>unpack</code> methods.
160: *
161: * @internal
162: */
163: static class BE extends UTF32 {
164: /**
165: * This method packs a 32-bit Unicode code point into the byte array using
166: * the UTF-32BE encoding.
167: *
168: * @param bytes the destination byte array
169: * @param codePoint the 32-bit Unicode code point
170: * @param out the destination index in <code>bytes</code>.
171: *
172: * @internal
173: */
174: public void pack(byte[] bytes, int codePoint, int out) {
175: bytes[out + 0] = (byte) ((codePoint >> 24) & 0xFF);
176: bytes[out + 1] = (byte) ((codePoint >> 16) & 0xFF);
177: bytes[out + 2] = (byte) ((codePoint >> 8) & 0xFF);
178: bytes[out + 3] = (byte) ((codePoint >> 0) & 0xFF);
179: }
180:
181: /**
182: * This method unpacks bytes from the UTF-32BE encoded byte array into a 32-bit
183: * Unicode code point.
184: *
185: * @param bytes the source byte array.
186: * @param index the index of the first source byte.
187: * @return the 32-bit Unicode code point.
188: *
189: * @internal
190: */
191: public int unpack(byte[] bytes, int index) {
192: return (bytes[index + 0] & 0xFF) << 24
193: | (bytes[index + 1] & 0xFF) << 16
194: | (bytes[index + 2] & 0xFF) << 8
195: | (bytes[index + 3] & 0xFF);
196: }
197: }
198:
199: /**
200: * This sublcass implements the UTF-32LE encoding via the
201: * <code>pack</code> and <code>unpack</code> methods.
202: *
203: * @internal
204: */
205: static class LE extends UTF32 {
206: /**
207: * This method packs a 32-bit Unicode code point into the byte array using
208: * the UTF-32LE encoding.
209: *
210: * @param bytes the destination byte array
211: * @param codePoint the 32-bit Unicode code point
212: * @param out the destination index in <code>bytes</code>.
213: *
214: * @internal
215: */
216: public void pack(byte[] bytes, int codePoint, int out) {
217: bytes[out + 3] = (byte) ((codePoint >> 24) & 0xFF);
218: bytes[out + 2] = (byte) ((codePoint >> 16) & 0xFF);
219: bytes[out + 1] = (byte) ((codePoint >> 8) & 0xFF);
220: bytes[out + 0] = (byte) ((codePoint >> 0) & 0xFF);
221: }
222:
223: /**
224: * This method unpacks bytes from the UTF-32LE encoded byte array into a 32-bit
225: * Unicode code point.
226: *
227: * @param bytes the source byte array.
228: * @param index the index of the first source byte.
229: * @return the 32-bit Unicode code point.
230: *
231: * @internal
232: */
233: public int unpack(byte[] bytes, int index) {
234: return (bytes[index + 3] & 0xFF) << 24
235: | (bytes[index + 2] & 0xFF) << 16
236: | (bytes[index + 1] & 0xFF) << 8
237: | (bytes[index + 0] & 0xFF);
238: }
239: }
240:
241: private static UTF32 beInstance = null;
242: private static UTF32 leInstance = null;
243: }
|