001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017:
018: package org.apache.harmony.niochar.charset;
019:
020: import java.nio.ByteBuffer;
021: import java.nio.CharBuffer;
022: import java.nio.charset.Charset;
023: import java.nio.charset.CharsetDecoder;
024: import java.nio.charset.CharsetEncoder;
025: import java.nio.charset.CoderResult;
026:
027: public class UTF_16 extends Charset {
028:
029: protected static final int UNKNOWN = -1;
030:
031: protected static final int BIG = 0;
032:
033: protected static final int LITTLE = 1;
034:
035: protected static final int ANY = 2;
036:
037: protected static final int NOT_DETECTED = 3;
038:
039: public UTF_16(String canonicalName, String[] aliases) {
040: super (canonicalName, aliases);
041: }
042:
043: public CharsetDecoder newDecoder() {
044: return new Decoder(this );
045: }
046:
047: public CharsetEncoder newEncoder() {
048: return new Encoder(this );
049: }
050:
051: public boolean contains(Charset cs) {
052: return cs.name().equalsIgnoreCase("UTF-16")
053: || cs.name().equalsIgnoreCase("US-ASCII")
054: || cs.name().equalsIgnoreCase("KOI8-R")
055: || cs.name().equalsIgnoreCase("windows-1250")
056: || cs.name().equalsIgnoreCase("windows-1251")
057: || cs.name().equalsIgnoreCase("windows-1252")
058: || cs.name().equalsIgnoreCase("windows-1253")
059: || cs.name().equalsIgnoreCase("windows-1254")
060: || cs.name().equalsIgnoreCase("windows-1257")
061: || cs.name().equalsIgnoreCase("ISO-8859-1")
062: || cs.name().equalsIgnoreCase("ISO-8859-2")
063: || cs.name().equalsIgnoreCase("ISO-8859-4")
064: || cs.name().equalsIgnoreCase("ISO-8859-5")
065: || cs.name().equalsIgnoreCase("ISO-8859-7")
066: || cs.name().equalsIgnoreCase("ISO-8859-9")
067: || cs.name().equalsIgnoreCase("ISO-8859-10")
068: || cs.name().equalsIgnoreCase("ISO-8859-13")
069: || cs.name().equalsIgnoreCase("ISO-8859-14")
070: || cs.name().equalsIgnoreCase("ISO-8859-15")
071: || cs.name().equalsIgnoreCase("ISO-8859-16")
072: || cs.name().equalsIgnoreCase("UTF-8")
073: || cs.name().equalsIgnoreCase("UTF-16LE")
074: || cs.name().equalsIgnoreCase("UTF-16BE");
075: }
076:
077: protected int getDefaultEndian() {
078: return ANY;
079: }
080:
081: protected int getDetectedEndian(int b1, int b2) {
082: if (b1 == 0xFF && b2 == 0xFE) {
083: return LITTLE;
084: }
085: if (b1 == 0xFE && b2 == 0xFF) {
086: return BIG;
087: }
088: return NOT_DETECTED;
089: }
090:
091: private final class Decoder extends CharsetDecoder {
092:
093: private int endian;
094:
095: private Decoder(Charset cs) {
096: super (cs, 0.5f, 1.0f);
097: implReset();
098: }
099:
100: protected void implReset() {
101: endian = UNKNOWN;
102: }
103:
104: protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
105: int outRemaining = out.remaining();
106: int pos = in.position();
107: int limit = in.limit();
108: try {
109: while (pos < limit - 1) {
110: if (outRemaining == 0) {
111: return CoderResult.OVERFLOW;
112: }
113:
114: int b1 = in.get() & 0xFF;
115: int b2 = in.get() & 0xFF;
116:
117: if (endian == UNKNOWN) {
118: endian = getDetectedEndian(b1, b2);
119: if (endian == NOT_DETECTED) {
120: endian = getDefaultEndian();
121: if (endian == ANY) {
122: endian = BIG;
123: }
124: } else {
125: if (getDefaultEndian() == ANY) {
126: pos += 2;
127: continue;
128: }
129: }
130: }
131:
132: int jchar = (endian == BIG) ? (b1 << 8) | b2
133: : (b2 << 8) | b1;
134:
135: if (jchar >= 0xD800 && jchar <= 0xDFFF) {
136: // Determine if the surrogate pair starts with a
137: // low-surrogate.
138: if (jchar >= 0xDC00) {
139: return CoderResult.malformedForLength(2);
140: }
141:
142: if (outRemaining < 2) {
143: return CoderResult.OVERFLOW;
144: }
145: if (pos + 3 >= limit) {
146: return CoderResult.UNDERFLOW;
147: }
148:
149: int b3 = in.get() & 0xFF;
150: int b4 = in.get() & 0xFF;
151: int jchar2 = (endian == BIG) ? (b3 << 8) | b4
152: : (b4 << 8) | b3;
153:
154: // Determine if the surrogate pair ends with a
155: // high-surrogate.
156: if (jchar2 < 0xDC00) {
157: return CoderResult.malformedForLength(4);
158: }
159:
160: out.put((char) jchar);
161: out.put((char) jchar2);
162: outRemaining -= 2;
163: pos += 4;
164: } else {
165: out.put((char) jchar);
166: outRemaining--;
167: pos += 2;
168: }
169: }
170: return CoderResult.UNDERFLOW;
171: } finally {
172: in.position(pos);
173: }
174: }
175: }
176:
177: private final class Encoder extends CharsetEncoder {
178:
179: private int endian;
180:
181: private Encoder(Charset cs) {
182: super (cs, 2.0f, 2.0f, new byte[] { -1, -3 });
183: if (getDefaultEndian() == 1)
184: replaceWith(new byte[] { -3, -1 });
185: implReset();
186: }
187:
188: protected void implReset() {
189: endian = UNKNOWN;
190: }
191:
192: protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
193: int outRemaining = out.remaining();
194: int pos = in.position();
195: int limit = in.limit();
196: try {
197: while (pos < limit) {
198:
199: if (endian == UNKNOWN) {
200: endian = getDefaultEndian();
201: if (endian == ANY) {
202: if (outRemaining < 2) {
203: endian = UNKNOWN;
204: return CoderResult.OVERFLOW;
205: }
206: endian = BIG;
207: out.put((byte) 0xFE);
208: out.put((byte) 0xFF);
209: outRemaining -= 2;
210: }
211: }
212:
213: if (outRemaining == 0) {
214: return CoderResult.OVERFLOW;
215: }
216:
217: int jchar = (in.get() & 0xFFFF);
218:
219: if (jchar >= 0xD800 && jchar <= 0xDFFF) {
220:
221: // in has to have one byte more.
222: if (limit <= pos + 1) {
223: return CoderResult.UNDERFLOW;
224: }
225:
226: if (outRemaining < 4) {
227: return CoderResult.OVERFLOW;
228: }
229:
230: // The surrogate pair starts with a low-surrogate.
231: if (jchar >= 0xDC00) {
232: return CoderResult.malformedForLength(1);
233: }
234: int jchar2 = (in.get() & 0xFFFF);
235: // The surrogate pair ends with a high-surrogate.
236: if (jchar2 < 0xDC00) {
237: return CoderResult.malformedForLength(1);
238: }
239:
240: if (endian == BIG) {
241: out.put((byte) ((jchar >> 8) & 0xFF));
242: out.put((byte) (jchar & 0xFF));
243: out.put((byte) ((jchar2 >> 8) & 0xFF));
244: out.put((byte) (jchar2 & 0xFF));
245: } else {
246: out.put((byte) (jchar & 0xFF));
247: out.put((byte) ((jchar >> 8) & 0xFF));
248: out.put((byte) (jchar2 & 0xFF));
249: out.put((byte) ((jchar2 >> 8) & 0xFF));
250: }
251: outRemaining -= 4;
252: pos++;
253:
254: } else {
255:
256: if (outRemaining < 2) {
257: return CoderResult.OVERFLOW;
258: }
259:
260: if (endian == BIG) {
261: out.put((byte) ((jchar >> 8) & 0xFF));
262: out.put((byte) (jchar & 0xFF));
263: } else {
264: out.put((byte) (jchar & 0xFF));
265: out.put((byte) ((jchar >> 8) & 0xFF));
266: }
267: outRemaining -= 2;
268: }
269: pos++;
270: }
271: return CoderResult.UNDERFLOW;
272: } finally {
273: in.position(pos);
274: }
275: }
276: }
277: }
|