001: /*
002: * @(#)StringCoding.java 1.9 02/04/09
003: *
004: * Copyright 1990-2006 Sun Microsystems, Inc. All Rights Reserved.
005: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER
006: *
007: * This program is free software; you can redistribute it and/or
008: * modify it under the terms of the GNU General Public License version
009: * 2 only, as published by the Free Software Foundation.
010: *
011: * This program is distributed in the hope that it will be useful, but
012: * WITHOUT ANY WARRANTY; without even the implied warranty of
013: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
014: * General Public License version 2 for more details (a copy is
015: * included at /legal/license.txt).
016: *
017: * You should have received a copy of the GNU General Public License
018: * version 2 along with this work; if not, write to the Free Software
019: * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
020: * 02110-1301 USA
021: *
022: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
023: * Clara, CA 95054 or visit www.sun.com if you need additional
024: * information or have any questions.
025: */
026:
027: package java.lang;
028:
029: import java.io.CharConversionException;
030: import java.io.UnsupportedEncodingException;
031: import java.lang.ref.SoftReference; /*
032: *import java.nio.ByteBuffer;
033: *import java.nio.CharBuffer;
034: *import java.nio.BufferOverflowException;
035: *import java.nio.BufferUnderflowException;
036: *import java.nio.charset.Charset;
037: *import java.nio.charset.CharsetDecoder;
038: *import java.nio.charset.CharsetEncoder;
039: *import java.nio.charset.CharacterCodingException;
040: *import java.nio.charset.CoderResult;
041: *import java.nio.charset.CodingErrorAction;
042: *import java.nio.charset.IllegalCharsetNameException;
043: *import java.nio.charset.MalformedInputException;
044: *import java.nio.charset.UnsupportedCharsetException;
045: *import sun.misc.MessageUtils;
046: *import sun.nio.cs.HistoricallyNamedCharset;
047: */
048: import sun.io.ByteToCharConverter;
049: import sun.io.CharToByteConverter;
050: import sun.io.Converters;
051:
052: import sun.misc.CVM;
053:
054: /**
055: * Utility class for string encoding and decoding.
056: */
057:
058: class StringCoding {
059:
060: private StringCoding() {
061: }
062:
063: /* The cached coders for each thread
064: */
065: private static ThreadLocal decoder = new ThreadLocal();
066: private static ThreadLocal encoder = new ThreadLocal();
067:
068: private static boolean warnUnsupportedCharset = true;
069:
070: private static Object deref(ThreadLocal tl) {
071: SoftReference sr = (SoftReference) tl.get();
072: if (sr == null)
073: return null;
074: return sr.get();
075: }
076:
077: private static void set(ThreadLocal tl, Object ob) {
078: tl.set(new SoftReference(ob));
079: }
080:
081: // Trim the given byte array to the given length
082: //
083: private static byte[] trim(byte[] ba, int len) {
084: if (len == ba.length)
085: return ba;
086: byte[] tba = new byte[len];
087: /* IAI - 15 */
088: CVM.copyByteArray(ba, 0, tba, 0, len);
089: /* IAI - 15 */
090: return tba;
091: }
092:
093: // Trim the given char array to the given length
094: //
095: private static char[] trim(char[] ca, int len) {
096: if (len == ca.length)
097: return ca;
098: char[] tca = new char[len];
099: /* IAI - 15 */
100: CVM.copyCharArray(ca, 0, tca, 0, len);
101: /* IAI - 15 */
102: return tca;
103: }
104:
105: /*
106: * private static Charset lookupCharset(String csn) {
107: * if (csn.equalsIgnoreCase("PCK"))
108: * return null;
109: * if (Charset.isSupported(csn)) {
110: * try {
111: * return Charset.forName(csn);
112: * } catch (UnsupportedCharsetException x) {
113: * throw new Error(x);
114: * }
115: * }
116: * return null;
117: * }
118: */
119:
120: private static void warnUnsupportedCharset(String csn) {
121: if (warnUnsupportedCharset) {
122: // Use sun.misc.MessageUtils rather than the Logging API or
123: // System.err since this method may be called during VM
124: // initialization before either is available.
125: //MessageUtils.err("WARNING: Default charset " + csn +
126: // " not supported, using ISO-8859-1 instead");
127: warnUnsupportedCharset = false;
128: }
129: }
130:
131: // -- Decoding --
132:
133: // Encapsulates either a ByteToCharConverter or a CharsetDecoder
134: //
135: private static abstract class StringDecoder {
136: private final String requestedCharsetName;
137:
138: protected StringDecoder(String requestedCharsetName) {
139: this .requestedCharsetName = requestedCharsetName;
140: }
141:
142: final String requestedCharsetName() {
143: return requestedCharsetName;
144: }
145:
146: abstract String charsetName();
147:
148: abstract char[] decode(byte[] ba, int off, int len);
149: }
150:
151: // A string decoder based upon a ByteToCharConverter
152: //
153:
154: private static class ConverterSD extends StringDecoder {
155: private ByteToCharConverter btc;
156:
157: private ConverterSD(ByteToCharConverter btc, String rcn) {
158: super (rcn);
159: this .btc = btc;
160: }
161:
162: String charsetName() {
163: return btc.getCharacterEncoding();
164: }
165:
166: char[] decode(byte[] ba, int off, int len) {
167: int en = btc.getMaxCharsPerByte() * len;
168: char[] ca = new char[en];
169: if (len == 0)
170: return ca;
171: btc.reset();
172: int n = 0;
173: try {
174: n = btc.convert(ba, off, off + len, ca, 0, en);
175: n += btc.flush(ca, btc.nextCharIndex(), en);
176: } catch (CharConversionException x) {
177: // Yes, this is what we've always done
178: n = btc.nextCharIndex();
179: }
180: return trim(ca, n);
181: }
182:
183: }
184:
185: // A string decoder based upon a CharsetDecoder
186: //
187: /*
188: * private static class CharsetSD
189: * extends StringDecoder
190: * {
191: * private final Charset cs;
192: * private final CharsetDecoder cd;
193: *
194: * private CharsetSD(Charset cs, String rcn) {
195: * super(rcn);
196: * this.cs = cs;
197: * this.cd = cs.newDecoder()
198: * .onMalformedInput(CodingErrorAction.REPLACE)
199: * .onUnmappableCharacter(CodingErrorAction.REPLACE);
200: * }
201: *
202: * String charsetName() {
203: * if (cs instanceof HistoricallyNamedCharset)
204: * return ((HistoricallyNamedCharset)cs).historicalName();
205: * return cs.name();
206: * }
207: *
208: * char[] decode(byte[] ba, int off, int len) {
209: * int en = (int)(cd.maxCharsPerByte() * len);
210: * char[] ca = new char[en];
211: * if (len == 0)
212: * return ca;
213: * cd.reset();
214: * ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
215: * CharBuffer cb = CharBuffer.wrap(ca);
216: * try {
217: * CoderResult cr = cd.decode(bb, cb, true);
218: * if (!cr.isUnderflow())
219: * cr.throwException();
220: * cr = cd.flush(cb);
221: * if (!cr.isUnderflow())
222: * cr.throwException();
223: * } catch (CharacterCodingException x) {
224: * // Substitution is always enabled,
225: * // so this shouldn't happen
226: * throw new Error(x);
227: * }
228: * return trim(ca, cb.position());
229: * }
230: *
231: * }
232: */
233:
234: static char[] decode(String charsetName, byte[] ba, int off, int len)
235: throws UnsupportedEncodingException {
236: StringDecoder sd = (StringDecoder) deref(decoder);
237: String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
238: if ((sd == null)
239: || !(csn.equals(sd.requestedCharsetName()) || csn
240: .equals(sd.charsetName()))) {
241: /*
242: * sd = null;
243: * try {
244: * Charset cs = lookupCharset(csn);
245: * if (cs != null)
246: * sd = new CharsetSD(cs, csn);
247: * else
248: * sd = null;
249: * } catch (IllegalCharsetNameException x) {
250: * // FALL THROUGH to ByteToCharConverter, for compatibility
251: * }
252: * if (sd == null)
253: */
254: sd = new ConverterSD(ByteToCharConverter.getConverter(csn),
255: csn);
256: set(decoder, sd);
257: }
258: return sd.decode(ba, off, len);
259: }
260:
261: static char[] decode(byte[] ba, int off, int len) {
262: String csn = Converters.getDefaultEncodingName();
263: try {
264: return decode(csn, ba, off, len);
265: } catch (UnsupportedEncodingException x) {
266: Converters.resetDefaultEncodingName();
267: warnUnsupportedCharset(csn);
268: }
269: try {
270: return decode("ISO-8859-1", ba, off, len);
271: } catch (UnsupportedEncodingException x) {
272: // If this code is hit during VM initialization, MessageUtils is
273: // the only way we will be able to get any kind of error message.
274: //MessageUtils.err("ISO-8859-1 charset not available: "
275: // + x.toString());
276: // If we can not find ISO-8859-1 (a required encoding) then things
277: // are seriously wrong with the installation.
278: System.exit(1);
279: return null;
280: }
281: }
282:
283: // -- Encoding --
284:
285: // Encapsulates either a CharToByteConverter or a CharsetEncoder
286: //
287: private static abstract class StringEncoder {
288: private final String requestedCharsetName;
289:
290: protected StringEncoder(String requestedCharsetName) {
291: this .requestedCharsetName = requestedCharsetName;
292: }
293:
294: final String requestedCharsetName() {
295: return requestedCharsetName;
296: }
297:
298: abstract String charsetName();
299:
300: abstract byte[] encode(char[] cs, int off, int len);
301: }
302:
303: // A string encoder based upon a CharToByteConverter
304: //
305: private static class ConverterSE extends StringEncoder {
306: private CharToByteConverter ctb;
307:
308: private ConverterSE(CharToByteConverter ctb, String rcn) {
309: super (rcn);
310: this .ctb = ctb;
311: }
312:
313: String charsetName() {
314: return ctb.getCharacterEncoding();
315: }
316:
317: byte[] encode(char[] ca, int off, int len) {
318: int en = ctb.getMaxBytesPerChar() * len;
319: byte[] ba = new byte[en];
320: if (len == 0)
321: return ba;
322:
323: ctb.reset();
324: int n;
325: try {
326: n = ctb.convertAny(ca, off, (off + len), ba, 0, en);
327: n += ctb.flushAny(ba, ctb.nextByteIndex(), en);
328: } catch (CharConversionException x) {
329: throw new Error("Converter malfunction: "
330: + ctb.getClass().getName(), x);
331: }
332: return trim(ba, n);
333: }
334:
335: }
336:
337: // A string encoder based upon a CharsetEncoder
338: //
339: /*
340: * private static class CharsetSE
341: * extends StringEncoder
342: * {
343: * private Charset cs;
344: * private CharsetEncoder ce;
345: *
346: * private CharsetSE(Charset cs, String rcn) {
347: * super(rcn);
348: * this.cs = cs;
349: * this.ce = cs.newEncoder()
350: * .onMalformedInput(CodingErrorAction.REPLACE)
351: * .onUnmappableCharacter(CodingErrorAction.REPLACE);
352: * }
353: *
354: * String charsetName() {
355: * if (cs instanceof HistoricallyNamedCharset)
356: * return ((HistoricallyNamedCharset)cs).historicalName();
357: * return cs.name();
358: * }
359: *
360: * byte[] encode(char[] ca, int off, int len) {
361: * int en = (int)(ce.maxBytesPerChar() * len);
362: * byte[] ba = new byte[en];
363: * if (len == 0)
364: * return ba;
365: *
366: * ce.reset();
367: * ByteBuffer bb = ByteBuffer.wrap(ba);
368: * CharBuffer cb = CharBuffer.wrap(ca, off, len);
369: * try {
370: * CoderResult cr = ce.encode(cb, bb, true);
371: * if (!cr.isUnderflow())
372: * cr.throwException();
373: * cr = ce.flush(bb);
374: * if (!cr.isUnderflow())
375: * cr.throwException();
376: * } catch (CharacterCodingException x) {
377: * // Substitution is always enabled,
378: * // so this shouldn't happen
379: * throw new Error(x);
380: * }
381: * return trim(ba, bb.position());
382: * }
383: *
384: * }
385: */
386:
387: static byte[] encode(String charsetName, char[] ca, int off, int len)
388: throws UnsupportedEncodingException {
389: StringEncoder se = (StringEncoder) deref(encoder);
390: String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
391: if ((se == null)
392: || !(csn.equals(se.requestedCharsetName()) || csn
393: .equals(se.charsetName()))) {
394: /*
395: * se = null;
396: * try {
397: * Charset cs = lookupCharset(csn);
398: * if (cs != null)
399: * se = new CharsetSE(cs, csn);
400: * } catch (IllegalCharsetNameException x) {
401: * // FALL THROUGH to CharToByteConverter, for compatibility
402: * }
403: * if (se == null)
404: */
405: se = new ConverterSE(CharToByteConverter.getConverter(csn),
406: csn);
407: set(encoder, se);
408: }
409: return se.encode(ca, off, len);
410: }
411:
412: static byte[] encode(char[] ca, int off, int len) {
413: String csn = Converters.getDefaultEncodingName();
414: try {
415: return encode(csn, ca, off, len);
416: } catch (UnsupportedEncodingException x) {
417: Converters.resetDefaultEncodingName();
418: warnUnsupportedCharset(csn);
419: }
420: try {
421: return encode("ISO-8859-1", ca, off, len);
422: } catch (UnsupportedEncodingException x) {
423: // If this code is hit during VM initialization, MessageUtils is
424: // the only way we will be able to get any kind of error message.
425: // MessageUtils.err("ISO-8859-1 charset not available: "
426: // + x.toString());
427: // If we can not find ISO-8859-1 (a required encoding) then things
428: // are seriously wrong with the installation.
429: System.exit(1);
430: return null;
431: }
432: }
433:
434: }
|