001: /*
002: * Copyright 2001-2005 Sun Microsystems, Inc. All Rights Reserved.
003: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
004: *
005: * This code is free software; you can redistribute it and/or modify it
006: * under the terms of the GNU General Public License version 2 only, as
007: * published by the Free Software Foundation. Sun designates this
008: * particular file as subject to the "Classpath" exception as provided
009: * by Sun in the LICENSE file that accompanied this code.
010: *
011: * This code is distributed in the hope that it will be useful, but WITHOUT
012: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
013: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
014: * version 2 for more details (a copy is included in the LICENSE file that
015: * accompanied this code).
016: *
017: * You should have received a copy of the GNU General Public License version
018: * 2 along with this work; if not, write to the Free Software Foundation,
019: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
020: *
021: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
022: * CA 95054 USA or visit www.sun.com if you need additional information or
023: * have any questions.
024: */
025: package sun.nio.cs.ext;
026:
027: import java.io.ByteArrayOutputStream;
028: import java.nio.ByteBuffer;
029: import java.nio.CharBuffer;
030: import java.nio.charset.*;
031:
032: import java.util.Collections;
033: import java.util.HashMap;
034: import java.util.Iterator;
035: import java.util.List;
036: import java.util.Map;
037:
038: public class COMPOUND_TEXT_Encoder extends CharsetEncoder {
039:
040: /**
041: * NOTE: The following four static variables should be used *only* for
042: * testing whether a encoder can encode a specific character. They
043: * cannot be used for actual encoding because they are shared across all
044: * COMPOUND_TEXT encoders and may be stateful.
045: */
046: private static final Map encodingToEncoderMap = Collections
047: .synchronizedMap(new HashMap(21, 1.0f));
048: private static final CharsetEncoder latin1Encoder;
049: private static final CharsetEncoder defaultEncoder;
050: private static final boolean defaultEncodingSupported;
051:
052: static {
053: CharsetEncoder encoder = Charset.defaultCharset().newEncoder();
054: String encoding = encoder.charset().name();
055: if ("ISO8859_1".equals(encoding)) {
056: latin1Encoder = encoder;
057: defaultEncoder = encoder;
058: defaultEncodingSupported = true;
059: } else {
060: try {
061: latin1Encoder = Charset.forName("ISO8859_1")
062: .newEncoder();
063: } catch (IllegalArgumentException e) {
064: throw new ExceptionInInitializerError(
065: "ISO8859_1 unsupported");
066: }
067: defaultEncoder = encoder;
068: defaultEncodingSupported = CompoundTextSupport
069: .getEncodings().contains(
070: defaultEncoder.charset().name());
071: }
072: }
073:
074: private CharsetEncoder encoder;
075: private char[] charBuf = new char[1];
076: private CharBuffer charbuf = CharBuffer.wrap(charBuf);
077: private ByteArrayOutputStream nonStandardCharsetBuffer;
078: private byte[] byteBuf;
079: private ByteBuffer bytebuf;
080: private int numNonStandardChars, nonStandardEncodingLen;
081:
082: public COMPOUND_TEXT_Encoder(Charset cs) {
083: super (
084: cs,
085: (float) (CompoundTextSupport.MAX_CONTROL_SEQUENCE_LEN + 2),
086: (float) (CompoundTextSupport.MAX_CONTROL_SEQUENCE_LEN + 2));
087: try {
088: encoder = Charset.forName("ISO8859_1").newEncoder();
089: } catch (IllegalArgumentException cannotHappen) {
090: }
091: initEncoder(encoder);
092: }
093:
094: protected CoderResult encodeLoop(CharBuffer src, ByteBuffer des) {
095: CoderResult cr = CoderResult.UNDERFLOW;
096: char[] input = src.array();
097: int inOff = src.arrayOffset() + src.position();
098: int inEnd = src.arrayOffset() + src.limit();
099:
100: try {
101: while (inOff < inEnd && cr.isUnderflow()) {
102: charBuf[0] = input[inOff];
103: if (charBuf[0] <= '\u0008'
104: || (charBuf[0] >= '\u000B' && charBuf[0] <= '\u001F')
105: || (charBuf[0] >= '\u0080' && charBuf[0] <= '\u009F')) {
106: // The compound text specification only permits the octets
107: // 0x09, 0x0A, 0x1B, and 0x9B in C0 and C1. Of these, 1B and
108: // 9B must also be removed because they initiate control
109: // sequences.
110: charBuf[0] = '?';
111: }
112:
113: CharsetEncoder enc = getEncoder(charBuf[0]);
114: //System.out.println("char=" + charBuf[0] + ", enc=" + enc);
115: if (enc == null) {
116: if (unmappableCharacterAction() == CodingErrorAction.REPORT) {
117: charBuf[0] = '?';
118: enc = latin1Encoder;
119: } else {
120: return CoderResult.unmappableForLength(1);
121: }
122: }
123: if (enc != encoder) {
124: if (nonStandardCharsetBuffer != null) {
125: cr = flushNonStandardCharsetBuffer(des);
126: } else {
127: //cr= encoder.flush(des);
128: flushEncoder(encoder, des);
129: }
130: if (!cr.isUnderflow())
131: return cr;
132: byte[] escSequence = CompoundTextSupport
133: .getEscapeSequence(enc.charset().name());
134: if (escSequence == null) {
135: throw new InternalError("Unknown encoding: "
136: + enc.charset().name());
137: } else if (escSequence[1] == (byte) 0x25
138: && escSequence[2] == (byte) 0x2F) {
139: initNonStandardCharsetBuffer(enc, escSequence);
140: } else if (des.remaining() >= escSequence.length) {
141: des.put(escSequence, 0, escSequence.length);
142: } else {
143: return CoderResult.OVERFLOW;
144: }
145: encoder = enc;
146: continue;
147: }
148: charbuf.rewind();
149: if (nonStandardCharsetBuffer == null) {
150: cr = encoder.encode(charbuf, des, false);
151: } else {
152: bytebuf.clear();
153: cr = encoder.encode(charbuf, bytebuf, false);
154: bytebuf.flip();
155: nonStandardCharsetBuffer.write(byteBuf, 0, bytebuf
156: .limit());
157: numNonStandardChars++;
158: }
159: inOff++;
160: }
161: return cr;
162: } finally {
163: src.position(inOff - src.arrayOffset());
164: }
165: }
166:
167: protected CoderResult implFlush(ByteBuffer out) {
168: CoderResult cr = (nonStandardCharsetBuffer != null) ? flushNonStandardCharsetBuffer(out)
169: //: encoder.flush(out);
170: : flushEncoder(encoder, out);
171: reset();
172: return cr;
173: }
174:
175: private void initNonStandardCharsetBuffer(CharsetEncoder c,
176: byte[] escSequence) {
177: nonStandardCharsetBuffer = new ByteArrayOutputStream();
178: byteBuf = new byte[(int) c.maxBytesPerChar()];
179: bytebuf = ByteBuffer.wrap(byteBuf);
180: nonStandardCharsetBuffer.write(escSequence, 0,
181: escSequence.length);
182: nonStandardCharsetBuffer.write(0); // M placeholder
183: nonStandardCharsetBuffer.write(0); // L placeholder
184: byte[] encoding = CompoundTextSupport.getEncoding(c.charset()
185: .name());
186: if (encoding == null) {
187: throw new InternalError("Unknown encoding: "
188: + encoder.charset().name());
189: }
190: nonStandardCharsetBuffer.write(encoding, 0, encoding.length);
191: nonStandardCharsetBuffer.write(0x02); // divider
192: nonStandardEncodingLen = encoding.length + 1;
193: }
194:
195: private CoderResult flushNonStandardCharsetBuffer(ByteBuffer out) {
196: if (numNonStandardChars > 0) {
197: byte[] flushBuf = new byte[(int) encoder.maxBytesPerChar()
198: * numNonStandardChars];
199: ByteBuffer bb = ByteBuffer.wrap(flushBuf);
200: flushEncoder(encoder, bb);
201: bb.flip();
202: nonStandardCharsetBuffer.write(flushBuf, 0, bb.limit());
203: numNonStandardChars = 0;
204: }
205:
206: int numBytes = nonStandardCharsetBuffer.size();
207: int nonStandardBytesOff = 6 + nonStandardEncodingLen;
208:
209: if (out.remaining() < (numBytes - nonStandardBytesOff)
210: + nonStandardBytesOff
211: * (((numBytes - nonStandardBytesOff) / ((1 << 14) - 1)) + 1)) {
212: return CoderResult.OVERFLOW;
213: }
214:
215: byte[] nonStandardBytes = nonStandardCharsetBuffer
216: .toByteArray();
217:
218: // The non-standard charset header only supports 2^14-1 bytes of data.
219: // If we have more than that, we have to repeat the header.
220: do {
221: out.put((byte) 0x1B);
222: out.put((byte) 0x25);
223: out.put((byte) 0x2F);
224: out.put((byte) nonStandardBytes[3]);
225:
226: int toWrite = Math.min(numBytes - nonStandardBytesOff,
227: (1 << 14) - 1 - nonStandardEncodingLen);
228:
229: out
230: .put((byte) (((toWrite + nonStandardEncodingLen) / 0x80) | 0x80)); // M
231: out
232: .put((byte) (((toWrite + nonStandardEncodingLen) % 0x80) | 0x80)); // L
233: out.put(nonStandardBytes, 6, nonStandardEncodingLen);
234: out.put(nonStandardBytes, nonStandardBytesOff, toWrite);
235: nonStandardBytesOff += toWrite;
236: } while (nonStandardBytesOff < numBytes);
237:
238: nonStandardCharsetBuffer = null;
239: byteBuf = null;
240: nonStandardEncodingLen = 0;
241: return CoderResult.UNDERFLOW;
242: }
243:
244: /**
245: * Resets the encoder.
246: * Call this method to reset the encoder to its initial state
247: */
248: protected void implReset() {
249: numNonStandardChars = nonStandardEncodingLen = 0;
250: nonStandardCharsetBuffer = null;
251: byteBuf = null;
252: try {
253: encoder = Charset.forName("ISO8859_1").newEncoder();
254: } catch (IllegalArgumentException cannotHappen) {
255: }
256: initEncoder(encoder);
257: }
258:
259: /**
260: * Return whether a character is mappable or not
261: * @return true if a character is mappable
262: */
263: public boolean canEncode(char ch) {
264: return getEncoder(ch) != null;
265: }
266:
267: protected void implOnMalformedInput(CodingErrorAction newAction) {
268: encoder.onUnmappableCharacter(newAction);
269: }
270:
271: protected void implOnUnmappableCharacter(CodingErrorAction newAction) {
272: encoder.onUnmappableCharacter(newAction);
273: }
274:
275: protected void implReplaceWith(byte[] newReplacement) {
276: if (encoder != null)
277: encoder.replaceWith(newReplacement);
278: }
279:
280: /**
281: * Try to figure out which CharsetEncoder to use for conversion
282: * of the specified Unicode character. The target character encoding
283: * of the returned encoder is approved to be used with Compound Text.
284: *
285: * @param ch Unicode character
286: * @return CharsetEncoder to convert the given character
287: */
288: private CharsetEncoder getEncoder(char ch) {
289: // 1. Try the current encoder.
290: if (encoder.canEncode(ch)) {
291: return encoder;
292: }
293:
294: // 2. Try the default encoder.
295: if (defaultEncodingSupported && defaultEncoder.canEncode(ch)) {
296: CharsetEncoder retval = null;
297: try {
298: retval = defaultEncoder.charset().newEncoder();
299: } catch (UnsupportedOperationException cannotHappen) {
300: }
301: initEncoder(retval);
302: return retval;
303: }
304:
305: // 3. Try ISO8859-1.
306: if (latin1Encoder.canEncode(ch)) {
307: CharsetEncoder retval = null;
308: try {
309: retval = latin1Encoder.charset().newEncoder();
310: } catch (UnsupportedOperationException cannotHappen) {
311: }
312: initEncoder(retval);
313: return retval;
314: }
315:
316: // 4. Brute force search of all supported encodings.
317: for (Iterator iter = CompoundTextSupport.getEncodings()
318: .iterator(); iter.hasNext();) {
319: String encoding = (String) iter.next();
320: CharsetEncoder enc = (CharsetEncoder) encodingToEncoderMap
321: .get(encoding);
322: if (enc == null) {
323: enc = CompoundTextSupport.getEncoder(encoding);
324: if (enc == null) {
325: throw new InternalError("Unsupported encoding: "
326: + encoding);
327: }
328: encodingToEncoderMap.put(encoding, enc);
329: }
330: if (enc.canEncode(ch)) {
331: CharsetEncoder retval = CompoundTextSupport
332: .getEncoder(encoding);
333: initEncoder(retval);
334: return retval;
335: }
336: }
337:
338: return null;
339: }
340:
341: private void initEncoder(CharsetEncoder enc) {
342: try {
343: enc.onUnmappableCharacter(CodingErrorAction.REPLACE)
344: .replaceWith(replacement());
345: } catch (IllegalArgumentException x) {
346: }
347: }
348:
349: private CharBuffer fcb = CharBuffer.allocate(0);
350:
351: private CoderResult flushEncoder(CharsetEncoder enc, ByteBuffer bb) {
352: enc.encode(fcb, bb, true);
353: return enc.flush(bb);
354: }
355: }
|