001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017:
018: package java.io;
019:
020: import java.nio.ByteBuffer;
021: import java.nio.CharBuffer;
022: import java.nio.charset.Charset;
023: import java.nio.charset.CharsetDecoder;
024: import java.nio.charset.CoderResult;
025: import java.nio.charset.CodingErrorAction;
026: import java.nio.charset.MalformedInputException;
027: import java.nio.charset.UnmappableCharacterException;
028: import java.security.AccessController;
029: import java.util.HashMap;
030:
031: import org.apache.harmony.luni.util.Msg;
032: import org.apache.harmony.luni.util.PriviAction;
033:
034: /**
035: * InputStreamReader is class for turning a byte Stream into a character Stream.
036: * Data read from the source input stream is converted into characters by either
037: * a default or provided character converter. By default, the encoding is
038: * assumed to ISO8859_1. The InputStreamReader contains a buffer of bytes read
039: * from the source input stream and converts these into characters as needed.
040: * The buffer size is 8K.
041: *
042: * @see OutputStreamWriter
043: */
044: public class InputStreamReader extends Reader {
045: private InputStream in;
046:
047: private static final int BUFFER_SIZE = 8192;
048:
049: private boolean endOfInput = false;
050:
051: CharsetDecoder decoder;
052:
053: ByteBuffer bytes = ByteBuffer.allocate(BUFFER_SIZE);
054:
055: /**
056: * Constructs a new InputStreamReader on the InputStream <code>in</code>.
057: * Now character reading can be filtered through this InputStreamReader.
058: * This constructor assumes the default conversion of ISO8859_1
059: * (ISO-Latin-1).
060: *
061: * @param in
062: * the InputStream to convert to characters.
063: */
064: public InputStreamReader(InputStream in) {
065: super (in);
066: this .in = in;
067: String encoding = AccessController
068: .doPrivileged(new PriviAction<String>(
069: "file.encoding", "ISO8859_1")); //$NON-NLS-1$//$NON-NLS-2$
070: decoder = Charset.forName(encoding).newDecoder()
071: .onMalformedInput(CodingErrorAction.REPLACE)
072: .onUnmappableCharacter(CodingErrorAction.REPLACE);
073: bytes.limit(0);
074: }
075:
076: /**
077: * Constructs a new InputStreamReader on the InputStream <code>in</code>.
078: * Now character reading can be filtered through this InputStreamReader.
079: * This constructor takes a String parameter <code>enc</code> which is the
080: * name of an encoding. If the encoding cannot be found, an
081: * UnsupportedEncodingException error is thrown.
082: *
083: * @param in
084: * the InputStream to convert to characters.
085: * @param enc
086: * a String describing the character converter to use.
087: *
088: * @throws UnsupportedEncodingException
089: * if the encoding cannot be found.
090: */
091: public InputStreamReader(InputStream in, final String enc)
092: throws UnsupportedEncodingException {
093: super (in);
094: if (enc == null) {
095: throw new NullPointerException();
096: }
097: this .in = in;
098: try {
099: decoder = Charset.forName(enc).newDecoder()
100: .onMalformedInput(CodingErrorAction.REPLACE)
101: .onUnmappableCharacter(CodingErrorAction.REPLACE);
102: } catch (IllegalArgumentException e) {
103: throw (UnsupportedEncodingException) new UnsupportedEncodingException()
104: .initCause(e);
105: }
106: bytes.limit(0);
107: }
108:
109: /**
110: * Constructs a new InputStreamReader on the InputStream <code>in</code>
111: * and CharsetDecoder <code>dec</code>. Now character reading can be
112: * filtered through this InputStreamReader.
113: *
114: * @param in
115: * the InputStream to convert to characters
116: * @param dec
117: * a CharsetDecoder used by the character conversion
118: */
119: public InputStreamReader(InputStream in, CharsetDecoder dec) {
120: super (in);
121: dec.averageCharsPerByte();
122: this .in = in;
123: decoder = dec;
124: bytes.limit(0);
125: }
126:
127: /**
128: * Constructs a new InputStreamReader on the InputStream <code>in</code>
129: * and Charset <code>charset</code>. Now character reading can be
130: * filtered through this InputStreamReader.
131: *
132: * @param in
133: * the InputStream to convert to characters
134: * @param charset
135: * the Charset that specify the character converter
136: */
137: public InputStreamReader(InputStream in, Charset charset) {
138: super (in);
139: this .in = in;
140: decoder = charset.newDecoder().onMalformedInput(
141: CodingErrorAction.REPLACE).onUnmappableCharacter(
142: CodingErrorAction.REPLACE);
143: bytes.limit(0);
144: }
145:
146: /**
147: * Close this InputStreamReader. This implementation closes the source
148: * InputStream and releases all local storage.
149: *
150: * @throws IOException
151: * If an error occurs attempting to close this
152: * InputStreamReader.
153: */
154: @Override
155: public void close() throws IOException {
156: synchronized (lock) {
157: decoder = null;
158: if (in != null) {
159: in.close();
160: in = null;
161: }
162: }
163: }
164:
165: /**
166: * Answer the String which identifies the encoding used to convert bytes to
167: * characters. The value <code>null</code> is returned if this Reader has
168: * been closed.
169: *
170: * @return the String describing the converter or null if this Reader is
171: * closed.
172: */
173: public String getEncoding() {
174: if (!isOpen()) {
175: return null;
176: }
177: return HistoricalNamesUtil.getHistoricalName(decoder.charset()
178: .name());
179: }
180:
181: /*
182: * helper for getEncoding()
183: */
184: @SuppressWarnings("nls")
185: static class HistoricalNamesUtil {
186: private static HashMap<String, String> historicalNames = new HashMap<String, String>();
187: static {
188: historicalNames.put("Big5-HKSCS", "Big5_HKSCS");
189: historicalNames.put("EUC-JP", "EUC_JP");
190: historicalNames.put("EUC-KR", "EUC_KR");
191: historicalNames.put("GB2312", "EUC_CN");
192: historicalNames.put("IBM-Thai", "Cp838");
193: historicalNames.put("IBM00858", "Cp858");
194: historicalNames.put("IBM01140", "Cp1140");
195: historicalNames.put("IBM01141", "Cp1141");
196: historicalNames.put("IBM01142", "Cp1142");
197: historicalNames.put("IBM01143", "Cp1143");
198: historicalNames.put("IBM01144", "Cp1144");
199: historicalNames.put("IBM01145", "Cp1145");
200: historicalNames.put("IBM01146", "Cp1146");
201: historicalNames.put("IBM01147", "Cp1147");
202: historicalNames.put("IBM01148", "Cp1148");
203: historicalNames.put("IBM01149", "Cp1149");
204: historicalNames.put("IBM037", "Cp037");
205: historicalNames.put("IBM1026", "Cp1026");
206: historicalNames.put("IBM1047", "Cp1047");
207: historicalNames.put("IBM273", "Cp273");
208: historicalNames.put("IBM277", "Cp277");
209: historicalNames.put("IBM278", "Cp278");
210: historicalNames.put("IBM280", "Cp280");
211: historicalNames.put("IBM284", "Cp284");
212: historicalNames.put("IBM285", "Cp285");
213: historicalNames.put("IBM297", "Cp297");
214: historicalNames.put("IBM420", "Cp420");
215: historicalNames.put("IBM424", "Cp424");
216: historicalNames.put("IBM437", "Cp437");
217: historicalNames.put("IBM500", "Cp500");
218: historicalNames.put("IBM775", "Cp775");
219: historicalNames.put("IBM850", "Cp850");
220: historicalNames.put("IBM852", "Cp852");
221: historicalNames.put("IBM855", "Cp855");
222: historicalNames.put("IBM857", "Cp857");
223: historicalNames.put("IBM860", "Cp860");
224: historicalNames.put("IBM861", "Cp861");
225: historicalNames.put("IBM862", "Cp862");
226: historicalNames.put("IBM863", "Cp863");
227: historicalNames.put("IBM864", "Cp864");
228: historicalNames.put("IBM865", "Cp865");
229: historicalNames.put("IBM866", "Cp866");
230: historicalNames.put("IBM868", "Cp868");
231: historicalNames.put("IBM869", "Cp869");
232: historicalNames.put("IBM870", "Cp870");
233: historicalNames.put("IBM871", "Cp871");
234: historicalNames.put("IBM918", "Cp918");
235: historicalNames.put("ISO-2022-CN", "ISO2022CN");
236: historicalNames.put("ISO-2022-JP", "ISO2022JP");
237: historicalNames.put("ISO-2022-KR", "ISO2022KR");
238: historicalNames.put("ISO-8859-1", "ISO8859_1");
239: historicalNames.put("ISO-8859-13", "ISO8859_13");
240: historicalNames.put("ISO-8859-15", "ISO8859_15");
241: historicalNames.put("ISO-8859-2", "ISO8859_2");
242: historicalNames.put("ISO-8859-3", "ISO8859_3");
243: historicalNames.put("ISO-8859-4", "ISO8859_4");
244: historicalNames.put("ISO-8859-5", "ISO8859_5");
245: historicalNames.put("ISO-8859-6", "ISO8859_6");
246: historicalNames.put("ISO-8859-7", "ISO8859_7");
247: historicalNames.put("ISO-8859-8", "ISO8859_8");
248: historicalNames.put("ISO-8859-9", "ISO8859_9");
249: historicalNames.put("KOI8-R", "KOI8_R");
250: historicalNames.put("Shift_JIS", "SJIS");
251: historicalNames.put("TIS-620", "TIS620");
252: historicalNames.put("US-ASCII", "ASCII");
253: historicalNames.put("UTF-16BE", "UnicodeBigUnmarked");
254: historicalNames.put("UTF-16LE", "UnicodeLittleUnmarked");
255: historicalNames.put("UTF-8", "UTF8");
256: historicalNames.put("windows-1250", "Cp1250");
257: historicalNames.put("windows-1251", "Cp1251");
258: historicalNames.put("windows-1252", "Cp1252");
259: historicalNames.put("windows-1253", "Cp1253");
260: historicalNames.put("windows-1254", "Cp1254");
261: historicalNames.put("windows-1255", "Cp1255");
262: historicalNames.put("windows-1256", "Cp1256");
263: historicalNames.put("windows-1257", "Cp1257");
264: historicalNames.put("windows-1258", "Cp1258");
265: historicalNames.put("windows-31j", "MS932");
266: historicalNames.put("x-Big5-Solaris", "Big5_Solaris");
267: historicalNames.put("x-euc-jp-linux", "EUC_JP_LINUX");
268: historicalNames.put("x-EUC-TW", "EUC_TW");
269: historicalNames.put("x-eucJP-Open", "EUC_JP_Solaris");
270: historicalNames.put("x-IBM1006", "Cp1006");
271: historicalNames.put("x-IBM1025", "Cp1025");
272: historicalNames.put("x-IBM1046", "Cp1046");
273: historicalNames.put("x-IBM1097", "Cp1097");
274: historicalNames.put("x-IBM1098", "Cp1098");
275: historicalNames.put("x-IBM1112", "Cp1112");
276: historicalNames.put("x-IBM1122", "Cp1122");
277: historicalNames.put("x-IBM1123", "Cp1123");
278: historicalNames.put("x-IBM1124", "Cp1124");
279: historicalNames.put("x-IBM1381", "Cp1381");
280: historicalNames.put("x-IBM1383", "Cp1383");
281: historicalNames.put("x-IBM33722", "Cp33722");
282: historicalNames.put("x-IBM737", "Cp737");
283: historicalNames.put("x-IBM856", "Cp856");
284: historicalNames.put("x-IBM874", "Cp874");
285: historicalNames.put("x-IBM875", "Cp875");
286: historicalNames.put("x-IBM921", "Cp921");
287: historicalNames.put("x-IBM922", "Cp922");
288: historicalNames.put("x-IBM930", "Cp930");
289: historicalNames.put("x-IBM933", "Cp933");
290: historicalNames.put("x-IBM935", "Cp935");
291: historicalNames.put("x-IBM937", "Cp937");
292: historicalNames.put("x-IBM939", "Cp939");
293: historicalNames.put("x-IBM942", "Cp942");
294: historicalNames.put("x-IBM942C", "Cp942C");
295: historicalNames.put("x-IBM943", "Cp943");
296: historicalNames.put("x-IBM943C", "Cp943C");
297: historicalNames.put("x-IBM948", "Cp948");
298: historicalNames.put("x-IBM949", "Cp949");
299: historicalNames.put("x-IBM949C", "Cp949C");
300: historicalNames.put("x-IBM950", "Cp950");
301: historicalNames.put("x-IBM964", "Cp964");
302: historicalNames.put("x-IBM970", "Cp970");
303: historicalNames.put("x-ISCII91", "ISCII91");
304: historicalNames.put("x-ISO-2022-CN-CNS", "ISO2022CN");
305: historicalNames.put("x-ISO-2022-CN-GB", "ISO2022CN");
306: historicalNames.put("x-JISAutoDetect", "JISAutoDetect");
307: historicalNames.put("x-MacArabic", "MacArabic");
308: historicalNames.put("x-MacCentralEurope",
309: "MacCentralEurope");
310: historicalNames.put("x-MacCroatian", "MacCroatian");
311: historicalNames.put("x-MacCyrillic", "MacCyrillic");
312: historicalNames.put("x-MacDingbat", "MacDingbat");
313: historicalNames.put("x-MacGreek", "MacGreek");
314: historicalNames.put("x-MacHebrew", "MacHebrew");
315: historicalNames.put("x-MacIceland", "MacIceland");
316: historicalNames.put("x-MacRoman", "MacRoman");
317: historicalNames.put("x-MacRomania", "MacRomania");
318: historicalNames.put("x-MacSymbol", "MacSymbol");
319: historicalNames.put("x-MacThai", "MacThai");
320: historicalNames.put("x-MacTurkish", "MacTurkish");
321: historicalNames.put("x-MacUkraine", "MacUkraine");
322: historicalNames.put("x-MS950-HKSCS", "MS950_HKSCS");
323: historicalNames.put("x-mswin-936", "MS936");
324: historicalNames.put("x-PCK", "PCK");
325: historicalNames.put("x-windows-874", "MS874");
326: historicalNames.put("x-windows-949", "MS949");
327: historicalNames.put("x-windows-950", "MS950");
328: }
329:
330: public static String getHistoricalName(String name) {
331: return (!historicalNames.containsKey(name) ? name
332: : historicalNames.get(name));
333: }
334: }
335:
336: /**
337: * Reads a single character from this InputStreamReader and returns the
338: * result as an int. The 2 higher-order characters are set to 0. If the end
339: * of reader was encountered then return -1. The byte value is either
340: * obtained from converting bytes in this readers buffer or by first filling
341: * the buffer from the source InputStream and then reading from the buffer.
342: *
343: * @return the character read or -1 if end of reader.
344: *
345: * @throws IOException
346: * If the InputStreamReader is already closed or some other IO
347: * error occurs.
348: */
349: @Override
350: public int read() throws IOException {
351: synchronized (lock) {
352: if (!isOpen()) {
353: // K0070=InputStreamReader is closed.
354: throw new IOException(Msg.getString("K0070")); //$NON-NLS-1$
355: }
356:
357: char buf[] = new char[1];
358: return read(buf, 0, 1) != -1 ? buf[0] : -1;
359: }
360: }
361:
362: /**
363: * Reads at most <code>count</code> characters from this Reader and stores
364: * them at <code>offset</code> in the character array <code>buf</code>.
365: * Returns the number of characters actually read or -1 if the end of reader
366: * was encountered. The bytes are either obtained from converting bytes in
367: * this readers buffer or by first filling the buffer from the source
368: * InputStream and then reading from the buffer.
369: *
370: * @param buf
371: * character array to store the read characters
372: * @param offset
373: * offset in buf to store the read characters
374: * @param length
375: * maximum number of characters to read
376: * @return the number of characters read or -1 if end of reader.
377: *
378: * @throws IOException
379: * If the InputStreamReader is already closed or some other IO
380: * error occurs.
381: */
382: @Override
383: public int read(char[] buf, int offset, int length)
384: throws IOException {
385: synchronized (lock) {
386: if (!isOpen()) {
387: // K0070=InputStreamReader is closed.
388: throw new IOException(Msg.getString("K0070")); //$NON-NLS-1$
389: }
390: if (offset < 0 || offset > buf.length - length
391: || length < 0) {
392: throw new IndexOutOfBoundsException();
393: }
394: if (length == 0) {
395: return 0;
396: }
397:
398: CharBuffer out = CharBuffer.wrap(buf, offset, length);
399: CoderResult result = CoderResult.UNDERFLOW;
400:
401: // bytes.remaining() indicates number of bytes in buffer
402: // when 1-st time entered, it'll be equal to zero
403: boolean needInput = !bytes.hasRemaining();
404:
405: while (out.hasRemaining()) {
406: // fill the buffer if needed
407: if (needInput) {
408: if ((in.available() == 0)
409: && (out.position() > offset)) {
410: // we could return the result without blocking read
411: break;
412: }
413:
414: int to_read = bytes.capacity() - bytes.limit();
415: int off = bytes.arrayOffset() + bytes.limit();
416: int was_red = in.read(bytes.array(), off, to_read);
417:
418: if (was_red == -1) {
419: endOfInput = true;
420: break;
421: } else if (was_red == 0) {
422: break;
423: }
424: bytes.limit(bytes.limit() + was_red);
425: needInput = false;
426: }
427:
428: // decode bytes
429: result = decoder.decode(bytes, out, false);
430:
431: if (result.isUnderflow()) {
432: // compact the buffer if no space left
433: if (bytes.limit() == bytes.capacity()) {
434: bytes.compact();
435: bytes.limit(bytes.position());
436: bytes.position(0);
437: }
438: needInput = true;
439: } else {
440: break;
441: }
442: }
443:
444: if (result == CoderResult.UNDERFLOW && endOfInput) {
445: result = decoder.decode(bytes, out, true);
446: decoder.flush(out);
447: decoder.reset();
448: }
449: if (result.isMalformed()) {
450: throw new MalformedInputException(result.length());
451: } else if (result.isUnmappable()) {
452: throw new UnmappableCharacterException(result.length());
453: }
454:
455: return out.position() - offset == 0 ? -1 : out.position()
456: - offset;
457: }
458: }
459:
460: /*
461: * Answer a boolean indicating whether or not this InputStreamReader is
462: * open.
463: */
464: private boolean isOpen() {
465: return in != null;
466: }
467:
468: /**
469: * Answers a <code>boolean</code> indicating whether or not this
470: * InputStreamReader is ready to be read without blocking. If the result is
471: * <code>true</code>, the next <code>read()</code> will not block. If
472: * the result is <code>false</code> this Reader may or may not block when
473: * <code>read()</code> is sent. This implementation answers
474: * <code>true</code> if there are bytes available in the buffer or the
475: * source InputStream has bytes available.
476: *
477: * @return <code>true</code> if the receiver will not block when
478: * <code>read()</code> is called, <code>false</code> if unknown
479: * or blocking will occur.
480: *
481: * @throws IOException
482: * If the InputStreamReader is already closed or some other IO
483: * error occurs.
484: */
485: @Override
486: public boolean ready() throws IOException {
487: synchronized (lock) {
488: if (in == null) {
489: // K0070=InputStreamReader is closed.
490: throw new IOException(Msg.getString("K0070")); //$NON-NLS-1$
491: }
492: try {
493: return bytes.hasRemaining() || in.available() > 0;
494: } catch (IOException e) {
495: return false;
496: }
497: }
498: }
499: }
|