001: /*
002: * $Id: StreamBufferManager.java,v 1.2 2006/04/01 06:01:48 jeffsuttor Exp $
003: */
004:
005: /*
006: * The contents of this file are subject to the terms
007: * of the Common Development and Distribution License
008: * (the License). You may not use this file except in
009: * compliance with the License.
010: *
011: * You can obtain a copy of the license at
012: * https://glassfish.dev.java.net/public/CDDLv1.0.html.
013: * See the License for the specific language governing
014: * permissions and limitations under the License.
015: *
016: * When distributing Covered Code, include this CDDL
017: * Header Notice in each file and include the License file
018: * at https://glassfish.dev.java.net/public/CDDLv1.0.html.
019: * If applicable, add the following below the CDDL Header,
020: * with the fields enclosed by brackets [] replaced by
021: * you own identifying information:
022: * "Portions Copyrighted [year] [name of copyright owner]"
023: *
024: * [Name of File] [ver.__] [Date]
025: *
026: * Copyright 2006 Sun Microsystems Inc. All Rights Reserved
027: */
028:
029: package com.sun.xml.stream;
030:
031: import java.io.BufferedReader;
032: import java.io.File;
033: import java.io.IOException;
034: import java.io.InputStream;
035: import java.io.InputStreamReader;
036: import java.io.Reader;
037: import java.net.URL;
038: import java.nio.CharBuffer;
039: import java.util.Locale;
040: import com.sun.xml.stream.xerces.impl.io.ASCIIReader;
041: import com.sun.xml.stream.xerces.impl.io.UCSReader;
042: import com.sun.xml.stream.xerces.impl.io.UTF8Reader;
043: import com.sun.xml.stream.xerces.util.EncodingMap;
044: import com.sun.xml.stream.xerces.util.XMLChar;
045:
046: /**
047: *
048: * @author Neeraj Bajaj, Sun Microsystems
049: */
050: public class StreamBufferManager extends BufferManager {
051:
052: static final int DEFAULT_LENGTH = 8192;
053: static final boolean DEBUG = false;
054: CharBuffer charBuffer = null;
055: Reader fReader = null;
056: boolean fAllowJavaEncodings = true;
057:
058: /** Creates a new instance of StreamBufferManager */
059: public StreamBufferManager(InputStream stream, String encoding)
060: throws java.io.IOException {
061: if (DEBUG)
062: System.out.println("Encoding supplied = " + encoding);
063: init(stream, encoding);
064: }
065:
066: void init(InputStream istream, String encoding)
067: throws java.io.IOException {
068: Boolean isBigEndian = null;
069: // wrap this stream in RewindableInputStream
070: InputStream stream = new RewindableInputStream(istream);
071: if (DEBUG) {
072: System.out.println("stream = " + stream);
073: }
074: // perform auto-detect of encoding if necessary
075: if (encoding == null) {
076: if (DEBUG) {
077: System.out.println("Autodetecting the encoding");
078: }
079: // read first four bytes and determine encoding
080: final byte[] b4 = new byte[4];
081: int count = 0;
082: for (; count < 4; count++) {
083: b4[count] = (byte) stream.read();
084: }
085: if (count == 4) {
086: Object[] encodingDesc = getEncodingName(b4, count);
087: encoding = (String) (encodingDesc[0]);
088: isBigEndian = (Boolean) (encodingDesc[1]);
089: stream.reset();
090: int offset = 0;
091: // Special case UTF-8 files with BOM created by Microsoft
092: // tools. It's more efficient to consume the BOM than make
093: // the reader perform extra checks. -Ac
094: if (count > 2 && encoding.equals("UTF-8")) {
095: int b0 = b4[0] & 0xFF;
096: int b1 = b4[1] & 0xFF;
097: int b2 = b4[2] & 0xFF;
098: //consume the byte order mark
099: if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
100: // ignore first three bytes...
101: stream.skip(3);
102: }
103: }
104: if (DEBUG) {
105: System.out
106: .println("Encoding after autodetection = "
107: + encoding);
108: }
109: fReader = createReader(stream, encoding, isBigEndian);
110: } else {
111: fReader = createReader(stream, encoding, isBigEndian);
112: }
113: }
114: // use specified encoding
115: else {
116: fReader = createReader(stream, encoding, isBigEndian);
117: }
118:
119: charBuffer = CharBuffer.allocate(DEFAULT_LENGTH);
120: }
121:
122: public CharBuffer getCharBuffer() {
123: return charBuffer;
124: }
125:
126: /**
127: * This function returns true if some character data was loaded. Data is available via getCharBuffer().
128: * If before calling this function CharBuffer had some data (i.e. remaining() > 0) then this function
129: * first calls CharBuffer.compact() and then it is filled with more data. After calling this function
130: * CharBuffer.position() is always 'zero'.
131: *
132: * @see CharBuffer.compact()
133: * @return true if some character data was loaded. False value can be assume to be end of current
134: * entity.
135: */
136: public boolean getMore() throws java.io.IOException {
137: //compact() changes the position of the buffer
138: if (charBuffer.position() != 0) {
139: charBuffer.compact();
140: }
141: char[] ch = charBuffer.array();
142: int offset = charBuffer.position();
143: //xxx: JDK 1.5 gives option to directly read into CharBuffer
144: int count = fReader.read(ch, offset, charBuffer.capacity());
145:
146: if (DEBUG) {
147: System.out.println("No. of characters read = " + count);
148: }
149: if (count == -1) {
150: endOfStream = true;
151: return false;
152: }
153: charBuffer = charBuffer.wrap(ch);
154: //set the limit to the count of characters read
155: charBuffer.limit(count);
156: //xxx: what should be done if the characters read are 'zero' but still the end of the
157: //stream is not reached.
158: if (count > 0) {
159: return true;
160: } else
161: return false;
162: }
163:
164: /**
165: * Creates a reader capable of reading the given input stream in
166: * the specified encoding.
167: *
168: * @param inputStream The input stream.
169: * @param encoding The encoding name that the input stream is
170: * encoded using. If the user has specified that
171: * Java encoding names are allowed, then the
172: * encoding name may be a Java encoding name;
173: * otherwise, it is an ianaEncoding name.
174: * @param isBigEndian For encodings (like uCS-4), whose names cannot
175: * specify a byte order, this tells whether the order is bigEndian. null menas
176: * unknown or not relevant.
177: *
178: * @return Returns a reader.
179: */
180:
181: protected Reader createReader(InputStream inputStream,
182: String encoding, Boolean isBigEndian) throws IOException {
183:
184: // normalize encoding name
185: if (encoding == null) {
186: encoding = "UTF-8";
187: }
188:
189: // try to use an optimized reader
190: String ENCODING = encoding.toUpperCase(Locale.ENGLISH);
191: if (ENCODING.equals("UTF-8")) {
192: if (DEBUG) {
193: System.out.println("Creating UTF-8 Reader");
194: }
195: //xxx: we are not passing ErrorReporter
196: return new UTF8Reader(inputStream, DEFAULT_LENGTH, null,
197: Locale.getDefault());
198: }
199: if (ENCODING.equals("US-ASCII")) {
200: if (DEBUG) {
201: System.out.println("$$$ creating ASCIIReader");
202: }//xxx: we are not passing ErrorReporter
203: return new ASCIIReader(inputStream, DEFAULT_LENGTH, null,
204: Locale.getDefault());
205: }
206: if (ENCODING.equals("ISO-10646-UCS-4")) {
207: if (isBigEndian != null) {
208: boolean isBE = isBigEndian.booleanValue();
209: if (isBE) {
210: return new UCSReader(inputStream, UCSReader.UCS4BE);
211: } else {
212: return new UCSReader(inputStream, UCSReader.UCS4LE);
213: }
214: } else {
215: throw new java.io.IOException(
216: "Encoding byte order not supported");
217: }
218: }
219: if (ENCODING.equals("ISO-10646-UCS-2")) {
220: if (isBigEndian != null) { // sould never happen with this encoding...
221: boolean isBE = isBigEndian.booleanValue();
222: if (isBE) {
223: return new UCSReader(inputStream, UCSReader.UCS2BE);
224: } else {
225: return new UCSReader(inputStream, UCSReader.UCS2LE);
226: }
227: } else {
228: throw new java.io.IOException(
229: "Encoding byte order not supported");
230: }
231: }
232:
233: // check for valid name
234: boolean validIANA = XMLChar.isValidIANAEncoding(encoding);
235: boolean validJava = XMLChar.isValidJavaEncoding(encoding);
236: if (!validIANA || (fAllowJavaEncodings && !validJava)) {
237: throw new java.io.IOException("Encoding declaration "
238: + encoding + "not valid");
239: }
240: // try to use a Java reader
241: String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING);
242: if (javaEncoding == null) {
243: if (fAllowJavaEncodings) {
244: javaEncoding = encoding;
245: } else {
246: throw new java.io.IOException("Encoding " + encoding
247: + " not supported");
248: }
249: }
250: if (DEBUG) {
251: System.out
252: .print("$$$ creating Java InputStreamReader: encoding="
253: + javaEncoding);
254: if (javaEncoding == encoding) {
255: System.out.print(" (IANA encoding)");
256: }
257: System.out.println();
258: }
259: return new BufferedReader(new InputStreamReader(inputStream,
260: javaEncoding));
261:
262: } // createReader(InputStream,String, Boolean): Reader
263:
264: int getLength() {
265: //decide the number of bytes that need to read
266: return DEFAULT_LENGTH;
267: }
268:
269: public static void main(String[] args) {
270: try {
271: File file = new File(args[0]);
272: System.out.println("url parameter = "
273: + file.toURI().toString());
274: URL url = new URL(file.toURI().toString());
275: StreamBufferManager sb = new StreamBufferManager(url
276: .openStream(), "UTF-8");
277: CharBuffer cb = sb.getCharBuffer();
278: int i = 0;
279: while (sb.getMore()) {
280: System.out.println("Loop " + i++ + " = "
281: + sb.getCharBuffer());
282: }
283: System.out.println("End of stream reached = "
284: + sb.endOfStream());
285: System.out.println("Total no. of loops required = " + i);
286: } catch (Exception ex) {
287: ex.printStackTrace();
288: }
289: }
290:
291: public void close() throws java.io.IOException {
292: if (fReader != null) {
293: fReader.close();
294: }
295: }
296:
297: public void setEncoding(String encoding) throws java.io.IOException {
298: //xxx: this need to be implemented. if the encoding is different than the current encoding we need
299: //to change the reader with the newly created reader
300: }
301:
302: public boolean arrangeCapacity(int length)
303: throws java.io.IOException {
304: return false;
305: }
306:
307: // This class wraps the byte inputstreams we're presented with.
308: // We need it because java.io.InputStreams don't provide
309: // functionality to reread processed bytes, and they have a habit
310: // of reading more than one character when you call their read()
311: // methods. This means that, once we discover the true (declared)
312: // encoding of a document, we can neither backtrack to read the
313: // whole doc again nor start reading where we are with a new
314: // reader.
315: //
316: // This class allows rewinding an inputStream by allowing a mark
317: // to be set, and the stream reset to that position. <strong>The
318: // class assumes that it needs to read one character per
319: // invocation when it's read() method is inovked, but uses the
320: // underlying InputStream's read(char[], offset length) method--it
321: // won't buffer data read this way!</strong>
322: //
323: // @author Neil Graham, IBM
324: // @author Glenn Marcy, IBM
325:
326: protected final class RewindableInputStream extends InputStream {
327:
328: private InputStream fInputStream;
329: private byte[] fData;
330: private int fStartOffset;
331: private int fEndOffset;
332: private int fOffset;
333: private int fLength;
334: private int fMark;
335: static final int DEFAULT_XMLDECL_BUFFER_SIZE = 64;
336:
337: public RewindableInputStream(InputStream is) {
338: fData = new byte[DEFAULT_XMLDECL_BUFFER_SIZE];
339: fInputStream = is;
340: fStartOffset = 0;
341: fEndOffset = -1;
342: fOffset = 0;
343: fLength = 0;
344: fMark = 0;
345: }
346:
347: public void setStartOffset(int offset) {
348: fStartOffset = offset;
349: }
350:
351: public void rewind() {
352: fOffset = fStartOffset;
353: }
354:
355: public int read() throws IOException {
356: int b = 0;
357: if (fOffset < fLength) {
358: return fData[fOffset++] & 0xff;
359: }
360: if (fOffset == fEndOffset) {
361: return -1;
362: }
363: if (fOffset == fData.length) {
364: byte[] newData = new byte[fOffset << 1];
365: System.arraycopy(fData, 0, newData, 0, fOffset);
366: fData = newData;
367: }
368: b = fInputStream.read();
369: if (b == -1) {
370: fEndOffset = fOffset;
371: return -1;
372: }
373: fData[fLength++] = (byte) b;
374: fOffset++;
375: return b & 0xff;
376: }
377:
378: public int read(byte[] b, int off, int len) throws IOException {
379: int bytesLeft = fLength - fOffset;
380: if (bytesLeft == 0) {
381: if (fOffset == fEndOffset) {
382: return -1;
383: }
384: return fInputStream.read(b, off, len);
385: /**
386: * //System.out.println("fCurrentEntitty = " + fCurrentEntity );
387: * //System.out.println("fInputStream = " + fInputStream );
388: * // better get some more for the voracious reader...
389: * if(fCurrentEntity.mayReadChunks) {
390: * return fInputStream.read(b, off, len);
391: * }
392: *
393: * int returnedVal = read();
394: * if(returnedVal == -1) {
395: * fEndOffset = fOffset;
396: * return -1;
397: * }
398: * b[off] = (byte)returnedVal;
399: * return 1;
400: */
401: }
402: if (len < bytesLeft) {
403: if (len <= 0) {
404: return 0;
405: }
406: } else {
407: len = bytesLeft;
408: }
409: if (b != null) {
410: System.arraycopy(fData, fOffset, b, off, len);
411: }
412: fOffset += len;
413: return len;
414: }
415:
416: public long skip(long n) throws IOException {
417: int bytesLeft;
418: if (n <= 0) {
419: return 0;
420: }
421: bytesLeft = fLength - fOffset;
422: if (bytesLeft == 0) {
423: if (fOffset == fEndOffset) {
424: return 0;
425: }
426: return fInputStream.skip(n);
427: }
428: if (n <= bytesLeft) {
429: fOffset += n;
430: return n;
431: }
432: fOffset += bytesLeft;
433: if (fOffset == fEndOffset) {
434: return bytesLeft;
435: }
436: n -= bytesLeft;
437: /*
438: * In a manner of speaking, when this class isn't permitting more
439: * than one byte at a time to be read, it is "blocking". The
440: * available() method should indicate how much can be read without
441: * blocking, so while we're in this mode, it should only indicate
442: * that bytes in its buffer are available; otherwise, the result of
443: * available() on the underlying InputStream is appropriate.
444: */
445: return fInputStream.skip(n) + bytesLeft;
446: }
447:
448: public int available() throws IOException {
449: int bytesLeft = fLength - fOffset;
450: if (bytesLeft == 0) {
451: if (fOffset == fEndOffset) {
452: return -1;
453: }
454: return fInputStream.available();
455: }
456: return bytesLeft;
457: }
458:
459: public void mark(int howMuch) {
460: fMark = fOffset;
461: }
462:
463: public void reset() {
464: fOffset = fMark;
465: //test();
466: }
467:
468: public boolean markSupported() {
469: return true;
470: }
471:
472: public void close() throws IOException {
473: if (fInputStream != null) {
474: fInputStream.close();
475: fInputStream = null;
476: }
477: }
478: } // end of RewindableInputStream class
479:
480: }
|