001: /*
002: * $Id: BufferManager.java,v 1.2 2006/04/01 06:01:49 jeffsuttor Exp $
003: */
004:
005: /*
006: * The contents of this file are subject to the terms
007: * of the Common Development and Distribution License
008: * (the License). You may not use this file except in
009: * compliance with the License.
010: *
011: * You can obtain a copy of the license at
012: * https://glassfish.dev.java.net/public/CDDLv1.0.html.
013: * See the License for the specific language governing
014: * permissions and limitations under the License.
015: *
016: * When distributing Covered Code, include this CDDL
017: * Header Notice in each file and include the License file
018: * at https://glassfish.dev.java.net/public/CDDLv1.0.html.
019: * If applicable, add the following below the CDDL Header,
020: * with the fields enclosed by brackets [] replaced by
021: * you own identifying information:
022: * "Portions Copyrighted [year] [name of copyright owner]"
023: *
024: * [Name of File] [ver.__] [Date]
025: *
026: * Copyright 2006 Sun Microsystems Inc. All Rights Reserved
027: */
028:
029: package com.sun.xml.stream;
030:
031: import java.io.File;
032: import java.io.FileInputStream;
033: import java.io.InputStream;
034: import java.net.URL;
035: import java.nio.CharBuffer;
036: import com.sun.xml.stream.xerces.xni.parser.XMLInputSource;
037:
038: /**
039: * @author Neeraj Bajaj, Sun Microsystems
040: */
041: public abstract class BufferManager {
042:
043: protected boolean endOfStream = false;
044: static boolean DEBUG = false;
045:
046: public static BufferManager getBufferManager(
047: XMLInputSource inputSource) throws java.io.IOException {
048:
049: InputStream stream = inputSource.getByteStream();
050: if (stream instanceof FileInputStream) {
051: if (DEBUG) {
052: System.out.println("Using FileBufferManager");
053: }
054: return new FileBufferManager((FileInputStream) stream,
055: inputSource.getEncoding());
056: } else {
057: if (DEBUG) {
058: System.out.println("Using StreamBufferManager");
059: }
060: return new StreamBufferManager(stream, inputSource
061: .getEncoding());
062: }
063: }
064:
065: /**
066: * This function returns true if some character data was loaded. Data is available via getCharBuffer().
067: * If before calling this function CharBuffer had some data (i.e. remaining() > 0) then this function
068: * first calls CharBuffer.compact() and then it is filled with more data. After calling this function
069: * CharBuffer.position() is always 'zero'.
070: *
071: * @see CharBuffer.compact()
072: * @return true if some character data was loaded. False value can be assume to be end of current
073: * entity.
074: */
075:
076: public abstract boolean getMore() throws java.io.IOException;
077:
078: public abstract CharBuffer getCharBuffer();
079:
080: /**
081: *xxx: This should be an abstract method because in StreamBufferManager
082: * CharBuffer capacity doesn't grow
083: */
084: public abstract boolean arrangeCapacity(int length)
085: throws java.io.IOException;
086:
087: /**{
088: if(getCharBuffer().limit() - getCharBuffer().position() >= length){
089: return true;
090: }
091: while( (getCharBuffer().limit() - getCharBuffer().position()) < length){
092: if(endOfStream())break;
093: getMore();
094: }
095: if(getCharBuffer().limit() - getCharBuffer().position() >= length){
096: return true;
097: }else{
098: return false;
099: }
100: }*/
101:
102: /** This file signals the end of file
103: * @return true/false signals the end of file.
104: */
105: public boolean endOfStream() {
106: return endOfStream;
107: }
108:
109: public abstract void close() throws java.io.IOException;
110:
111: public abstract void setEncoding(String encoding)
112: throws java.io.IOException;
113:
114: /**
115: * Returns the IANA encoding name that is auto-detected from
116: * the bytes specified, with the endian-ness of that encoding where appropriate.
117: *
118: * @param b4 The first four bytes of the input.
119: * @param count The number of bytes actually read.
120: * @return a 2-element array: the first element, an IANA-encoding string,
121: * the second element a Boolean which is true iff the document is big endian, false
122: * if it's little-endian, and null if the distinction isn't relevant.
123: */
124: protected Object[] getEncodingName(byte[] b4, int count) {
125:
126: if (count < 2) {
127: return new Object[] { "UTF-8", null };
128: }
129:
130: // UTF-16, with BOM
131: int b0 = b4[0] & 0xFF;
132: int b1 = b4[1] & 0xFF;
133: if (b0 == 0xFE && b1 == 0xFF) {
134: // UTF-16, big-endian
135: return new Object[] { "UTF-16BE", new Boolean(true) };
136: }
137: if (b0 == 0xFF && b1 == 0xFE) {
138: // UTF-16, little-endian
139: return new Object[] { "UTF-16LE", new Boolean(false) };
140: }
141:
142: // default to UTF-8 if we don't have enough bytes to make a
143: // good determination of the encoding
144: if (count < 3) {
145: return new Object[] { "UTF-8", null };
146: }
147:
148: // UTF-8 with a BOM
149: int b2 = b4[2] & 0xFF;
150: if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
151: return new Object[] { "UTF-8", null };
152: }
153:
154: // default to UTF-8 if we don't have enough bytes to make a
155: // good determination of the encoding
156: if (count < 4) {
157: return new Object[] { "UTF-8", null };
158: }
159:
160: // other encodings
161: int b3 = b4[3] & 0xFF;
162: if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) {
163: // UCS-4, big endian (1234)
164: return new Object[] { "ISO-10646-UCS-4", new Boolean(true) };
165: }
166: if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) {
167: // UCS-4, little endian (4321)
168: return new Object[] { "ISO-10646-UCS-4", new Boolean(false) };
169: }
170: if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) {
171: // UCS-4, unusual octet order (2143)
172: // REVISIT: What should this be?
173: return new Object[] { "ISO-10646-UCS-4", null };
174: }
175: if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) {
176: // UCS-4, unusual octect order (3412)
177: // REVISIT: What should this be?
178: return new Object[] { "ISO-10646-UCS-4", null };
179: }
180: if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
181: // UTF-16, big-endian, no BOM
182: // (or could turn out to be UCS-2...
183: // REVISIT: What should this be?
184: return new Object[] { "UTF-16BE", new Boolean(true) };
185: }
186: if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
187: // UTF-16, little-endian, no BOM
188: // (or could turn out to be UCS-2...
189: return new Object[] { "UTF-16LE", new Boolean(false) };
190: }
191: if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) {
192: // EBCDIC
193: // a la xerces1, return CP037 instead of EBCDIC here
194: return new Object[] { "CP037", null };
195: }
196:
197: // default encoding
198: return new Object[] { "UTF-8", null };
199:
200: } // getEncodingName(byte[],int):Object[]
201:
202: public static void main(String[] args) {
203: try {
204: File file = new File(args[0]);
205: System.out.println("url parameter = "
206: + file.toURI().toString());
207: URL url = new URL(file.toURI().toString());
208: XMLInputSource inputSource = new XMLInputSource(null, null,
209: null, new FileInputStream(file), "UTF-8");
210: BufferManager sb = BufferManager
211: .getBufferManager(inputSource);
212: CharBuffer cb = sb.getCharBuffer();
213: int i = 0;
214: while (sb.getMore()) {
215: System.out.println("Loop " + i++ + " = "
216: + sb.getCharBuffer());
217: }
218: System.out.println("End of stream reached = "
219: + sb.endOfStream());
220: System.out.println("Total no. of loops required = " + i);
221: } catch (Exception ex) {
222: ex.printStackTrace();
223: }
224: }
225: }
|