001: /*
002: * $Id: FileBufferManager.java,v 1.2 2006/04/01 06:01:49 jeffsuttor Exp $
003: */
004:
005: /*
006: * The contents of this file are subject to the terms
007: * of the Common Development and Distribution License
008: * (the License). You may not use this file except in
009: * compliance with the License.
010: *
011: * You can obtain a copy of the license at
012: * https://glassfish.dev.java.net/public/CDDLv1.0.html.
013: * See the License for the specific language governing
014: * permissions and limitations under the License.
015: *
016: * When distributing Covered Code, include this CDDL
017: * Header Notice in each file and include the License file
018: * at https://glassfish.dev.java.net/public/CDDLv1.0.html.
019: * If applicable, add the following below the CDDL Header,
020: * with the fields enclosed by brackets [] replaced by
021: * you own identifying information:
022: * "Portions Copyrighted [year] [name of copyright owner]"
023: *
024: * [Name of File] [ver.__] [Date]
025: *
026: * Copyright 2006 Sun Microsystems Inc. All Rights Reserved
027: */
028:
029: package com.sun.xml.stream;
030:
031: import java.io.FileInputStream;
032: import java.nio.ByteBuffer;
033: import java.nio.CharBuffer;
034: import java.nio.channels.FileChannel;
035: import java.nio.charset.Charset;
036: import java.nio.charset.CharsetDecoder;
037: import java.nio.charset.CoderResult;
038:
039: /**
040: *
041: * @author Neeraj Bajaj, Sun Microsystems
042: */
043: public class FileBufferManager extends BufferManager {
044:
045: static final int DEFAULT_LENGTH = 8192;
046: static final int THRESH_HOLD = 10 * 8192;
047: static final boolean DEBUG = false;
048:
049: CharsetDecoder decoder = null;
050: FileChannel fChannel = null;
051: CharBuffer charBuffer = null;
052: boolean calledGetMore;
053: long remaining = -1;
054: long filepos = 0;
055: long filesize = -1;
056:
057: public FileBufferManager(FileInputStream stream, String encodingName)
058: throws java.io.IOException {
059: if (DEBUG)
060: System.out.println("Encoding supplied = " + encodingName);
061: init(stream);
062: //setDecoder(encodingName);
063: setDecoder("UTF-8");
064: }
065:
066: void init(FileInputStream stream) throws java.io.IOException {
067: //allocate the CharBuffer to the capacity of DEFAULT_LENGTH
068: charBuffer = CharBuffer.allocate(2 * DEFAULT_LENGTH);
069: fChannel = stream.getChannel();
070: filesize = fChannel.size();
071: remaining = filesize;
072: if (DEBUG)
073: System.out.println("File size = " + remaining);
074:
075: }
076:
077: public boolean arrangeCapacity(int length)
078: throws java.io.IOException {
079: //this means some data has already been read from the file.
080: if (!calledGetMore) {
081: getMore();
082: }
083:
084: if (getCharBuffer().limit() - getCharBuffer().position() >= length) {
085: return true;
086: }
087: while ((getCharBuffer().limit() - getCharBuffer().position()) < length) {
088: if (endOfStream())
089: break;
090: getMore();
091: }
092: if (getCharBuffer().limit() - getCharBuffer().position() >= length) {
093: return true;
094: } else {
095: return false;
096: }
097:
098: }//arrangeCapacity
099:
100: /**
101: * This function gets more data from the file. If there is no more data a ByteBuffer of capacity 'zero'
102: * is returned. This function always returns a new ByteBuffer.
103: */
104: public ByteBuffer getMoreBytes() throws java.io.IOException {
105: int len = getLength();
106: //if there are no more bytes to be read -- which means end of file.
107: //allocate a buffer of capacity 'zero' probably we should be using observer pattern to let the scanner know
108: //that end of file is reached.
109: if (endOfStream) {
110: return ByteBuffer.allocate(0);
111: }
112: ByteBuffer bb = null;
113: //if the filesize is more than 15 KB
114: if (filesize > THRESH_HOLD) {
115: if (DEBUG)
116: System.out.println("Using MappedByteBuffer");
117: //use the map byte buffer for efficiency
118: bb = fChannel.map(FileChannel.MapMode.READ_ONLY, filepos,
119: len);
120: filepos = filepos + bb.limit();
121: } else {
122: if (DEBUG)
123: System.out.println("Using ByteBuffer.allocate("
124: + getLength() + ")");
125: bb = ByteBuffer.allocate(getLength());
126: fChannel.read(bb);
127: filepos = fChannel.position();
128: //flip this buffer
129: bb.flip();
130: }
131:
132: //This should also be equivalent to "remaining = remaining - fChannel.position()"
133: remaining = filesize - filepos;
134: //remaining = remaining - mapBuffer.limit() ;
135: if (remaining < 1) {
136: endOfStream = true;
137: }
138: return bb;
139: }
140:
141: /**
142: * This function returns true if some character data was loaded. Data is available via getCharBuffer().
143: * If before calling this function CharBuffer had some data (i.e. remaining() > 0) then this function
144: * first calls CharBuffer.compact() and then it is filled with more data.
145: *
146: * @see CharBuffer.compact()
147: * @return true if some character data was loaded. False value can be assume to be end of current
148: * entity.
149: */
150: public boolean getMore() throws java.io.IOException {
151:
152: calledGetMore = true;
153: if (DEBUG) {
154: System.out.println("Remaining no. of bytes to be read = "
155: + remaining);
156: }
157: if (endOfStream)
158: return false;
159:
160: //1. getMoreBytes()
161: ByteBuffer bb = getMoreBytes();
162:
163: //this function makes sure that after this call buffer position would be reset to '0' for reading
164: //so if position is != '0' compact this buffer
165: if (charBuffer.position() != 0) {
166: charBuffer.compact();
167: } else {
168: charBuffer.clear();
169: }
170:
171: int before = charBuffer.position();
172:
173: if (DEBUG) {
174: printByteBuffer(bb);
175: printCharBuffer(charBuffer);
176: }
177:
178: //3. decode the bytes into given CharBuffer
179: CoderResult cr = decoder.decode(bb, charBuffer, false);
180:
181: if (DEBUG) {
182: System.out.println("---------After first Decode---------");
183: System.out.println("Coder = " + cr);
184: printCharBuffer(charBuffer);
185: printByteBuffer(bb);
186: }
187:
188: //if there are still more number of bytes
189: while (bb.remaining() > 0) {
190: if (cr.isOverflow()) {
191: //this might be a costly operation if the buffer need to be resized.
192: //resizeCharBuffer(charBuffer.limit() + DEFAULT_LENGTH);
193: resizeCharBuffer(charBuffer.limit() + bb.remaining());
194: }
195: //however if the output buffer got overflowed before the bytes were over resize the buffer -- this is costly but it's fine
196: cr = decoder.decode(bb, charBuffer, true);
197: if (DEBUG) {
198: System.out
199: .println("-----In while Loop bb.remaining()----");
200: printByteBuffer(bb);
201: printCharBuffer(charBuffer);
202: }
203: }
204: //is this step necessary ?
205: if (cr.isUnderflow()) {
206: cr = decoder.decode(bb, charBuffer, true);
207: decoder.flush(charBuffer);
208: }
209:
210: //allow bytebuffer to be GCed.
211: //bb = null ;
212: //reset the decoder
213: decoder.reset();
214:
215: if (DEBUG) {
216: System.out.print("CharBuffer Position, Before = " + before);
217: System.out.println(" After= " + charBuffer.position());
218: }
219:
220: if (charBuffer.position() > before) {
221: //IMPORTANT: flip the buffer so that it is ready for get operations
222: //-- set the position back to '0' before we return.
223: charBuffer.flip();
224: return true;
225: } else {
226: return false;
227: }
228: }
229:
230: public CharBuffer getCharBuffer() {
231: return charBuffer;
232: }
233:
234: //get the remaining data from existing char buffer
235: CharSequence getCharSequence() {
236: return charBuffer.subSequence(0, charBuffer.remaining());
237: }
238:
239: //allocate a new CharBuffer for given capacity with the content filled with the CharSequence
240: CharBuffer resizeCharBuffer(int capacity) {
241: if (DEBUG) {
242: System.out.println("RESIZING THE CHAR BUFFER FOR CAPACITY "
243: + capacity);
244: System.out.println("BEFORE RESIZING CHAR BUFFER DETAILS");
245: printCharBuffer(charBuffer);
246: }
247: //allocate a new buffer of given capacity
248: CharBuffer cb = CharBuffer.allocate(capacity);
249: //we need to put the current charBuffer content to the new array
250: //so flip the current charBuffer so that it is ready for the new buffer
251: charBuffer = cb.put((CharBuffer) charBuffer.flip());
252: if (DEBUG) {
253: System.out.println("AFTER RESIZING CHAR BUFFER DETAIL");
254: printCharBuffer(charBuffer);
255: }
256: return charBuffer;
257: }
258:
259: int getLength() {
260: //decide the number of bytes that need to read
261: return remaining < 2 * DEFAULT_LENGTH ? (int) remaining
262: : 2 * DEFAULT_LENGTH;
263: }
264:
265: void setDecoder(String encoding) throws java.io.IOException {
266: //do we need to anything special for UTF-8 Reader
267: if (encoding != null) {
268: decoder = Charset.forName(encoding).newDecoder();
269: } else {
270: ByteBuffer byteBuffer = ByteBuffer.allocate(4);
271: fChannel.read(byteBuffer);
272: if (DEBUG) {
273: System.out.println("Bytes remaining in the buffer = "
274: + byteBuffer.remaining());
275: }
276: byte[] b = new byte[4];
277: byteBuffer.get(b);
278: Object[] array = getEncodingName(b, 4);
279: if (DEBUG) {
280: System.out.println("Encoding autodetected = "
281: + array[0]);
282: }
283: decoder = Charset.forName((String) array[0]).newDecoder();
284: }
285: }
286:
287: static void printByteBuffer(ByteBuffer bb) {
288: System.out.println("------------ByteBuffer Details---------");
289: System.out.println("bb.position = " + bb.position());
290: System.out.println("bb.remaining() = " + bb.remaining());
291: System.out.println("bb.limit = " + bb.limit());
292: System.out.println("bb.capacity = " + bb.capacity());
293: }
294:
295: static void printCharBuffer(CharBuffer bb) {
296: System.out.println("----------- CharBuffer Details---------");
297: System.out.println("bb.position = " + bb.position());
298: System.out.println("bb.remaining() = " + bb.remaining());
299: System.out.println("bb.limit = " + bb.limit());
300: System.out.println("bb.capacity = " + bb.capacity());
301: }
302:
303: public static void main(String[] args) {
304: try {
305: FileBufferManager fb = new FileBufferManager(
306: new FileInputStream(args[0]), "UTF-8");
307: CharBuffer cb = fb.getCharBuffer();
308: int i = 0;
309: while (fb.getMore()) {
310: System.out.println("Loop " + i++ + " = "
311: + fb.getCharBuffer().toString());
312: System.out
313: .println("------------Loop CharBuffer details--------");
314: printCharBuffer(cb);
315: }
316: System.out.println("End of file reached = "
317: + fb.endOfStream());
318: System.out.println("Total no. of loops required = " + i);
319: } catch (Exception ex) {
320: ex.printStackTrace();
321: }
322: }
323:
324: public void close() throws java.io.IOException {
325: if (fChannel != null) {
326: fChannel.close();
327: }
328: }
329:
330: public void setEncoding(String encoding) throws java.io.IOException {
331: //xxx: this need to be implemented. if the encoding is different than the current encoding we need
332: //to change the reader with the newly created reader
333: }
334:
335: }
|