001: /*
002: * The Apache Software License, Version 1.1
003: *
004: *
005: * Copyright (c) 1999 The Apache Software Foundation. All rights
006: * reserved.
007: *
008: * Redistribution and use in source and binary forms, with or without
009: * modification, are permitted provided that the following conditions
010: * are met:
011: *
012: * 1. Redistributions of source code must retain the above copyright
013: * notice, this list of conditions and the following disclaimer.
014: *
015: * 2. Redistributions in binary form must reproduce the above copyright
016: * notice, this list of conditions and the following disclaimer in
017: * the documentation and/or other materials provided with the
018: * distribution.
019: *
020: * 3. The end-user documentation included with the redistribution,
021: * if any, must include the following acknowledgment:
022: * "This product includes software developed by the
023: * Apache Software Foundation (http://www.apache.org/)."
024: * Alternately, this acknowledgment may appear in the software itself,
025: * if and wherever such third-party acknowledgments normally appear.
026: *
027: * 4. The names "Xerces" and "Apache Software Foundation" must
028: * not be used to endorse or promote products derived from this
029: * software without prior written permission. For written
030: * permission, please contact apache@apache.org.
031: *
032: * 5. Products derived from this software may not be called "Apache",
033: * nor may "Apache" appear in their name, without prior written
034: * permission of the Apache Software Foundation.
035: *
036: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
037: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
038: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
039: * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
040: * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
041: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
042: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
043: * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
044: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
045: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
046: * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
047: * SUCH DAMAGE.
048: * ====================================================================
049: *
050: * This software consists of voluntary contributions made by many
051: * individuals on behalf of the Apache Software Foundation and was
052: * originally based on software copyright (c) 1999, International
053: * Business Machines, Inc., http://www.apache.org. For more
054: * information on the Apache Software Foundation, please see
055: * <http://www.apache.org/>.
056: */
057:
058: package org.apache.xerces.utils;
059:
060: import org.apache.xerces.readers.XMLEntityHandler;
061:
062: /**
063: * This class provides the character buffers used by some of the
064: * reader classes. The instances of this class are reference
065: * counted and placed upon a free list for reallocation when no
066: * longer in use so that they are reclaimed faster and with less
067: * overhead than using the garbage collector.
068: *
069: * @version
070: */
071: public final class CharDataChunk implements StringPool.StringProducer {
072: /**
073: * Chunk size constants
074: *
075: * The reader classes use the chunk size directly for better performance.
076: */
077: public static final int CHUNK_SHIFT = 14; // 2^14 = 16k
078: public static final int CHUNK_SIZE = (1 << CHUNK_SHIFT);
079: public static final int CHUNK_MASK = CHUNK_SIZE - 1;
080:
081: /**
082: * Public constructor (factory)
083: *
084: * If there are any free instances available, remove them from the
085: * free list and reinitialize them. If not, allocate a new one.
086: *
087: * @param stringPool The string pool.
088: * @param prev The chunk that precedes this one, or null if this is
089: * the first chunk.
090: * @return The instance reused or created.
091: */
092: public static CharDataChunk createChunk(StringPool stringPool,
093: CharDataChunk prev) {
094:
095: CharDataChunk newChunk = null;
096: synchronized (CharDataChunk.class) {
097: newChunk = fgFreeChunks;
098: if (newChunk != null) {
099: fgFreeChunks = newChunk.fNextChunk;
100: } else {
101: newChunk = new CharDataChunk();
102: }
103: }
104: newChunk.fStringPool = stringPool;
105: newChunk.fRefCount = 1; // account for the reference we return to the caller
106: newChunk.fChunk = prev == null ? 0 : prev.fChunk + 1;
107: newChunk.fNextChunk = null;
108: newChunk.fPreviousChunk = prev;
109: if (prev != null) {
110: //
111: // You might think that we should call prev.addRef() here,
112: // and you would normally be correct. However, the reader
113: // that calls us is doing something like this:
114: //
115: // fCurrentChunk = CharDataChunk.createChunk(fStringPool, fCurrentChunk);
116: //
117: // During this call, the fCurrentChunk changes from the
118: // previous chunk to this chunk, losing the reference to
119: // the previous chunk. To avoid needing code like this:
120: //
121: // CharDataChunk prevChunk = fCurrentChunk;
122: // fCurrentChunk = CharDataChunk.createChunk(fStringPool, prevChunk);
123: // prevChunk.releaseChunk();
124: //
125: // We "adopt the reference" to the previous chunk into our
126: // fPreviousChunk field, since the addRef() followed by a
127: // removeRef() from the caller after we return just cancel
128: // each other out. The previous chunk reference will go
129: // away later when clearPreviousChunk is called.
130: //
131: prev.setNextChunk(newChunk);
132: }
133: return newChunk;
134: }
135:
136: /**
137: * Return the instance that contains the specified offset.
138: *
139: * This method must always be invoked on an instance that
140: * contains the specified offset, or an instance the contains
141: * an offset greater than, i.e. after, the instance we are
142: * to return.
143: *
144: * @param offset The offset to find.
145: * @return The instance containing the offset.
146: */
147: public CharDataChunk chunkFor(int offset) {
148: int firstChunk = offset >> CHUNK_SHIFT;
149: if (firstChunk == fChunk)
150: return this ;
151: CharDataChunk dataChunk = fPreviousChunk;
152: while (firstChunk != dataChunk.fChunk)
153: dataChunk = dataChunk.fPreviousChunk;
154: return dataChunk;
155: }
156:
157: /**
158: * Get the character array of this instance.
159: *
160: * The reader classes access the data of each instance directly.
161: * This class only exists to manage the lifetime of the references
162: * to each instance. It is not intended to hide from the readers
163: * the fact that each instance contains a buffer of character data.
164: *
165: * @return The character data.
166: */
167: public char[] toCharArray() {
168: return fData;
169: }
170:
171: /**
172: * Set the character array for this instance.
173: *
174: * @param data The character data.
175: */
176: public void setCharArray(char[] data) {
177: fData = data;
178: }
179:
180: /**
181: * Get the next chunk.
182: *
183: * @return The instance that follows this one in the list of chunks,
184: * or null if there is no such instance.
185: */
186: public CharDataChunk nextChunk() {
187: return fNextChunk;
188: }
189:
190: /**
191: * Clean the previous chunk reference.
192: *
193: * When a reader has reached a point where it knows that it will no
194: * longer call the addString, addSymbol, or append methods with an
195: * offset that is contained within a chunk that precedes this one,
196: * it will call this method to clear the reference from this chunk to
197: * the one preceding it. This allows the references between chunks
198: * to be dropped as we go and allow the unused instances to be placed
199: * upon the free list for reuse.
200: *
201: * @return <code>true</code> if we cleared the previous chunk pointer;
202: * otherwise <code>false</code> if the pointer is already null.
203: */
204: public boolean clearPreviousChunk() {
205: if (fPreviousChunk != null) {
206: fPreviousChunk.clearNextChunk();
207: fPreviousChunk.removeRef();
208: fPreviousChunk = null;
209: return true;
210: }
211: return false;
212: }
213:
214: /**
215: * Release the reference to this chunk held by the reader that allocated
216: * this instance. Called at end of input to release the last chunk in the
217: * list used by the reader.
218: */
219: public void releaseChunk() {
220: removeRef();
221: }
222:
223: /**
224: * Add a range from this chunk to the <code>StringPool</code>
225: *
226: * @param offset the offset of the first character to be added
227: * @param length the number of characters to add
228: * @return the <code>StringPool</code> handle that was added.
229: */
230: public int addString(int offset, int length) {
231: int chunk = offset >> CHUNK_SHIFT;
232: if (chunk != fChunk) {
233: if (fPreviousChunk == null)
234: throw new RuntimeException(
235: new ImplementationMessages().createMessage(
236: null, ImplementationMessages.INT_PCN,
237: 0, null));
238: return fPreviousChunk.addString(offset, length);
239: }
240: int lastChunk = (offset + length - 1) >> CHUNK_SHIFT;
241: if (chunk == lastChunk) {
242: addRef();
243: return fStringPool.addString(this , offset & CHUNK_MASK,
244: length);
245: }
246: String str = toString(offset & CHUNK_MASK, length);
247: return fStringPool.addString(str);
248: }
249:
250: /**
251: * Add a range from this chunk to the <code>StringPool</code> as a symbol
252: *
253: * @param offset the offset of the first character to be added
254: * @param length the number of characters to add
255: * @param hashcode hashcode to match to ensure uniqueness
256: * @return the <code>StringPool</code> handle that was added.
257: */
258: public int addSymbol(int offset, int length, int hashcode) {
259: int chunk = offset >> CHUNK_SHIFT;
260: if (chunk != fChunk) {
261: if (fPreviousChunk == null)
262: throw new RuntimeException(
263: new ImplementationMessages().createMessage(
264: null, ImplementationMessages.INT_PCN,
265: 0, null));
266: return fPreviousChunk.addSymbol(offset, length, hashcode);
267: }
268: int lastChunk = (offset + length - 1) >> CHUNK_SHIFT;
269: int index = offset & CHUNK_MASK;
270: if (chunk == lastChunk) {
271: if (hashcode == 0)
272: hashcode = StringHasher.hashChars(fData, index, length);
273: int symbol = fStringPool.lookupSymbol(this , offset
274: & CHUNK_MASK, length, hashcode);
275: if (symbol == -1) {
276: String str = toString(offset & CHUNK_MASK, length);
277: symbol = fStringPool.addNewSymbol(str, hashcode);
278: }
279: return symbol;
280: }
281: String str = toString(offset & CHUNK_MASK, length);
282: return fStringPool.addSymbol(str);
283: }
284:
285: /**
286: * Append data from a <code>CharBuffer</code> to this chunk.
287: *
288: * @param charBuffer the buffer to be appended.
289: * @param offset the offset of the first character to be appended.
290: * @param length the number of characters to append.
291: */
292: public void append(XMLEntityHandler.CharBuffer charBuffer,
293: int offset, int length) {
294: //
295: // Setup for the operation.
296: //
297: CharDataChunk dataChunk = chunkFor(offset);
298: int index = offset & CHUNK_MASK;
299: int nbytes = (index + length <= CHUNK_SIZE) ? length
300: : CHUNK_SIZE - index;
301: //
302: // Visit each Chunk in turn until we are done.
303: //
304: while (true) {
305: charBuffer.append(dataChunk.fData, index, nbytes);
306: length -= nbytes;
307: if (length == 0)
308: break;
309: dataChunk = dataChunk.fNextChunk;
310: index = 0;
311: nbytes = length <= CHUNK_SIZE ? length : CHUNK_SIZE;
312: }
313: }
314:
315: //
316: // StringProducer interfaces
317: //
318: /**
319: * Return a range of characters as a <code>String</code>.
320: *
321: * @param offset the offset of the first character to convert.
322: * @param length the number of characters to convert.
323: * @return the <code>String</code>
324: */
325: public String toString(int offset, int length) {
326: if (offset + length <= CHUNK_SIZE) {
327: //
328: // All the chars are in the same chunk
329: //
330: return new String(fData, offset, length);
331: }
332: //
333: // The data is spread across chunks, so we need to build it in pieces.
334: //
335: StringBuffer sb = new StringBuffer(length);
336: //
337: // Copy the partial data from the first chunk.
338: //
339: int nbytes = CHUNK_SIZE - offset;
340: sb.append(fData, offset, nbytes);
341: length -= nbytes;
342: //
343: // Use each chunk in turn until we are done.
344: //
345: CharDataChunk aChunk = fNextChunk;
346: do {
347: nbytes = length <= CHUNK_SIZE ? length : CHUNK_SIZE;
348: sb.append(aChunk.fData, 0, nbytes);
349: length -= nbytes;
350: aChunk = aChunk.fNextChunk;
351: } while (length > 0);
352: String retval = sb.toString();
353: sb = null; // REVISIT - does this help gc ?
354: return retval;
355: }
356:
357: /**
358: * Release a string from this chunk
359: *
360: * @param offset the offset of the first character to be released
361: * @param length the number of characters to release.
362: */
363: public void releaseString(int offset, int length) {
364: removeRef();
365: }
366:
367: /**
368: * Compare a range in this chunk and a range in a character array for equality
369: *
370: * @param offset the offset of the first character in the range in this chunk
371: * @param length the number of characters in the range to compare
372: * @param strChars the character array to compare
373: * @param strOffset the offset of the first character in the range in strChars
374: * @param strLength the number of characters to release.
375: * @return true if the ranges are character-wise equal, otherwise false.
376: */
377: public boolean equalsString(int offset, int length,
378: char[] strChars, int strOffset, int strLength) {
379: if (length != strLength)
380: return false;
381: if (offset + length <= CHUNK_SIZE) {
382: //
383: // All the chars are in the same chunk
384: //
385: for (int i = 0; i < length; i++) {
386: if (fData[offset++] != strChars[strOffset++])
387: return false;
388: }
389: return true;
390: }
391: //
392: // Compare the partial data from the first chunk.
393: //
394: int nbytes = CHUNK_SIZE - offset;
395: length -= nbytes;
396: while (nbytes-- > 0) {
397: if (fData[offset++] != strChars[strOffset++])
398: return false;
399: }
400: //
401: // Check each chunk in turn until we are done.
402: //
403: CharDataChunk aChunk = fNextChunk;
404: do {
405: offset = 0;
406: nbytes = length <= CHUNK_SIZE ? length : CHUNK_SIZE;
407: length -= nbytes;
408: while (nbytes-- > 0) {
409: if (aChunk.fData[offset++] != strChars[strOffset++])
410: return false;
411: }
412: aChunk = aChunk.fNextChunk;
413: } while (length > 0);
414: return true;
415: }
416:
417: //
418: // Private methods
419: //
420:
421: //
422: // Constructor for factory method.
423: //
424: private CharDataChunk() {
425: }
426:
427: //
428: //
429: //
430: private void addRef() {
431: fRefCount++;
432: }
433:
434: //
435: //
436: //
437: private void removeRef() {
438: fRefCount--;
439: if (fRefCount == 0) {
440: fStringPool = null;
441: fChunk = -1;
442: fPreviousChunk = null;
443: synchronized (CharDataChunk.class) {
444: /*** Only keep one free chunk at a time! ***
445: fNextChunk = fgFreeChunks;
446: /***/
447: fNextChunk = null;
448: fgFreeChunks = this ;
449: }
450: }
451: }
452:
453: //
454: //
455: //
456: private void clearNextChunk() {
457: if (fNextChunk != null)
458: fNextChunk.removeRef();
459: fNextChunk = null;
460: }
461:
462: //
463: //
464: //
465: private void setNextChunk(CharDataChunk nextChunk) {
466: if (fNextChunk != null) {
467: throw new RuntimeException("CharDataChunk::setNextChunk");
468: }
469: nextChunk.addRef();
470: fNextChunk = nextChunk;
471: }
472:
473: //
474: // Private instance variables.
475: //
476: private StringPool fStringPool;
477: private int fRefCount;
478: private int fChunk;
479: private char[] fData = null;
480: private CharDataChunk fNextChunk;
481: private CharDataChunk fPreviousChunk;
482: private static CharDataChunk fgFreeChunks = null;
483: }
|