001: /*
002: * The Apache Software License, Version 1.1
003: *
004: *
005: * Copyright (c) 1999 The Apache Software Foundation. All rights
006: * reserved.
007: *
008: * Redistribution and use in source and binary forms, with or without
009: * modification, are permitted provided that the following conditions
010: * are met:
011: *
012: * 1. Redistributions of source code must retain the above copyright
013: * notice, this list of conditions and the following disclaimer.
014: *
015: * 2. Redistributions in binary form must reproduce the above copyright
016: * notice, this list of conditions and the following disclaimer in
017: * the documentation and/or other materials provided with the
018: * distribution.
019: *
020: * 3. The end-user documentation included with the redistribution,
021: * if any, must include the following acknowledgment:
022: * "This product includes software developed by the
023: * Apache Software Foundation (http://www.apache.org/)."
024: * Alternately, this acknowledgment may appear in the software itself,
025: * if and wherever such third-party acknowledgments normally appear.
026: *
027: * 4. The names "Xerces" and "Apache Software Foundation" must
028: * not be used to endorse or promote products derived from this
029: * software without prior written permission. For written
030: * permission, please contact apache@apache.org.
031: *
032: * 5. Products derived from this software may not be called "Apache",
033: * nor may "Apache" appear in their name, without prior written
034: * permission of the Apache Software Foundation.
035: *
036: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
037: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
038: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
039: * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
040: * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
041: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
042: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
043: * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
044: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
045: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
046: * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
047: * SUCH DAMAGE.
048: * ====================================================================
049: *
050: * This software consists of voluntary contributions made by many
051: * individuals on behalf of the Apache Software Foundation and was
052: * originally based on software copyright (c) 1999, International
053: * Business Machines, Inc., http://www.apache.org. For more
054: * information on the Apache Software Foundation, please see
055: * <http://www.apache.org/>.
056: */
057:
058: package org.apache.xerces.utils;
059:
060: import org.apache.xerces.readers.XMLEntityHandler;
061: import java.util.Vector;
062:
063: //
064: //
065: //
066: public class UTF8DataChunk implements StringPool.StringProducer {
067: //
068: // Chunk size constants
069: //
070: public static final int CHUNK_SHIFT = 14; // 2^14 = 16k
071: public static final int CHUNK_SIZE = (1 << CHUNK_SHIFT);
072: public static final int CHUNK_MASK = CHUNK_SIZE - 1;
073:
074: //
075: // Public constructor (factory)
076: //
077: public static UTF8DataChunk createChunk(StringPool stringPool,
078: UTF8DataChunk prev) {
079:
080: synchronized (UTF8DataChunk.class) {
081: if (fgFreeChunks != null) {
082: UTF8DataChunk newChunk = fgFreeChunks;
083: fgFreeChunks = newChunk.fNextChunk;
084: newChunk.fNextChunk = null;
085: newChunk.init(stringPool, prev);
086: return newChunk;
087: }
088: }
089: UTF8DataChunk chunk = new UTF8DataChunk(stringPool, prev);
090: return chunk;
091: }
092:
093: //
094: //
095: //
096: public final byte[] toByteArray() {
097: return fData;
098: }
099:
100: //
101: //
102: //
103: public void setByteArray(byte[] data) {
104: fData = data;
105: }
106:
107: //
108: //
109: //
110: public UTF8DataChunk nextChunk() {
111: return fNextChunk;
112: }
113:
114: //
115: //
116: //
117: public boolean clearPreviousChunk() {
118: if (fPreviousChunk != null) {
119: fPreviousChunk.setNextChunk(null);
120: fPreviousChunk.removeRef();
121: //System.err.println("[" + fPreviousChunk.fChunk + "] " + fPreviousChunk.fRefCount + " refs after clearPreviousChunk");
122: //System.err.println("[" + fChunk + "] " + fRefCount + " refs after clearPreviousChunk");
123: fPreviousChunk = null;
124: return true;
125: }
126: return fChunk == 0;
127: }
128:
129: //
130: //
131: //
132: public void releaseChunk() {
133: removeRef();
134: //System.err.println("[" + fChunk + "] " + fRefCount + " refs after releaseChunk");
135: }
136:
137: //
138: //
139: //
140: public void releaseString(int offset, int length) {
141: removeRef();
142: }
143:
144: //
145: //
146: //
147: public String toString(int offset, int length) {
148:
149: synchronized (fgTempBufferLock) {
150: int outOffset = 0;
151: UTF8DataChunk dataChunk = this ;
152: int endOffset = offset + length;
153: int index = offset & CHUNK_MASK;
154: byte[] data = fData;
155: boolean skiplf = false;
156: while (offset < endOffset) {
157: int b0 = data[index++] & 0xff;
158: offset++;
159: if (index == CHUNK_SIZE && offset < endOffset) {
160: dataChunk = dataChunk.fNextChunk;
161: data = dataChunk.fData;
162: index = 0;
163: }
164: if (b0 < 0x80) {
165: if (skiplf) {
166: skiplf = false;
167: if (b0 == 0x0A)
168: continue;
169: }
170: if (b0 == 0x0D) {
171: b0 = 0x0A;
172: skiplf = true;
173: }
174: try {
175: fgTempBuffer[outOffset] = (char) b0;
176: outOffset++;
177: } catch (NullPointerException ex) {
178: fgTempBuffer = new char[CHUNK_SIZE];
179: fgTempBuffer[outOffset++] = (char) b0;
180: } catch (ArrayIndexOutOfBoundsException ex) {
181: char[] newBuffer = new char[outOffset * 2];
182: System.arraycopy(fgTempBuffer, 0, newBuffer, 0,
183: outOffset);
184: fgTempBuffer = newBuffer;
185: fgTempBuffer[outOffset++] = (char) b0;
186: }
187: continue;
188: }
189: int b1 = data[index++] & 0xff;
190: offset++;
191: if (index == CHUNK_SIZE && offset < endOffset) {
192: dataChunk = dataChunk.fNextChunk;
193: data = dataChunk.fData;
194: index = 0;
195: }
196: if ((0xe0 & b0) == 0xc0) { // 110yyyyy 10xxxxxx
197: int ch = ((0x1f & b0) << 6) + (0x3f & b1); // yyy yyxx xxxx (0x80 to 0x7ff)
198: try {
199: fgTempBuffer[outOffset] = (char) ch;
200: outOffset++;
201: } catch (NullPointerException ex) {
202: fgTempBuffer = new char[CHUNK_SIZE];
203: fgTempBuffer[outOffset++] = (char) ch;
204: } catch (ArrayIndexOutOfBoundsException ex) {
205: char[] newBuffer = new char[outOffset * 2];
206: System.arraycopy(fgTempBuffer, 0, newBuffer, 0,
207: outOffset);
208: fgTempBuffer = newBuffer;
209: fgTempBuffer[outOffset++] = (char) ch;
210: }
211: continue;
212: }
213: int b2 = data[index++] & 0xff;
214: offset++;
215: if (index == CHUNK_SIZE && offset < endOffset) {
216: dataChunk = dataChunk.fNextChunk;
217: data = dataChunk.fData;
218: index = 0;
219: }
220: if ((0xf0 & b0) == 0xe0) { // 1110zzzz 10yyyyyy 10xxxxxx
221: int ch = ((0x0f & b0) << 12) + ((0x3f & b1) << 6)
222: + (0x3f & b2); // zzzz yyyy yyxx xxxx (0x800 to 0xffff)
223: try {
224: fgTempBuffer[outOffset] = (char) ch;
225: outOffset++;
226: } catch (NullPointerException ex) {
227: fgTempBuffer = new char[CHUNK_SIZE];
228: fgTempBuffer[outOffset++] = (char) ch;
229: } catch (ArrayIndexOutOfBoundsException ex) {
230: char[] newBuffer = new char[outOffset * 2];
231: System.arraycopy(fgTempBuffer, 0, newBuffer, 0,
232: outOffset);
233: fgTempBuffer = newBuffer;
234: fgTempBuffer[outOffset++] = (char) ch;
235: }
236: continue;
237: }
238: int b3 = data[index++] & 0xff; // 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx
239: offset++;
240: if (index == CHUNK_SIZE && offset < endOffset) {
241: dataChunk = dataChunk.fNextChunk;
242: data = dataChunk.fData;
243: index = 0;
244: }
245: int ch = ((0x0f & b0) << 18) + ((0x3f & b1) << 12)
246: + ((0x3f & b2) << 6) + (0x3f & b3);
247: if (ch < 0x10000) {
248: try {
249: fgTempBuffer[outOffset] = (char) ch;
250: outOffset++;
251: } catch (NullPointerException ex) {
252: fgTempBuffer = new char[CHUNK_SIZE];
253: fgTempBuffer[outOffset++] = (char) ch;
254: } catch (ArrayIndexOutOfBoundsException ex) {
255: char[] newBuffer = new char[outOffset * 2];
256: System.arraycopy(fgTempBuffer, 0, newBuffer, 0,
257: outOffset);
258: fgTempBuffer = newBuffer;
259: fgTempBuffer[outOffset++] = (char) ch;
260: }
261: } else {
262: char ch1 = (char) (((ch - 0x00010000) >> 10) + 0xd800);
263: char ch2 = (char) (((ch - 0x00010000) & 0x3ff) + 0xdc00);
264: try {
265: fgTempBuffer[outOffset] = (char) ch1;
266: outOffset++;
267: } catch (NullPointerException ex) {
268: fgTempBuffer = new char[CHUNK_SIZE];
269: fgTempBuffer[outOffset++] = (char) ch1;
270: } catch (ArrayIndexOutOfBoundsException ex) {
271: char[] newBuffer = new char[outOffset * 2];
272: System.arraycopy(fgTempBuffer, 0, newBuffer, 0,
273: outOffset);
274: fgTempBuffer = newBuffer;
275: fgTempBuffer[outOffset++] = (char) ch1;
276: }
277: try {
278: fgTempBuffer[outOffset] = (char) ch2;
279: outOffset++;
280: } catch (NullPointerException ex) {
281: fgTempBuffer = new char[CHUNK_SIZE];
282: fgTempBuffer[outOffset++] = (char) ch2;
283: } catch (ArrayIndexOutOfBoundsException ex) {
284: char[] newBuffer = new char[outOffset * 2];
285: System.arraycopy(fgTempBuffer, 0, newBuffer, 0,
286: outOffset);
287: fgTempBuffer = newBuffer;
288: fgTempBuffer[outOffset++] = (char) ch2;
289: }
290: }
291: }
292: return new String(fgTempBuffer, 0, outOffset);
293: }
294: }
295:
296: //
297: //
298: //
299: public boolean equalsString(int offset, int length,
300: char[] strChars, int strOffset, int strLength) {
301: UTF8DataChunk dataChunk = this ;
302: int endOffset = offset + length;
303: int index = offset & CHUNK_MASK;
304: byte[] data = fData;
305: boolean skiplf = false;
306: while (offset < endOffset) {
307: if (strLength-- == 0)
308: return false;
309: int b0 = data[index++] & 0xff;
310: offset++;
311: if (index == CHUNK_SIZE && offset < endOffset) {
312: dataChunk = dataChunk.fNextChunk;
313: data = dataChunk.fData;
314: index = 0;
315: }
316: if (b0 < 0x80) {
317: if (skiplf) {
318: skiplf = false;
319: if (b0 == 0x0A)
320: continue;
321: }
322: if (b0 == 0x0D) {
323: b0 = 0x0A;
324: skiplf = true;
325: }
326: if (b0 != strChars[strOffset++])
327: return false;
328: continue;
329: }
330: int b1 = data[index++] & 0xff;
331: offset++;
332: if (index == CHUNK_SIZE && offset < endOffset) {
333: dataChunk = dataChunk.fNextChunk;
334: data = dataChunk.fData;
335: index = 0;
336: }
337: if ((0xe0 & b0) == 0xc0) { // 110yyyyy 10xxxxxx
338: int ch = ((0x1f & b0) << 6) + (0x3f & b1);
339: if (ch != strChars[strOffset++])
340: return false;
341: continue;
342: }
343: int b2 = data[index++] & 0xff;
344: offset++;
345: if (index == CHUNK_SIZE && offset < endOffset) {
346: dataChunk = dataChunk.fNextChunk;
347: data = dataChunk.fData;
348: index = 0;
349: }
350: if ((0xf0 & b0) == 0xe0) { // 1110zzzz 10yyyyyy 10xxxxxx
351: int ch = ((0x0f & b0) << 12) + ((0x3f & b1) << 6)
352: + (0x3f & b2);
353: if (ch != strChars[strOffset++])
354: return false;
355: continue;
356: }
357: int b3 = data[index++] & 0xff; // 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx
358: offset++;
359: if (index == CHUNK_SIZE && offset < endOffset) {
360: dataChunk = dataChunk.fNextChunk;
361: data = dataChunk.fData;
362: index = 0;
363: }
364: int ch = ((0x0f & b0) << 18) + ((0x3f & b1) << 12)
365: + ((0x3f & b2) << 6) + (0x3f & b3);
366: if (ch < 0x10000) {
367: if (ch != strChars[strOffset++])
368: return false;
369: } else {
370: if ((((ch - 0x00010000) >> 10) + 0xd800) != strChars[strOffset++])
371: return false;
372: if (strLength-- == 0)
373: return false;
374: if ((((ch - 0x00010000) & 0x3ff) + 0xdc00) != strChars[strOffset++])
375: return false;
376: }
377: }
378: return (strLength == 0);
379: }
380:
381: //
382: //
383: //
384: public int addString(int offset, int length) {
385: if (length == 0)
386: return StringPool.EMPTY_STRING;
387: int chunk = offset >> CHUNK_SHIFT;
388: if (chunk != fChunk) {
389: if (fPreviousChunk == null)
390: throw new RuntimeException(
391: new ImplementationMessages().createMessage(
392: null, ImplementationMessages.INT_PCN,
393: 0, null));
394: return fPreviousChunk.addString(offset, length);
395: }
396: int lastChunk = (offset + length - 1) >> CHUNK_SHIFT;
397: if (chunk == lastChunk) {
398: addRef();
399: return fStringPool.addString(this , offset & CHUNK_MASK,
400: length);
401: }
402: String str = toString(offset & CHUNK_MASK, length);
403: return fStringPool.addString(str);
404: }
405:
406: //
407: //
408: //
409: public int addSymbol(int offset, int length, int hashcode) {
410: if (length == 0)
411: return StringPool.EMPTY_STRING;
412: int chunk = offset >> CHUNK_SHIFT;
413: if (chunk != fChunk) {
414: if (fPreviousChunk == null)
415: throw new RuntimeException(
416: new ImplementationMessages().createMessage(
417: null, ImplementationMessages.INT_PCN,
418: 0, null));
419: return fPreviousChunk.addSymbol(offset, length, hashcode);
420: }
421: int lastChunk = (offset + length - 1) >> CHUNK_SHIFT;
422: int index = offset & CHUNK_MASK;
423: if (chunk == lastChunk) {
424: if (hashcode == 0) {
425: hashcode = getHashcode(index, length);
426: }
427: int symbol = fStringPool.lookupSymbol(this , index, length,
428: hashcode);
429: if (symbol == -1) {
430: String str = toString(index, length);
431: symbol = fStringPool.addNewSymbol(str, hashcode);
432: }
433: return symbol;
434: }
435: String str = toString(index, length);
436: return fStringPool.addSymbol(str);
437: }
438:
439: //
440: //
441: //
442: public void append(XMLEntityHandler.CharBuffer charBuffer,
443: int offset, int length) {
444: //
445: // Setup for the operation.
446: //
447: UTF8DataChunk dataChunk = chunkFor(offset);
448: int endOffset = offset + length;
449: int index = offset & CHUNK_MASK;
450: byte[] data = dataChunk.fData;
451: boolean skiplf = false;
452: while (offset < endOffset) {
453: int b0 = data[index++] & 0xff;
454: offset++;
455: if (index == CHUNK_SIZE && offset < endOffset) {
456: dataChunk = dataChunk.fNextChunk;
457: data = dataChunk.fData;
458: index = 0;
459: }
460: if (b0 < 0x80) {
461: if (skiplf) {
462: skiplf = false;
463: if (b0 == 0x0A)
464: continue;
465: }
466: if (b0 == 0x0D) {
467: b0 = 0x0A;
468: skiplf = true;
469: }
470: charBuffer.append((char) b0);
471: continue;
472: }
473: int b1 = data[index++] & 0xff;
474: offset++;
475: if (index == CHUNK_SIZE && offset < endOffset) {
476: dataChunk = dataChunk.fNextChunk;
477: data = dataChunk.fData;
478: index = 0;
479: }
480: if ((0xe0 & b0) == 0xc0) { // 110yyyyy 10xxxxxx
481: int ch = ((0x1f & b0) << 6) + (0x3f & b1);
482: charBuffer.append((char) ch); // yyy yyxx xxxx (0x80 to 0x7ff)
483: continue;
484: }
485: int b2 = data[index++] & 0xff;
486: offset++;
487: if (index == CHUNK_SIZE && offset < endOffset) {
488: dataChunk = dataChunk.fNextChunk;
489: data = dataChunk.fData;
490: index = 0;
491: }
492: if ((0xf0 & b0) == 0xe0) { // 1110zzzz 10yyyyyy 10xxxxxx
493: int ch = ((0x0f & b0) << 12) + ((0x3f & b1) << 6)
494: + (0x3f & b2);
495: charBuffer.append((char) ch); // zzzz yyyy yyxx xxxx (0x800 to 0xffff)
496: continue;
497: }
498: int b3 = data[index++] & 0xff; // 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx
499: offset++;
500: if (index == CHUNK_SIZE && offset < endOffset) {
501: dataChunk = dataChunk.fNextChunk;
502: data = dataChunk.fData;
503: index = 0;
504: }
505: int ch = ((0x0f & b0) << 18) + ((0x3f & b1) << 12)
506: + ((0x3f & b2) << 6) + (0x3f & b3);
507: if (ch < 0x10000)
508: charBuffer.append((char) ch);
509: else {
510: charBuffer
511: .append((char) (((ch - 0x00010000) >> 10) + 0xd800));
512: charBuffer
513: .append((char) (((ch - 0x00010000) & 0x3ff) + 0xdc00));
514: }
515: }
516: }
517:
518: //
519: //
520: //
521: private int getHashcode(int index, int length) {
522: int endIndex = index + length;
523: int hashcode = 0;
524: byte[] data = fData;
525: while (index < endIndex) {
526: int b0 = data[index++] & 0xff;
527: if ((b0 & 0x80) == 0) {
528: hashcode = StringHasher.hashChar(hashcode, b0);
529: continue;
530: }
531: int b1 = data[index++] & 0xff;
532: if ((0xe0 & b0) == 0xc0) { // 110yyyyy 10xxxxxx
533: int ch = ((0x1f & b0) << 6) + (0x3f & b1); // yyy yyxx xxxx (0x80 to 0x7ff)
534: hashcode = StringHasher.hashChar(hashcode, ch);
535: continue;
536: }
537: int b2 = data[index++] & 0xff;
538: if ((0xf0 & b0) == 0xe0) { // 1110zzzz 10yyyyyy 10xxxxxx
539: int ch = ((0x0f & b0) << 12) + ((0x3f & b1) << 6)
540: + (0x3f & b2); // zzzz yyyy yyxx xxxx (0x800 to 0xffff)
541: hashcode = StringHasher.hashChar(hashcode, ch);
542: continue;
543: }
544: int b3 = data[index++] & 0xff; // 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx
545: int ch = ((0x0f & b0) << 18) + ((0x3f & b1) << 12)
546: + ((0x3f & b2) << 6) + (0x3f & b3);
547: if (ch < 0x10000)
548: hashcode = StringHasher.hashChar(hashcode, ch);
549: else {
550: hashcode = StringHasher.hashChar(hashcode,
551: (int) (((ch - 0x00010000) >> 10) + 0xd800));
552: hashcode = StringHasher.hashChar(hashcode,
553: (int) (((ch - 0x00010000) & 0x3ff) + 0xdc00));
554: }
555: }
556: return StringHasher.finishHash(hashcode);
557: }
558:
559: //
560: //
561: //
562: private void init(StringPool stringPool, UTF8DataChunk prev) {
563: fStringPool = stringPool;
564: fRefCount = 1;
565: fChunk = prev == null ? 0 : prev.fChunk + 1;
566: fNextChunk = null;
567: fPreviousChunk = prev;
568: if (prev != null) {
569: prev.addRef();
570: prev.setNextChunk(this );
571: prev.removeRef();
572: }
573: }
574:
575: //
576: // Constructor for factory method.
577: //
578: private UTF8DataChunk(StringPool stringPool, UTF8DataChunk prev) {
579: init(stringPool, prev);
580: }
581:
582: //
583: //
584: //
585: private final UTF8DataChunk chunkFor(int offset) {
586: if ((offset >> CHUNK_SHIFT) == fChunk)
587: return this ;
588: return slowChunkFor(offset);
589: }
590:
591: private UTF8DataChunk slowChunkFor(int offset) {
592: int firstChunk = offset >> CHUNK_SHIFT;
593: UTF8DataChunk dataChunk = this ;
594: while (firstChunk != dataChunk.fChunk)
595: dataChunk = dataChunk.fPreviousChunk;
596: return dataChunk;
597: }
598:
599: //
600: //
601: //
602: private final void addRef() {
603: fRefCount++;
604: //System.err.println(">>[" + fChunk + "] " + (fRefCount - 1) + " -> " + fRefCount);
605: }
606:
607: //
608: //
609: //
610: private final void removeRef() {
611: fRefCount--;
612: //System.err.println("<<[" + fChunk + "] " + (fRefCount + 1) + " -> " + fRefCount);
613: if (fRefCount == 0) {
614: //System.err.println("[" + fChunk + "] recycled a " + fData.length + " character array");
615: fStringPool = null;
616: fChunk = -1;
617: // fData = null;
618: fPreviousChunk = null;
619: synchronized (UTF8DataChunk.class) {
620: /*** Only keep one free chunk at a time! ***
621: fNextChunk = fgFreeChunks;
622: /***/
623: fNextChunk = null;
624: fgFreeChunks = this ;
625: }
626: }
627: }
628:
629: //
630: //
631: //
632: private void setNextChunk(UTF8DataChunk nextChunk) {
633: if (nextChunk == null) {
634: if (fNextChunk != null)
635: fNextChunk.removeRef();
636: } else if (fNextChunk == null) {
637: nextChunk.addRef();
638: } else
639: throw new RuntimeException("UTF8DataChunk::setNextChunk");
640: fNextChunk = nextChunk;
641: }
642:
643: //
644: //
645: //
646: private StringPool fStringPool;
647: private int fRefCount;
648: private int fChunk;
649: private byte[] fData = null;
650: private UTF8DataChunk fNextChunk;
651: private UTF8DataChunk fPreviousChunk;
652: private static UTF8DataChunk fgFreeChunks = null;
653: private static char[] fgTempBuffer = null;
654: private static Object fgTempBufferLock = new Object();
655: }
|