0001: /*
0002: * Copyright 1999-2004 The Apache Software Foundation.
0003: *
0004: * Licensed under the Apache License, Version 2.0 (the "License");
0005: * you may not use this file except in compliance with the License.
0006: * You may obtain a copy of the License at
0007: *
0008: * http://www.apache.org/licenses/LICENSE-2.0
0009: *
0010: * Unless required by applicable law or agreed to in writing, software
0011: * distributed under the License is distributed on an "AS IS" BASIS,
0012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0013: * See the License for the specific language governing permissions and
0014: * limitations under the License.
0015: */
0016: /*
0017: * $Id: FastStringBuffer.java,v 1.27 2005/01/23 00:52:41 mcnamara Exp $
0018: */
0019: package org.apache.xml.utils;
0020:
0021: /**
0022: * Bare-bones, unsafe, fast string buffer. No thread-safety, no
0023: * parameter range checking, exposed fields. Note that in typical
0024: * applications, thread-safety of a StringBuffer is a somewhat
0025: * dubious concept in any case.
0026: * <p>
0027: * Note that Stree and DTM used a single FastStringBuffer as a string pool,
0028: * by recording start and length indices within this single buffer. This
0029: * minimizes heap overhead, but of course requires more work when retrieving
0030: * the data.
0031: * <p>
0032: * FastStringBuffer operates as a "chunked buffer". Doing so
0033: * reduces the need to recopy existing information when an append
0034: * exceeds the space available; we just allocate another chunk and
0035: * flow across to it. (The array of chunks may need to grow,
0036: * admittedly, but that's a much smaller object.) Some excess
0037: * recopying may arise when we extract Strings which cross chunk
0038: * boundaries; larger chunks make that less frequent.
0039: * <p>
0040: * The size values are parameterized, to allow tuning this code. In
0041: * theory, Result Tree Fragments might want to be tuned differently
0042: * from the main document's text.
0043: * <p>
0044: * %REVIEW% An experiment in self-tuning is
0045: * included in the code (using nested FastStringBuffers to achieve
0046: * variation in chunk sizes), but this implementation has proven to
0047: * be problematic when data may be being copied from the FSB into itself.
0048: * We should either re-architect that to make this safe (if possible)
0049: * or remove that code and clean up for performance/maintainability reasons.
0050: * <p>
0051: */
0052: public class FastStringBuffer {
0053: // If nonzero, forces the inial chunk size.
0054: /**/static final int DEBUG_FORCE_INIT_BITS = 0;
0055:
0056: // %BUG% %REVIEW% *****PROBLEM SUSPECTED: If data from an FSB is being copied
0057: // back into the same FSB (variable set from previous variable, for example)
0058: // and blocksize changes in mid-copy... there's risk of severe malfunction in
0059: // the read process, due to how the resizing code re-jiggers storage. Arggh.
0060: // If we want to retain the variable-size-block feature, we need to reconsider
0061: // that issue. For now, I have forced us into fixed-size mode.
0062: static final boolean DEBUG_FORCE_FIXED_CHUNKSIZE = true;
0063:
0064: /** Manifest constant: Suppress leading whitespace.
0065: * This should be used when normalize-to-SAX is called for the first chunk of a
0066: * multi-chunk output, or one following unsuppressed whitespace in a previous
0067: * chunk.
0068: * @see #sendNormalizedSAXcharacters(org.xml.sax.ContentHandler,int,int)
0069: */
0070: public static final int SUPPRESS_LEADING_WS = 0x01;
0071:
0072: /** Manifest constant: Suppress trailing whitespace.
0073: * This should be used when normalize-to-SAX is called for the last chunk of a
0074: * multi-chunk output; it may have to be or'ed with SUPPRESS_LEADING_WS.
0075: */
0076: public static final int SUPPRESS_TRAILING_WS = 0x02;
0077:
0078: /** Manifest constant: Suppress both leading and trailing whitespace.
0079: * This should be used when normalize-to-SAX is called for a complete string.
0080: * (I'm not wild about the name of this one. Ideas welcome.)
0081: * @see #sendNormalizedSAXcharacters(org.xml.sax.ContentHandler,int,int)
0082: */
0083: public static final int SUPPRESS_BOTH = SUPPRESS_LEADING_WS
0084: | SUPPRESS_TRAILING_WS;
0085:
0086: /** Manifest constant: Carry trailing whitespace of one chunk as leading
0087: * whitespace of the next chunk. Used internally; I don't see any reason
0088: * to make it public right now.
0089: */
0090: private static final int CARRY_WS = 0x04;
0091:
0092: /**
0093: * Field m_chunkBits sets our chunking strategy, by saying how many
0094: * bits of index can be used within a single chunk before flowing over
0095: * to the next chunk. For example, if m_chunkbits is set to 15, each
0096: * chunk can contain up to 2^15 (32K) characters
0097: */
0098: int m_chunkBits = 15;
0099:
0100: /**
0101: * Field m_maxChunkBits affects our chunk-growth strategy, by saying what
0102: * the largest permissible chunk size is in this particular FastStringBuffer
0103: * hierarchy.
0104: */
0105: int m_maxChunkBits = 15;
0106:
0107: /**
0108: * Field m_rechunkBits affects our chunk-growth strategy, by saying how
0109: * many chunks should be allocated at one size before we encapsulate them
0110: * into the first chunk of the next size up. For example, if m_rechunkBits
0111: * is set to 3, then after 8 chunks at a given size we will rebundle
0112: * them as the first element of a FastStringBuffer using a chunk size
0113: * 8 times larger (chunkBits shifted left three bits).
0114: */
0115: int m_rebundleBits = 2;
0116:
0117: /**
0118: * Field m_chunkSize establishes the maximum size of one chunk of the array
0119: * as 2**chunkbits characters.
0120: * (Which may also be the minimum size if we aren't tuning for storage)
0121: */
0122: int m_chunkSize; // =1<<(m_chunkBits-1);
0123:
0124: /**
0125: * Field m_chunkMask is m_chunkSize-1 -- in other words, m_chunkBits
0126: * worth of low-order '1' bits, useful for shift-and-mask addressing
0127: * within the chunks.
0128: */
0129: int m_chunkMask; // =m_chunkSize-1;
0130:
0131: /**
0132: * Field m_array holds the string buffer's text contents, using an
0133: * array-of-arrays. Note that this array, and the arrays it contains, may be
0134: * reallocated when necessary in order to allow the buffer to grow;
0135: * references to them should be considered to be invalidated after any
0136: * append. However, the only time these arrays are directly exposed
0137: * is in the sendSAXcharacters call.
0138: */
0139: char[][] m_array;
0140:
0141: /**
0142: * Field m_lastChunk is an index into m_array[], pointing to the last
0143: * chunk of the Chunked Array currently in use. Note that additional
0144: * chunks may actually be allocated, eg if the FastStringBuffer had
0145: * previously been truncated or if someone issued an ensureSpace request.
0146: * <p>
0147: * The insertion point for append operations is addressed by the combination
0148: * of m_lastChunk and m_firstFree.
0149: */
0150: int m_lastChunk = 0;
0151:
0152: /**
0153: * Field m_firstFree is an index into m_array[m_lastChunk][], pointing to
0154: * the first character in the Chunked Array which is not part of the
0155: * FastStringBuffer's current content. Since m_array[][] is zero-based,
0156: * the length of that content can be calculated as
0157: * (m_lastChunk<<m_chunkBits) + m_firstFree
0158: */
0159: int m_firstFree = 0;
0160:
0161: /**
0162: * Field m_innerFSB, when non-null, is a FastStringBuffer whose total
0163: * length equals m_chunkSize, and which replaces m_array[0]. This allows
0164: * building a hierarchy of FastStringBuffers, where early appends use
0165: * a smaller chunkSize (for less wasted memory overhead) but later
0166: * ones use a larger chunkSize (for less heap activity overhead).
0167: */
0168: FastStringBuffer m_innerFSB = null;
0169:
0170: /**
0171: * Construct a FastStringBuffer, with allocation policy as per parameters.
0172: * <p>
0173: * For coding convenience, I've expressed both allocation sizes in terms of
0174: * a number of bits. That's needed for the final size of a chunk,
0175: * to permit fast and efficient shift-and-mask addressing. It's less critical
0176: * for the inital size, and may be reconsidered.
0177: * <p>
0178: * An alternative would be to accept integer sizes and round to powers of two;
0179: * that really doesn't seem to buy us much, if anything.
0180: *
0181: * @param initChunkBits Length in characters of the initial allocation
0182: * of a chunk, expressed in log-base-2. (That is, 10 means allocate 1024
0183: * characters.) Later chunks will use larger allocation units, to trade off
0184: * allocation speed of large document against storage efficiency of small
0185: * ones.
0186: * @param maxChunkBits Number of character-offset bits that should be used for
0187: * addressing within a chunk. Maximum length of a chunk is 2^chunkBits
0188: * characters.
0189: * @param rebundleBits Number of character-offset bits that addressing should
0190: * advance before we attempt to take a step from initChunkBits to maxChunkBits
0191: */
0192: public FastStringBuffer(int initChunkBits, int maxChunkBits,
0193: int rebundleBits) {
0194: if (DEBUG_FORCE_INIT_BITS != 0)
0195: initChunkBits = DEBUG_FORCE_INIT_BITS;
0196:
0197: // %REVIEW%
0198: // Should this force to larger value, or smaller? Smaller less efficient, but if
0199: // someone requested variable mode it's because they care about storage space.
0200: // On the other hand, given the other changes I'm making, odds are that we should
0201: // adopt the larger size. Dither, dither, dither... This is just stopgap workaround
0202: // anyway; we need a permanant solution.
0203: //
0204: if (DEBUG_FORCE_FIXED_CHUNKSIZE)
0205: maxChunkBits = initChunkBits;
0206: //if(DEBUG_FORCE_FIXED_CHUNKSIZE) initChunkBits=maxChunkBits;
0207:
0208: m_array = new char[16][];
0209:
0210: // Don't bite off more than we're prepared to swallow!
0211: if (initChunkBits > maxChunkBits)
0212: initChunkBits = maxChunkBits;
0213:
0214: m_chunkBits = initChunkBits;
0215: m_maxChunkBits = maxChunkBits;
0216: m_rebundleBits = rebundleBits;
0217: m_chunkSize = 1 << (initChunkBits);
0218: m_chunkMask = m_chunkSize - 1;
0219: m_array[0] = new char[m_chunkSize];
0220: }
0221:
0222: /**
0223: * Construct a FastStringBuffer, using a default rebundleBits value.
0224: *
0225: * NEEDSDOC @param initChunkBits
0226: * NEEDSDOC @param maxChunkBits
0227: */
0228: public FastStringBuffer(int initChunkBits, int maxChunkBits) {
0229: this (initChunkBits, maxChunkBits, 2);
0230: }
0231:
0232: /**
0233: * Construct a FastStringBuffer, using default maxChunkBits and
0234: * rebundleBits values.
0235: * <p>
0236: * ISSUE: Should this call assert initial size, or fixed size?
0237: * Now configured as initial, with a default for fixed.
0238: *
0239: * NEEDSDOC @param initChunkBits
0240: */
0241: public FastStringBuffer(int initChunkBits) {
0242: this (initChunkBits, 15, 2);
0243: }
0244:
0245: /**
0246: * Construct a FastStringBuffer, using a default allocation policy.
0247: */
0248: public FastStringBuffer() {
0249:
0250: // 10 bits is 1K. 15 bits is 32K. Remember that these are character
0251: // counts, so actual memory allocation unit is doubled for UTF-16 chars.
0252: //
0253: // For reference: In the original FastStringBuffer, we simply
0254: // overallocated by blocksize (default 1KB) on each buffer-growth.
0255: this (10, 15, 2);
0256: }
0257:
0258: /**
0259: * Get the length of the list. Synonym for length().
0260: *
0261: * @return the number of characters in the FastStringBuffer's content.
0262: */
0263: public final int size() {
0264: return (m_lastChunk << m_chunkBits) + m_firstFree;
0265: }
0266:
0267: /**
0268: * Get the length of the list. Synonym for size().
0269: *
0270: * @return the number of characters in the FastStringBuffer's content.
0271: */
0272: public final int length() {
0273: return (m_lastChunk << m_chunkBits) + m_firstFree;
0274: }
0275:
0276: /**
0277: * Discard the content of the FastStringBuffer, and most of the memory
0278: * that was allocated by it, restoring the initial state. Note that this
0279: * may eventually be different from setLength(0), which see.
0280: */
0281: public final void reset() {
0282:
0283: m_lastChunk = 0;
0284: m_firstFree = 0;
0285:
0286: // Recover the original chunk size
0287: FastStringBuffer innermost = this ;
0288:
0289: while (innermost.m_innerFSB != null) {
0290: innermost = innermost.m_innerFSB;
0291: }
0292:
0293: m_chunkBits = innermost.m_chunkBits;
0294: m_chunkSize = innermost.m_chunkSize;
0295: m_chunkMask = innermost.m_chunkMask;
0296:
0297: // Discard the hierarchy
0298: m_innerFSB = null;
0299: m_array = new char[16][0];
0300: m_array[0] = new char[m_chunkSize];
0301: }
0302:
0303: /**
0304: * Directly set how much of the FastStringBuffer's storage is to be
0305: * considered part of its content. This is a fast but hazardous
0306: * operation. It is not protected against negative values, or values
0307: * greater than the amount of storage currently available... and even
0308: * if additional storage does exist, its contents are unpredictable.
0309: * The only safe use for our setLength() is to truncate the FastStringBuffer
0310: * to a shorter string.
0311: *
0312: * @param l New length. If l<0 or l>=getLength(), this operation will
0313: * not report an error but future operations will almost certainly fail.
0314: */
0315: public final void setLength(int l) {
0316: m_lastChunk = l >>> m_chunkBits;
0317:
0318: if (m_lastChunk == 0 && m_innerFSB != null) {
0319: // Replace this FSB with the appropriate inner FSB, truncated
0320: m_innerFSB.setLength(l, this );
0321: } else {
0322: m_firstFree = l & m_chunkMask;
0323:
0324: // There's an edge case if l is an exact multiple of m_chunkBits, which risks leaving
0325: // us pointing at the start of a chunk which has not yet been allocated. Rather than
0326: // pay the cost of dealing with that in the append loops (more scattered and more
0327: // inner-loop), we correct it here by moving to the safe side of that
0328: // line -- as we would have left the indexes had we appended up to that point.
0329: if (m_firstFree == 0 && m_lastChunk > 0) {
0330: --m_lastChunk;
0331: m_firstFree = m_chunkSize;
0332: }
0333: }
0334: }
0335:
0336: /**
0337: * Subroutine for the public setLength() method. Deals with the fact
0338: * that truncation may require restoring one of the innerFSBs
0339: *
0340: * NEEDSDOC @param l
0341: * NEEDSDOC @param rootFSB
0342: */
0343: private final void setLength(int l, FastStringBuffer rootFSB) {
0344:
0345: m_lastChunk = l >>> m_chunkBits;
0346:
0347: if (m_lastChunk == 0 && m_innerFSB != null) {
0348: m_innerFSB.setLength(l, rootFSB);
0349: } else {
0350:
0351: // Undo encapsulation -- pop the innerFSB data back up to root.
0352: // Inefficient, but attempts to keep the code simple.
0353: rootFSB.m_chunkBits = m_chunkBits;
0354: rootFSB.m_maxChunkBits = m_maxChunkBits;
0355: rootFSB.m_rebundleBits = m_rebundleBits;
0356: rootFSB.m_chunkSize = m_chunkSize;
0357: rootFSB.m_chunkMask = m_chunkMask;
0358: rootFSB.m_array = m_array;
0359: rootFSB.m_innerFSB = m_innerFSB;
0360: rootFSB.m_lastChunk = m_lastChunk;
0361:
0362: // Finally, truncate this sucker.
0363: rootFSB.m_firstFree = l & m_chunkMask;
0364: }
0365: }
0366:
0367: /**
0368: * Note that this operation has been somewhat deoptimized by the shift to a
0369: * chunked array, as there is no factory method to produce a String object
0370: * directly from an array of arrays and hence a double copy is needed.
0371: * By using ensureCapacity we hope to minimize the heap overhead of building
0372: * the intermediate StringBuffer.
0373: * <p>
0374: * (It really is a pity that Java didn't design String as a final subclass
0375: * of MutableString, rather than having StringBuffer be a separate hierarchy.
0376: * We'd avoid a <strong>lot</strong> of double-buffering.)
0377: *
0378: * @return the contents of the FastStringBuffer as a standard Java string.
0379: */
0380: public final String toString() {
0381:
0382: int length = (m_lastChunk << m_chunkBits) + m_firstFree;
0383:
0384: return getString(new StringBuffer(length), 0, 0, length)
0385: .toString();
0386: }
0387:
0388: /**
0389: * Append a single character onto the FastStringBuffer, growing the
0390: * storage if necessary.
0391: * <p>
0392: * NOTE THAT after calling append(), previously obtained
0393: * references to m_array[][] may no longer be valid....
0394: * though in fact they should be in this instance.
0395: *
0396: * @param value character to be appended.
0397: */
0398: public final void append(char value) {
0399:
0400: char[] chunk;
0401:
0402: // We may have preallocated chunks. If so, all but last should
0403: // be at full size.
0404: boolean lastchunk = (m_lastChunk + 1 == m_array.length);
0405:
0406: if (m_firstFree < m_chunkSize) // Simplified test single-character-fits
0407: chunk = m_array[m_lastChunk];
0408: else {
0409:
0410: // Extend array?
0411: int i = m_array.length;
0412:
0413: if (m_lastChunk + 1 == i) {
0414: char[][] newarray = new char[i + 16][];
0415:
0416: System.arraycopy(m_array, 0, newarray, 0, i);
0417:
0418: m_array = newarray;
0419: }
0420:
0421: // Advance one chunk
0422: chunk = m_array[++m_lastChunk];
0423:
0424: if (chunk == null) {
0425:
0426: // Hierarchical encapsulation
0427: if (m_lastChunk == 1 << m_rebundleBits
0428: && m_chunkBits < m_maxChunkBits) {
0429:
0430: // Should do all the work of both encapsulating
0431: // existing data and establishing new sizes/offsets
0432: m_innerFSB = new FastStringBuffer(this );
0433: }
0434:
0435: // Add a chunk.
0436: chunk = m_array[m_lastChunk] = new char[m_chunkSize];
0437: }
0438:
0439: m_firstFree = 0;
0440: }
0441:
0442: // Space exists in the chunk. Append the character.
0443: chunk[m_firstFree++] = value;
0444: }
0445:
0446: /**
0447: * Append the contents of a String onto the FastStringBuffer,
0448: * growing the storage if necessary.
0449: * <p>
0450: * NOTE THAT after calling append(), previously obtained
0451: * references to m_array[] may no longer be valid.
0452: *
0453: * @param value String whose contents are to be appended.
0454: */
0455: public final void append(String value) {
0456:
0457: if (value == null)
0458: return;
0459: int strlen = value.length();
0460:
0461: if (0 == strlen)
0462: return;
0463:
0464: int copyfrom = 0;
0465: char[] chunk = m_array[m_lastChunk];
0466: int available = m_chunkSize - m_firstFree;
0467:
0468: // Repeat while data remains to be copied
0469: while (strlen > 0) {
0470:
0471: // Copy what fits
0472: if (available > strlen)
0473: available = strlen;
0474:
0475: value.getChars(copyfrom, copyfrom + available,
0476: m_array[m_lastChunk], m_firstFree);
0477:
0478: strlen -= available;
0479: copyfrom += available;
0480:
0481: // If there's more left, allocate another chunk and continue
0482: if (strlen > 0) {
0483:
0484: // Extend array?
0485: int i = m_array.length;
0486:
0487: if (m_lastChunk + 1 == i) {
0488: char[][] newarray = new char[i + 16][];
0489:
0490: System.arraycopy(m_array, 0, newarray, 0, i);
0491:
0492: m_array = newarray;
0493: }
0494:
0495: // Advance one chunk
0496: chunk = m_array[++m_lastChunk];
0497:
0498: if (chunk == null) {
0499:
0500: // Hierarchical encapsulation
0501: if (m_lastChunk == 1 << m_rebundleBits
0502: && m_chunkBits < m_maxChunkBits) {
0503:
0504: // Should do all the work of both encapsulating
0505: // existing data and establishing new sizes/offsets
0506: m_innerFSB = new FastStringBuffer(this );
0507: }
0508:
0509: // Add a chunk.
0510: chunk = m_array[m_lastChunk] = new char[m_chunkSize];
0511: }
0512:
0513: available = m_chunkSize;
0514: m_firstFree = 0;
0515: }
0516: }
0517:
0518: // Adjust the insert point in the last chunk, when we've reached it.
0519: m_firstFree += available;
0520: }
0521:
0522: /**
0523: * Append the contents of a StringBuffer onto the FastStringBuffer,
0524: * growing the storage if necessary.
0525: * <p>
0526: * NOTE THAT after calling append(), previously obtained
0527: * references to m_array[] may no longer be valid.
0528: *
0529: * @param value StringBuffer whose contents are to be appended.
0530: */
0531: public final void append(StringBuffer value) {
0532:
0533: if (value == null)
0534: return;
0535: int strlen = value.length();
0536:
0537: if (0 == strlen)
0538: return;
0539:
0540: int copyfrom = 0;
0541: char[] chunk = m_array[m_lastChunk];
0542: int available = m_chunkSize - m_firstFree;
0543:
0544: // Repeat while data remains to be copied
0545: while (strlen > 0) {
0546:
0547: // Copy what fits
0548: if (available > strlen)
0549: available = strlen;
0550:
0551: value.getChars(copyfrom, copyfrom + available,
0552: m_array[m_lastChunk], m_firstFree);
0553:
0554: strlen -= available;
0555: copyfrom += available;
0556:
0557: // If there's more left, allocate another chunk and continue
0558: if (strlen > 0) {
0559:
0560: // Extend array?
0561: int i = m_array.length;
0562:
0563: if (m_lastChunk + 1 == i) {
0564: char[][] newarray = new char[i + 16][];
0565:
0566: System.arraycopy(m_array, 0, newarray, 0, i);
0567:
0568: m_array = newarray;
0569: }
0570:
0571: // Advance one chunk
0572: chunk = m_array[++m_lastChunk];
0573:
0574: if (chunk == null) {
0575:
0576: // Hierarchical encapsulation
0577: if (m_lastChunk == 1 << m_rebundleBits
0578: && m_chunkBits < m_maxChunkBits) {
0579:
0580: // Should do all the work of both encapsulating
0581: // existing data and establishing new sizes/offsets
0582: m_innerFSB = new FastStringBuffer(this );
0583: }
0584:
0585: // Add a chunk.
0586: chunk = m_array[m_lastChunk] = new char[m_chunkSize];
0587: }
0588:
0589: available = m_chunkSize;
0590: m_firstFree = 0;
0591: }
0592: }
0593:
0594: // Adjust the insert point in the last chunk, when we've reached it.
0595: m_firstFree += available;
0596: }
0597:
0598: /**
0599: * Append part of the contents of a Character Array onto the
0600: * FastStringBuffer, growing the storage if necessary.
0601: * <p>
0602: * NOTE THAT after calling append(), previously obtained
0603: * references to m_array[] may no longer be valid.
0604: *
0605: * @param chars character array from which data is to be copied
0606: * @param start offset in chars of first character to be copied,
0607: * zero-based.
0608: * @param length number of characters to be copied
0609: */
0610: public final void append(char[] chars, int start, int length) {
0611:
0612: int strlen = length;
0613:
0614: if (0 == strlen)
0615: return;
0616:
0617: int copyfrom = start;
0618: char[] chunk = m_array[m_lastChunk];
0619: int available = m_chunkSize - m_firstFree;
0620:
0621: // Repeat while data remains to be copied
0622: while (strlen > 0) {
0623:
0624: // Copy what fits
0625: if (available > strlen)
0626: available = strlen;
0627:
0628: System.arraycopy(chars, copyfrom, m_array[m_lastChunk],
0629: m_firstFree, available);
0630:
0631: strlen -= available;
0632: copyfrom += available;
0633:
0634: // If there's more left, allocate another chunk and continue
0635: if (strlen > 0) {
0636:
0637: // Extend array?
0638: int i = m_array.length;
0639:
0640: if (m_lastChunk + 1 == i) {
0641: char[][] newarray = new char[i + 16][];
0642:
0643: System.arraycopy(m_array, 0, newarray, 0, i);
0644:
0645: m_array = newarray;
0646: }
0647:
0648: // Advance one chunk
0649: chunk = m_array[++m_lastChunk];
0650:
0651: if (chunk == null) {
0652:
0653: // Hierarchical encapsulation
0654: if (m_lastChunk == 1 << m_rebundleBits
0655: && m_chunkBits < m_maxChunkBits) {
0656:
0657: // Should do all the work of both encapsulating
0658: // existing data and establishing new sizes/offsets
0659: m_innerFSB = new FastStringBuffer(this );
0660: }
0661:
0662: // Add a chunk.
0663: chunk = m_array[m_lastChunk] = new char[m_chunkSize];
0664: }
0665:
0666: available = m_chunkSize;
0667: m_firstFree = 0;
0668: }
0669: }
0670:
0671: // Adjust the insert point in the last chunk, when we've reached it.
0672: m_firstFree += available;
0673: }
0674:
0675: /**
0676: * Append the contents of another FastStringBuffer onto
0677: * this FastStringBuffer, growing the storage if necessary.
0678: * <p>
0679: * NOTE THAT after calling append(), previously obtained
0680: * references to m_array[] may no longer be valid.
0681: *
0682: * @param value FastStringBuffer whose contents are
0683: * to be appended.
0684: */
0685: public final void append(FastStringBuffer value) {
0686:
0687: // Complicating factor here is that the two buffers may use
0688: // different chunk sizes, and even if they're the same we're
0689: // probably on a different alignment due to previously appended
0690: // data. We have to work through the source in bite-sized chunks.
0691: if (value == null)
0692: return;
0693: int strlen = value.length();
0694:
0695: if (0 == strlen)
0696: return;
0697:
0698: int copyfrom = 0;
0699: char[] chunk = m_array[m_lastChunk];
0700: int available = m_chunkSize - m_firstFree;
0701:
0702: // Repeat while data remains to be copied
0703: while (strlen > 0) {
0704:
0705: // Copy what fits
0706: if (available > strlen)
0707: available = strlen;
0708:
0709: int sourcechunk = (copyfrom + value.m_chunkSize - 1) >>> value.m_chunkBits;
0710: int sourcecolumn = copyfrom & value.m_chunkMask;
0711: int runlength = value.m_chunkSize - sourcecolumn;
0712:
0713: if (runlength > available)
0714: runlength = available;
0715:
0716: System.arraycopy(value.m_array[sourcechunk], sourcecolumn,
0717: m_array[m_lastChunk], m_firstFree, runlength);
0718:
0719: if (runlength != available)
0720: System.arraycopy(value.m_array[sourcechunk + 1], 0,
0721: m_array[m_lastChunk], m_firstFree + runlength,
0722: available - runlength);
0723:
0724: strlen -= available;
0725: copyfrom += available;
0726:
0727: // If there's more left, allocate another chunk and continue
0728: if (strlen > 0) {
0729:
0730: // Extend array?
0731: int i = m_array.length;
0732:
0733: if (m_lastChunk + 1 == i) {
0734: char[][] newarray = new char[i + 16][];
0735:
0736: System.arraycopy(m_array, 0, newarray, 0, i);
0737:
0738: m_array = newarray;
0739: }
0740:
0741: // Advance one chunk
0742: chunk = m_array[++m_lastChunk];
0743:
0744: if (chunk == null) {
0745:
0746: // Hierarchical encapsulation
0747: if (m_lastChunk == 1 << m_rebundleBits
0748: && m_chunkBits < m_maxChunkBits) {
0749:
0750: // Should do all the work of both encapsulating
0751: // existing data and establishing new sizes/offsets
0752: m_innerFSB = new FastStringBuffer(this );
0753: }
0754:
0755: // Add a chunk.
0756: chunk = m_array[m_lastChunk] = new char[m_chunkSize];
0757: }
0758:
0759: available = m_chunkSize;
0760: m_firstFree = 0;
0761: }
0762: }
0763:
0764: // Adjust the insert point in the last chunk, when we've reached it.
0765: m_firstFree += available;
0766: }
0767:
0768: /**
0769: * @return true if the specified range of characters are all whitespace,
0770: * as defined by XMLCharacterRecognizer.
0771: * <p>
0772: * CURRENTLY DOES NOT CHECK FOR OUT-OF-RANGE.
0773: *
0774: * @param start Offset of first character in the range.
0775: * @param length Number of characters to send.
0776: */
0777: public boolean isWhitespace(int start, int length) {
0778:
0779: int sourcechunk = start >>> m_chunkBits;
0780: int sourcecolumn = start & m_chunkMask;
0781: int available = m_chunkSize - sourcecolumn;
0782: boolean chunkOK;
0783:
0784: while (length > 0) {
0785: int runlength = (length <= available) ? length : available;
0786:
0787: if (sourcechunk == 0 && m_innerFSB != null)
0788: chunkOK = m_innerFSB.isWhitespace(sourcecolumn,
0789: runlength);
0790: else
0791: chunkOK = org.apache.xml.utils.XMLCharacterRecognizer
0792: .isWhiteSpace(m_array[sourcechunk],
0793: sourcecolumn, runlength);
0794:
0795: if (!chunkOK)
0796: return false;
0797:
0798: length -= runlength;
0799:
0800: ++sourcechunk;
0801:
0802: sourcecolumn = 0;
0803: available = m_chunkSize;
0804: }
0805:
0806: return true;
0807: }
0808:
0809: /**
0810: * @param start Offset of first character in the range.
0811: * @param length Number of characters to send.
0812: * @return a new String object initialized from the specified range of
0813: * characters.
0814: */
0815: public String getString(int start, int length) {
0816: int startColumn = start & m_chunkMask;
0817: int startChunk = start >>> m_chunkBits;
0818: if (startColumn + length < m_chunkMask && m_innerFSB == null) {
0819: return getOneChunkString(startChunk, startColumn, length);
0820: }
0821: return getString(new StringBuffer(length), startChunk,
0822: startColumn, length).toString();
0823: }
0824:
0825: protected String getOneChunkString(int startChunk, int startColumn,
0826: int length) {
0827: return new String(m_array[startChunk], startColumn, length);
0828: }
0829:
0830: /**
0831: * @param sb StringBuffer to be appended to
0832: * @param start Offset of first character in the range.
0833: * @param length Number of characters to send.
0834: * @return sb with the requested text appended to it
0835: */
0836: StringBuffer getString(StringBuffer sb, int start, int length) {
0837: return getString(sb, start >>> m_chunkBits,
0838: start & m_chunkMask, length);
0839: }
0840:
0841: /**
0842: * Internal support for toString() and getString().
0843: * PLEASE NOTE SIGNATURE CHANGE from earlier versions; it now appends into
0844: * and returns a StringBuffer supplied by the caller. This simplifies
0845: * m_innerFSB support.
0846: * <p>
0847: * Note that this operation has been somewhat deoptimized by the shift to a
0848: * chunked array, as there is no factory method to produce a String object
0849: * directly from an array of arrays and hence a double copy is needed.
0850: * By presetting length we hope to minimize the heap overhead of building
0851: * the intermediate StringBuffer.
0852: * <p>
0853: * (It really is a pity that Java didn't design String as a final subclass
0854: * of MutableString, rather than having StringBuffer be a separate hierarchy.
0855: * We'd avoid a <strong>lot</strong> of double-buffering.)
0856: *
0857: *
0858: * @param sb
0859: * @param startChunk
0860: * @param startColumn
0861: * @param length
0862: *
0863: * @return the contents of the FastStringBuffer as a standard Java string.
0864: */
0865: StringBuffer getString(StringBuffer sb, int startChunk,
0866: int startColumn, int length) {
0867:
0868: int stop = (startChunk << m_chunkBits) + startColumn + length;
0869: int stopChunk = stop >>> m_chunkBits;
0870: int stopColumn = stop & m_chunkMask;
0871:
0872: // Factored out
0873: //StringBuffer sb=new StringBuffer(length);
0874: for (int i = startChunk; i < stopChunk; ++i) {
0875: if (i == 0 && m_innerFSB != null)
0876: m_innerFSB.getString(sb, startColumn, m_chunkSize
0877: - startColumn);
0878: else
0879: sb.append(m_array[i], startColumn, m_chunkSize
0880: - startColumn);
0881:
0882: startColumn = 0; // after first chunk
0883: }
0884:
0885: if (stopChunk == 0 && m_innerFSB != null)
0886: m_innerFSB.getString(sb, startColumn, stopColumn
0887: - startColumn);
0888: else if (stopColumn > startColumn)
0889: sb.append(m_array[stopChunk], startColumn, stopColumn
0890: - startColumn);
0891:
0892: return sb;
0893: }
0894:
0895: /**
0896: * Get a single character from the string buffer.
0897: *
0898: *
0899: * @param pos character position requested.
0900: * @return A character from the requested position.
0901: */
0902: public char charAt(int pos) {
0903: int startChunk = pos >>> m_chunkBits;
0904:
0905: if (startChunk == 0 && m_innerFSB != null)
0906: return m_innerFSB.charAt(pos & m_chunkMask);
0907: else
0908: return m_array[startChunk][pos & m_chunkMask];
0909: }
0910:
0911: /**
0912: * Sends the specified range of characters as one or more SAX characters()
0913: * events.
0914: * Note that the buffer reference passed to the ContentHandler may be
0915: * invalidated if the FastStringBuffer is edited; it's the user's
0916: * responsibility to manage access to the FastStringBuffer to prevent this
0917: * problem from arising.
0918: * <p>
0919: * Note too that there is no promise that the output will be sent as a
0920: * single call. As is always true in SAX, one logical string may be split
0921: * across multiple blocks of memory and hence delivered as several
0922: * successive events.
0923: *
0924: * @param ch SAX ContentHandler object to receive the event.
0925: * @param start Offset of first character in the range.
0926: * @param length Number of characters to send.
0927: * @exception org.xml.sax.SAXException may be thrown by handler's
0928: * characters() method.
0929: */
0930: public void sendSAXcharacters(org.xml.sax.ContentHandler ch,
0931: int start, int length) throws org.xml.sax.SAXException {
0932:
0933: int startChunk = start >>> m_chunkBits;
0934: int startColumn = start & m_chunkMask;
0935: if (startColumn + length < m_chunkMask && m_innerFSB == null) {
0936: ch.characters(m_array[startChunk], startColumn, length);
0937: return;
0938: }
0939:
0940: int stop = start + length;
0941: int stopChunk = stop >>> m_chunkBits;
0942: int stopColumn = stop & m_chunkMask;
0943:
0944: for (int i = startChunk; i < stopChunk; ++i) {
0945: if (i == 0 && m_innerFSB != null)
0946: m_innerFSB.sendSAXcharacters(ch, startColumn,
0947: m_chunkSize - startColumn);
0948: else
0949: ch.characters(m_array[i], startColumn, m_chunkSize
0950: - startColumn);
0951:
0952: startColumn = 0; // after first chunk
0953: }
0954:
0955: // Last, or only, chunk
0956: if (stopChunk == 0 && m_innerFSB != null)
0957: m_innerFSB.sendSAXcharacters(ch, startColumn, stopColumn
0958: - startColumn);
0959: else if (stopColumn > startColumn) {
0960: ch.characters(m_array[stopChunk], startColumn, stopColumn
0961: - startColumn);
0962: }
0963: }
0964:
0965: /**
0966: * Sends the specified range of characters as one or more SAX characters()
0967: * events, normalizing the characters according to XSLT rules.
0968: *
0969: * @param ch SAX ContentHandler object to receive the event.
0970: * @param start Offset of first character in the range.
0971: * @param length Number of characters to send.
0972: * @return normalization status to apply to next chunk (because we may
0973: * have been called recursively to process an inner FSB):
0974: * <dl>
0975: * <dt>0</dt>
0976: * <dd>if this output did not end in retained whitespace, and thus whitespace
0977: * at the start of the following chunk (if any) should be converted to a
0978: * single space.
0979: * <dt>SUPPRESS_LEADING_WS</dt>
0980: * <dd>if this output ended in retained whitespace, and thus whitespace
0981: * at the start of the following chunk (if any) should be completely
0982: * suppressed.</dd>
0983: * </dd>
0984: * </dl>
0985: * @exception org.xml.sax.SAXException may be thrown by handler's
0986: * characters() method.
0987: */
0988: public int sendNormalizedSAXcharacters(
0989: org.xml.sax.ContentHandler ch, int start, int length)
0990: throws org.xml.sax.SAXException {
0991: // This call always starts at the beginning of the
0992: // string being written out, either because it was called directly or
0993: // because it was an m_innerFSB recursion. This is important since
0994: // it gives us a well-known initial state for this flag:
0995: int stateForNextChunk = SUPPRESS_LEADING_WS;
0996:
0997: int stop = start + length;
0998: int startChunk = start >>> m_chunkBits;
0999: int startColumn = start & m_chunkMask;
1000: int stopChunk = stop >>> m_chunkBits;
1001: int stopColumn = stop & m_chunkMask;
1002:
1003: for (int i = startChunk; i < stopChunk; ++i) {
1004: if (i == 0 && m_innerFSB != null)
1005: stateForNextChunk = m_innerFSB
1006: .sendNormalizedSAXcharacters(ch, startColumn,
1007: m_chunkSize - startColumn);
1008: else
1009: stateForNextChunk = sendNormalizedSAXcharacters(
1010: m_array[i], startColumn, m_chunkSize
1011: - startColumn, ch, stateForNextChunk);
1012:
1013: startColumn = 0; // after first chunk
1014: }
1015:
1016: // Last, or only, chunk
1017: if (stopChunk == 0 && m_innerFSB != null)
1018: stateForNextChunk = // %REVIEW% Is this update really needed?
1019: m_innerFSB.sendNormalizedSAXcharacters(ch, startColumn,
1020: stopColumn - startColumn);
1021: else if (stopColumn > startColumn) {
1022: stateForNextChunk = // %REVIEW% Is this update really needed?
1023: sendNormalizedSAXcharacters(m_array[stopChunk],
1024: startColumn, stopColumn - startColumn, ch,
1025: stateForNextChunk | SUPPRESS_TRAILING_WS);
1026: }
1027: return stateForNextChunk;
1028: }
1029:
1030: static final char[] SINGLE_SPACE = { ' ' };
1031:
1032: /**
1033: * Internal method to directly normalize and dispatch the character array.
1034: * This version is aware of the fact that it may be called several times
1035: * in succession if the data is made up of multiple "chunks", and thus
1036: * must actively manage the handling of leading and trailing whitespace.
1037: *
1038: * Note: The recursion is due to the possible recursion of inner FSBs.
1039: *
1040: * @param ch The characters from the XML document.
1041: * @param start The start position in the array.
1042: * @param length The number of characters to read from the array.
1043: * @param handler SAX ContentHandler object to receive the event.
1044: * @param edgeTreatmentFlags How leading/trailing spaces should be handled.
1045: * This is a bitfield contining two flags, bitwise-ORed together:
1046: * <dl>
1047: * <dt>SUPPRESS_LEADING_WS</dt>
1048: * <dd>When false, causes leading whitespace to be converted to a single
1049: * space; when true, causes it to be discarded entirely.
1050: * Should be set TRUE for the first chunk, and (in multi-chunk output)
1051: * whenever the previous chunk ended in retained whitespace.</dd>
1052: * <dt>SUPPRESS_TRAILING_WS</dt>
1053: * <dd>When false, causes trailing whitespace to be converted to a single
1054: * space; when true, causes it to be discarded entirely.
1055: * Should be set TRUE for the last or only chunk.
1056: * </dd>
1057: * </dl>
1058: * @return normalization status, as in the edgeTreatmentFlags parameter:
1059: * <dl>
1060: * <dt>0</dt>
1061: * <dd>if this output did not end in retained whitespace, and thus whitespace
1062: * at the start of the following chunk (if any) should be converted to a
1063: * single space.
1064: * <dt>SUPPRESS_LEADING_WS</dt>
1065: * <dd>if this output ended in retained whitespace, and thus whitespace
1066: * at the start of the following chunk (if any) should be completely
1067: * suppressed.</dd>
1068: * </dd>
1069: * </dl>
1070: *
1071: *
1072: * @exception org.xml.sax.SAXException Any SAX exception, possibly
1073: * wrapping another exception.
1074: */
1075: static int sendNormalizedSAXcharacters(char ch[], int start,
1076: int length, org.xml.sax.ContentHandler handler,
1077: int edgeTreatmentFlags) throws org.xml.sax.SAXException {
1078: boolean processingLeadingWhitespace = ((edgeTreatmentFlags & SUPPRESS_LEADING_WS) != 0);
1079: boolean seenWhitespace = ((edgeTreatmentFlags & CARRY_WS) != 0);
1080: boolean suppressTrailingWhitespace = ((edgeTreatmentFlags & SUPPRESS_TRAILING_WS) != 0);
1081: int currPos = start;
1082: int limit = start + length;
1083:
1084: // Strip any leading spaces first, if required
1085: if (processingLeadingWhitespace) {
1086: for (; currPos < limit
1087: && XMLCharacterRecognizer.isWhiteSpace(ch[currPos]); currPos++) {
1088: }
1089:
1090: // If we've only encountered leading spaces, the
1091: // current state remains unchanged
1092: if (currPos == limit) {
1093: return edgeTreatmentFlags;
1094: }
1095: }
1096:
1097: // If we get here, there are no more leading spaces to strip
1098: while (currPos < limit) {
1099: int startNonWhitespace = currPos;
1100:
1101: // Grab a chunk of non-whitespace characters
1102: for (; currPos < limit
1103: && !XMLCharacterRecognizer
1104: .isWhiteSpace(ch[currPos]); currPos++) {
1105: }
1106:
1107: // Non-whitespace seen - emit them, along with a single
1108: // space for any preceding whitespace characters
1109: if (startNonWhitespace != currPos) {
1110: if (seenWhitespace) {
1111: handler.characters(SINGLE_SPACE, 0, 1);
1112: seenWhitespace = false;
1113: }
1114: handler.characters(ch, startNonWhitespace, currPos
1115: - startNonWhitespace);
1116: }
1117:
1118: int startWhitespace = currPos;
1119:
1120: // Consume any whitespace characters
1121: for (; currPos < limit
1122: && XMLCharacterRecognizer.isWhiteSpace(ch[currPos]); currPos++) {
1123: }
1124:
1125: if (startWhitespace != currPos) {
1126: seenWhitespace = true;
1127: }
1128: }
1129:
1130: return (seenWhitespace ? CARRY_WS : 0)
1131: | (edgeTreatmentFlags & SUPPRESS_TRAILING_WS);
1132: }
1133:
1134: /**
1135: * Directly normalize and dispatch the character array.
1136: *
1137: * @param ch The characters from the XML document.
1138: * @param start The start position in the array.
1139: * @param length The number of characters to read from the array.
1140: * @param handler SAX ContentHandler object to receive the event.
1141: * @exception org.xml.sax.SAXException Any SAX exception, possibly
1142: * wrapping another exception.
1143: */
1144: public static void sendNormalizedSAXcharacters(char ch[],
1145: int start, int length, org.xml.sax.ContentHandler handler)
1146: throws org.xml.sax.SAXException {
1147: sendNormalizedSAXcharacters(ch, start, length, handler,
1148: SUPPRESS_BOTH);
1149: }
1150:
1151: /**
1152: * Sends the specified range of characters as sax Comment.
1153: * <p>
1154: * Note that, unlike sendSAXcharacters, this has to be done as a single
1155: * call to LexicalHandler#comment.
1156: *
1157: * @param ch SAX LexicalHandler object to receive the event.
1158: * @param start Offset of first character in the range.
1159: * @param length Number of characters to send.
1160: * @exception org.xml.sax.SAXException may be thrown by handler's
1161: * characters() method.
1162: */
1163: public void sendSAXComment(org.xml.sax.ext.LexicalHandler ch,
1164: int start, int length) throws org.xml.sax.SAXException {
1165:
1166: // %OPT% Do it this way for now...
1167: String comment = getString(start, length);
1168: ch.comment(comment.toCharArray(), 0, length);
1169: }
1170:
1171: /**
1172: * Copies characters from this string into the destination character
1173: * array.
1174: *
1175: * @param srcBegin index of the first character in the string
1176: * to copy.
1177: * @param srcEnd index after the last character in the string
1178: * to copy.
1179: * @param dst the destination array.
1180: * @param dstBegin the start offset in the destination array.
1181: * @exception IndexOutOfBoundsException If any of the following
1182: * is true:
1183: * <ul><li><code>srcBegin</code> is negative.
1184: * <li><code>srcBegin</code> is greater than <code>srcEnd</code>
1185: * <li><code>srcEnd</code> is greater than the length of this
1186: * string
1187: * <li><code>dstBegin</code> is negative
1188: * <li><code>dstBegin+(srcEnd-srcBegin)</code> is larger than
1189: * <code>dst.length</code></ul>
1190: * @exception NullPointerException if <code>dst</code> is <code>null</code>
1191: */
1192: private void getChars(int srcBegin, int srcEnd, char dst[],
1193: int dstBegin) {
1194: // %TBD% Joe needs to write this function. Make public when implemented.
1195: }
1196:
1197: /**
1198: * Encapsulation c'tor. After this is called, the source FastStringBuffer
1199: * will be reset to use the new object as its m_innerFSB, and will have
1200: * had its chunk size reset appropriately. IT SHOULD NEVER BE CALLED
1201: * EXCEPT WHEN source.length()==1<<(source.m_chunkBits+source.m_rebundleBits)
1202: *
1203: * NEEDSDOC @param source
1204: */
1205: private FastStringBuffer(FastStringBuffer source) {
1206:
1207: // Copy existing information into new encapsulation
1208: m_chunkBits = source.m_chunkBits;
1209: m_maxChunkBits = source.m_maxChunkBits;
1210: m_rebundleBits = source.m_rebundleBits;
1211: m_chunkSize = source.m_chunkSize;
1212: m_chunkMask = source.m_chunkMask;
1213: m_array = source.m_array;
1214: m_innerFSB = source.m_innerFSB;
1215:
1216: // These have to be adjusted because we're calling just at the time
1217: // when we would be about to allocate another chunk
1218: m_lastChunk = source.m_lastChunk - 1;
1219: m_firstFree = source.m_chunkSize;
1220:
1221: // Establish capsule as the Inner FSB, reset chunk sizes/addressing
1222: source.m_array = new char[16][];
1223: source.m_innerFSB = this ;
1224:
1225: // Since we encapsulated just as we were about to append another
1226: // chunk, return ready to create the chunk after the innerFSB
1227: // -- 1, not 0.
1228: source.m_lastChunk = 1;
1229: source.m_firstFree = 0;
1230: source.m_chunkBits += m_rebundleBits;
1231: source.m_chunkSize = 1 << (source.m_chunkBits);
1232: source.m_chunkMask = source.m_chunkSize - 1;
1233: }
1234: }
|