0001: // Copyright (c) 2001, 2002, 2003, 2006 Per M.A. Bothner and Brainfood Inc.
0002: // This is free software; for terms and warranty disclaimer see ./COPYING.
0003:
0004: package gnu.lists;
0005:
0006: import gnu.text.Char;
0007:
0008: /** A compact representation of a tree, that is a nested list structure.
0009: * The data structure can store anything that can be emitted to a Consumer.
0010: * This data structure is optimized for efficient forwards traversal
0011: * through the data structure, not random access.
0012: * It does have an "insertion point"; insertions and deletions are
0013: * efficient through the use of a buffer gap.
0014: * It is a reasonable choice for a "DOM" for XML data.
0015: */
0016:
0017: public class TreeList extends AbstractSequence implements
0018: /* #ifdef JAVA5 */
0019: // Appendable,
0020: /* #endif */
0021: XConsumer, PositionConsumer, Consumable {
0022: // Some public fields and methods are public which probably shouldn't be,
0023: // for the sake of XMLFilter. FIXME. Perhaps an abstract class
0024: // in gnu.lists that XMLFilter extend?
0025:
0026: public Object[] objects;
0027: public int oindex;
0028: public char[] data;
0029: public int gapStart;
0030: public int gapEnd;
0031:
0032: /** If non-zero, gap is in an attribute starting (1 less than) here. */
0033: public int attrStart;
0034: /** If non-zero, gap is in an document starting (1 less than) here. */
0035: public int docStart;
0036:
0037: public TreeList() {
0038: resizeObjects();
0039: gapEnd = 200;
0040: data = new char[gapEnd];
0041: }
0042:
0043: /**
0044: * Make a copy of a sub-range of a TreeList.
0045: * @param list the TreeList to copy
0046: * @param startPosition start of range, as a raw index in data
0047: * @param endPosition end of range, as a raw index in data
0048: */
0049: public TreeList(TreeList list, int startPosition, int endPosition) {
0050: this ();
0051: list.consumeIRange(startPosition, endPosition, this );
0052: }
0053:
0054: public TreeList(TreeList list) {
0055: this (list, 0, list.data.length);
0056: }
0057:
0058: public void clear() {
0059: gapStart = 0;
0060: gapEnd = data.length;
0061: attrStart = 0;
0062: if (gapEnd > 1500) {
0063: gapEnd = 200;
0064: data = new char[gapEnd];
0065: }
0066: objects = null;
0067: oindex = 0;
0068: resizeObjects();
0069: }
0070:
0071: // The data array contains an encoding of values, as follows:
0072: // 0x0000 ... 0x9FFF: A single Unicode character.
0073: // 0xAXXX: BEGIN_ELEMENT_SHORT
0074: // 0xBXXX: negative integer ((short)0x0XXX<<4)>>4, range -4096 to -1
0075: // 0xCXXX: positive integer 0x0XXX, in the range 0 to 4095
0076: // 0xDXXX: positive integer 0x1XXX, in the range 4096 to 8191
0077: // 0xEXXX:: OBJECT_REF_SHORT. The object in objects[0xXXX].
0078: // 0xF0XX: A byte (encoded character or char fragment) ((byte) 0xXX).
0079: // 0xF100 ... 0xF101: A Boolean (BOOL_FALSE, BOOL_TRUE)
0080: // 0xF102 A B: INT_FOLLOWS - 32-bit int (big-endian)
0081: // 0xF103 A B C D: LONG_FOLLOWS 64-bit long int (big-endian)
0082: // 0xF104 A B: FLOAT_FOLLOWS 32-bit float (big-endian)
0083: // 0xF105 A B C D: DOUBLE_FOLLOWS 64-bit double (big-endian)
0084: // 0xF106: CHAR_FOLLOWS
0085: // 0xF108: BEGIN_ELEMENT_LONG
0086: // 0xF109: BEGIN_ATTRIBUTE_LONG
0087: // 0xF10A: END_ATTRIBUTE
0088: // 0xF10B: END_ELEMENT_SHORT
0089: // 0xF10C: END_ELEMENT_LONG
0090: // 0xF10D A B: OBJECT_REF_FOLLOWS: The object in objects[(A,B)].
0091: // 0xF10E A B: POSITION_REF_FOLLOWS: The TreePosition in objects[(A,B)].
0092: // 0xF10F A B C D: POSITION_PAIR_FOLLOWS
0093: // 0xF110 BEGIN_DOCUMENT
0094: // 0xF111 END_DOCUMENT
0095: // 0xF112 BEGIN_ENTITY
0096: // 0xF113 END_ENTITY
0097: // 0xF114 PROCESSING_INSTRUCTION
0098: // 0xF115 CDATA_SECTION
0099: // 0xF116 JOINER
0100: // 0xF117 COMMENT
0101: // 0xF118 DOCUMENT_URI: Not a node, but a property of the previous document.
0102:
0103: /** The largest Unicode character that can be encoded in one char. */
0104: public static final int MAX_CHAR_SHORT = 0x9FFF;
0105:
0106: /** The smallest integer that can use the short int encoding. */
0107: static final int MIN_INT_SHORT = -0x1000; // -4096
0108:
0109: /** The largest integer that can use the short int encoding. */
0110: static final int MAX_INT_SHORT = 0x1FFF; // 8191
0111:
0112: /** The value used to encode the integer zero. */
0113: static final int INT_SHORT_ZERO = 0xC000;
0114:
0115: /** The value used to encode the object in objects[0]. */
0116: static final int OBJECT_REF_SHORT = 0xE000;
0117:
0118: /** The maximum offset in the objects array for a short object-ref. */
0119: static final int OBJECT_REF_SHORT_INDEX_MAX = 0xFFF;
0120:
0121: /** Followed by 2 chars that provide an index into objects. */
0122: static final char OBJECT_REF_FOLLOWS = 0xF10D;
0123:
0124: /** Followed by 2 chars that provide an index into objects. */
0125: static final char POSITION_REF_FOLLOWS = 0xF10E;
0126:
0127: /** A position triple referenceing some other "nodes".
0128: * Followed by index of sequence (2 chars), and ipos (2 chars). */
0129: protected static final char POSITION_PAIR_FOLLOWS = 0xF10F;
0130:
0131: /** Encoding prefix that indicates a byte value. */
0132: static final int BYTE_PREFIX = 0xF000;
0133:
0134: /** The value used to encode false. */
0135: static final char BOOL_FALSE = 0xF100;
0136:
0137: /** The value used to encode true. */
0138: static final char BOOL_TRUE = 0xF101;
0139:
0140: /** A 32-bit integer, non-compact form.
0141: *
0142: * [INT_FOLLOWS]
0143: * [word1], [word2]: The big-endian bits of the integer.
0144: */
0145: public static final int INT_FOLLOWS = 0xF102;
0146:
0147: /** A 64-bit long integer.
0148: *
0149: * [LONG_FOLLOWS]
0150: * [word1], [word2], [word3], [word4]: The big-endian bits of the long.
0151: */
0152: static final int LONG_FOLLOWS = 0xF103;
0153:
0154: /** A 64-bit float floating-pointer number.
0155: *
0156: * [FLOAT_FOLLOWS]
0157: * [word1], [word2]: The big-endian bits of the float.
0158: */
0159: static final int FLOAT_FOLLOWS = 0xF104;
0160:
0161: /** A 64-bit double floating-pointer number.
0162: *
0163: * [DOUBLE_FOLLOWS]
0164: * [word1], [word2], [word3], [word4]: The big-endian bits of the double.
0165: */
0166: static final int DOUBLE_FOLLOWS = 0xF105;
0167:
0168: /** A 16-bit (non-compact) Unicode char follows. */
0169: static final int CHAR_FOLLOWS = 0xF106;
0170:
0171: /** A comment node follows.
0172: * [COMMENT]
0173: * [length] 2 shorts
0174: * [comment text], (length) number of characters.
0175: */
0176: static final int COMMENT = 0xF117;
0177:
0178: /** A processing-instruction node follows.
0179: * [PROCESSING_INSTRUCTION]
0180: * [target] 2 shorts, where objects[target] is the target as a String.
0181: * [length] 2 shorts.
0182: * [comment text], (length) number of characters.
0183: */
0184: protected static final int PROCESSING_INSTRUCTION = 0xF114;
0185:
0186: /** A CDATA node follows.
0187: * [CDATA_SECTION]
0188: * [length] 2 shorts
0189: * [comment text], (length) number of characters.
0190: */
0191: static final int CDATA_SECTION = 0xF115;
0192:
0193: /** Suppress spacing between non-node items. */
0194: static final int JOINER = 0xF116;
0195:
0196: /** The beginning of an attribute.
0197: * [BEGIN_ATTRIBUTE_LONG]
0198: * [index], 2 shorts, where objects[index] is the attribute type name
0199: * and objects[index+1] is the attribute type object.
0200: * [end_offset], 2 shorts, giving the location of the following
0201: * END_ATTRIBUTE. If the attribute straddles the gap, then
0202: * end_offset is a negative offset relative to data.length.
0203: * (Therefore allocating more space for the gap does not require
0204: * adjusting end_offset.) Otherwise, the end_offset is relative
0205: * to the BEGIN_ATTRIBUTE_LONG word.
0206: */
0207: protected static final int BEGIN_ATTRIBUTE_LONG = 0xF109;
0208: public static final int BEGIN_ATTRIBUTE_LONG_SIZE = 5;
0209:
0210: /** The end of an attribute of a node. */
0211: static final int END_ATTRIBUTE = 0xF10A;
0212: public static final int END_ATTRIBUTE_SIZE = 1;
0213:
0214: /** Beginning of a document (or top-level value).
0215: * Used to distinguish a document from its element node.
0216: * [end_offset], 2 shorts, giving the location of the following
0217: * END_DOCUMENT. If the attribute straddles the gap, then
0218: * end_offset is a negative offset relative to data.length.
0219: * (Therefore allocating more space for the gap does not require
0220: * adjusting end_offset.) Otherwise, the end_offset is relative
0221: * to the BEGIN_DOCUMENT word.
0222: * [parent_offset], in 2 shorts. The parent node, or -1 if no parent.
0223: * Otherwise, a negative value is a relative offset, while a non-negative
0224: * value is absolute. (Use the latter when gap is between this node and
0225: * its parent. The parent would normally be a BEGIN_ENTITY.
0226: */
0227: protected static final int BEGIN_DOCUMENT = 0xF110;
0228:
0229: /** End of a document. */
0230: protected static final int END_DOCUMENT = 0xF111;
0231:
0232: /** End of an entity (typically a file, possibly included).
0233: * [base_uri], 2 short, given an index of a base-uri object
0234: * [parent_offset], in 2 shorts, encoded as for BEGIN_DOCUMENT.
0235: */
0236: public static final int BEGIN_ENTITY = 0xF112;
0237: public static final int BEGIN_ENTITY_SIZE = 5;
0238:
0239: protected static final int END_ENTITY = 0xF113;
0240:
0241: /** The document-uri property of a node.
0242: * This is not an actual value, but it is a property of the previous
0243: * document node, or the surrounding node just after a BEGIN_XXX entry.
0244: * [DOCUMENT_URI]
0245: * [index]. 2 shorts, where objects[index] is the document-uri value.
0246: */
0247: protected static final int DOCUMENT_URI = 0xF118;
0248:
0249: /** Beginning of an element, compact form.
0250: *
0251: * [BEGIN_ELEMENT_SHORT + index], where objects[index] is the element's
0252: * type name and objects[index+1] is the type object.
0253: * [end_offset], the unsigned offset (from the initial word)
0254: * to the corresponding END_ELEMENT_SHORT.
0255: * [parent_offset], the (unsigned absolute value of the) offset
0256: * to the outer BEGIN_ELEMENT_SHORT/BEGIN_ELEMENT_LONG/BEGIN_DOCUMENT.
0257: *. (If these is no parent, then parent_offset==0.)
0258: *
0259: * This should is used when index < BEGIN_ELEMENT_SHORT_INDEX_MAX,
0260: * both end_offset and parent_offset fit in 16 bits,
0261: * and the element does not straddle the gap.
0262: */
0263: protected static final int BEGIN_ELEMENT_SHORT = 0xA000;
0264: protected static final int BEGIN_ELEMENT_SHORT_INDEX_MAX = 0xFFF;
0265:
0266: /** End of an element, compact form.
0267: *
0268: * [END_ELEMENT_SHORT]
0269: * [begin_offset], the unsigned absolute value of the offset to the
0270: * matching BEGIN. (This is the same as the matching end_offset.)
0271: *
0272: */
0273: protected static final int END_ELEMENT_SHORT = 0xF10B;
0274:
0275: /** Begin of an element, non-compact form.
0276: *
0277: * [BEGIN_ELEMENT_LONG]
0278: * [end_offset], in 2 shorts. The position of the matching END_ELEMENT_LONG.
0279: * If the element straddles the gap, then end_offset is a negative offset
0280: * relative to data.length. (Therefore allocating more space for the
0281: * gap does not require adjusting any end_offset.) If the element and
0282: * and its children are all on the same side of the gap, then end_offset
0283: * is a positive offset relative to the BEGIN_ELEMENT_LONG word. (Hence
0284: * shifting an entire element when the gap is moved does not require
0285: * changing its end_offset.)
0286: *
0287: * Note that the space taken by a BEGIN_ELEMENT_LONG is the same that
0288: * needed for a BEGIN_ELEMENT_SHORT (but a END_ELEMENT_LONG takes much
0289: * more space than a END_ELEMENT_SHORT). This is to make it easier
0290: * to convert a BEGIN_ELEMENT_LONG to a BEGIN_ELEMENT_SHORT or vice
0291: * versa, as needed.
0292: */
0293: protected static final int BEGIN_ELEMENT_LONG = 0xF108;
0294:
0295: /** End of n element, non-compact form.
0296: *
0297: * [END_ELEMENT_LONG]
0298: * [index], 2 shorts where objects[index] is the element's type name and
0299: * objects[index+1] is the type object.
0300: * [begin_offset], in 2 shorts. The position of the matching
0301: * BEGIN_ELEMENT_LONG. If the element straddles the gap, then begin_offset
0302: * is the actual index (i.e. relative to the start of data) of the
0303: * matching BEGIN_ELEMENT_LONG. (Therefore allocating more space for the
0304: * gap does not require adjusting begin_offset.) If the element does not
0305: * straddle the gap, then begin_offset is a negative offset relative
0306: * to the END_ELEMENT_LONG word. (Hence shifting an entire element when
0307: * the gap is moved does not require changing its begin_offset.)
0308: * relative to data.length.
0309: * [parent_offset], in 2 shorts. The position of the outer BEGIN_ELEMENT_LONG,
0310: * BEGIN_ELEMENT_SHORT or BEGIN_DOCUMENT. If the difference straddles
0311: * the gap (i.e. either this element straddles the gap or the parent element
0312: * does and the gap precedes this element), then parent_offset is the
0313: * actual index of the parent element. Otherwise, then parent_offset is a
0314: * negative offset relative to the END_ELEMENT_LONG word.
0315: */
0316: protected static final int END_ELEMENT_LONG = 0xF10C;
0317:
0318: int currentParent = -1;
0319:
0320: public void ensureSpace(int needed) {
0321: int avail = gapEnd - gapStart;
0322: if (needed > avail) {
0323: int oldSize = data.length;
0324: int neededSize = oldSize - avail + needed;
0325: int newSize = 2 * oldSize;
0326: if (newSize < neededSize)
0327: newSize = neededSize;
0328: char[] tmp = new char[newSize];
0329: if (gapStart > 0)
0330: System.arraycopy(data, 0, tmp, 0, gapStart);
0331: int afterGap = oldSize - gapEnd;
0332: if (afterGap > 0)
0333: System.arraycopy(data, gapEnd, tmp, newSize - afterGap,
0334: afterGap);
0335: gapEnd = newSize - afterGap;
0336: data = tmp;
0337: }
0338: }
0339:
0340: public final void resizeObjects() {
0341: int oldLength;
0342: int newLength;
0343: Object[] tmp;
0344: if (objects == null) {
0345: oldLength = 0;
0346: newLength = 100;
0347: tmp = new Object[newLength];
0348: } else {
0349: oldLength = objects.length;
0350: newLength = 2 * oldLength;
0351: tmp = new Object[newLength];
0352: System.arraycopy(objects, 0, tmp, 0, oldLength);
0353: }
0354: objects = tmp;
0355: }
0356:
0357: public int find(Object arg1) {
0358: if (oindex == objects.length)
0359: resizeObjects();
0360: objects[oindex] = arg1;
0361: return oindex++;
0362: }
0363:
0364: /** Get a 32-bit int from the data array. */
0365: final protected int getIntN(int index) {
0366: return (data[index] << 16) | (data[index + 1] & 0xFFFF);
0367: }
0368:
0369: /** Get a 64-bit long from the data array. */
0370: final protected long getLongN(int index) {
0371: char[] data = this .data; // Optimization.
0372: return (data[index] & 0xFFFFL) << 48
0373: | (data[index + 1] & 0xFFFFL) << 32
0374: | (data[index + 2] & 0xFFFFL) << 16
0375: | (data[index + 3] & 0xFFFFL);
0376: }
0377:
0378: final public void setIntN(int index, int i) {
0379: data[index] = (char) (i >> 16);
0380: data[index + 1] = (char) i;
0381: }
0382:
0383: public void consume(SeqPosition position) {
0384: ensureSpace(3);
0385: // FIXME - no need for find to search in this case!
0386: int index = find(position.copy());
0387: data[gapStart++] = POSITION_REF_FOLLOWS;
0388: setIntN(gapStart, index);
0389: gapStart += 2;
0390: }
0391:
0392: public void writePosition(AbstractSequence seq, int ipos) {
0393: ensureSpace(5);
0394: data[gapStart] = POSITION_PAIR_FOLLOWS;
0395: int seq_index = find(seq);
0396: setIntN(gapStart + 1, seq_index);
0397: setIntN(gapStart + 3, ipos);
0398: gapStart += 5;
0399: }
0400:
0401: public void writeObject(Object v) {
0402: ensureSpace(3);
0403: int index = find(v);
0404: if (index < 0x1000)
0405: data[gapStart++] = (char) (OBJECT_REF_SHORT | index);
0406: else {
0407: data[gapStart++] = OBJECT_REF_FOLLOWS;
0408: setIntN(gapStart, index);
0409: gapStart += 2;
0410: }
0411: }
0412:
0413: /** Write/set the document-uri property of the current document.
0414: * Only allowed immediately following startDocument. */
0415: public void writeDocumentUri(Object uri) {
0416: ensureSpace(3);
0417: int index = find(uri);
0418: data[gapStart++] = DOCUMENT_URI;
0419: setIntN(gapStart, index);
0420: gapStart += 2;
0421: }
0422:
0423: public void writeComment(char[] chars, int offset, int length) {
0424: ensureSpace(3 + length);
0425: int i = gapStart;
0426: data[i++] = COMMENT;
0427: setIntN(i, length);
0428: i += 2;
0429: System.arraycopy(chars, offset, data, i, length);
0430: gapStart = i + length;
0431: }
0432:
0433: public void writeComment(String comment, int offset, int length) {
0434: ensureSpace(3 + length);
0435: int i = gapStart;
0436: data[i++] = COMMENT;
0437: setIntN(i, length);
0438: i += 2;
0439: comment.getChars(offset, offset + length, data, i);
0440: gapStart = i + length;
0441: }
0442:
0443: public void writeProcessingInstruction(String target,
0444: char[] content, int offset, int length) {
0445: ensureSpace(5 + length);
0446: int i = gapStart;
0447: data[i++] = PROCESSING_INSTRUCTION;
0448: int index = find(target);
0449: setIntN(i, index);
0450: setIntN(i + 2, length);
0451: i += 4;
0452: System.arraycopy(content, offset, data, i, length);
0453: gapStart = i + length;
0454: }
0455:
0456: public void writeProcessingInstruction(String target,
0457: String content, int offset, int length) {
0458: ensureSpace(5 + length);
0459: int i = gapStart;
0460: data[i++] = PROCESSING_INSTRUCTION;
0461: int index = find(target);
0462: setIntN(i, index);
0463: setIntN(i + 2, length);
0464: i += 4;
0465: content.getChars(offset, offset + length, data, i);
0466: gapStart = i + length;
0467: }
0468:
0469: public void startElement(Object type) {
0470: startElement(find(type));
0471: }
0472:
0473: public void startDocument() {
0474: ensureSpace(5 + 1);
0475: gapEnd--;
0476: int p = gapStart;
0477: data[p] = BEGIN_DOCUMENT;
0478: if (docStart != 0)
0479: throw new Error("nested document");
0480: docStart = p + 1;
0481: setIntN(p + 1, gapEnd - data.length);
0482: setIntN(p + 3, currentParent == -1 ? -1 : currentParent - p); // parent_offset
0483: currentParent = p;
0484: gapStart = p + 5;
0485: currentParent = p;
0486: data[gapEnd] = END_DOCUMENT;
0487: }
0488:
0489: public void endDocument() {
0490: if (data[gapEnd] != END_DOCUMENT || docStart <= 0
0491: || data[currentParent] != BEGIN_DOCUMENT)
0492: throw new Error("unexpected endDocument");
0493: // Move the END_DOCUMENT to before the gap.
0494: gapEnd++;
0495: setIntN(docStart, gapStart - docStart + 1);
0496: docStart = 0;
0497: data[gapStart++] = END_DOCUMENT;
0498: int parent = getIntN(currentParent + 3);
0499: currentParent = parent >= -1 ? parent : currentParent + parent;
0500: }
0501:
0502: public void beginEntity(Object base) {
0503: // Ideally, we want to ignore a beginEnity if and only if it is redundant.
0504: // There are also problems in the current implementation with
0505: // nested (or maybe any non-top-level?) BEGIN_ENTITY nodes.
0506: // So for now, let's keep it simple.
0507: if (gapStart != 0)
0508: return;
0509: ensureSpace(BEGIN_ENTITY_SIZE + 1);
0510: gapEnd--;
0511: int p = gapStart;
0512: data[p] = BEGIN_ENTITY;
0513: setIntN(p + 1, find(base));
0514: setIntN(p + 3, currentParent == -1 ? -1 : currentParent - p); // parent_offset
0515: gapStart = p + 5;
0516: currentParent = p;
0517: data[gapEnd] = END_ENTITY;
0518: }
0519:
0520: public void endEntity() {
0521: // See comment in beginEntity.
0522: if (gapEnd + 1 != data.length || data[gapEnd] != END_ENTITY)
0523: return;
0524: if (/*data[gapEnd] != END_ENTITY || */
0525: data[currentParent] != BEGIN_ENTITY)
0526: throw new Error("unexpected endEntity");
0527: // Move the END_ENTITY to before the gap.
0528: gapEnd++;
0529: data[gapStart++] = END_ENTITY;
0530: int parent = getIntN(currentParent + 3);
0531: currentParent = parent >= -1 ? parent : currentParent + parent;
0532: }
0533:
0534: public void startElement(int index) {
0535: ensureSpace(3 + 7);
0536: gapEnd -= 7;
0537: data[gapStart++] = BEGIN_ELEMENT_LONG;
0538: setIntN(gapStart, gapEnd - data.length); // end_offset
0539: gapStart += 2;
0540: data[gapEnd] = END_ELEMENT_LONG;
0541: setIntN(gapEnd + 1, index); // begin_offset
0542: setIntN(gapEnd + 3, gapStart - 3); // begin_offset
0543: setIntN(gapEnd + 5, currentParent); // parent_offset
0544: currentParent = gapStart - 3;
0545: }
0546:
0547: public void setElementName(int elementIndex, int nameIndex) {
0548: if (data[elementIndex] == BEGIN_ELEMENT_LONG) {
0549: int j = getIntN(elementIndex + 1);
0550: elementIndex = j + (j < 0 ? data.length : elementIndex);
0551: }
0552: if (elementIndex < gapEnd)
0553: throw new Error("setElementName before gapEnd");
0554: setIntN(elementIndex + 1, nameIndex);
0555: }
0556:
0557: public void endElement() {
0558: if (data[gapEnd] != END_ELEMENT_LONG)
0559: throw new Error("unexpected endElement");
0560: int index = getIntN(gapEnd + 1);
0561: int begin = getIntN(gapEnd + 3);
0562: int parent = getIntN(gapEnd + 5);
0563: currentParent = parent;
0564: gapEnd += 7;
0565: int offset = gapStart - begin;
0566: int parentOffset = begin - parent;
0567: if (index < BEGIN_ELEMENT_SHORT_INDEX_MAX && offset < 0x10000
0568: && parentOffset < 0x10000) {
0569: data[begin] = (char) (BEGIN_ELEMENT_SHORT | index);
0570: data[begin + 1] = (char) offset; // end_offset
0571: data[begin + 2] = (char) parentOffset;
0572: data[gapStart] = END_ELEMENT_SHORT;
0573: data[gapStart + 1] = (char) offset; // begin_offset
0574: gapStart += 2;
0575: } else {
0576: data[begin] = BEGIN_ELEMENT_LONG;
0577: setIntN(begin + 1, offset);
0578: data[gapStart] = END_ELEMENT_LONG;
0579: setIntN(gapStart + 1, index);
0580: setIntN(gapStart + 3, -offset);
0581: if (parent >= gapStart || begin <= gapStart)
0582: parent -= gapStart;
0583: setIntN(gapStart + 5, parent);
0584: gapStart += 7;
0585: }
0586: }
0587:
0588: public void startAttribute(Object attrType) {
0589: startAttribute(find(attrType));
0590: }
0591:
0592: public void startAttribute(int index) {
0593: /* This needs to be tested. FIXME. Anyway only solves limited problem.
0594: // If there is whitespace and nothing else between the BEGIN_ELEMENT_LONG
0595: // and the current position, get rid of the spaces.
0596: int i = currentParent;
0597: if (i > 0 && (i += 3) < gapStart)
0598: {
0599: for (int j = i; ; j++)
0600: {
0601: if (j == gapStart)
0602: {
0603: gapStart = i;
0604: break;
0605: }
0606: char c = data[j];
0607: if (c != ' ' && c != '\t' && c != '\n' && c != '\r')
0608: break;
0609: }
0610: }
0611: */
0612:
0613: ensureSpace(5 + 1);
0614: gapEnd--;
0615: data[gapStart++] = BEGIN_ATTRIBUTE_LONG;
0616: if (attrStart != 0)
0617: throw new Error("nested attribute");
0618: attrStart = gapStart;
0619: setIntN(gapStart, index);
0620: setIntN(gapStart + 2, gapEnd - data.length);
0621: gapStart += 4;
0622: data[gapEnd] = END_ATTRIBUTE;
0623: }
0624:
0625: public void setAttributeName(int attrIndex, int nameIndex) {
0626: setIntN(attrIndex + 1, nameIndex);
0627: }
0628:
0629: public void endAttribute() {
0630: if (attrStart <= 0)
0631: return;
0632: if (data[gapEnd] != END_ATTRIBUTE)
0633: throw new Error("unexpected endAttribute");
0634: // Move the END_ATTRIBUTES to before the gap.
0635: gapEnd++;
0636: setIntN(attrStart + 2, gapStart - attrStart + 1);
0637: attrStart = 0;
0638: data[gapStart++] = END_ATTRIBUTE;
0639: }
0640:
0641: public Consumer append(char c) {
0642: write(c);
0643: return this ;
0644: }
0645:
0646: public void write(int c) {
0647: ensureSpace(3);
0648: if (c <= MAX_CHAR_SHORT)
0649: data[gapStart++] = (char) c;
0650: else if (c < 0x10000) {
0651: data[gapStart++] = CHAR_FOLLOWS;
0652: data[gapStart++] = (char) c;
0653: } else
0654: Char.print(c, this );
0655: }
0656:
0657: public void writeBoolean(boolean v) {
0658: ensureSpace(1);
0659: data[gapStart++] = v ? BOOL_TRUE : BOOL_FALSE;
0660: }
0661:
0662: public void writeByte(int v) {
0663: ensureSpace(1);
0664: data[gapStart++] = (char) (BYTE_PREFIX + (v & 0xFF));
0665: }
0666:
0667: public void writeInt(int v) {
0668: ensureSpace(3);
0669: if (v >= MIN_INT_SHORT && v <= MAX_INT_SHORT)
0670: data[gapStart++] = (char) (INT_SHORT_ZERO + v);
0671: else {
0672: data[gapStart++] = INT_FOLLOWS;
0673: setIntN(gapStart, v);
0674: gapStart += 2;
0675: }
0676: }
0677:
0678: public void writeLong(long v) {
0679: ensureSpace(5);
0680: data[gapStart++] = LONG_FOLLOWS;
0681: data[gapStart++] = (char) (v >>> 48);
0682: data[gapStart++] = (char) (v >>> 32);
0683: data[gapStart++] = (char) (v >>> 16);
0684: data[gapStart++] = (char) v;
0685: }
0686:
0687: public void writeFloat(float v) {
0688: ensureSpace(3);
0689: int i = Float.floatToIntBits(v);
0690: data[gapStart++] = FLOAT_FOLLOWS;
0691: data[gapStart++] = (char) (i >>> 16);
0692: data[gapStart++] = (char) i;
0693: }
0694:
0695: public void writeDouble(double v) {
0696: ensureSpace(5);
0697: long l = Double.doubleToLongBits(v);
0698: data[gapStart++] = DOUBLE_FOLLOWS;
0699: data[gapStart++] = (char) (l >>> 48);
0700: data[gapStart++] = (char) (l >>> 32);
0701: data[gapStart++] = (char) (l >>> 16);
0702: data[gapStart++] = (char) l;
0703: }
0704:
0705: public boolean ignoring() {
0706: return false;
0707: }
0708:
0709: public void writeJoiner() {
0710: ensureSpace(1);
0711: data[gapStart++] = JOINER;
0712: }
0713:
0714: public void write(char[] buf, int off, int len) {
0715: if (len == 0)
0716: writeJoiner();
0717: ensureSpace(len);
0718: while (len > 0) {
0719: char ch = buf[off++];
0720: len--;
0721: if (ch <= MAX_CHAR_SHORT)
0722: data[gapStart++] = ch;
0723: else {
0724: write(ch);
0725: ensureSpace(len);
0726: }
0727: }
0728: }
0729:
0730: public void write(String str) {
0731: write(str, 0, str.length());
0732: }
0733:
0734: /* #ifdef use:java.lang.CharSequence */
0735: public void write(CharSequence str, int start, int length)
0736: /* #else */
0737: // public void write (String str, int start, int length)
0738: /* #endif */
0739: {
0740: if (length == 0)
0741: writeJoiner();
0742: ensureSpace(length);
0743: while (length > 0) {
0744: char ch = str.charAt(start++);
0745: length--;
0746: if (ch <= MAX_CHAR_SHORT)
0747: data[gapStart++] = ch;
0748: else {
0749: write(ch);
0750: ensureSpace(length);
0751: }
0752: }
0753: }
0754:
0755: public void writeCDATA(char[] chars, int offset, int length) {
0756: ensureSpace(3 + length);
0757: int i = gapStart;
0758: data[i++] = CDATA_SECTION;
0759: setIntN(i, length);
0760: i += 2;
0761: System.arraycopy(chars, offset, data, i, length);
0762: gapStart = i + length;
0763: }
0764:
0765: /* #ifdef use:java.lang.CharSequence */
0766: public Consumer append(CharSequence csq) {
0767: if (csq == null)
0768: csq = "null";
0769: return append(csq, 0, csq.length());
0770: }
0771:
0772: public Consumer append(CharSequence csq, int start, int end) {
0773: if (csq == null)
0774: csq = "null";
0775: for (int i = start; i < end; i++)
0776: append(csq.charAt(i));
0777: return this ;
0778: }
0779:
0780: /* #else */
0781: // public Consumer append (String str)
0782: // {
0783: // if (str == null)
0784: // str = "null";
0785: // int len = str.length();
0786: // for (int i = 0; i < len; i++)
0787: // append(str.charAt(i));
0788: // return this;
0789: // }
0790: /* #endif */
0791:
0792: public boolean isEmpty() {
0793: // FIXME does not work if we allow comment() entries!
0794: int pos = gapStart == 0 ? gapEnd : 0;
0795: return pos == data.length;
0796: }
0797:
0798: public int size() {
0799: int size = 0;
0800: int i = 0;
0801: for (;;) {
0802: i = nextPos(i);
0803: if (i == 0)
0804: return size;
0805: size++;
0806: }
0807: }
0808:
0809: public int createPos(int index, boolean isAfter) {
0810: return createRelativePos(0, index, isAfter);
0811: }
0812:
0813: public final int posToDataIndex(int ipos) {
0814: if (ipos == -1)
0815: return data.length;
0816: int index = ipos >>> 1;
0817: if ((ipos & 1) != 0)
0818: index--;
0819: if (index >= gapStart)
0820: index += gapEnd - gapStart;
0821: if ((ipos & 1) != 0) {
0822: index = nextDataIndex(index);
0823: if (index < 0)
0824: return data.length;
0825: if (index == gapStart)
0826: index += gapEnd - gapStart;
0827: }
0828: return index;
0829: }
0830:
0831: public int firstChildPos(int ipos) {
0832: int index = gotoChildrenStart(posToDataIndex(ipos));
0833: if (index < 0)
0834: return 0;
0835: return index << 1;
0836: }
0837:
0838: public final int gotoChildrenStart(int index) {
0839: if (index == data.length)
0840: return -1;
0841: char datum = data[index];
0842: if ((datum >= BEGIN_ELEMENT_SHORT && datum <= BEGIN_ELEMENT_SHORT
0843: + BEGIN_ELEMENT_SHORT_INDEX_MAX)
0844: || datum == BEGIN_ELEMENT_LONG)
0845: index += 3;
0846: else if (datum == BEGIN_DOCUMENT || datum == BEGIN_ENTITY)
0847: index += 5;
0848: else
0849: return -1;
0850: for (;;) {
0851: if (index >= gapStart)
0852: index += gapEnd - gapStart;
0853: datum = data[index];
0854: if (datum == BEGIN_ATTRIBUTE_LONG) {
0855: int end = getIntN(index + 3);
0856: index = end + (end < 0 ? data.length : index);
0857: } else if (datum == END_ATTRIBUTE || datum == JOINER)
0858: index++;
0859: else if (datum == DOCUMENT_URI)
0860: index += 3;
0861: else
0862: break;
0863: }
0864: return index;
0865: }
0866:
0867: public int parentPos(int ipos) {
0868: int index = posToDataIndex(ipos);
0869: for (;;) {
0870: index = parentOrEntityI(index);
0871: if (index == -1)
0872: return -1;
0873: if (data[index] != BEGIN_ENTITY)
0874: return index << 1;
0875: }
0876: }
0877:
0878: public int parentOrEntityPos(int ipos) {
0879: int index = parentOrEntityI(posToDataIndex(ipos));
0880: return index < 0 ? -1 : index << 1;
0881: }
0882:
0883: public int parentOrEntityI(int index) {
0884: if (index == data.length)
0885: return -1;
0886: char datum = data[index];
0887: if (datum == BEGIN_DOCUMENT || datum == BEGIN_ENTITY) {
0888: int parent_offset = getIntN(index + 3);
0889: if (parent_offset >= -1)
0890: return parent_offset;
0891: else
0892: return index + parent_offset;
0893: }
0894: if (datum >= BEGIN_ELEMENT_SHORT
0895: && datum <= BEGIN_ELEMENT_SHORT
0896: + BEGIN_ELEMENT_SHORT_INDEX_MAX) {
0897: int parent_offset = data[index + 2];
0898: return parent_offset == 0 ? -1 : index - parent_offset;
0899: }
0900: if (datum == BEGIN_ELEMENT_LONG) {
0901: int end_offset = getIntN(index + 1);
0902: end_offset += end_offset < 0 ? data.length : index;
0903: int parent_offset = getIntN(end_offset + 5);
0904: if (parent_offset == 0)
0905: return -1;
0906: if (parent_offset < 0)
0907: parent_offset += end_offset;
0908: return parent_offset;
0909: }
0910: for (;;) {
0911: if (index == gapStart)
0912: index = gapEnd;
0913: if (index == data.length)
0914: return -1;
0915: datum = data[index];
0916: switch (datum) {
0917: case END_ELEMENT_SHORT:
0918: return index - data[index + 1];
0919: case END_ELEMENT_LONG:
0920: int begin_offset = getIntN(index + 3);
0921: if (begin_offset < 0)
0922: begin_offset += index;
0923: return begin_offset;
0924: case END_ATTRIBUTE:
0925: index++;
0926: continue;
0927: case END_DOCUMENT:
0928: return -1;
0929: default:
0930: index = nextDataIndex(index);
0931: }
0932: if (index < 0)
0933: break;
0934: }
0935: return -1;
0936: }
0937:
0938: public int getAttributeCount(int parent) {
0939: int n = 0;
0940: for (int attr = firstAttributePos(parent); attr != 0
0941: && getNextKind(attr) == Sequence.ATTRIBUTE_VALUE; attr = nextPos(attr))
0942: n++;
0943: return n;
0944: }
0945:
0946: public boolean gotoAttributesStart(TreePosition pos) {
0947: int index = gotoAttributesStart(pos.ipos >> 1);
0948: if (index < 0)
0949: return false;
0950: pos.push(this , index << 1);
0951: return true;
0952: }
0953:
0954: public int firstAttributePos(int ipos) {
0955: int index = gotoAttributesStart(posToDataIndex(ipos));
0956: return index < 0 ? 0 : index << 1;
0957: }
0958:
0959: public int gotoAttributesStart(int index) {
0960: if (index >= gapStart)
0961: index += gapEnd - gapStart;
0962: if (index == data.length)
0963: return -1;
0964: char datum = data[index];
0965: if ((datum >= BEGIN_ELEMENT_SHORT && datum <= BEGIN_ELEMENT_SHORT
0966: + BEGIN_ELEMENT_SHORT_INDEX_MAX)
0967: || datum == BEGIN_ELEMENT_LONG)
0968: return index + 3;
0969: else
0970: return -1;
0971: }
0972:
0973: public Object get(int index) {
0974: int i = 0;
0975: while (--index >= 0) {
0976: i = nextPos(i);
0977: if (i == 0)
0978: throw new IndexOutOfBoundsException();
0979: }
0980: return getPosNext(i);
0981: }
0982:
0983: public boolean consumeNext(int ipos, Consumer out) {
0984: if (!hasNext(ipos))
0985: return false;
0986: int start = posToDataIndex(ipos);
0987: int end = nextNodeIndex(start, -1 >>> 1);
0988: if (end == start)
0989: end = nextDataIndex(start);
0990: if (end >= 0)
0991: consumeIRange(start, end, out);
0992: return true;
0993: }
0994:
0995: public void consumePosRange(int startPos, int endPos, Consumer out) {
0996: consumeIRange(posToDataIndex(startPos), posToDataIndex(endPos),
0997: out);
0998: }
0999:
1000: public int consumeIRange(int startPosition, int endPosition,
1001: Consumer out) {
1002: int pos = startPosition;
1003: int limit = startPosition <= gapStart && endPosition > gapStart ? gapStart
1004: : endPosition;
1005: int index;
1006: for (;;) {
1007: if (pos >= limit) {
1008: if (pos == gapStart && endPosition > gapEnd) {
1009: pos = gapEnd;
1010: limit = endPosition;
1011: } else
1012: break;
1013: }
1014:
1015: char datum = data[pos++];
1016:
1017: if (datum <= MAX_CHAR_SHORT) {
1018: int start = pos - 1;
1019: int lim = limit;
1020: for (;;) {
1021: if (pos >= lim)
1022: break;
1023: datum = data[pos++];
1024: if (datum > MAX_CHAR_SHORT) {
1025: pos--;
1026: break;
1027: }
1028: }
1029: out.write(data, start, pos - start);
1030: continue;
1031: }
1032: if (datum >= OBJECT_REF_SHORT
1033: && datum <= OBJECT_REF_SHORT
1034: + OBJECT_REF_SHORT_INDEX_MAX) {
1035: out.writeObject(objects[datum - OBJECT_REF_SHORT]);
1036: continue;
1037: }
1038: if (datum >= BEGIN_ELEMENT_SHORT
1039: && datum <= BEGIN_ELEMENT_SHORT
1040: + BEGIN_ELEMENT_SHORT_INDEX_MAX) {
1041: index = datum - BEGIN_ELEMENT_SHORT;
1042: out.startElement(objects[index]);
1043: pos += 2;
1044: continue;
1045: }
1046: /*
1047: if ((datum & 0xFF00) == BYTE_PREFIX)
1048: {
1049: out.writeByte((byte) datum);
1050: continue;
1051: }
1052: */
1053: if (datum >= INT_SHORT_ZERO + MIN_INT_SHORT
1054: && datum <= INT_SHORT_ZERO + MAX_INT_SHORT) {
1055: out.writeInt(datum - INT_SHORT_ZERO);
1056: continue;
1057: }
1058: switch (datum) {
1059: case BEGIN_DOCUMENT:
1060: out.startDocument();
1061: pos += 4;
1062: continue;
1063: case END_DOCUMENT:
1064: out.endDocument();
1065: continue;
1066: case BEGIN_ENTITY:
1067: if (out instanceof TreeList)
1068: ((TreeList) out).beginEntity(objects[getIntN(pos)]);
1069: pos += 4;
1070: continue;
1071: case END_ENTITY:
1072: if (out instanceof TreeList)
1073: ((TreeList) out).endEntity();
1074: continue;
1075: case DOCUMENT_URI:
1076: if (out instanceof TreeList)
1077: ((TreeList) out)
1078: .writeDocumentUri(objects[getIntN(pos)]);
1079: pos += 2;
1080: continue;
1081: case COMMENT: {
1082: int length = getIntN(pos);
1083: pos += 2;
1084: if (out instanceof XConsumer)
1085: ((XConsumer) out).writeComment(data, pos, length);
1086: pos += length;
1087: }
1088: continue;
1089: case CDATA_SECTION: {
1090: int length = getIntN(pos);
1091: pos += 2;
1092: if (out instanceof XConsumer)
1093: ((XConsumer) out).writeCDATA(data, pos, length);
1094: else
1095: out.write(data, pos, length);
1096: pos += length;
1097: }
1098: continue;
1099: case PROCESSING_INSTRUCTION: {
1100: String target = (String) objects[getIntN(pos)];
1101: int length = getIntN(pos + 2);
1102: pos += 4;
1103: if (out instanceof XConsumer)
1104: ((XConsumer) out).writeProcessingInstruction(
1105: target, data, pos, length);
1106: pos += length;
1107: }
1108: continue;
1109: case BOOL_FALSE:
1110: case BOOL_TRUE:
1111: out.writeBoolean(datum != BOOL_FALSE);
1112: continue;
1113: case JOINER:
1114: out.write("");
1115: continue;
1116: case CHAR_FOLLOWS:
1117: out.write(data, pos, 1 + datum - CHAR_FOLLOWS);
1118: pos++;
1119: continue;
1120: case POSITION_PAIR_FOLLOWS: {
1121: AbstractSequence seq = (AbstractSequence) objects[getIntN(pos)];
1122: int ipos = getIntN(pos + 2);
1123: if (out instanceof PositionConsumer)
1124: ((PositionConsumer) out).writePosition(seq, ipos);
1125: else
1126: out.writeObject(seq.getIteratorAtPos(ipos));
1127: pos += 4;
1128: }
1129: continue;
1130: case POSITION_REF_FOLLOWS:
1131: if (out instanceof PositionConsumer) {
1132: ((PositionConsumer) out)
1133: .consume((SeqPosition) objects[getIntN(pos)]);
1134: pos += 2;
1135: continue;
1136: }
1137: // ... else fall through ...
1138: case OBJECT_REF_FOLLOWS:
1139: out.writeObject(objects[getIntN(pos)]);
1140: pos += 2;
1141: continue;
1142: case END_ELEMENT_SHORT:
1143: pos++;
1144: out.endElement();
1145: continue;
1146: case BEGIN_ELEMENT_LONG:
1147: index = getIntN(pos);
1148: index += index >= 0 ? pos - 1 : data.length;
1149: pos += 2;
1150: index = getIntN(index + 1);
1151: out.startElement(objects[index]);
1152: continue;
1153: case END_ELEMENT_LONG:
1154: index = getIntN(pos);
1155: out.endElement();
1156: pos += 6;
1157: continue;
1158: case BEGIN_ATTRIBUTE_LONG:
1159: index = getIntN(pos);
1160: out.startAttribute(objects[index]);
1161: pos += 4;
1162: continue;
1163: case END_ATTRIBUTE:
1164: out.endAttribute();
1165: continue;
1166: case INT_FOLLOWS:
1167: out.writeInt(getIntN(pos));
1168: pos += 2;
1169: continue;
1170: case FLOAT_FOLLOWS:
1171: out.writeFloat(Float.intBitsToFloat(getIntN(pos)));
1172: pos += 2;
1173: continue;
1174: case LONG_FOLLOWS:
1175: out.writeLong(getLongN(pos));
1176: pos += 4;
1177: continue;
1178: case DOUBLE_FOLLOWS:
1179: out.writeDouble(Double.longBitsToDouble(getLongN(pos)));
1180: pos += 4;
1181: continue;
1182: default:
1183: throw new Error("unknown code:" + (int) datum);
1184: }
1185: }
1186: return pos;
1187: }
1188:
1189: public void toString(String sep, StringBuffer sbuf) {
1190: int pos = 0;
1191: int limit = gapStart;
1192: int index;
1193: boolean seen = false;
1194: boolean inStartTag = false;
1195: boolean inAttribute = false;
1196: for (;;) {
1197: if (pos >= limit) {
1198: if (pos == gapStart) {
1199: pos = gapEnd;
1200: limit = data.length;
1201: if (pos == limit)
1202: break;
1203: } else
1204: break;
1205: }
1206:
1207: char datum = data[pos++];
1208:
1209: if (datum <= MAX_CHAR_SHORT) {
1210: int start = pos - 1;
1211: int lim = limit;
1212: for (;;) {
1213: if (pos >= lim)
1214: break;
1215: datum = data[pos++];
1216: if (datum > MAX_CHAR_SHORT) {
1217: pos--;
1218: break;
1219: }
1220: }
1221: if (inStartTag) {
1222: sbuf.append('>');
1223: inStartTag = false;
1224: }
1225: sbuf.append(data, start, pos - start);
1226: seen = false;
1227: continue;
1228: }
1229: if (datum >= OBJECT_REF_SHORT
1230: && datum <= OBJECT_REF_SHORT
1231: + OBJECT_REF_SHORT_INDEX_MAX) {
1232: if (inStartTag) {
1233: sbuf.append('>');
1234: inStartTag = false;
1235: }
1236: if (seen)
1237: sbuf.append(sep);
1238: else
1239: seen = true;
1240: sbuf.append(objects[datum - OBJECT_REF_SHORT]);
1241: continue;
1242: }
1243: if (datum >= BEGIN_ELEMENT_SHORT
1244: && datum <= BEGIN_ELEMENT_SHORT
1245: + BEGIN_ELEMENT_SHORT_INDEX_MAX) {
1246: if (inStartTag) {
1247: sbuf.append('>');
1248: inStartTag = false;
1249: }
1250: index = datum - BEGIN_ELEMENT_SHORT;
1251: if (seen)
1252: sbuf.append(sep);
1253: sbuf.append('<');
1254: sbuf.append(objects[index].toString());
1255: pos += 2;
1256: seen = false;
1257: inStartTag = true;
1258: continue;
1259: }
1260: if (datum >= INT_SHORT_ZERO + MIN_INT_SHORT
1261: && datum <= INT_SHORT_ZERO + MAX_INT_SHORT) {
1262: if (inStartTag) {
1263: sbuf.append('>');
1264: inStartTag = false;
1265: }
1266: if (seen)
1267: sbuf.append(sep);
1268: else
1269: seen = true;
1270: sbuf.append(datum - INT_SHORT_ZERO);
1271: continue;
1272: }
1273: switch (datum) {
1274: case BEGIN_DOCUMENT:
1275: case BEGIN_ENTITY:
1276: pos += 4;
1277: continue;
1278: case DOCUMENT_URI:
1279: pos += 2;
1280: continue;
1281: case COMMENT:
1282: if (inStartTag) {
1283: sbuf.append('>');
1284: inStartTag = false;
1285: }
1286: index = getIntN(pos); // comment length
1287: pos += 2;
1288: sbuf.append("<!--");
1289: sbuf.append(data, pos, index);
1290: sbuf.append("-->");
1291: pos += index;
1292: continue;
1293: case CDATA_SECTION:
1294: if (inStartTag) {
1295: sbuf.append('>');
1296: inStartTag = false;
1297: }
1298: index = getIntN(pos); // comment length
1299: pos += 2;
1300: sbuf.append("<![CDATA[");
1301: sbuf.append(data, pos, index);
1302: sbuf.append("]]>");
1303: pos += index;
1304: continue;
1305: case PROCESSING_INSTRUCTION:
1306: if (inStartTag) {
1307: sbuf.append('>');
1308: inStartTag = false;
1309: }
1310: sbuf.append("<?");
1311: index = getIntN(pos); // target
1312: pos += 2;
1313: sbuf.append(objects[index]);
1314: index = getIntN(pos); // comment length
1315: pos += 2;
1316: if (index > 0) {
1317: sbuf.append(' ');
1318: sbuf.append(data, pos, index);
1319: pos += index;
1320: }
1321: sbuf.append("?>");
1322: continue;
1323: case END_DOCUMENT:
1324: case END_ENTITY:
1325: continue;
1326: case BOOL_FALSE:
1327: case BOOL_TRUE:
1328: if (inStartTag) {
1329: sbuf.append('>');
1330: inStartTag = false;
1331: }
1332: if (seen)
1333: sbuf.append(sep);
1334: else
1335: seen = true;
1336: sbuf.append(datum != BOOL_FALSE);
1337: continue;
1338: case JOINER:
1339: continue;
1340: case CHAR_FOLLOWS:
1341: if (inStartTag) {
1342: sbuf.append('>');
1343: inStartTag = false;
1344: }
1345: sbuf.append(data, pos, 1 + datum - CHAR_FOLLOWS);
1346: seen = false;
1347: pos++;
1348: continue;
1349: case POSITION_PAIR_FOLLOWS:
1350: if (inStartTag) {
1351: sbuf.append('>');
1352: inStartTag = false;
1353: }
1354: if (seen)
1355: sbuf.append(sep);
1356: else
1357: seen = true;
1358: {
1359: AbstractSequence seq = (AbstractSequence) objects[getIntN(pos)];
1360: int ipos = getIntN(pos + 2);
1361: // This could lead to to a lot of copying. FIXME.
1362: sbuf.append(seq.getIteratorAtPos(ipos));
1363: pos += 4;
1364: }
1365: continue;
1366: case POSITION_REF_FOLLOWS:
1367: case OBJECT_REF_FOLLOWS:
1368: if (inStartTag) {
1369: sbuf.append('>');
1370: inStartTag = false;
1371: }
1372: if (seen)
1373: sbuf.append(sep);
1374: else
1375: seen = true;
1376: sbuf.append(objects[getIntN(pos)]);
1377: pos += 2;
1378: continue;
1379: case BEGIN_ELEMENT_LONG:
1380: index = getIntN(pos);
1381: index += index >= 0 ? pos - 1 : data.length;
1382: pos += 2;
1383: index = getIntN(index + 1);
1384: if (inStartTag)
1385: sbuf.append('>');
1386: else if (seen)
1387: sbuf.append(sep);
1388: sbuf.append('<');
1389: sbuf.append(objects[index]);
1390: seen = false;
1391: inStartTag = true;
1392: continue;
1393: case END_ELEMENT_LONG:
1394: case END_ELEMENT_SHORT:
1395: if (datum == END_ELEMENT_SHORT) {
1396: index = data[pos++];
1397: index = data[pos - 2 - index] - BEGIN_ELEMENT_SHORT;
1398: } else {
1399: index = getIntN(pos);
1400: pos += 6;
1401: }
1402: if (inStartTag)
1403: sbuf.append("/>");
1404: else {
1405: sbuf.append("</");
1406: sbuf.append(objects[index]);
1407: sbuf.append('>');
1408: }
1409: inStartTag = false;
1410: seen = true;
1411: continue;
1412: case BEGIN_ATTRIBUTE_LONG:
1413: index = getIntN(pos);
1414: sbuf.append(' ');
1415: sbuf.append(objects[index]);
1416: sbuf.append("=\"");
1417: inAttribute = true;
1418: inStartTag = false;
1419: pos += 4;
1420: continue;
1421: case END_ATTRIBUTE:
1422: sbuf.append('"');
1423: inAttribute = false;
1424: inStartTag = true;
1425: seen = false;
1426: continue;
1427: case INT_FOLLOWS:
1428: if (inStartTag) {
1429: sbuf.append('>');
1430: inStartTag = false;
1431: }
1432: if (seen)
1433: sbuf.append(sep);
1434: else
1435: seen = true;
1436: sbuf.append(getIntN(pos));
1437: pos += 2;
1438: continue;
1439: case FLOAT_FOLLOWS:
1440: if (inStartTag) {
1441: sbuf.append('>');
1442: inStartTag = false;
1443: }
1444: if (seen)
1445: sbuf.append(sep);
1446: else
1447: seen = true;
1448: sbuf.append(Float.intBitsToFloat(getIntN(pos)));
1449: pos += 2;
1450: continue;
1451: case LONG_FOLLOWS:
1452: if (inStartTag) {
1453: sbuf.append('>');
1454: inStartTag = false;
1455: }
1456: if (seen)
1457: sbuf.append(sep);
1458: else
1459: seen = true;
1460: sbuf.append(getLongN(pos));
1461: pos += 4;
1462: continue;
1463: case DOUBLE_FOLLOWS:
1464: if (inStartTag) {
1465: sbuf.append('>');
1466: inStartTag = false;
1467: }
1468: if (seen)
1469: sbuf.append(sep);
1470: else
1471: seen = true;
1472: sbuf.append(Double.longBitsToDouble(getLongN(pos)));
1473: pos += 4;
1474: continue;
1475: default:
1476: throw new Error("unknown code:" + (int) datum);
1477: }
1478: }
1479: }
1480:
1481: public boolean hasNext(int ipos) {
1482: int index = posToDataIndex(ipos);
1483: if (index == data.length)
1484: return false;
1485: char ch = data[index];
1486: return ch != END_ATTRIBUTE && ch != END_ELEMENT_SHORT
1487: && ch != END_ELEMENT_LONG && ch != END_DOCUMENT;
1488: }
1489:
1490: public int getNextKind(int ipos) {
1491: return getNextKindI(posToDataIndex(ipos));
1492: }
1493:
1494: public int getNextKindI(int index) {
1495: if (index == data.length)
1496: return Sequence.EOF_VALUE;
1497: char datum = data[index];
1498: if (datum <= MAX_CHAR_SHORT)
1499: return Sequence.CHAR_VALUE;
1500: if (datum >= OBJECT_REF_SHORT
1501: && datum <= OBJECT_REF_SHORT
1502: + OBJECT_REF_SHORT_INDEX_MAX)
1503: return Sequence.OBJECT_VALUE;
1504: if (datum >= BEGIN_ELEMENT_SHORT
1505: && datum <= BEGIN_ELEMENT_SHORT
1506: + BEGIN_ELEMENT_SHORT_INDEX_MAX)
1507: return Sequence.ELEMENT_VALUE;
1508: if ((datum & 0xFF00) == BYTE_PREFIX)
1509: return Sequence.TEXT_BYTE_VALUE;
1510: if (datum >= INT_SHORT_ZERO + MIN_INT_SHORT
1511: && datum <= INT_SHORT_ZERO + MAX_INT_SHORT)
1512: return Sequence.INT_S32_VALUE;
1513: switch (datum) {
1514: case BOOL_FALSE:
1515: case BOOL_TRUE:
1516: return Sequence.BOOLEAN_VALUE;
1517: case INT_FOLLOWS:
1518: return Sequence.INT_S32_VALUE;
1519: case LONG_FOLLOWS:
1520: return Sequence.INT_S64_VALUE;
1521: case FLOAT_FOLLOWS:
1522: return Sequence.FLOAT_VALUE;
1523: case DOUBLE_FOLLOWS:
1524: return Sequence.DOUBLE_VALUE;
1525: case CHAR_FOLLOWS:
1526: case BEGIN_DOCUMENT:
1527: return Sequence.DOCUMENT_VALUE;
1528: case BEGIN_ENTITY:
1529: return getNextKind((index + BEGIN_ENTITY_SIZE) << 1);
1530: case BEGIN_ELEMENT_LONG:
1531: return Sequence.ELEMENT_VALUE;
1532: case END_ELEMENT_SHORT:
1533: case END_ELEMENT_LONG:
1534: case END_ATTRIBUTE:
1535: case END_DOCUMENT:
1536: case END_ENTITY:
1537: return Sequence.EOF_VALUE;
1538: case BEGIN_ATTRIBUTE_LONG:
1539: return Sequence.ATTRIBUTE_VALUE;
1540: case CDATA_SECTION:
1541: return Sequence.CDATA_VALUE;
1542: case COMMENT:
1543: return Sequence.COMMENT_VALUE;
1544: case PROCESSING_INSTRUCTION:
1545: return Sequence.PROCESSING_INSTRUCTION_VALUE;
1546: case DOCUMENT_URI: // FIXME
1547: case POSITION_REF_FOLLOWS: // FIXME
1548: case POSITION_PAIR_FOLLOWS:
1549: case OBJECT_REF_FOLLOWS:
1550: case JOINER: // FIXME
1551: default:
1552: return Sequence.OBJECT_VALUE;
1553: }
1554:
1555: }
1556:
1557: public Object getNextTypeObject(int ipos) {
1558: int index = posToDataIndex(ipos);
1559: char datum;
1560: for (;;) {
1561: if (index == data.length)
1562: return null;
1563: datum = data[index];
1564: if (datum != BEGIN_ENTITY)
1565: break;
1566: index += BEGIN_ENTITY_SIZE;
1567: }
1568: if (datum >= BEGIN_ELEMENT_SHORT
1569: && datum <= BEGIN_ELEMENT_SHORT
1570: + BEGIN_ELEMENT_SHORT_INDEX_MAX)
1571: index = datum - BEGIN_ELEMENT_SHORT;
1572: else if (datum == BEGIN_ELEMENT_LONG) {
1573: int j = getIntN(index + 1);
1574: j += j < 0 ? data.length : index;
1575: index = getIntN(j + 1);
1576: } else if (datum == BEGIN_ATTRIBUTE_LONG)
1577: index = getIntN(index + 1);
1578: else if (datum == PROCESSING_INSTRUCTION)
1579: index = getIntN(index + 1);
1580: else
1581: return null;
1582: return index < 0 ? null : objects[index];
1583: }
1584:
1585: public String getNextTypeName(int ipos) {
1586: Object type = getNextTypeObject(ipos);
1587: return type == null ? null : type.toString();
1588: }
1589:
1590: public Object getPosPrevious(int ipos) {
1591: if ((ipos & 1) != 0 && ipos != -1)
1592: return getPosNext(ipos - 3);
1593: else
1594: return super .getPosPrevious(ipos);
1595: }
1596:
1597: private Object copyToList(int startPosition, int endPosition) {
1598: return new TreeList(this , startPosition, endPosition);
1599: }
1600:
1601: /** Return following value (like getPosNext), as an integer. */
1602: public int getPosNextInt(int ipos) {
1603: int index = posToDataIndex(ipos);
1604: if (index < data.length) {
1605: char datum = data[index];
1606: if (datum >= INT_SHORT_ZERO + MIN_INT_SHORT
1607: && datum <= INT_SHORT_ZERO + MAX_INT_SHORT)
1608: return datum - INT_SHORT_ZERO;
1609: if (datum == INT_FOLLOWS)
1610: return getIntN(index + 1);
1611: }
1612: return ((Number) getPosNext(ipos)).intValue();
1613: }
1614:
1615: public Object getPosNext(int ipos) {
1616: int index = posToDataIndex(ipos);
1617: if (index == data.length)
1618: return Sequence.eofValue;
1619: char datum = data[index];
1620: if (datum <= MAX_CHAR_SHORT)
1621: return Convert.toObject(datum);
1622: if (datum >= OBJECT_REF_SHORT
1623: && datum <= OBJECT_REF_SHORT
1624: + OBJECT_REF_SHORT_INDEX_MAX)
1625: return objects[datum - OBJECT_REF_SHORT];
1626: if (datum >= BEGIN_ELEMENT_SHORT
1627: && datum <= BEGIN_ELEMENT_SHORT
1628: + BEGIN_ELEMENT_SHORT_INDEX_MAX)
1629: return copyToList(index, index + data[index + 1] + 2);
1630: /*
1631: if ((datum & 0xFF00) == BYTE_PREFIX)
1632: return Sequence.TEXT_BYTE_VALUE;
1633: */
1634: if (datum >= INT_SHORT_ZERO + MIN_INT_SHORT
1635: && datum <= INT_SHORT_ZERO + MAX_INT_SHORT)
1636: return Convert.toObject(datum - INT_SHORT_ZERO);
1637: switch (datum) {
1638: case BEGIN_DOCUMENT: {
1639: int end_offset = getIntN(index + 1);
1640: end_offset += end_offset < 0 ? data.length : index;
1641: end_offset++;
1642: /* Need to be careful about this.
1643: if (index == 0
1644: && (end_offset == data.length
1645: || (end_offset == gapStart && gapEnd == data.length)))
1646: return this;
1647: */
1648: return copyToList(index, end_offset);
1649: }
1650: case BOOL_FALSE:
1651: case BOOL_TRUE:
1652: return Convert.toObject(datum != BOOL_FALSE);
1653: case INT_FOLLOWS:
1654: return Convert.toObject(getIntN(index + 1));
1655: case LONG_FOLLOWS:
1656: return Convert.toObject(getLongN(index + 1));
1657: case FLOAT_FOLLOWS:
1658: return Convert.toObject(Float
1659: .intBitsToFloat(getIntN(index + 1)));
1660: case DOUBLE_FOLLOWS:
1661: return Convert.toObject(Double
1662: .longBitsToDouble(getLongN(index + 1)));
1663: case CHAR_FOLLOWS:
1664: return Convert.toObject(data[index + 1]);
1665: case BEGIN_ATTRIBUTE_LONG: {
1666: int end_offset = getIntN(index + 3);
1667: end_offset += end_offset < 0 ? data.length : index;
1668: return copyToList(index, end_offset + 1);
1669: }
1670: case BEGIN_ELEMENT_LONG: {
1671: int end_offset = getIntN(index + 1);
1672: end_offset += end_offset < 0 ? data.length : index;
1673: return copyToList(index, end_offset + 7);
1674: }
1675: case END_ELEMENT_SHORT:
1676: case END_ELEMENT_LONG:
1677: case END_ATTRIBUTE:
1678: case END_DOCUMENT:
1679: return Sequence.eofValue;
1680: case POSITION_REF_FOLLOWS:
1681: case OBJECT_REF_FOLLOWS:
1682: return objects[getIntN(index + 1)];
1683: case JOINER:
1684: return "";
1685: case POSITION_PAIR_FOLLOWS: //FIXME
1686: AbstractSequence seq = (AbstractSequence) objects[getIntN(index + 1)];
1687: ipos = getIntN(index + 3);
1688: return seq.getIteratorAtPos(ipos);
1689: default:
1690: throw unsupported("getPosNext, code="
1691: + Integer.toHexString(datum));
1692: }
1693: }
1694:
1695: public void stringValue(int startIndex, int endIndex,
1696: StringBuffer sbuf) {
1697: int index = startIndex;
1698: while (index < endIndex && index >= 0)
1699: index = stringValue(false, index, sbuf);
1700: }
1701:
1702: public int stringValue(int index, StringBuffer sbuf) {
1703: int next = nextNodeIndex(index, -1 >>> 1);
1704: if (next > index) {
1705: stringValue(index, next, sbuf);
1706: return index;
1707: } else
1708: return stringValue(false, index, sbuf);
1709: }
1710:
1711: public int stringValue(boolean inElement, int index,
1712: StringBuffer sbuf) {
1713: Object value = null;
1714: int doChildren = 0, j;
1715: if (index >= gapStart)
1716: index += gapEnd - gapStart;
1717: if (index == data.length)
1718: return -1;
1719: char datum = data[index];
1720: index++;
1721: boolean spaceNeeded = false;
1722: if (datum <= MAX_CHAR_SHORT) {
1723: sbuf.append(datum);
1724: return index;
1725: }
1726: if (datum >= OBJECT_REF_SHORT
1727: && datum <= OBJECT_REF_SHORT
1728: + OBJECT_REF_SHORT_INDEX_MAX) {
1729: if (spaceNeeded)
1730: sbuf.append(' ');
1731: value = objects[datum - OBJECT_REF_SHORT];
1732: spaceNeeded = false;
1733: } else if (datum >= BEGIN_ELEMENT_SHORT
1734: && datum <= BEGIN_ELEMENT_SHORT
1735: + BEGIN_ELEMENT_SHORT_INDEX_MAX) {
1736: doChildren = index + 2;
1737: index = data[index] + index + 1;
1738: } else if ((datum & 0xFF00) == BYTE_PREFIX) {
1739: sbuf.append(datum & 0xFF);
1740: return index;
1741: } else if (datum >= INT_SHORT_ZERO + MIN_INT_SHORT
1742: && datum <= INT_SHORT_ZERO + MAX_INT_SHORT) {
1743: sbuf.append((int) datum - INT_SHORT_ZERO);
1744: return index;
1745: } else {
1746: switch (datum) {
1747: case DOCUMENT_URI:
1748: return index + 2;
1749: case PROCESSING_INSTRUCTION:
1750: index += 2;
1751: /* ... fall through ... */
1752: case CDATA_SECTION:
1753: case COMMENT: {
1754: int length = getIntN(index);
1755: index += 2;
1756: if (!inElement || datum == CDATA_SECTION)
1757: sbuf.append(data, index, length);
1758: return index + length;
1759: }
1760: case BOOL_FALSE:
1761: case BOOL_TRUE:
1762: if (spaceNeeded)
1763: sbuf.append(' ');
1764: sbuf.append(datum != BOOL_FALSE);
1765: spaceNeeded = true;
1766: return index;
1767: case INT_FOLLOWS:
1768: if (spaceNeeded)
1769: sbuf.append(' ');
1770: sbuf.append(getIntN(index));
1771: spaceNeeded = true;
1772: return index + 2;
1773: case LONG_FOLLOWS:
1774: if (spaceNeeded)
1775: sbuf.append(' ');
1776: sbuf.append(getLongN(index));
1777: spaceNeeded = true;
1778: return index + 4;
1779: case FLOAT_FOLLOWS:
1780: if (spaceNeeded)
1781: sbuf.append(' ');
1782: sbuf.append(Float.intBitsToFloat(getIntN(index)));
1783: spaceNeeded = true;
1784: return index + 2;
1785: case DOUBLE_FOLLOWS:
1786: if (spaceNeeded)
1787: sbuf.append(' ');
1788: sbuf.append(Double.longBitsToDouble(getLongN(index)));
1789: spaceNeeded = true;
1790: return index + 4;
1791: case CHAR_FOLLOWS:
1792: spaceNeeded = false;
1793: sbuf.append(data[index]);
1794: return index + 1;
1795: case BEGIN_DOCUMENT:
1796: case BEGIN_ENTITY:
1797: doChildren = index + 4;
1798: index = nextDataIndex(index - 1);
1799: break;
1800: case BEGIN_ELEMENT_LONG:
1801: spaceNeeded = false;
1802: doChildren = index + 2;
1803: j = getIntN(index);
1804: j += j < 0 ? data.length : index - 1;
1805: index = j + 7;
1806: break;
1807: case JOINER:
1808: spaceNeeded = false;
1809: break;
1810: case END_ELEMENT_SHORT:
1811: case END_ELEMENT_LONG:
1812: case END_ATTRIBUTE:
1813: case END_DOCUMENT:
1814: case END_ENTITY:
1815: return -1;
1816: case BEGIN_ATTRIBUTE_LONG:
1817: if (!inElement)
1818: doChildren = index + 4;
1819: int end = getIntN(index + 2);
1820: index = end + (end < 0 ? data.length + 1 : index);
1821: break;
1822: case POSITION_PAIR_FOLLOWS: {
1823: AbstractSequence seq = (AbstractSequence) objects[getIntN(index)];
1824: int ipos = getIntN(index + 2);
1825: ((TreeList) seq)
1826: .stringValue(inElement, ipos >> 1, sbuf);
1827: index += 4;
1828: }
1829: break;
1830: case POSITION_REF_FOLLOWS:
1831: case OBJECT_REF_FOLLOWS:
1832: default:
1833: throw new Error("unimplemented: "
1834: + Integer.toHexString(datum) + " at:" + index);
1835: }
1836: }
1837: if (value != null)
1838: sbuf.append(value);
1839: if (doChildren > 0) {
1840: do {
1841: doChildren = stringValue(true, doChildren, sbuf);
1842: } while (doChildren >= 0);
1843: }
1844: return index;
1845: }
1846:
1847: public int createRelativePos(int istart, int offset, boolean isAfter) {
1848: if (isAfter) {
1849: if (offset == 0) {
1850: if ((istart & 1) != 0)
1851: return istart;
1852: if (istart == 0)
1853: return 1;
1854: }
1855: offset--;
1856: }
1857: if (offset < 0)
1858: throw unsupported("backwards createRelativePos");
1859: int pos = posToDataIndex(istart);
1860: while (--offset >= 0) {
1861: pos = nextDataIndex(pos);
1862: if (pos < 0)
1863: throw new IndexOutOfBoundsException();
1864: }
1865: if (pos >= gapEnd)
1866: pos -= gapEnd - gapStart;
1867: return isAfter ? ((pos + 1) << 1) | 1 : (pos << 1);
1868: }
1869:
1870: /** Skip all primitive content nodes. */
1871: public final int nextNodeIndex(int pos, int limit) {
1872: if ((limit | 0x80000000) == -1) // kludge
1873: limit = data.length;
1874: for (;;) {
1875: if (pos == gapStart)
1876: pos = gapEnd;
1877: if (pos >= limit)
1878: return pos;
1879: char datum = data[pos];
1880: if (datum <= MAX_CHAR_SHORT
1881: || (datum >= OBJECT_REF_SHORT && datum <= OBJECT_REF_SHORT
1882: + OBJECT_REF_SHORT_INDEX_MAX)
1883: || (datum >= INT_SHORT_ZERO + MIN_INT_SHORT && datum <= INT_SHORT_ZERO
1884: + MAX_INT_SHORT)
1885: || (datum & 0xFF00) == BYTE_PREFIX) {
1886: pos++;
1887: continue;
1888: }
1889: if (datum >= BEGIN_ELEMENT_SHORT
1890: && datum <= BEGIN_ELEMENT_SHORT
1891: + BEGIN_ELEMENT_SHORT_INDEX_MAX)
1892: return pos;
1893: switch (datum) {
1894: case DOCUMENT_URI:
1895: pos += 3;
1896: break;
1897: case JOINER:
1898: pos += 1;
1899: break;
1900: case PROCESSING_INSTRUCTION:
1901: case COMMENT:
1902: case BEGIN_DOCUMENT:
1903: case BEGIN_ELEMENT_LONG:
1904: case BEGIN_ATTRIBUTE_LONG:
1905: return pos;
1906: case BEGIN_ENTITY:
1907: pos += 5;
1908: break;
1909: case END_ELEMENT_SHORT:
1910: case END_ELEMENT_LONG:
1911: case END_ATTRIBUTE:
1912: case END_DOCUMENT:
1913: case END_ENTITY:
1914: return pos;
1915: case CDATA_SECTION:
1916: default:
1917: pos = nextDataIndex(pos);
1918: continue;
1919: }
1920: }
1921: }
1922:
1923: public int nextMatching(int startPos, ItemPredicate predicate,
1924: int endPos, boolean descend) {
1925: int start = posToDataIndex(startPos);
1926: int limit = posToDataIndex(endPos);
1927: int pos = start;
1928: if (predicate instanceof NodePredicate)
1929: pos = nextNodeIndex(pos, limit);
1930: boolean checkAttribute = false; // true if attribute nodes could match.
1931: boolean checkNode;
1932: boolean checkText;
1933: boolean checkElement; // true if element nodes could match.
1934: if (predicate instanceof ElementPredicate) {
1935: checkNode = true;
1936: checkElement = true;
1937: checkText = false;
1938: } else if (predicate instanceof AttributePredicate) {
1939: checkNode = true;
1940: checkElement = false;
1941: checkText = false;
1942: } else {
1943: checkNode = true;
1944: checkElement = true;
1945: checkText = true;
1946: }
1947: int next;
1948: for (;; pos = next) {
1949: if (pos == gapStart)
1950: pos = gapEnd;
1951: if (pos >= limit && limit != -1)
1952: return 0;
1953: int j;
1954: char datum = data[pos];
1955: if (datum <= MAX_CHAR_SHORT
1956: || (datum >= OBJECT_REF_SHORT && datum <= OBJECT_REF_SHORT
1957: + OBJECT_REF_SHORT_INDEX_MAX)
1958: || (datum >= INT_SHORT_ZERO + MIN_INT_SHORT && datum <= INT_SHORT_ZERO
1959: + MAX_INT_SHORT)) {
1960: if (checkText
1961: && predicate.isInstancePos(this , pos << 1)) {
1962: if (pos >= gapEnd)
1963: pos -= gapEnd - gapStart;
1964: return pos << 1;
1965: }
1966: next = pos + 1;
1967: continue;
1968: }
1969: switch (datum) {
1970: case DOCUMENT_URI:
1971: next = pos + 3;
1972: continue;
1973: case BEGIN_DOCUMENT:
1974: next = pos + 5;
1975: if (checkNode)
1976: break;
1977: continue;
1978: case BEGIN_ENTITY:
1979: next = pos + 5;
1980: continue;
1981: case POSITION_REF_FOLLOWS:
1982: case OBJECT_REF_FOLLOWS:
1983: case INT_FOLLOWS:
1984: next = pos + 3;
1985: if (checkText)
1986: break;
1987: continue;
1988: case CHAR_FOLLOWS:
1989: next = pos + 2;
1990: continue;
1991: case END_ELEMENT_SHORT:
1992: if (!descend)
1993: return 0;
1994: next = pos + 2;
1995: continue;
1996: case POSITION_PAIR_FOLLOWS:
1997: next = pos + 5;
1998: if (checkText)
1999: break;
2000: continue;
2001: case END_ELEMENT_LONG:
2002: if (!descend)
2003: return 0;
2004: next = pos + 7;
2005: continue;
2006: case END_ATTRIBUTE:
2007: case END_DOCUMENT:
2008: if (!descend)
2009: return 0;
2010: /* ... fall through ...*/
2011: case END_ENTITY:
2012: next = pos + 1;
2013: continue;
2014: case BEGIN_ATTRIBUTE_LONG:
2015: if (checkNode) {
2016: j = getIntN(pos + 3);
2017: next = j + 1 + (j < 0 ? data.length : pos);
2018: } else
2019: next = pos + 5;
2020: if (checkAttribute)
2021: break;
2022: continue;
2023: case BOOL_FALSE:
2024: case BOOL_TRUE:
2025: next = pos + 1;
2026: if (checkText)
2027: break;
2028: continue;
2029: case JOINER:
2030: next = pos + 1;
2031: continue;
2032: case LONG_FOLLOWS:
2033: case DOUBLE_FOLLOWS:
2034: next = pos + 5;
2035: if (checkText)
2036: break;
2037: continue;
2038: case PROCESSING_INSTRUCTION:
2039: next = pos + 5 + getIntN(pos + 3);
2040: if (checkNode)
2041: break;
2042: continue;
2043: case COMMENT:
2044: next = pos + 3 + getIntN(pos + 1);
2045: if (checkNode)
2046: break;
2047: continue;
2048: case CDATA_SECTION:
2049: next = pos + 3 + getIntN(pos + 1);
2050: if (checkText)
2051: break;
2052: continue;
2053: case BEGIN_ELEMENT_LONG:
2054: if (descend)
2055: next = pos + 3;
2056: else {
2057: j = getIntN(pos + 1);
2058: next = j + (j < 0 ? data.length : pos) + 7;
2059: }
2060: if (checkElement)
2061: break;
2062: continue;
2063: default:
2064: if (datum >= BEGIN_ELEMENT_SHORT
2065: && datum <= BEGIN_ELEMENT_SHORT
2066: + BEGIN_ELEMENT_SHORT_INDEX_MAX) {
2067: if (descend)
2068: next = pos + 3;
2069: else
2070: next = pos + data[pos + 1] + 2;
2071: if (checkElement)
2072: break;
2073: } else
2074: throw new Error("unknown code:" + (int) datum);
2075: continue;
2076: }
2077: if (pos > start && predicate.isInstancePos(this , pos << 1)) {
2078: if (pos >= gapEnd)
2079: pos -= gapEnd - gapStart;
2080: return pos << 1;
2081: }
2082: }
2083: }
2084:
2085: public int nextPos(int position) {
2086: int index = posToDataIndex(position);
2087: if (index == data.length)
2088: return 0;
2089: if (index >= gapEnd)
2090: index -= gapEnd - gapStart;
2091: return (index << 1) + 3;
2092: }
2093:
2094: public final int nextDataIndex(int pos) {
2095: if (pos == gapStart)
2096: pos = gapEnd;
2097: if (pos == data.length)
2098: return -1;
2099: int j;
2100: char datum = data[pos++];
2101: if (datum <= MAX_CHAR_SHORT
2102: || (datum >= OBJECT_REF_SHORT && datum <= OBJECT_REF_SHORT
2103: + OBJECT_REF_SHORT_INDEX_MAX)
2104: || (datum >= INT_SHORT_ZERO + MIN_INT_SHORT && datum <= INT_SHORT_ZERO
2105: + MAX_INT_SHORT))
2106: return pos;
2107: if (datum >= BEGIN_ELEMENT_SHORT
2108: && datum <= BEGIN_ELEMENT_SHORT
2109: + BEGIN_ELEMENT_SHORT_INDEX_MAX)
2110: return data[pos] + pos + 1;
2111: switch (datum) {
2112: case BEGIN_DOCUMENT:
2113: j = getIntN(pos);
2114: j += j < 0 ? data.length : pos - 1;
2115: return j + 1;
2116: case BEGIN_ENTITY:
2117: j = pos + (BEGIN_ENTITY_SIZE - 1);
2118: for (;;) {
2119: if (j == gapStart)
2120: j = gapEnd;
2121: if (j == data.length)
2122: return -1; // actually error.
2123: if (data[j] == END_ENTITY)
2124: return j + 1;
2125: j = nextDataIndex(j);
2126: }
2127: case BOOL_FALSE:
2128: case BOOL_TRUE:
2129: case JOINER:
2130: return pos;
2131: case CHAR_FOLLOWS:
2132: return pos + 1;
2133: case POSITION_REF_FOLLOWS:
2134: case OBJECT_REF_FOLLOWS:
2135: case INT_FOLLOWS:
2136: return pos + 2;
2137: case POSITION_PAIR_FOLLOWS:
2138: return pos + 4;
2139: case END_ELEMENT_SHORT:
2140: case END_ELEMENT_LONG:
2141: case END_ATTRIBUTE:
2142: case END_DOCUMENT:
2143: case END_ENTITY:
2144: return -1;
2145: case BEGIN_ELEMENT_LONG:
2146: j = getIntN(pos);
2147: j += j < 0 ? data.length : pos - 1;
2148: return j + 7;
2149: case BEGIN_ATTRIBUTE_LONG:
2150: j = getIntN(pos + 2);
2151: j += j < 0 ? data.length : pos - 1;
2152: return j + 1;
2153: case LONG_FOLLOWS:
2154: case DOUBLE_FOLLOWS:
2155: return pos + 4;
2156: case PROCESSING_INSTRUCTION:
2157: pos += 2;
2158: // ... fall through ...
2159: case CDATA_SECTION:
2160: case COMMENT:
2161: return pos + 2 + getIntN(pos);
2162: default:
2163: throw new Error("unknown code:"
2164: + Integer.toHexString((int) datum));
2165: }
2166: }
2167:
2168: public Object documentUriOfPos(int pos) {
2169: int index = posToDataIndex(pos);
2170: if (index == data.length)
2171: return null;
2172: if (data[index] == BEGIN_DOCUMENT) {
2173: int next = index + 5;
2174: if (next == gapStart)
2175: next = gapEnd;
2176: if (next < data.length && data[next] == DOCUMENT_URI)
2177: return objects[getIntN(next + 1)];
2178: }
2179: return null;
2180: }
2181:
2182: /** Compare two positions, and indicate their relative order. */
2183: public int compare(int ipos1, int ipos2) {
2184: // It's difficult to optimize this, because because if (say) isAfter(ipos1)
2185: // then we need nextDataIndex((ipos1>>>1)-1). In that case comparing
2186: // (ipos1>>>1)-1 and (pos2>>>1)-1 tells us nothing, since the former
2187: // could be a BEGIN_ELEMENT, while the latter might be a node inside
2188: // the element.
2189: int i1 = posToDataIndex(ipos1);
2190: int i2 = posToDataIndex(ipos2);
2191: return i1 < i2 ? -1 : i1 > i2 ? 1 : 0;
2192: }
2193:
2194: protected int getIndexDifference(int ipos1, int ipos0) {
2195: int i0 = posToDataIndex(ipos0);
2196: int i1 = posToDataIndex(ipos1);
2197: boolean negate = false;
2198: if (i0 > i1) {
2199: negate = true;
2200: int i = i1;
2201: i1 = i0;
2202: i0 = i;
2203: }
2204: int i = 0;
2205: while (i0 < i1) {
2206: i0 = nextDataIndex(i0);
2207: i++;
2208: }
2209: return negate ? -i : i;
2210: }
2211:
2212: public int hashCode() {
2213: // Calculating a real hashCode is real pain.
2214: return System.identityHashCode(this );
2215: }
2216:
2217: public void consume(Consumer out) {
2218: consumeIRange(0, data.length, out);
2219: }
2220:
2221: public void statistics() {
2222: java.io.PrintWriter out = new java.io.PrintWriter(System.out);
2223: statistics(out);
2224: out.flush();
2225: }
2226:
2227: public void statistics(java.io.PrintWriter out) {
2228: out.print("data array length: ");
2229: out.println(data.length);
2230: out.print("data array gap: ");
2231: out.println(gapEnd - gapStart);
2232: out.print("object array length: ");
2233: out.println(objects.length);
2234: }
2235:
2236: // /* DEBUGGING
2237: public void dump() {
2238: java.io.PrintWriter out = new java.io.PrintWriter(System.out);
2239:
2240: dump(out);
2241: out.flush();
2242: }
2243:
2244: public void dump(java.io.PrintWriter out) {
2245: out.println(getClass().getName() + " @"
2246: + Integer.toHexString(System.identityHashCode(this ))
2247: + " gapStart:" + gapStart + " gapEnd:" + gapEnd
2248: + " length:" + data.length);
2249: dump(out, 0, data.length);
2250: }
2251:
2252: public void dump(java.io.PrintWriter out, int start, int limit) {
2253: int toskip = 0;
2254: // Skip follow-on words.
2255: boolean skipFollowingWords = true;
2256: for (int i = start; i < limit; i++) {
2257:
2258: if (i < gapStart || i >= gapEnd) {
2259: int j;
2260: long l;
2261: int ch = data[i];
2262: out.print("" + i + ": 0x" + Integer.toHexString(ch)
2263: + '=' + ((short) ch));
2264: if (--toskip < 0) {
2265: if (ch <= MAX_CHAR_SHORT) {
2266: if (ch >= ' ' && ch < 127)
2267: out.print("='" + ((char) ch) + "'");
2268: else if (ch == '\n')
2269: out.print("='\\n'");
2270: else
2271: out.print("='\\u" + Integer.toHexString(ch)
2272: + "'");
2273: } else if (ch >= OBJECT_REF_SHORT
2274: && ch <= OBJECT_REF_SHORT
2275: + OBJECT_REF_SHORT_INDEX_MAX) {
2276: ch = ch - OBJECT_REF_SHORT;
2277: Object obj = objects[ch];
2278: out.print("=Object#"
2279: + ((int) ch)
2280: + '='
2281: + obj
2282: + ':'
2283: + obj.getClass().getName()
2284: + '@'
2285: + Integer.toHexString(System
2286: .identityHashCode(obj)));
2287: } else if (ch >= BEGIN_ELEMENT_SHORT
2288: && ch <= BEGIN_ELEMENT_SHORT
2289: + BEGIN_ELEMENT_SHORT_INDEX_MAX) {
2290: ch = ch - BEGIN_ELEMENT_SHORT;
2291: j = data[i + 1] + i;
2292: out.print("=BEGIN_ELEMENT_SHORT end:" + j
2293: + " index#" + ((int) ch) + "=<"
2294: + objects[ch] + '>');
2295: toskip = 2;
2296: } else if (ch >= INT_SHORT_ZERO + MIN_INT_SHORT
2297: && ch <= INT_SHORT_ZERO + MAX_INT_SHORT) {
2298: out.print("= INT_SHORT:"
2299: + (ch - INT_SHORT_ZERO));
2300: } else {
2301: switch (ch) {
2302: case INT_FOLLOWS:
2303: j = getIntN(i + 1);
2304: out.print("=INT_FOLLOWS value:" + j);
2305: toskip = 2;
2306: break;
2307: case LONG_FOLLOWS:
2308: l = getLongN(i + 1);
2309: out.print("=LONG_FOLLOWS value:" + l);
2310: toskip = 4;
2311: break;
2312: case FLOAT_FOLLOWS:
2313: j = getIntN(i + 1);
2314: out.write("=FLOAT_FOLLOWS value:"
2315: + Float.intBitsToFloat(j));
2316: toskip = 2;
2317: break;
2318: case DOUBLE_FOLLOWS:
2319: l = getLongN(i + 1);
2320: out.print("=DOUBLE_FOLLOWS value:"
2321: + Double.longBitsToDouble(l));
2322: toskip = 4;
2323: break;
2324: case BEGIN_DOCUMENT:
2325: j = getIntN(i + 1);
2326: j += j < 0 ? data.length : i;
2327: out.print("=BEGIN_DOCUMENT end:");
2328: out.print(j);
2329: out.print(" parent:");
2330: j = getIntN(i + 3);
2331: out.print(j);
2332: toskip = 4;
2333: break;
2334: case BEGIN_ENTITY:
2335: j = getIntN(i + 1);
2336: out.print("=BEGIN_ENTITY base:");
2337: out.print(j);
2338: out.print(" parent:");
2339: j = getIntN(i + 3);
2340: out.print(j);
2341: toskip = 4;
2342: break;
2343: case END_ENTITY:
2344: out.print("=END_ENTITY");
2345: break;
2346: case DOCUMENT_URI:
2347: out.print("=DOCUMENT_URI: ");
2348: j = getIntN(i + 1);
2349: out.print(objects[j]);
2350: toskip = 2;
2351: break;
2352: case COMMENT:
2353: out.print("=COMMENT: '");
2354: j = getIntN(i + 1);
2355: out.write(data, i + 3, j);
2356: out.print('\'');
2357: toskip = 2 + j;
2358: break;
2359: case CDATA_SECTION:
2360: out.print("=CDATA: '");
2361: j = getIntN(i + 1);
2362: out.write(data, i + 3, j);
2363: out.print('\'');
2364: toskip = 2 + j;
2365: break;
2366: case PROCESSING_INSTRUCTION:
2367: out.print("=PROCESSING_INSTRUCTION: ");
2368: j = getIntN(i + 1);
2369: out.print(objects[j]);
2370: out.print(" '");
2371: j = getIntN(i + 3);
2372: out.write(data, i + 5, j);
2373: out.print('\'');
2374: toskip = 4 + j;
2375: break;
2376: case END_DOCUMENT:
2377: out.print("=END_DOCUMENT");
2378: break;
2379: case BOOL_FALSE:
2380: out.print("= false");
2381: break;
2382: case BOOL_TRUE:
2383: out.print("= true");
2384: break;
2385: case JOINER:
2386: out.print("= joiner");
2387: break;
2388: case CHAR_FOLLOWS:
2389: out.print("=CHAR_FOLLOWS");
2390: toskip = 1;
2391: break;
2392: case POSITION_REF_FOLLOWS:
2393: case OBJECT_REF_FOLLOWS:
2394: toskip = 2;
2395: break;
2396: case END_ELEMENT_SHORT:
2397: out.print("=END_ELEMENT_SHORT begin:");
2398: j = i - data[i + 1];
2399: out.print(j);
2400: j = data[j] - BEGIN_ELEMENT_SHORT;
2401: out.print(" -> #");
2402: out.print(j);
2403: out.print("=<");
2404: out.print(objects[j]);
2405: out.print('>');
2406: toskip = 1;
2407: break;
2408: case BEGIN_ELEMENT_LONG:
2409: j = getIntN(i + 1);
2410: j += j < 0 ? data.length : i;
2411: out.print("=BEGIN_ELEMENT_LONG end:");
2412: out.print(j);
2413: j = getIntN(j + 1);
2414: out.print(" -> #");
2415: out.print(j);
2416: if (j >= 0 && j + 1 < objects.length)
2417: out.print("=<" + objects[j] + '>');
2418: else
2419: out.print("=<out-of-bounds>");
2420: toskip = 2;
2421: break;
2422: case END_ELEMENT_LONG:
2423: j = getIntN(i + 1);
2424: out.print("=END_ELEMENT_LONG name:" + j
2425: + "=<" + objects[j] + '>');
2426: j = getIntN(i + 3);
2427: j = j < 0 ? i + j : j;
2428: out.print(" begin:" + j);
2429: j = getIntN(i + 5);
2430: j = j < 0 ? i + j : j;
2431: out.print(" parent:" + j);
2432: toskip = 6;
2433: break;
2434: case BEGIN_ATTRIBUTE_LONG:
2435: j = getIntN(i + 1);
2436: out.print("=BEGIN_ATTRIBUTE name:" + j
2437: + "=" + objects[j]);
2438: j = getIntN(i + 3);
2439: j += j < 0 ? data.length : i;
2440: out.print(" end:" + j);
2441: toskip = 4;
2442: break;
2443: case END_ATTRIBUTE:
2444: out.print("=END_ATTRIBUTE");
2445: break;
2446: case POSITION_PAIR_FOLLOWS:
2447: out.print("=POSITION_PAIR_FOLLOWS seq:");
2448: {
2449: j = getIntN(i + 1);
2450: out.print(j);
2451: out.print('=');
2452: Object seq = objects[j];
2453: out.print(seq == null ? null : seq
2454: .getClass().getName());
2455: out.print('@');
2456: if (seq == null)
2457: out.print("null");
2458: else
2459: out
2460: .print(Integer
2461: .toHexString(System
2462: .identityHashCode(seq)));
2463: out.print(" ipos:");
2464: out.print(getIntN(i + 3));
2465: }
2466: toskip = 4;
2467: /*
2468: AbstractSequence seq = (AbstractSequence) objects[getIntN(i+1)];
2469: ipos = getIntN(i+3);
2470: */
2471: break;
2472: }
2473: }
2474: }
2475: out.println();
2476: if (skipFollowingWords && toskip > 0) {
2477: i += toskip;
2478: toskip = 0;
2479: }
2480: }
2481: }
2482: }
2483: // DEBUGGING */
2484: }
|