0001: package net.sf.saxon.tinytree;
0002:
0003: import net.sf.saxon.Configuration;
0004: import net.sf.saxon.om.*;
0005: import net.sf.saxon.sort.IntHashSet;
0006: import net.sf.saxon.style.StandardNames;
0007: import net.sf.saxon.tree.LineNumberMap;
0008: import net.sf.saxon.tree.SystemIdMap;
0009: import net.sf.saxon.type.*;
0010: import net.sf.saxon.value.UntypedAtomicValue;
0011:
0012: import java.util.ArrayList;
0013:
0014: /**
0015: * A data structure to hold the contents of a tree. As the name implies, this implementation
0016: * of the data model is optimized for size, and for speed of creation: it minimizes the number
0017: * of Java objects used.
0018: *
0019: * <p>It can be used to represent a tree that is rooted at a document node, or one that is rooted
0020: * at an element node.</p>
0021: */
0022:
0023: public final class TinyTree {
0024:
0025: private static final int[] EMPTY_INT_ARRAY = new int[0];
0026: private static final String[] EMPTY_STRING_ARRAY = new String[0];
0027:
0028: private Configuration config;
0029:
0030: // List of top-level document nodes.
0031: private ArrayList documentList = new ArrayList(5);
0032:
0033: // The document number (really a tree number: it can identify a non-document root node
0034: protected int documentNumber;
0035:
0036: // the contents of the document
0037:
0038: protected char[] charBuffer;
0039: protected int charBufferLength = 0;
0040: protected FastStringBuffer commentBuffer = null; // created when needed
0041:
0042: protected int numberOfNodes = 0; // excluding attributes and namespaces
0043:
0044: // The following arrays contain one entry for each node other than attribute
0045: // and namespace nodes, arranged in document order.
0046:
0047: // nodeKind indicates the kind of node, e.g. element, text, or comment
0048: public byte[] nodeKind;
0049:
0050: // depth is the depth of the node in the hierarchy, i.e. the number of ancestors
0051: protected short[] depth;
0052:
0053: // next is the node number of the next sibling
0054: // - unless it points backwards, in which case it is the node number of the parent
0055: protected int[] next;
0056:
0057: // alpha holds a value that depends on the node kind. For text nodes, it is the offset
0058: // into the text buffer. For comments and processing instructions, it is the offset into
0059: // the comment buffer. For elements, it is the index of the first attribute node, or -1
0060: // if this element has no attributes.
0061: protected int[] alpha;
0062:
0063: // beta holds a value that depends on the node kind. For text nodes, it is the length
0064: // of the text. For comments and processing instructions, it is the length of the text.
0065: // For elements, it is the index of the first namespace node, or -1
0066: // if this element has no namespaces.
0067: protected int[] beta;
0068:
0069: // nameCode holds the name of the node, as an identifier resolved using the name pool
0070: protected int[] nameCode;
0071:
0072: // the prior array indexes preceding-siblings; it is constructed only when required
0073: protected int[] prior = null;
0074:
0075: // the typeCode array holds type codes for element nodes; it is constructed only
0076: // if at least one element has a type other than untyped (-1)
0077: protected int[] typeCodeArray = null;
0078:
0079: // the owner array gives fast access from a node to its parent; it is constructed
0080: // only when required
0081: // protected int[] parentIndex = null;
0082:
0083: // the following arrays have one entry for each attribute.
0084: protected int numberOfAttributes = 0;
0085:
0086: // attParent is the index of the parent element node
0087: protected int[] attParent;
0088:
0089: // attCode is the nameCode representing the attribute name
0090: protected int[] attCode;
0091:
0092: // attValue is the string value of the attribute
0093: protected CharSequence[] attValue;
0094:
0095: // attTypeCode holds type annotations. The array is created only if any nodes have a type annotation
0096: // or are marked as IDREF/IDREFS attributes. The bit 1<<30 represents the is-idref property, and 1<<29
0097: // represents the is-id property
0098: protected int[] attTypeCode;
0099:
0100: // The following arrays have one entry for each namespace declaration
0101: protected int numberOfNamespaces = 0;
0102:
0103: // namespaceParent is the index of the element node owning the namespace declaration
0104: protected int[] namespaceParent;
0105:
0106: // namespaceCode is the namespace code used by the name pool: the top half is the prefix
0107: // code, the bottom half the URI code
0108: protected int[] namespaceCode;
0109:
0110: // an array holding the offsets of all the level-0 (root) nodes, so that the root of a given
0111: // node can be found efficiently
0112: private int[] rootIndex = new int[5];
0113: private int rootIndexUsed = 0;
0114:
0115: private LineNumberMap lineNumberMap;
0116: private SystemIdMap systemIdMap = null;
0117:
0118: // a boolean that is set to true if the document declares a namespace other than the XML namespace
0119: protected boolean usesNamespaces = false;
0120:
0121: private IntHashSet nonIDs; // a set of type annotation codes appearing in this document that are known
0122:
0123: // not to be subtypes of xs:ID
0124:
0125: //private int IDtype;
0126:
0127: public TinyTree() {
0128: this (4000, 100, 20, 4000);
0129: }
0130:
0131: public TinyTree(int nodes, int attributes, int namespaces,
0132: int characters) {
0133: //System.err.println("TinyTree.new() " + this);
0134: nodeKind = new byte[nodes];
0135: depth = new short[nodes];
0136: next = new int[nodes];
0137: alpha = new int[nodes];
0138: beta = new int[nodes];
0139: nameCode = new int[nodes];
0140:
0141: numberOfAttributes = 0;
0142: attParent = new int[attributes];
0143: attCode = new int[attributes];
0144: attValue = new String[attributes];
0145:
0146: numberOfNamespaces = 0;
0147: namespaceParent = new int[namespaces];
0148: namespaceCode = new int[namespaces];
0149:
0150: charBuffer = new char[characters];
0151: charBufferLength = 0;
0152: }
0153:
0154: /**
0155: * Set the Configuration that contains this document
0156: */
0157:
0158: public void setConfiguration(Configuration config) {
0159: this .config = config;
0160: addNamespace(0, NamespaceConstant.XML_NAMESPACE_CODE);
0161: }
0162:
0163: /**
0164: * Get the configuration previously set using setConfiguration
0165: */
0166:
0167: public Configuration getConfiguration() {
0168: return config;
0169: }
0170:
0171: /**
0172: * Get the name pool used for the names in this document
0173: */
0174:
0175: public NamePool getNamePool() {
0176: return config.getNamePool();
0177: }
0178:
0179: private void ensureNodeCapacity(short kind) {
0180: if (nodeKind.length < numberOfNodes + 1) {
0181: //System.err.println("Number of nodes = " + numberOfNodes);
0182: int k = (kind == Type.STOPPER ? numberOfNodes + 1
0183: : numberOfNodes * 2);
0184:
0185: byte[] nodeKind2 = new byte[k];
0186: int[] next2 = new int[k];
0187: short[] depth2 = new short[k];
0188: int[] alpha2 = new int[k];
0189: int[] beta2 = new int[k];
0190: int[] nameCode2 = new int[k];
0191:
0192: System.arraycopy(nodeKind, 0, nodeKind2, 0, numberOfNodes);
0193: System.arraycopy(next, 0, next2, 0, numberOfNodes);
0194: System.arraycopy(depth, 0, depth2, 0, numberOfNodes);
0195: System.arraycopy(alpha, 0, alpha2, 0, numberOfNodes);
0196: System.arraycopy(beta, 0, beta2, 0, numberOfNodes);
0197: System.arraycopy(nameCode, 0, nameCode2, 0, numberOfNodes);
0198:
0199: nodeKind = nodeKind2;
0200: next = next2;
0201: depth = depth2;
0202: alpha = alpha2;
0203: beta = beta2;
0204: nameCode = nameCode2;
0205:
0206: if (typeCodeArray != null) {
0207: int[] typeCodeArray2 = new int[k];
0208: System.arraycopy(typeCodeArray, 0, typeCodeArray2, 0,
0209: numberOfNodes);
0210: typeCodeArray = typeCodeArray2;
0211: }
0212: }
0213: }
0214:
0215: private void ensureAttributeCapacity() {
0216: if (attParent.length < numberOfAttributes + 1) {
0217: int k = numberOfAttributes * 2;
0218: if (k == 0) {
0219: k = 10;
0220: }
0221:
0222: int[] attParent2 = new int[k];
0223: int[] attCode2 = new int[k];
0224: String[] attValue2 = new String[k];
0225:
0226: System.arraycopy(attParent, 0, attParent2, 0,
0227: numberOfAttributes);
0228: System.arraycopy(attCode, 0, attCode2, 0,
0229: numberOfAttributes);
0230: System.arraycopy(attValue, 0, attValue2, 0,
0231: numberOfAttributes);
0232:
0233: attParent = attParent2;
0234: attCode = attCode2;
0235: attValue = attValue2;
0236:
0237: if (attTypeCode != null) {
0238: int[] attTypeCode2 = new int[k];
0239: System.arraycopy(attTypeCode, 0, attTypeCode2, 0,
0240: numberOfAttributes);
0241: attTypeCode = attTypeCode2;
0242: }
0243: }
0244: }
0245:
0246: private void ensureNamespaceCapacity() {
0247: if (namespaceParent.length < numberOfNamespaces + 1) {
0248: int k = numberOfNamespaces * 2;
0249:
0250: int[] namespaceParent2 = new int[k];
0251: int[] namespaceCode2 = new int[k];
0252:
0253: System.arraycopy(namespaceParent, 0, namespaceParent2, 0,
0254: numberOfNamespaces);
0255: System.arraycopy(namespaceCode, 0, namespaceCode2, 0,
0256: numberOfNamespaces);
0257:
0258: namespaceParent = namespaceParent2;
0259: namespaceCode = namespaceCode2;
0260: }
0261: }
0262:
0263: /**
0264: * Add a document node to the tree. The data structure can contain any number of document (or element) nodes
0265: * as top-level nodes. The document node is retained in the documentList list, and its offset in that list
0266: * is held in the alpha array for the relevant node number.
0267: */
0268:
0269: void addDocumentNode(TinyDocumentImpl doc) {
0270: documentList.add(doc);
0271: addNode(Type.DOCUMENT, 0, documentList.size() - 1, 0, -1);
0272: }
0273:
0274: /**
0275: * Add a node to the tree
0276: * @param kind The kind of the node. This must be a document, element, text, comment,
0277: * or processing-instruction node (not an attribute or namespace)
0278: * @param depth The depth in the tree
0279: * @param alpha Pointer to attributes or text
0280: * @param beta Pointer to namespaces or text
0281: * @param nameCode The name of the node
0282: * @return the node number of the node that was added
0283: */
0284: int addNode(short kind, int depth, int alpha, int beta, int nameCode) {
0285: ensureNodeCapacity(kind);
0286: this .nodeKind[numberOfNodes] = (byte) kind;
0287: this .depth[numberOfNodes] = (short) depth;
0288: this .alpha[numberOfNodes] = alpha;
0289: this .beta[numberOfNodes] = beta;
0290: this .nameCode[numberOfNodes] = nameCode;
0291: this .next[numberOfNodes] = -1; // safety precaution
0292:
0293: if (typeCodeArray != null) {
0294: typeCodeArray[numberOfNodes] = StandardNames.XDT_UNTYPED;
0295: }
0296:
0297: if (numberOfNodes == 0) {
0298: documentNumber = config.getDocumentNumberAllocator()
0299: .allocateDocumentNumber();
0300: }
0301:
0302: if (depth == 0) {
0303: if (rootIndexUsed == rootIndex.length) {
0304: int[] r2 = new int[rootIndexUsed * 2];
0305: System.arraycopy(rootIndex, 0, r2, 0, rootIndexUsed);
0306: rootIndex = r2;
0307: }
0308: rootIndex[rootIndexUsed++] = numberOfNodes;
0309: }
0310: return numberOfNodes++;
0311: }
0312:
0313: void appendChars(CharSequence chars) {
0314: if (charBuffer.length < charBufferLength + chars.length()) {
0315: char[] ch2 = new char[Math.max(charBuffer.length * 2,
0316: charBufferLength + chars.length() + 100)];
0317: System.arraycopy(charBuffer, 0, ch2, 0, charBufferLength);
0318: charBuffer = ch2;
0319: }
0320: if (chars instanceof CharSlice) {
0321: ((CharSlice) chars).copyTo(charBuffer, charBufferLength);
0322: } else if (chars instanceof FastStringBuffer) {
0323: ((FastStringBuffer) chars).getChars(0, chars.length(),
0324: charBuffer, charBufferLength);
0325: } else {
0326: char[] newchars = chars.toString().toCharArray();
0327: System.arraycopy(newchars, 0, charBuffer, charBufferLength,
0328: chars.length());
0329: // TODO: this is inefficient. There is no absolute reason that the
0330: // character data for the whole document needs to be in one array.
0331: }
0332: charBufferLength += chars.length();
0333: }
0334:
0335: /**
0336: * Condense the tree: release unused memory. This is done after the full tree has been built.
0337: * The method makes a pragmatic judgement as to whether it is worth reclaiming space; this is
0338: * only done when the constructed tree is very small compared with the space allocated.
0339: */
0340:
0341: protected void condense() {
0342: //System.err.println("TinyTree.condense() " + this + " roots " + rootIndexUsed + " nodes " + numberOfNodes + " capacity " + nodeKind.length);
0343:
0344: // If there are already two trees in this forest, the chances are that more will be added. In this
0345: // case we don't want to condense the arrays because we will only have to expand them again, which gets
0346: // increasingly expensive as they grow larger.
0347: if (rootIndexUsed > 1) {
0348: return;
0349: }
0350: if (numberOfNodes * 3 < nodeKind.length
0351: || (nodeKind.length - numberOfNodes > 20000)) {
0352:
0353: //System.err.println("-- copying node arrays");
0354: int k = numberOfNodes + 1;
0355:
0356: byte[] nodeKind2 = new byte[k];
0357: int[] next2 = new int[k];
0358: short[] depth2 = new short[k];
0359: int[] alpha2 = new int[k];
0360: int[] beta2 = new int[k];
0361: int[] nameCode2 = new int[k];
0362:
0363: System.arraycopy(nodeKind, 0, nodeKind2, 0, numberOfNodes);
0364: System.arraycopy(next, 0, next2, 0, numberOfNodes);
0365: System.arraycopy(depth, 0, depth2, 0, numberOfNodes);
0366: System.arraycopy(alpha, 0, alpha2, 0, numberOfNodes);
0367: System.arraycopy(beta, 0, beta2, 0, numberOfNodes);
0368: System.arraycopy(nameCode, 0, nameCode2, 0, numberOfNodes);
0369: if (typeCodeArray != null) {
0370: int[] type2 = new int[k];
0371: System.arraycopy(typeCodeArray, 0, type2, 0,
0372: numberOfNodes);
0373: typeCodeArray = type2;
0374: }
0375:
0376: nodeKind = nodeKind2;
0377: next = next2;
0378: depth = depth2;
0379: alpha = alpha2;
0380: beta = beta2;
0381: nameCode = nameCode2;
0382: }
0383:
0384: if ((numberOfAttributes * 3 < attParent.length)
0385: || (attParent.length - numberOfAttributes > 1000)) {
0386: int k = numberOfAttributes;
0387:
0388: //System.err.println("-- copying attribute arrays");
0389:
0390: if (k == 0) {
0391: attParent = EMPTY_INT_ARRAY;
0392: attCode = EMPTY_INT_ARRAY;
0393: attValue = EMPTY_STRING_ARRAY;
0394: attTypeCode = null;
0395: }
0396:
0397: int[] attParent2 = new int[k];
0398: int[] attCode2 = new int[k];
0399: String[] attValue2 = new String[k];
0400:
0401: System.arraycopy(attParent, 0, attParent2, 0,
0402: numberOfAttributes);
0403: System.arraycopy(attCode, 0, attCode2, 0,
0404: numberOfAttributes);
0405: System.arraycopy(attValue, 0, attValue2, 0,
0406: numberOfAttributes);
0407:
0408: attParent = attParent2;
0409: attCode = attCode2;
0410: attValue = attValue2;
0411:
0412: if (attTypeCode != null) {
0413: int[] attTypeCode2 = new int[k];
0414: System.arraycopy(attTypeCode, 0, attTypeCode2, 0,
0415: numberOfAttributes);
0416: attTypeCode = attTypeCode2;
0417: }
0418: }
0419:
0420: if (numberOfNamespaces * 3 < namespaceParent.length) {
0421: int k = numberOfNamespaces;
0422: int[] namespaceParent2 = new int[k];
0423: int[] namespaceCode2 = new int[k];
0424:
0425: //System.err.println("-- copying namespace arrays");
0426:
0427: System.arraycopy(namespaceParent, 0, namespaceParent2, 0,
0428: numberOfNamespaces);
0429: System.arraycopy(namespaceCode, 0, namespaceCode2, 0,
0430: numberOfNamespaces);
0431:
0432: namespaceParent = namespaceParent2;
0433: namespaceCode = namespaceCode2;
0434: }
0435:
0436: if (charBufferLength * 3 < charBuffer.length
0437: || charBuffer.length - charBufferLength > 10000) {
0438: char[] c2 = new char[charBufferLength];
0439: System.arraycopy(charBuffer, 0, c2, 0, charBufferLength);
0440: charBuffer = c2;
0441: }
0442: }
0443:
0444: /**
0445: * Set the type annotation of an element node
0446: */
0447:
0448: void setElementAnnotation(int nodeNr, int typeCode) {
0449: if (typeCode != StandardNames.XDT_UNTYPED) {
0450: if (typeCodeArray == null) {
0451: typeCodeArray = new int[nodeKind.length];
0452: for (int i = 0; i < nodeKind.length; i++) {
0453: typeCodeArray[i] = StandardNames.XDT_UNTYPED;
0454: }
0455: }
0456: typeCodeArray[nodeNr] = typeCode;
0457: }
0458: }
0459:
0460: /**
0461: * Get the type annotation of a node. Applies only to document, element, text,
0462: * processing instruction, and comment nodes.
0463: * @return -1 if the annotation is xdt:untyped or if the node is not an element.
0464: */
0465:
0466: public int getTypeAnnotation(int nodeNr) {
0467: if (typeCodeArray == null) {
0468: return StandardNames.XDT_UNTYPED;
0469: }
0470: return typeCodeArray[nodeNr];
0471: }
0472:
0473: /**
0474: * Get the node kind of a given node, which must be a document, element,
0475: * text, comment, or processing instruction node
0476: * @param nodeNr the node number
0477: * @return the node kind
0478: */
0479:
0480: public int getNodeKind(int nodeNr) {
0481: return nodeKind[nodeNr];
0482: }
0483:
0484: /**
0485: * Get the nameCode for a given node, which must be a document, element,
0486: * text, comment, or processing instruction node
0487: * @param nodeNr the node number
0488: * @return the name code
0489: */
0490:
0491: public int getNameCode(int nodeNr) {
0492: return nameCode[nodeNr];
0493: }
0494:
0495: /**
0496: * On demand, make an index for quick access to preceding-sibling nodes
0497: */
0498:
0499: void ensurePriorIndex() {
0500: if (prior == null) {
0501: makePriorIndex();
0502: }
0503: }
0504:
0505: private synchronized void makePriorIndex() {
0506: prior = new int[numberOfNodes];
0507: for (int i = 0; i < numberOfNodes; i++) {
0508: prior[i] = -1;
0509: }
0510: for (int i = 0; i < numberOfNodes; i++) {
0511: int nextNode = next[i];
0512: if (nextNode > i) {
0513: prior[nextNode] = i;
0514: }
0515: }
0516: }
0517:
0518: void addAttribute(NodeInfo root, int parent, int nameCode,
0519: int typeCode, CharSequence attValue, int properties) {
0520: ensureAttributeCapacity();
0521: attParent[numberOfAttributes] = parent;
0522: attCode[numberOfAttributes] = nameCode;
0523: this .attValue[numberOfAttributes] = attValue;
0524:
0525: if (typeCode == -1) {
0526: // this shouldn't happen any more
0527: typeCode = StandardNames.XDT_UNTYPED_ATOMIC;
0528: }
0529:
0530: if (typeCode != StandardNames.XDT_UNTYPED_ATOMIC) {
0531: if (attTypeCode == null) {
0532: // this is the first typed attribute;
0533: // create an array for the types, and set all previous attributes to untyped
0534: attTypeCode = new int[attParent.length];
0535: for (int i = 0; i < numberOfAttributes; i++) {
0536: attTypeCode[i] = StandardNames.XDT_UNTYPED_ATOMIC;
0537: }
0538: }
0539: }
0540: if (attTypeCode != null) {
0541: attTypeCode[numberOfAttributes] = typeCode;
0542: }
0543:
0544: if (alpha[parent] == -1) {
0545: alpha[parent] = numberOfAttributes;
0546: }
0547:
0548: if (root instanceof TinyDocumentImpl
0549: && (isIDCode(typeCode) || ((nameCode & NamePool.FP_MASK) == StandardNames.XML_ID))) {
0550:
0551: // The attribute is marked as being an ID. But we don't trust it - it
0552: // might come from a non-validating parser. Before adding it to the index, we
0553: // check that it really is an ID.
0554:
0555: String id = attValue.toString().trim();
0556: if (root.getConfiguration().getNameChecker().isValidNCName(
0557: id)) {
0558: NodeInfo e = getNode(parent);
0559: ((TinyDocumentImpl) root).registerID(e, id);
0560: } else if (attTypeCode != null) {
0561: attTypeCode[numberOfAttributes] = Type.UNTYPED_ATOMIC;
0562: }
0563: }
0564:
0565: // Note: IDREF attributes are not indexed at this stage; that happens only if and when
0566: // the idref() function is called.
0567:
0568: // Note that an attTypes array will be created for all attributes if any ID or IDREF is reported.
0569:
0570: numberOfAttributes++;
0571: }
0572:
0573: /**
0574: * Index an element of type xs:ID
0575: */
0576:
0577: public void indexIDElement(NodeInfo root, int nodeNr,
0578: NameChecker checker) {
0579: String id = TinyParentNodeImpl.getStringValue(this , nodeNr)
0580: .toString().trim();
0581: if (root.getNodeKind() == Type.DOCUMENT
0582: && checker.isValidNCName(id)) {
0583: NodeInfo e = getNode(nodeNr);
0584: ((TinyDocumentImpl) root).registerID(e, id);
0585: }
0586: }
0587:
0588: /**
0589: * Test whether a type annotation code represents the type xs:ID or one of its subtypes
0590: */
0591:
0592: public boolean isIDCode(int typeCode) {
0593: typeCode &= NamePool.FP_MASK;
0594: if (typeCode == StandardNames.XS_ID) {
0595: return true;
0596: } else if (typeCode < 1024) {
0597: // No other built-in type is an ID
0598: return false;
0599: } else {
0600: if (nonIDs == null) {
0601: nonIDs = new IntHashSet(20);
0602: }
0603: //Integer key = new Integer(typeCode);
0604: if (nonIDs.contains(typeCode)) {
0605: return false;
0606: }
0607: SchemaType type = getConfiguration()
0608: .getSchemaType(typeCode);
0609: if (type instanceof AtomicType) {
0610: final TypeHierarchy th = getConfiguration()
0611: .getNamePool().getTypeHierarchy();
0612: if (th.isSubType((AtomicType) type, Type.ID_TYPE)) {
0613: return true;
0614: } else {
0615: nonIDs.add(typeCode);
0616: return false;
0617: }
0618: } else {
0619: return false;
0620: }
0621: }
0622: }
0623:
0624: /**
0625: * Add a namespace node to the current element
0626: * @param parent the node number of the element
0627: * @param nscode namespace code identifying the prefix and uri
0628: */
0629: void addNamespace(int parent, int nscode) {
0630:
0631: ensureNamespaceCapacity();
0632: namespaceParent[numberOfNamespaces] = parent;
0633: namespaceCode[numberOfNamespaces] = nscode;
0634:
0635: if (beta[parent] == -1) {
0636: beta[parent] = numberOfNamespaces;
0637: }
0638: numberOfNamespaces++;
0639: if (nscode != NamespaceConstant.XML_NAMESPACE_CODE) {
0640: usesNamespaces = true;
0641: }
0642: }
0643:
0644: public TinyNodeImpl getNode(int nr) {
0645:
0646: switch ((short) nodeKind[nr]) {
0647: case Type.DOCUMENT:
0648: return (TinyDocumentImpl) documentList.get(alpha[nr]);
0649: case Type.ELEMENT:
0650: return new TinyElementImpl(this , nr);
0651: case Type.TEXT:
0652: return new TinyTextImpl(this , nr);
0653: case Type.COMMENT:
0654: return new TinyCommentImpl(this , nr);
0655: case Type.PROCESSING_INSTRUCTION:
0656: return new TinyProcInstImpl(this , nr);
0657: case Type.PARENT_POINTER:
0658: throw new IllegalArgumentException(
0659: "Attempting to treat a parent pointer as a node");
0660: }
0661:
0662: return null;
0663: }
0664:
0665: /**
0666: * Get the typed value of a node whose type is known to be untypedAtomic.
0667: * The node must be a document, element, text,
0668: * comment, or processing-instruction node, and it must have no type annotation.
0669: * This method gets the typed value
0670: * of a numbered node without actually instantiating the NodeInfo object, as
0671: * a performance optimization.
0672: */
0673:
0674: UntypedAtomicValue getUntypedAtomicValue(int nodeNr) {
0675: switch (nodeKind[nodeNr]) {
0676: case Type.ELEMENT:
0677: case Type.DOCUMENT:
0678: int level = depth[nodeNr];
0679: int next = nodeNr + 1;
0680:
0681: // we optimize two special cases: firstly, where the node has no children, and secondly,
0682: // where it has a single text node as a child.
0683:
0684: if (depth[next] <= level) {
0685: return UntypedAtomicValue.ZERO_LENGTH_UNTYPED;
0686: } else if (nodeKind[next] == Type.TEXT
0687: && depth[next + 1] <= level) {
0688: int length = beta[next];
0689: int start = alpha[next];
0690: return new UntypedAtomicValue(new CharSlice(charBuffer,
0691: start, length));
0692: }
0693:
0694: // Now handle the general case
0695:
0696: StringBuffer sb = null;
0697: while (next < numberOfNodes && depth[next] > level) {
0698: if (nodeKind[next] == Type.TEXT) {
0699: if (sb == null) {
0700: sb = new StringBuffer(1024);
0701: }
0702: int length = beta[next];
0703: int start = alpha[next];
0704: sb.append(charBuffer, start, length);
0705: }
0706: next++;
0707: }
0708: if (sb == null) {
0709: return UntypedAtomicValue.ZERO_LENGTH_UNTYPED;
0710: } else {
0711: //sb.trimToSize(); // TODO: reinstate this in JDK 1.5
0712: char[] buff = new char[sb.length()];
0713: sb.getChars(0, sb.length(), buff, 0);
0714: return new UntypedAtomicValue(new CharSlice(buff));
0715: }
0716:
0717: case Type.TEXT:
0718: int start = alpha[nodeNr];
0719: int len = beta[nodeNr];
0720: return new UntypedAtomicValue(new CharSlice(charBuffer,
0721: start, len));
0722: case Type.COMMENT:
0723: case Type.PROCESSING_INSTRUCTION:
0724: int start2 = alpha[nodeNr];
0725: int len2 = beta[nodeNr];
0726: if (len2 == 0)
0727: return UntypedAtomicValue.ZERO_LENGTH_UNTYPED;
0728: char[] dest = new char[len2];
0729: commentBuffer.getChars(start2, start2 + len2, dest, 0);
0730: return new UntypedAtomicValue(new CharSlice(dest, 0, len2));
0731: default:
0732: throw new IllegalStateException("Unknown node kind");
0733: }
0734: }
0735:
0736: /**
0737: * Make a (transient) attribute node from the array of attributes
0738: */
0739:
0740: TinyAttributeImpl getAttributeNode(int nr) {
0741: return new TinyAttributeImpl(this , nr);
0742: }
0743:
0744: /**
0745: * Get the type annotation of an attribute node.
0746: * The bit {@link NodeInfo#IS_DTD_TYPE} (1<<30) will be set in the case of an attribute node if the type annotation
0747: * is one of ID, IDREF, or IDREFS and this is derived from DTD rather than schema validation.
0748: * @return Type.UNTYPED_ATOMIC if there is no annotation
0749: */
0750:
0751: int getAttributeAnnotation(int nr) {
0752: if (attTypeCode == null) {
0753: return StandardNames.XDT_UNTYPED_ATOMIC;
0754: } else {
0755: return attTypeCode[nr]
0756: & (NamePool.FP_MASK | NodeInfo.IS_DTD_TYPE);
0757: }
0758: }
0759:
0760: /**
0761: * Determine whether an attribute is an IDREF/IDREFS attribute. (The represents the
0762: * is-IDREF property in the data model
0763: */
0764:
0765: boolean isIdref(int nr) {
0766: if (attTypeCode == null) {
0767: return false;
0768: } else {
0769: int tc = attTypeCode[nr];
0770: if ((attTypeCode[nr] & (1 << 30)) != 0) {
0771: return true;
0772: } else if (tc == Type.UNTYPED_ATOMIC) {
0773: return false;
0774: } else if (tc == StandardNames.XS_IDREF) {
0775: return true;
0776: } else if (tc == StandardNames.XS_IDREFS) {
0777: return true;
0778: } else if (tc < 1024) {
0779: return false;
0780: } else {
0781: final SchemaType type = getConfiguration()
0782: .getSchemaType(tc);
0783: final TypeHierarchy th = getConfiguration()
0784: .getNamePool().getTypeHierarchy();
0785: if (type instanceof AtomicType) {
0786: return th
0787: .isSubType(
0788: (AtomicType) type,
0789: (AtomicType) BuiltInSchemaFactory
0790: .getSchemaType(StandardNames.XS_IDREF));
0791: } else if (type instanceof ListType) {
0792: SimpleType itemType = ((ListType) type)
0793: .getItemType();
0794: if (!(itemType instanceof AtomicType)) {
0795: return false;
0796: }
0797: return th
0798: .isSubType(
0799: (AtomicType) itemType,
0800: (AtomicType) BuiltInSchemaFactory
0801: .getSchemaType(StandardNames.XS_IDREF));
0802: }
0803: }
0804: }
0805: return false;
0806: }
0807:
0808: /**
0809: * Set the system id of an element in the document. This identifies the external entity containing
0810: * the node - this is not necessarily the same as the base URI.
0811: * @param seq the node number
0812: * @param uri the system ID
0813: */
0814:
0815: void setSystemId(int seq, String uri) {
0816: if (uri == null) {
0817: uri = "";
0818: }
0819: if (systemIdMap == null) {
0820: systemIdMap = new SystemIdMap();
0821: }
0822: systemIdMap.setSystemId(seq, uri);
0823: }
0824:
0825: /**
0826: * Get the system id of an element in the document
0827: */
0828:
0829: String getSystemId(int seq) {
0830: if (systemIdMap == null) {
0831: return null;
0832: }
0833: return systemIdMap.getSystemId(seq);
0834: }
0835:
0836: /**
0837: * Get the root node for a given node
0838: */
0839:
0840: int getRootNode(int nodeNr) {
0841: for (int i = rootIndexUsed - 1; i >= 0; i--) {
0842: if (rootIndex[i] <= nodeNr) {
0843: return rootIndex[i];
0844: }
0845: }
0846: return 0;
0847: }
0848:
0849: /**
0850: * Set line numbering on
0851: */
0852:
0853: public void setLineNumbering() {
0854: lineNumberMap = new LineNumberMap();
0855: lineNumberMap.setLineNumber(0, 0);
0856: }
0857:
0858: /**
0859: * Set the line number for an element. Ignored if line numbering is off.
0860: */
0861:
0862: void setLineNumber(int sequence, int line) {
0863: if (lineNumberMap != null) {
0864: lineNumberMap.setLineNumber(sequence, line);
0865: }
0866: }
0867:
0868: /**
0869: * Get the line number for an element. Return -1 if line numbering is off.
0870: */
0871:
0872: int getLineNumber(int sequence) {
0873: if (lineNumberMap != null) {
0874: return lineNumberMap.getLineNumber(sequence);
0875: }
0876: return -1;
0877: }
0878:
0879: /**
0880: * Get the document number (actually, the tree number)
0881: */
0882:
0883: public int getDocumentNumber() {
0884: return documentNumber;
0885: }
0886:
0887: /**
0888: * Determine whether a given node is nilled
0889: */
0890:
0891: public boolean isNilled(int nodeNr) {
0892: if (nodeKind[nodeNr] != Type.ELEMENT) {
0893: return false;
0894: }
0895: if (typeCodeArray == null || typeCodeArray[nodeNr] == -1
0896: || typeCodeArray[nodeNr] == StandardNames.XDT_UNTYPED) {
0897: return false;
0898: }
0899: int index = alpha[nodeNr];
0900: if (index > 0) {
0901: while (attParent[index] == nodeNr
0902: && index < numberOfAttributes) {
0903: if (attCode[index] == StandardNames.XSI_NIL) {
0904: String val = attValue[index].toString().trim();
0905: if (val.equals("1") || val.equals("true")) {
0906: return true;
0907: }
0908: }
0909: index++;
0910: }
0911: }
0912: return false;
0913: }
0914:
0915: /**
0916: * Produce diagnostic print of main tree arrays
0917: */
0918:
0919: public void diagnosticDump() {
0920: System.err
0921: .println(" node type depth next alpha beta name");
0922: for (int i = 0; i < numberOfNodes; i++) {
0923: System.err.println(n8(i) + n8(nodeKind[i]) + n8(depth[i])
0924: + n8(next[i]) + n8(alpha[i]) + n8(beta[i])
0925: + n8(nameCode[i]));
0926: }
0927: System.err.println(" attr parent name value");
0928: for (int i = 0; i < numberOfAttributes; i++) {
0929: System.err.println(n8(i) + n8(attParent[i])
0930: + n8(attCode[i]) + " " + attValue[i]);
0931: }
0932: System.err.println(" ns parent prefix uri");
0933: for (int i = 0; i < numberOfNamespaces; i++) {
0934: System.err.println(n8(i) + n8(namespaceParent[i])
0935: + n8(namespaceCode[i] >> 16)
0936: + n8(namespaceCode[i] & 0xffff));
0937: }
0938: }
0939:
0940: /**
0941: * Output a number as a string of 8 characters
0942: */
0943:
0944: private String n8(int val) {
0945: String s = " " + val;
0946: return s.substring(s.length() - 8);
0947: }
0948:
0949: public void showSize() {
0950: System.err.println("Tree size: " + numberOfNodes + " nodes, "
0951: + charBufferLength + " characters, "
0952: + numberOfAttributes + " attributes");
0953: }
0954:
0955: /**
0956: * Get the number of nodes in the tree, excluding attributes and namespace nodes
0957: * @return the number of nodes.
0958: */
0959:
0960: public int getNumberOfNodes() {
0961: return numberOfNodes;
0962: }
0963:
0964: public int getNumberOfAttributes() {
0965: return numberOfAttributes;
0966: }
0967:
0968: public int getNumberOfNamespaces() {
0969: return numberOfNamespaces;
0970: }
0971:
0972: public byte[] getNodeKindArray() {
0973: return nodeKind;
0974: }
0975:
0976: public short[] getNodeDepthArray() {
0977: return depth;
0978: }
0979:
0980: public int[] getNameCodeArray() {
0981: return nameCode;
0982: }
0983:
0984: public int[] getTypeCodeArray() {
0985: return typeCodeArray;
0986: }
0987:
0988: public int[] getNextPointerArray() {
0989: return next;
0990: }
0991:
0992: public int[] getAlphaArray() {
0993: return alpha;
0994: }
0995:
0996: public int[] getBetaArray() {
0997: return beta;
0998: }
0999:
1000: public CharSequence getCharacterBuffer() {
1001: return new CharSlice(charBuffer, 0, charBufferLength);
1002: }
1003:
1004: public CharSequence getCommentBuffer() {
1005: return commentBuffer;
1006: }
1007:
1008: public int[] getAttributeNameCodeArray() {
1009: return attCode;
1010: }
1011:
1012: public int[] getAttributeTypeCodeArray() {
1013: return attTypeCode;
1014: }
1015:
1016: public int[] getAttributeParentArray() {
1017: return attParent;
1018: }
1019:
1020: public CharSequence[] getAttributeValueArray() {
1021: return attValue;
1022: }
1023:
1024: public int[] getNamespaceCodeArray() {
1025: return namespaceCode;
1026: }
1027:
1028: public int[] getNamespaceParentArray() {
1029: return namespaceParent;
1030: }
1031:
1032: }
1033:
1034: //
1035: // The contents of this file are subject to the Mozilla Public License Version 1.0 (the "License");
1036: // you may not use this file except in compliance with the License. You may obtain a copy of the
1037: // License at http://www.mozilla.org/MPL/
1038: //
1039: // Software distributed under the License is distributed on an "AS IS" basis,
1040: // WITHOUT WARRANTY OF ANY KIND, either express or implied.
1041: // See the License for the specific language governing rights and limitations under the License.
1042: //
1043: // The Original Code is: all this file
1044: //
1045: // The Initial Developer of the Original Code is Michael H. Kay.
1046: //
1047: // Contributor(s):
1048: //
|