001: package net.sf.saxon.tinytree;
002:
003: import net.sf.saxon.event.PipelineConfiguration;
004: import net.sf.saxon.om.*;
005: import net.sf.saxon.pull.PullProvider;
006: import net.sf.saxon.trans.XPathException;
007: import net.sf.saxon.type.Type;
008: import net.sf.saxon.value.AtomicValue;
009: import net.sf.saxon.style.StandardNames;
010:
011: import javax.xml.transform.SourceLocator;
012:
013: /**
014: * This implementation of the Saxon pull interface starts from a document, element,
015: * text, comment, or processing-instruction node in a TinyTree,
016: * and returns the events corresponding to that node and its descendants (including
017: * their attributes and namespaces). The class performs the same function as
018: * the general-purpose {@link net.sf.saxon.pull.TreeWalker} class, but is
019: * specialized to exploit the TinyTree data structure: in particular, it never
020: * materializes any Node objects.
021: */
022:
023: public class TinyTreeWalker implements PullProvider, SourceLocator {
024:
025: private int startNode;
026: private int currentNode;
027: private int currentEvent;
028: private TinyTree tree;
029: private PipelineConfiguration pipe;
030: private NamespaceDeclarationsImpl nsDeclarations;
031: private int[] nsBuffer = new int[10];
032:
033: /**
034: * Create a TinyTreeWalker to return events associated with a tree or subtree
035: * @param startNode the root of the tree or subtree. Must be a document, element, text,
036: * comment, or processing-instruction node.
037: * @throws IllegalArgumentException if the start node is an attribute or namespace node.
038: */
039:
040: public TinyTreeWalker(TinyNodeImpl startNode) {
041: int kind = startNode.getNodeKind();
042: if (kind == Type.ATTRIBUTE || kind == Type.NAMESPACE) {
043: throw new IllegalArgumentException(
044: "TinyTreeWalker cannot start at an attribute or namespace node");
045: }
046: this .startNode = startNode.nodeNr;
047: this .tree = startNode.tree;
048: this .nsDeclarations = new NamespaceDeclarationsImpl();
049: nsDeclarations.setNamePool(startNode.getNamePool());
050: }
051:
052: /**
053: * Set configuration information. This must only be called before any events
054: * have been read.
055: */
056:
057: public void setPipelineConfiguration(PipelineConfiguration pipe) {
058: this .pipe = pipe;
059: }
060:
061: /**
062: * Get configuration information.
063: */
064:
065: public PipelineConfiguration getPipelineConfiguration() {
066: return pipe;
067: }
068:
069: /**
070: * Get the next event
071: *
072: * @return an integer code indicating the type of event. The code
073: * {@link #END_OF_INPUT} is returned if there are no more events to return.
074: */
075:
076: public int next() throws XPathException {
077: switch (currentEvent) {
078: case START_OF_INPUT:
079: currentNode = startNode;
080: switch (tree.nodeKind[currentNode]) {
081: case Type.DOCUMENT:
082: currentEvent = START_DOCUMENT;
083: break;
084: case Type.ELEMENT:
085: currentEvent = START_ELEMENT;
086: break;
087: case Type.TEXT:
088: currentEvent = TEXT;
089: break;
090: case Type.COMMENT:
091: currentEvent = COMMENT;
092: break;
093: case Type.PROCESSING_INSTRUCTION:
094: currentEvent = PROCESSING_INSTRUCTION;
095: break;
096: case Type.PARENT_POINTER:
097: throw new IllegalStateException(
098: "Current node is a parent-pointer pseudo-node");
099: }
100: return currentEvent;
101:
102: case START_DOCUMENT:
103: case START_ELEMENT:
104:
105: if (tree.depth[currentNode + 1] > tree.depth[currentNode]) {
106: // the current element or document has children: move to the first child
107: switch (tree.nodeKind[++currentNode]) {
108: case Type.ELEMENT:
109: currentEvent = START_ELEMENT;
110: break;
111: case Type.TEXT:
112: currentEvent = TEXT;
113: break;
114: case Type.COMMENT:
115: currentEvent = COMMENT;
116: break;
117: case Type.PROCESSING_INSTRUCTION:
118: currentEvent = PROCESSING_INSTRUCTION;
119: break;
120: case Type.PARENT_POINTER:
121: throw new IllegalStateException(
122: "First child node must not be a parent-pointer pseudo-node");
123: }
124: return currentEvent;
125: } else {
126: if (currentEvent == START_DOCUMENT) {
127: currentEvent = END_DOCUMENT;
128: } else {
129: currentEvent = END_ELEMENT;
130: }
131: return currentEvent;
132: }
133: case END_ELEMENT:
134: case TEXT:
135: case COMMENT:
136: case PROCESSING_INSTRUCTION:
137: if (currentNode == startNode) {
138: currentEvent = END_OF_INPUT;
139: return currentEvent;
140: }
141: int next = tree.next[currentNode];
142: if (next > currentNode) {
143: // this node has a following sibling
144: currentNode = tree.next[currentNode];
145: do {
146: switch (tree.nodeKind[currentNode]) {
147: case Type.ELEMENT:
148: currentEvent = START_ELEMENT;
149: break;
150: case Type.TEXT:
151: currentEvent = TEXT;
152: break;
153: case Type.COMMENT:
154: currentEvent = COMMENT;
155: break;
156: case Type.PROCESSING_INSTRUCTION:
157: currentEvent = PROCESSING_INSTRUCTION;
158: break;
159: case Type.PARENT_POINTER:
160: // skip this pseudo-node
161: currentEvent = -1;
162: currentNode++;
163: break;
164: }
165: } while (currentEvent == -1);
166: return currentEvent;
167: } else {
168: // return to the parent element or document
169: currentNode = next;
170: if (currentNode == -1) {
171: // indicates we were at the END_ELEMENT of a parentless element node
172: currentEvent = END_OF_INPUT;
173: return currentEvent;
174: }
175: switch (tree.nodeKind[currentNode]) {
176: case Type.ELEMENT:
177: currentEvent = END_ELEMENT;
178: break;
179: case Type.DOCUMENT:
180: currentEvent = END_DOCUMENT;
181: break;
182: }
183: return currentEvent;
184: }
185:
186: case ATTRIBUTE:
187: case NAMESPACE:
188: case END_DOCUMENT:
189: currentEvent = END_OF_INPUT;
190: return currentEvent;
191:
192: case END_OF_INPUT:
193: throw new IllegalStateException(
194: "Cannot call next() when input is exhausted");
195:
196: default:
197: throw new IllegalStateException("Unrecognized event "
198: + currentEvent);
199:
200: }
201: }
202:
203: /**
204: * Get the event most recently returned by next(), or by other calls that change
205: * the position, for example getStringValue() and skipToMatchingEnd(). This
206: * method does not change the position of the PullProvider.
207: *
208: * @return the current event
209: */
210:
211: public int current() {
212: return currentEvent;
213: }
214:
215: /**
216: * Get the attributes associated with the current element. This method must
217: * be called only after a START_ELEMENT event has been notified. The contents
218: * of the returned AttributeCollection are guaranteed to remain unchanged
219: * until the next START_ELEMENT event, but may be modified thereafter. The object
220: * should not be modified by the client.
221: * <p/>
222: * <p>Attributes may be read before or after reading the namespaces of an element,
223: * but must not be read after the first child node has been read, or after calling
224: * one of the methods skipToEnd(), getStringValue(), or getTypedValue().</p>
225: *
226: * @return an AttributeCollection representing the attributes of the element
227: * that has just been notified.
228: */
229:
230: public AttributeCollection getAttributes() throws XPathException {
231: if (tree.nodeKind[currentNode] == Type.ELEMENT) {
232: if (tree.alpha[currentNode] == -1) {
233: return AttributeCollectionImpl.EMPTY_ATTRIBUTE_COLLECTION;
234: }
235: return new TinyAttributeCollection(tree, currentNode);
236: } else {
237: throw new IllegalStateException(
238: "getAttributes() called when current event is not ELEMENT_START");
239: }
240: }
241:
242: /**
243: * Get the namespace declarations associated with the current element. This method must
244: * be called only after a START_ELEMENT event has been notified. In the case of a top-level
245: * START_ELEMENT event (that is, an element that either has no parent node, or whose parent
246: * is not included in the sequence being read), the NamespaceDeclarations object returned
247: * will contain a namespace declaration for each namespace that is in-scope for this element
248: * node. In the case of a non-top-level element, the NamespaceDeclarations will contain
249: * a set of namespace declarations and undeclarations, representing the differences between
250: * this element and its parent.
251: * <p/>
252: * <p>It is permissible for this method to return namespace declarations that are redundant.</p>
253: * <p/>
254: * <p>The NamespaceDeclarations object is guaranteed to remain unchanged until the next START_ELEMENT
255: * event, but may then be overwritten. The object should not be modified by the client.</p>
256: * <p/>
257: * <p>Namespaces may be read before or after reading the attributes of an element,
258: * but must not be read after the first child node has been read, or after calling
259: * one of the methods skipToEnd(), getStringValue(), or getTypedValue().</p>*
260: */
261:
262: public NamespaceDeclarations getNamespaceDeclarations()
263: throws XPathException {
264: if (tree.nodeKind[currentNode] == Type.ELEMENT) {
265: int[] decl;
266: if (currentNode == startNode) {
267: // get all inscope namespaces for a top-level element in the sequence.
268: decl = TinyElementImpl.getInScopeNamespaces(tree,
269: currentNode, nsBuffer);
270: } else {
271: // only namespace declarations (and undeclarations) on this element are required
272: decl = TinyElementImpl.getDeclaredNamespaces(tree,
273: currentNode, nsBuffer);
274: }
275: nsDeclarations.setNamespaceCodes(decl);
276: return nsDeclarations;
277: }
278: throw new IllegalStateException(
279: "getNamespaceDeclarations() called when current event is not START_ELEMENT");
280: }
281:
282: /**
283: * Skip the current subtree. This method may be called only immediately after
284: * a START_DOCUMENT or START_ELEMENT event. This call returns the matching
285: * END_DOCUMENT or END_ELEMENT event; the next call on next() will return
286: * the event following the END_DOCUMENT or END_ELEMENT.
287: */
288:
289: public int skipToMatchingEnd() throws XPathException {
290: // For this implementation, we simply leave the current node unchanged, and change
291: // the current event
292: switch (currentEvent) {
293: case START_DOCUMENT:
294: currentEvent = END_DOCUMENT;
295: return currentEvent;
296: case START_ELEMENT:
297: currentEvent = END_ELEMENT;
298: return currentEvent;
299: default:
300: throw new IllegalStateException(
301: "Cannot call skipToMatchingEnd() except when at start of element or document");
302:
303: }
304: }
305:
306: /**
307: * Close the event reader. This indicates that no further events are required.
308: * It is not necessary to close an event reader after {@link #END_OF_INPUT} has
309: * been reported, but it is recommended to close it if reading terminates
310: * prematurely. Once an event reader has been closed, the effect of further
311: * calls on next() is undefined.
312: */
313:
314: public void close() {
315: // no action
316: }
317:
318: /**
319: * Get the namePool used to lookup all name codes and namespace codes
320: *
321: * @return the namePool
322: */
323:
324: public NamePool getNamePool() {
325: return pipe.getConfiguration().getNamePool();
326: }
327:
328: /**
329: * Get the nameCode identifying the name of the current node. This method
330: * can be used after the {@link #START_ELEMENT}, {@link #PROCESSING_INSTRUCTION},
331: * {@link #ATTRIBUTE}, or {@link #NAMESPACE} events. With some PullProvider implementations,
332: * including this one, it can also be used after {@link #END_ELEMENT}.
333: * If called at other times, the result is undefined and may result in an IllegalStateException.
334: * If called when the current node is an unnamed namespace node (a node representing the default namespace)
335: * the returned value is -1.
336: * @return the nameCode. The nameCode can be used to obtain the prefix, local name,
337: * and namespace URI from the name pool.
338: */
339:
340: public int getNameCode() {
341: switch (currentEvent) {
342: case START_ELEMENT:
343: case PROCESSING_INSTRUCTION:
344: case END_ELEMENT:
345: return tree.nameCode[currentNode];
346: default:
347: throw new IllegalStateException(
348: "getNameCode() called when its value is undefined");
349: }
350: }
351:
352: /**
353: * Get the fingerprint of the name of the element. This is similar to the nameCode, except that
354: * it does not contain any information about the prefix: so two elements with the same fingerprint
355: * have the same name, excluding prefix. This method
356: * can be used after the {@link #START_ELEMENT}, {@link #END_ELEMENT}, {@link #PROCESSING_INSTRUCTION},
357: * {@link #ATTRIBUTE}, or {@link #NAMESPACE} events.
358: * If called at other times, the result is undefined and may result in an IllegalStateException.
359: * If called when the current node is an unnamed namespace node (a node representing the default namespace)
360: * the returned value is -1.
361: *
362: * @return the fingerprint. The fingerprint can be used to obtain the local name
363: * and namespace URI from the name pool.
364: */
365:
366: public int getFingerprint() {
367: int nc = getNameCode();
368: if (nc == -1) {
369: return -1;
370: } else {
371: return nc & NamePool.FP_MASK;
372: }
373: }
374:
375: /**
376: * Get the string value of the current attribute, text node, processing-instruction,
377: * or atomic value.
378: * This method cannot be used to obtain the string value of an element, or of a namespace
379: * node. If the most recent event was anything other than {@link #START_ELEMENT}, {@link #TEXT},
380: * {@link #PROCESSING_INSTRUCTION}, or {@link #ATOMIC_VALUE}, the result is undefined.
381: */
382:
383: public CharSequence getStringValue() throws XPathException {
384: switch (tree.nodeKind[currentNode]) {
385: case Type.TEXT:
386: return TinyTextImpl.getStringValue(tree, currentNode);
387: case Type.COMMENT:
388: case Type.PROCESSING_INSTRUCTION:
389: // sufficiently rare that instantiating the node is OK
390: return tree.getNode(currentNode).getStringValue();
391: case Type.ELEMENT:
392: currentEvent = END_ELEMENT;
393: return TinyParentNodeImpl.getStringValue(tree, currentNode);
394: case Type.PARENT_POINTER:
395: throw new IllegalStateException(
396: "Trying to get string value of a parent-pointer pseudo node");
397: }
398: return null;
399: }
400:
401: /**
402: * Get an atomic value. This call may be used only when the last event reported was
403: * ATOMIC_VALUE. This indicates that the PullProvider is reading a sequence that contains
404: * a free-standing atomic value; it is never used when reading the content of a node.
405: */
406:
407: public AtomicValue getAtomicValue() {
408: throw new IllegalStateException();
409: }
410:
411: /**
412: * Get the type annotation of the current attribute or element node, or atomic value.
413: * The result of this method is undefined unless the most recent event was START_ELEMENT,
414: * START_CONTENT, ATTRIBUTE, or ATOMIC_VALUE.
415: *
416: * @return the type code. This code is the fingerprint of a type name, which may be
417: * resolved to a {@link net.sf.saxon.type.SchemaType} by access to the Configuration.
418: */
419:
420: public int getTypeAnnotation() {
421: if (tree.nodeKind[currentNode] != Type.ELEMENT) {
422: throw new IllegalStateException(
423: "getTypeAnnotation() called when current event is not ELEMENT_START or ");
424: }
425: if (tree.typeCodeArray == null) {
426: return StandardNames.XDT_UNTYPED;
427: }
428: return tree.typeCodeArray[currentNode];
429: }
430:
431: /**
432: * Get the location of the current event.
433: * For an event stream representing a real document, the location information
434: * should identify the location in the lexical XML source. For a constructed document, it should
435: * identify the location in the query or stylesheet that caused the node to be created.
436: * A value of null can be returned if no location information is available.
437: */
438:
439: public SourceLocator getSourceLocator() {
440: return this ;
441: }
442:
443: /**
444: * Return the public identifier for the current document event.
445: * <p/>
446: * <p>The return value is the public identifier of the document
447: * entity or of the external parsed entity in which the markup that
448: * triggered the event appears.</p>
449: *
450: * @return A string containing the public identifier, or
451: * null if none is available.
452: * @see #getSystemId
453: */
454: public String getPublicId() {
455: return null;
456: }
457:
458: /**
459: * Return the system identifier for the current document event.
460: * <p/>
461: * <p>The return value is the system identifier of the document
462: * entity or of the external parsed entity in which the markup that
463: * triggered the event appears.</p>
464: * <p/>
465: * <p>If the system identifier is a URL, the parser must resolve it
466: * fully before passing it to the application.</p>
467: *
468: * @return A string containing the system identifier, or null
469: * if none is available.
470: * @see #getPublicId
471: */
472: public String getSystemId() {
473: return tree.getSystemId(currentNode);
474: }
475:
476: /**
477: * Return the line number where the current document event ends.
478: * <p/>
479: * <p><strong>Warning:</strong> The return value from the method
480: * is intended only as an approximation for the sake of error
481: * reporting; it is not intended to provide sufficient information
482: * to edit the character content of the original XML document.</p>
483: * <p/>
484: * <p>The return value is an approximation of the line number
485: * in the document entity or external parsed entity where the
486: * markup that triggered the event appears.</p>
487: *
488: * @return The line number, or -1 if none is available.
489: * @see #getColumnNumber
490: */
491: public int getLineNumber() {
492: return tree.getLineNumber(currentNode);
493: }
494:
495: /**
496: * Return the character position where the current document event ends.
497: * <p/>
498: * <p><strong>Warning:</strong> The return value from the method
499: * is intended only as an approximation for the sake of error
500: * reporting; it is not intended to provide sufficient information
501: * to edit the character content of the original XML document.</p>
502: * <p/>
503: * <p>The return value is an approximation of the column number
504: * in the document entity or external parsed entity where the
505: * markup that triggered the event appears.</p>
506: *
507: * @return The column number, or -1 if none is available.
508: * @see #getLineNumber
509: */
510: public int getColumnNumber() {
511: return -1;
512: }
513: }
514:
515: //
516: // The contents of this file are subject to the Mozilla Public License Version 1.0 (the "License");
517: // you may not use this file except in compliance with the License. You may obtain a copy of the
518: // License at http://www.mozilla.org/MPL/
519: //
520: // Software distributed under the License is distributed on an "AS IS" basis,
521: // WITHOUT WARRANTY OF ANY KIND, either express or implied.
522: // See the License for the specific language governing rights and limitations under the License.
523: //
524: // The Original Code is: all this file.
525: //
526: // The Initial Developer of the Original Code is Michael H. Kay.
527: //
528: // Portions created by (your name) are Copyright (C) (your legal entity). All Rights Reserved.
529: //
530: // Contributor(s): none.
531: //
|