001: package net.sf.saxon.pull;
002:
003: import net.sf.saxon.event.PipelineConfiguration;
004: import net.sf.saxon.om.AttributeCollection;
005: import net.sf.saxon.om.NamespaceDeclarations;
006: import net.sf.saxon.trans.XPathException;
007: import net.sf.saxon.value.AtomicValue;
008:
009: import javax.xml.transform.SourceLocator;
010:
011: /**
012: * PullProvider is Saxon's pull-based interface for reading XML documents. In fact,
013: * a PullProvider can deliver any sequence of nodes or atomic values. An atomic value
014: * in the sequence is delivered as a single event; a node is delivered as a sequence
015: * of events equivalent to a recursive walk of the XML tree. Within this sequence,
016: * the start and end of a document, or of an element, are delivered as separate
017: * events; other nodes are delivered as individual events.
018: */
019:
020: public interface PullProvider {
021:
022: // Start by defining the different types of event
023:
024: /**
025: * START_OF_INPUT is the initial state when the PullProvider is instantiated.
026: * This event is never notified by the next() method, but it is returned
027: * from a call of current() prior to the first call on next().
028: */
029:
030: public static final int START_OF_INPUT = 0;
031:
032: /**
033: * ATOMIC_VALUE is notified when the PullProvider is reading a sequence of items,
034: * and one of the items is an atomic value rather than a node. This will always
035: * be a top-level event (it will never be nested in Start/End Document or
036: * Start/End Element).
037: */
038:
039: public static final int ATOMIC_VALUE = 1;
040:
041: /**
042: * START_DOCUMENT is notified when a document node is encountered. This will
043: * always be a top-level event (it will never be nested in Start/End Document or
044: * Start/End Element). Note however that multiple document nodes can occur in
045: * a sequence, and the start and end of each one will be notified.
046: */
047:
048: public static final int START_DOCUMENT = 2;
049:
050: /**
051: * END_DOCUMENT is notified at the end of processing a document node, that is,
052: * after all the descendants of the document node have been notified. The event
053: * will always be preceded by the corresponding START_DOCUMENT event.
054: */
055:
056: public static final int END_DOCUMENT = 3;
057:
058: /**
059: * START_ELEMENT is notified when an element node is encountered. This may either
060: * be a top-level element (an element node that participates in the sequence being
061: * read in its own right) or a nested element (reported because it is a descendant
062: * of an element or document node that participates in the sequence.)
063: *
064: * <p>Following the notification of START_ELEMENT, the client may obtain information
065: * about the element node, such as its name and type annotation. The client may also
066: * call getAttributes() to obtain information about the attributes of the element
067: * node, and/or getNamespaceDeclarations to get information about the namespace
068: * declarations. The client may then do one of the following:</p>
069: *
070: * <ul>
071: * <li>Call skipToEnd() to move straight to the corresponding END_ELEMENT event (which
072: * will be the next event notified)</li>
073: * <li>Call next(), repeatedly, to be notified of events relating to the children and
074: * descendants of this element node</li>
075: * <li>Call getStringValue() to obtain the string value of the element node, after which
076: * the next event notified will be the corresponding END_ELEMENT event</li>
077: * <li>Call getTypedValue() to obtain the typed value of the element node, after which
078: * the next event notified will be the corresponding END_ELEMENT event</li>
079: * </ul>
080: */
081:
082: public static final int START_ELEMENT = 4;
083:
084: /**
085: * END_ELEMENT is notified at the end of an element node, that is, after all the children
086: * and descendants of the element have either been processed or skipped. It may relate to
087: * a top-level element, or to a nested element. For an empty element (one with no children)
088: * the END_ELEMENT event will immediately follow the corresponding START_ELEMENT event.
089: * No information (such as the element name) is available after an END_ELEMENT event: if the
090: * client requires such information, it must remember it, typically on a Stack.
091: */
092:
093: public static final int END_ELEMENT = 5;
094:
095: /**
096: * The ATTRIBUTE event is notified only for an attribute node that appears in its own right
097: * as a top-level item in the sequence being read. ATTRIBUTE events are not notified for
098: * the attributes of an element that has been notified: such attributes must be read using the
099: * {@link #getAttributes()} method.
100: */
101:
102: public static final int ATTRIBUTE = 6;
103:
104: /**
105: * The NAMESPACE event is notified only for a namespace node that appears in its own right
106: * as a top-level item in the sequence being read. NAMESPACE events are not notified for
107: * the namespaces of an element that has been notified: such attributes must be read using the
108: * {@link #getNamespaceDeclarations()} method.
109: */
110:
111: public static final int NAMESPACE = 7;
112:
113: /**
114: * A TEXT event is notified for a text node. This may either be a top-level text
115: * node, or a text node nested within an element or document node. At the top level,
116: * text nodes may be zero-length and may be consecutive in the sequence being read.
117: * Nested within an element or document node, text nodes will never be zero-length,
118: * and adjacent text nodes will have been coalesced into one. (This might not always
119: * be true when reading third-party data models such as a DOM.) Whitespace-only
120: * text nodes will be notified unless something has been done (e.g. xsl:strip-space)
121: * to remove them.
122: */
123:
124: public static final int TEXT = 8;
125:
126: /**
127: * A COMMENT event is notified for a comment node, which may be either a top-level
128: * comment or one nested within an element or document node.
129: */
130:
131: public static final int COMMENT = 9;
132:
133: /**
134: * A PROCESSING_INSTRUCTION event is notified for a processing instruction node,
135: * which may be either a top-level comment or one nested within an element or document node.
136: * As defined in the XPath data model, the "target" of a processing instruction is represented
137: * as the node name (which only has a local part, no prefix or URI), and the "data" of the
138: * processing instruction is represented as the string-value of the node.
139: */
140:
141: public static final int PROCESSING_INSTRUCTION = 10;
142:
143: /**
144: * The END_OF_INPUT event is returned to indicate the end of the sequence being read.
145: * After this event, the result of any further calls on the next() method is undefined.
146: */
147:
148: public static final int END_OF_INPUT = -1;
149:
150: /**
151: * Set configuration information. This must only be called before any events
152: * have been read.
153: */
154:
155: public void setPipelineConfiguration(PipelineConfiguration pipe);
156:
157: /**
158: * Get configuration information.
159: */
160:
161: public PipelineConfiguration getPipelineConfiguration();
162:
163: /**
164: * Get the next event
165: * @return an integer code indicating the type of event. The code
166: * {@link #END_OF_INPUT} is returned at the end of the sequence.
167: */
168:
169: public int next() throws XPathException;
170:
171: /**
172: * Get the event most recently returned by next(), or by other calls that change
173: * the position, for example getStringValue() and skipToMatchingEnd(). This
174: * method does not change the position of the PullProvider.
175: * @return the current event
176: */
177:
178: public int current();
179:
180: /**
181: * Get the attributes associated with the current element. This method must
182: * be called only after a START_ELEMENT event has been notified. The contents
183: * of the returned AttributeCollection are guaranteed to remain unchanged
184: * until the next START_ELEMENT event, but may be modified thereafter. The object
185: * should not be modified by the client.
186: *
187: * <p>Attributes may be read before or after reading the namespaces of an element,
188: * but must not be read after the first child node has been read, or after calling
189: * one of the methods skipToEnd(), getStringValue(), or getTypedValue().</p>
190: *
191: * @return an AttributeCollection representing the attributes of the element
192: * that has just been notified.
193: */
194:
195: public AttributeCollection getAttributes() throws XPathException;
196:
197: /**
198: * Get the namespace declarations associated with the current element. This method must
199: * be called only after a START_ELEMENT event has been notified. In the case of a top-level
200: * START_ELEMENT event (that is, an element that either has no parent node, or whose parent
201: * is not included in the sequence being read), the NamespaceDeclarations object returned
202: * will contain a namespace declaration for each namespace that is in-scope for this element
203: * node. In the case of a non-top-level element, the NamespaceDeclarations will contain
204: * a set of namespace declarations and undeclarations, representing the differences between
205: * this element and its parent.
206: *
207: * <p>It is permissible for this method to return namespace declarations that are redundant.</p>
208: *
209: * <p>The NamespaceDeclarations object is guaranteed to remain unchanged until the next START_ELEMENT
210: * event, but may then be overwritten. The object should not be modified by the client.</p>
211: *
212: * <p>Namespaces may be read before or after reading the attributes of an element,
213: * but must not be read after the first child node has been read, or after calling
214: * one of the methods skipToEnd(), getStringValue(), or getTypedValue().</p>*
215: */
216:
217: public NamespaceDeclarations getNamespaceDeclarations()
218: throws XPathException;
219:
220: /**
221: * Skip the current subtree. This method may be called only immediately after
222: * a START_DOCUMENT or START_ELEMENT event. This call returns the matching
223: * END_DOCUMENT or END_ELEMENT event; the next call on next() will return
224: * the event following the END_DOCUMENT or END_ELEMENT.
225: * @throws IllegalStateException if the method is called at any time other than
226: * immediately after a START_DOCUMENT or START_ELEMENT event.
227: */
228:
229: public int skipToMatchingEnd() throws XPathException;
230:
231: /**
232: * Close the event reader. This indicates that no further events are required.
233: * It is not necessary to close an event reader after {@link #END_OF_INPUT} has
234: * been reported, but it is recommended to close it if reading terminates
235: * prematurely. Once an event reader has been closed, the effect of further
236: * calls on next() is undefined.
237: */
238:
239: public void close();
240:
241: /**
242: * Get the nameCode identifying the name of the current node. This method
243: * can be used after the {@link #START_ELEMENT}, {@link #PROCESSING_INSTRUCTION},
244: * {@link #ATTRIBUTE}, or {@link #NAMESPACE} events. With some PullProvider implementations,
245: * it can also be used after {@link #END_ELEMENT}, but this is not guaranteed: a client who
246: * requires the information at that point (for example, to do serialization) should insert an
247: * {@link ElementNameTracker} into the pipeline.
248: * If called at other times, the result is undefined and may result in an IllegalStateException.
249: * If called when the current node is an unnamed namespace node (a node representing the default namespace)
250: * the returned value is -1.
251: * @return the nameCode. The nameCode can be used to obtain the prefix, local name,
252: * and namespace URI from the name pool.
253: */
254:
255: public int getNameCode();
256:
257: /**
258: * Get the fingerprint of the name of the element. This is similar to the nameCode, except that
259: * it does not contain any information about the prefix: so two elements with the same fingerprint
260: * have the same name, excluding prefix. This method
261: * can be used after the {@link #START_ELEMENT}, {@link #END_ELEMENT}, {@link #PROCESSING_INSTRUCTION},
262: * {@link #ATTRIBUTE}, or {@link #NAMESPACE} events.
263: * If called at other times, the result is undefined and may result in an IllegalStateException.
264: * If called when the current node is an unnamed namespace node (a node representing the default namespace)
265: * the returned value is -1.
266: * @return the fingerprint. The fingerprint can be used to obtain the local name
267: * and namespace URI from the name pool.
268: */
269:
270: public int getFingerprint();
271:
272: /**
273: * Get the string value of the current element, text node, processing-instruction,
274: * or top-level attribute or namespace node, or atomic value.
275: *
276: * <p>In other situations the result is undefined and may result in an IllegalStateException.</p>
277: *
278: * <p>If the most recent event was a {@link #START_ELEMENT}, this method causes the content
279: * of the element to be read. The current event on completion of this method will be the
280: * corresponding {@link #END_ELEMENT}. The next call of next() will return the event following
281: * the END_ELEMENT event.</p>
282: *
283: * @return the String Value of the node in question, defined according to the rules in the
284: * XPath data model.
285: */
286:
287: public CharSequence getStringValue() throws XPathException;
288:
289: /**
290: * Get the type annotation of the current attribute or element node, or atomic value.
291: * The result of this method is undefined unless the most recent event was START_ELEMENT,
292: * ATTRIBUTE, or ATOMIC_VALUE. In the case of an attribute node, the additional bit NodeInfo.IS_DTD_TYPE
293: * may be set to indicate a DTD-derived ID or IDREF/S type.
294: *
295: * @return the type annotation. This code is the fingerprint of a type name, which may be
296: * resolved to a {@link net.sf.saxon.type.SchemaType} by access to the Configuration.
297: */
298:
299: public int getTypeAnnotation();
300:
301: /**
302: * Get an atomic value. This call may be used only when the last event reported was
303: * ATOMIC_VALUE. This indicates that the PullProvider is reading a sequence that contains
304: * a free-standing atomic value; it is never used when reading the content of a node.
305: */
306:
307: public AtomicValue getAtomicValue();
308:
309: /**
310: * Get the location of the current event.
311: * For an event stream representing a real document, the location information
312: * should identify the location in the lexical XML source. For a constructed document, it should
313: * identify the location in the query or stylesheet that caused the node to be created.
314: * A value of null can be returned if no location information is available.
315: */
316:
317: public SourceLocator getSourceLocator();
318:
319: }
320:
321: //
322: // The contents of this file are subject to the Mozilla Public License Version 1.0 (the "License");
323: // you may not use this file except in compliance with the License. You may obtain a copy of the
324: // License at http://www.mozilla.org/MPL/
325: //
326: // Software distributed under the License is distributed on an "AS IS" basis,
327: // WITHOUT WARRANTY OF ANY KIND, either express or implied.
328: // See the License for the specific language governing rights and limitations under the License.
329: //
330: // The Original Code is: all this file.
331: //
332: // The Initial Developer of the Original Code is Michael H. Kay.
333: //
334: // Portions created by (your name) are Copyright (C) (your legal entity). All Rights Reserved.
335: //
336: // Contributor(s): none.
337: //
|