001: /*
002: * Javolution - Java(TM) Solution for Real-Time and Embedded Systems
003: * Copyright (C) 2006 - Javolution (http://javolution.org/)
004: * All rights reserved.
005: *
006: * Permission to use, copy, modify, and distribute this software is
007: * freely granted, provided that this notice is preserved.
008: */
009: package javolution.xml.stream;
010:
011: import javolution.text.CharArray;
012: import j2me.lang.CharSequence;
013:
014: /**
015: * <p> This interface is similar to
016: * <code>javax.xml.stream.XMLStreamReader</code>; but it does not forces
017: * dynamic allocation when parsing (its methods returns
018: * {@link CharArray CharArray} instances instead of {@link String}).</p>
019: *
020: * <p> Except for the speed (faster) and its real-time characteristics
021: * the usage/behavior is about the same as its StAX counterpart.</p>
022: *
023: * <p> The {@link CharArray CharArray} instances returned by this reader
024: * supports fast primitive conversions as illustrated below:[code]
025: *
026: * // Creates a new reader (potentially recycled).
027: * XMLInputFactory factory = XMLInputFactory.newInstance();
028: * XMLStreamReader reader = factory.createXMLStreamReader(inputStream);
029: *
030: * while (reader.getEventType() != XMLStreamConstants.END_DOCUMENT) {
031: * switch (reader.next()) {
032: * case XMLStreamConstants.START_ELEMENT:
033: * if (reader.getLocalName().equals("Time")) {
034: * // Reads primitive types (int) attributes directly (no memory allocation).
035: * time.hour = reader.getAttributeValue("hour").toInt();
036: * time.minute = reader.getAttributeValue("minute").toInt();
037: * time.second = reader.getAttributeValue("second").toInt();
038: * }
039: * ...
040: * break;
041: * }
042: * }
043: *
044: * reader.close(); // Recycles the reader.
045: * inputStream.close(); // Underlying stream has to be closed explicitly.
046: * [/code]
047: *
048: * @author <a href="mailto:jean-marie@dautelle.com">Jean-Marie Dautelle</a>
049: * @version 4.0, September 4, 2006
050: */
051: public interface XMLStreamReader extends XMLStreamConstants {
052:
053: /**
054: * Gets the value of a feature/property from the underlying implementation
055: *
056: * @param name the name of the property.
057: * @return the value of the property.
058: */
059: public Object getProperty(String name)
060: throws IllegalArgumentException;
061:
062: /**
063: * Gets next parsing event - contiguous character data is returned into a
064: * single chunk.
065: *
066: * By default entity references must be expanded and reported transparently
067: * to the application. An exception will be thrown if an entity reference
068: * cannot be expanded. If element content is empty (i.e. content is "") then
069: * no CHARACTERS event will be reported.
070: *
071: * <p>
072: * Given the following XML:<br>
073: * <foo><!--description-->content
074: * text<![CDATA[<greeting>Hello</greeting>]]>other content</foo><br>
075: * The behavior of calling next() when being on foo will be:<br>
076: * 1- the comment (COMMENT)<br>
077: * 2- then the characters section (CHARACTERS)<br>
078: * 3- then the CDATA section (another CHARACTERS)<br>
079: * 4- then the next characters section (another CHARACTERS)<br>
080: * 5- then the END_ELEMENT<br>
081: *
082: * <p>
083: * <b>NOTE:</b> empty element (such as <tag/>) will be reported with two
084: * separate events: START_ELEMENT, END_ELEMENT - This preserves parsing
085: * equivalency of empty element to <tag></tag>.
086: *
087: * This method will throw an IllegalStateException if it is called after
088: * hasNext() returns false.
089: *
090: * @return the integer code corresponding to the current parse event
091: * @throws NoSuchElementException if this is called when hasNext()
092: * returns false
093: * @throws XMLStreamException if there is an error processing the
094: * underlying XML source
095: */
096: public int next() throws XMLStreamException;
097:
098: /**
099: * Tests if the current event is of the given type and if the namespace and
100: * name match the current namespace and name of the current event. If the
101: * namespaceURI is null it is not checked for equality, if the localName is
102: * null it is not checked for equality.
103: *
104: * @param type the event type.
105: * @param namespaceURI the uri of the event, may be null.
106: * @param localName the localName of the event, may be null.
107: * @throws XMLStreamException if the required values are not matched.
108: */
109: public void require(int type, CharSequence namespaceURI,
110: CharSequence localName) throws XMLStreamException;
111:
112: /**
113: * Reads the content of a text-only element, an exception is thrown if this
114: * is not a text-only element. Regardless of the value of
115: * javax.xml.stream.isCoalescing this method always returns coalesced
116: * content. <br />
117: * Precondition: the current event is START_ELEMENT. <br />
118: * Postcondition: the current event is the corresponding END_ELEMENT.
119: *
120: * <br />
121: * The method does the following (implementations are free to optimized but
122: * must do equivalent processing):
123: *
124: * <pre>
125: * if (getEventType() != XMLStreamConstants.START_ELEMENT) {
126: * throw new XMLStreamException(
127: * "parser must be on START_ELEMENT to read next text", getLocation());
128: * }
129: * int eventType = next();
130: * StringBuffer content = new StringBuffer();
131: * while (eventType != XMLStreamConstants.END_ELEMENT) {
132: * if (eventType == XMLStreamConstants.CHARACTERS
133: * || eventType == XMLStreamConstants.CDATA
134: * || eventType == XMLStreamConstants.SPACE
135: * || eventType == XMLStreamConstants.ENTITY_REFERENCE) {
136: * buf.append(getText());
137: * } else if (eventType == XMLStreamConstants.PROCESSING_INSTRUCTION
138: * || eventType == XMLStreamConstants.COMMENT) {
139: * // skipping
140: * } else if (eventType == XMLStreamConstants.END_DOCUMENT) {
141: * throw new XMLStreamException(
142: * "unexpected end of document when reading element text content",
143: * this);
144: * } else if (eventType == XMLStreamConstants.START_ELEMENT) {
145: * throw new XMLStreamException(
146: * "element text content may not contain START_ELEMENT",
147: * getLocation());
148: * } else {
149: * throw new XMLStreamException("Unexpected event type " + eventType,
150: * getLocation());
151: * }
152: * eventType = next();
153: * }
154: * return buf.toString();
155: * </pre>
156: *
157: * @throws XMLStreamException if the current event is not a START_ELEMENT
158: * or if a non text element is encountered.
159: */
160: public CharArray getElementText() throws XMLStreamException;
161:
162: /**
163: * Skips any white space (isWhiteSpace() returns true), COMMENT, or
164: * PROCESSING_INSTRUCTION, until a START_ELEMENT or END_ELEMENT is reached.
165: * If other than white space characters, COMMENT, PROCESSING_INSTRUCTION,
166: * START_ELEMENT, END_ELEMENT are encountered, an exception is thrown. This
167: * method should be used when processing element-only content seperated by
168: * white space.
169: *
170: * <br />
171: * Precondition: none <br />
172: * Postcondition: the current event is START_ELEMENT or END_ELEMENT and
173: * cursor may have moved over any whitespace event.
174: *
175: * <br />
176: * Essentially it does the following (implementations are free to optimized
177: * but must do equivalent processing):
178: *
179: * <pre>
180: * int eventType = next();
181: * while((eventType == XMLStreamConstants.CHARACTERS && isWhiteSpace()) // skip whitespace
182: * || (eventType == XMLStreamConstants.CDATA && isWhiteSpace())
183: * // skip whitespace
184: * || eventType == XMLStreamConstants.SPACE
185: * || eventType == XMLStreamConstants.PROCESSING_INSTRUCTION
186: * || eventType == XMLStreamConstants.COMMENT
187: * ) {
188: * eventType = next();
189: * }
190: * if (eventType != XMLStreamConstants.START_ELEMENT && eventType != XMLStreamConstants.END_ELEMENT) {
191: * throw new String XMLStreamException("expected start or end tag", getLocation());
192: * }
193: * return eventType;
194: * </pre>
195: *
196: * @return the event type of the element read (START_ELEMENT or END_ELEMENT)
197: * @throws XMLStreamException if the current event is not white space,
198: * PROCESSING_INSTRUCTION, START_ELEMENT or END_ELEMENT
199: * @throws NoSuchElementException if this is called when hasNext()
200: * returns false
201: */
202: public int nextTag() throws XMLStreamException;
203:
204: /**
205: * Returns true if there are more parsing events and false if there are no
206: * more events. This method will return false if the current state of the
207: * XMLStreamReader is END_DOCUMENT.
208: *
209: * @return true if there are more events, false otherwise.
210: * @throws XMLStreamException if there is a fatal error detecting the next
211: * state.
212: */
213: public boolean hasNext() throws XMLStreamException;
214:
215: /**
216: * Frees any resources associated with this Reader. This method does not
217: * close the underlying input source.
218: *
219: * @throws XMLStreamException if there are errors freeing associated
220: * resources
221: */
222: public void close() throws XMLStreamException;
223:
224: /**
225: * Returns the uri for the given prefix. The uri returned depends on the
226: * current state of the processor.
227: *
228: * <p>
229: * <strong>NOTE:</strong>The 'xml' prefix is bound as defined in <a
230: * href="http://www.w3.org/TR/REC-xml-names/#ns-using">Namespaces in XML</a>
231: * specification to "http://www.w3.org/XML/1998/namespace".
232: *
233: * <p>
234: * <strong>NOTE:</strong> The 'xmlns' prefix must be resolved to following
235: * namespace <a
236: * href="http://www.w3.org/2000/xmlns/">http://www.w3.org/2000/xmlns/</a>
237: *
238: * @param prefix the prefix to lookup.
239: * @return the uri bound to the given prefix or <code>null</code> if it is
240: * not bound
241: */
242: public CharArray getNamespaceURI(CharSequence prefix);
243:
244: /**
245: * Indicates if the cursor points to a start tag.
246: *
247: * @return <code>true</code> if the cursor points to a start tag;
248: * <code>false</code> otherwise.
249: */
250: public boolean isStartElement();
251:
252: /**
253: * Indicates if the cursor points to an end tag.
254: *
255: * @return <code>true</code> if the cursor points to a end tag;
256: * <code>false</code> otherwise.
257: */
258: public boolean isEndElement();
259:
260: /**
261: * Indicates if the cursor points to character data.
262: *
263: * @return <code>true</code> if the cursor points to character data;
264: * <code>false</code> otherwise.
265: */
266: public boolean isCharacters();
267:
268: /**
269: * Indicates if the cursor points to character data that consists
270: * of all whitespace.
271: *
272: * @return <code>true</code> if the cursor points to whitespaces;
273: * <code>false</code> otherwise.
274: */
275: public boolean isWhiteSpace();
276:
277: /**
278: * Returns the normalized attribute value of the attribute with the
279: * namespace and localName.
280: *
281: * @param namespaceURI the namespace of the attribute or <code>null</code>.
282: * @param localName the local name of the attribute.
283: * @return returns the value of the attribute or <code>null</code>.
284: * @throws IllegalStateException if not a START_ELEMENT or ATTRIBUTE.
285: */
286: public CharArray getAttributeValue(CharSequence namespaceURI,
287: CharSequence localName);
288:
289: /**
290: * Returns the count of attributes on this START_ELEMENT, this method is
291: * only valid on a START_ELEMENT or ATTRIBUTE. This count excludes namespace
292: * definitions. Attribute indices are zero-based.
293: *
294: * @return returns the number of attributes.
295: * @throws IllegalStateException if not a START_ELEMENT or ATTRIBUTE.
296: */
297: public int getAttributeCount();
298:
299: /**
300: * Returns the namespace of the attribute at the provided index
301: *
302: * @param index the position of the attribute.
303: * @return the namespace URI or <code>null</code> if no prefix.
304: * @throws IllegalStateException if not a START_ELEMENT or ATTRIBUTE.
305: */
306: public CharArray getAttributeNamespace(int index);
307:
308: /**
309: * Returns the localName of the attribute at the provided index.
310: *
311: * @param index the position of the attribute.
312: * @return the localName of the attribute.
313: * @throws IllegalStateException if not a START_ELEMENT or ATTRIBUTE.
314: */
315: public CharArray getAttributeLocalName(int index);
316:
317: /**
318: * Returns the prefix of this attribute at the provided index
319: *
320: * @param index the position of the attribute.
321: * @return the prefix of the attribute or <code>null</code> if no prefix.
322: * @throws IllegalStateException if not a START_ELEMENT or ATTRIBUTE.
323: */
324: public CharArray getAttributePrefix(int index);
325:
326: /**
327: * Returns the XML type of the attribute at the provided index.
328: *
329: * @param index the position of the attribute
330: * @return the XML type of the attribute.
331: * @throws IllegalStateException if not a START_ELEMENT or ATTRIBUTE.
332: */
333: public CharArray getAttributeType(int index);
334:
335: /**
336: * Returns the value of the attribute at the index.
337: *
338: * @param index the position of the attribute.
339: * @return the attribute value.
340: * @throws IllegalStateException if not a START_ELEMENT or ATTRIBUTE.
341: */
342: public CharArray getAttributeValue(int index);
343:
344: /**
345: * Indicates if this attribute was created by default.
346: *
347: * @param index the position of the attribute.
348: * @return <code>true</code> if this is a default attribute;
349: * <code>false</code> otherwise.
350: * @throws IllegalStateException if not a START_ELEMENT or ATTRIBUTE.
351: */
352: public boolean isAttributeSpecified(int index);
353:
354: /**
355: * Returns the count of namespaces declared on this START_ELEMENT or
356: * END_ELEMENT. This method is only valid on a START_ELEMENT, END_ELEMENT or
357: * NAMESPACE. On an END_ELEMENT the count is of the namespaces that are
358: * about to go out of scope. This is the equivalent of the information
359: * reported by SAX callback for an end element event.
360: *
361: * @return returns the number of namespace declarations on this specific
362: * element.
363: * @throws IllegalStateException if not a START_ELEMENT or END_ELEMENT.
364: */
365: public int getNamespaceCount();
366:
367: /**
368: * Returns the prefix for the namespace declared at the index.
369: *
370: * @param index the position of the namespace declaration.
371: * @return returns the namespace prefix or <code>null</code> if no prefix.
372: * @throws IllegalStateException if this is not a START_ELEMENT,
373: * END_ELEMENT or NAMESPACE.
374: */
375: public CharArray getNamespacePrefix(int index);
376:
377: /**
378: * Returns the URI for the namespace declared at the index.
379: *
380: * @param index the position of the namespace declaration.
381: * @return returns the namespace uri or <code>null</code> if no prefix.
382: * @throws IllegalStateException if this is not a START_ELEMENT,
383: * END_ELEMENT or NAMESPACE.
384: */
385: public CharArray getNamespaceURI(int index);
386:
387: /**
388: * Returns a read only namespace context for the current position.
389: *
390: * @return return a namespace context
391: */
392: public NamespaceContext getNamespaceContext();
393:
394: /**
395: * Returns an integer code that indicates the type of the event the cursor
396: * is pointing to.
397: *
398: * @return the event type.
399: */
400: public int getEventType();
401:
402: /**
403: * Returns the current value of the parse event as a string, this returns
404: * the string value of a CHARACTERS event, returns the value of a COMMENT,
405: * the replacement value for an ENTITY_REFERENCE, the string value of a
406: * CDATA section, the string value for a SPACE event, or the String value of
407: * the internal subset of the DTD. If an ENTITY_REFERENCE has been resolved,
408: * any character data will be reported as CHARACTERS events.
409: *
410: * @return the current text or <code>null</code>
411: * @throws IllegalStateException if this state is not a valid text state.
412: */
413: public CharArray getText();
414:
415: /**
416: * Returns an array which contains the characters from this event. This
417: * array should be treated as read-only and transient. I.e. the array will
418: * contain the text characters until the XMLStreamReader moves on to the
419: * next event. Attempts to hold onto the character array beyond that time or
420: * modify the contents of the array are breaches of the contract for this
421: * interface.
422: *
423: * @return the current text or an empty array.
424: * @throws IllegalStateException if this state is not a valid text state.
425: */
426: public char[] getTextCharacters();
427:
428: /**
429: * Gets the the text associated with a CHARACTERS, SPACE or CDATA event.
430: * Text starting a "sourceStart" is copied into "target" starting at
431: * "targetStart". Up to "length" characters are copied. The number of
432: * characters actually copied is returned.
433: *
434: * The "sourceStart" argument must be greater or equal to 0 and less than or
435: * equal to the number of characters associated with the event. Usually, one
436: * requests text starting at a "sourceStart" of 0. If the number of
437: * characters actually copied is less than the "length", then there is no
438: * more text. Otherwise, subsequent calls need to be made until all text has
439: * been retrieved. For example:
440: *
441: * <code>
442: * int length = 1024;
443: * char[] myBuffer = new char[ length ];
444: *
445: * for ( int sourceStart = 0 ; ; sourceStart += length )
446: * {
447: * int nCopied = stream.getTextCharacters( sourceStart, myBuffer, 0, length );
448: *
449: * if (nCopied < length)
450: * break;
451: * }
452: * </code> XMLStreamException may be thrown
453: * if there are any XML errors in the underlying source. The "targetStart"
454: * argument must be greater than or equal to 0 and less than the length of
455: * "target", Length must be greater than 0 and "targetStart + length" must
456: * be less than or equal to length of "target".
457: *
458: * @param sourceStart the index of te first character in the source array
459: * to copy
460: * @param target the destination array
461: * @param targetStart the start offset in the target array
462: * @param length the number of characters to copy
463: * @return the number of characters actually copied
464: * @throws XMLStreamException if the XML source is not well-formed.
465: * @throws IndexOutOfBoundsException
466: * if targetStart < 0 or > than the length of target
467: * @throws IndexOutOfBoundsException
468: * if length < 0 or targetStart + length > length of target
469: * @throws UnsupportedOperationException if this method is not supported.
470: */
471: public int getTextCharacters(int sourceStart, char[] target,
472: int targetStart, int length) throws XMLStreamException;
473:
474: /**
475: * Returns the offset into the text character array where the first
476: * character (of this text event) is stored.
477: *
478: * @throws IllegalStateException if this state is not a valid text state.
479: */
480: public int getTextStart();
481:
482: /**
483: * Returns the length of the sequence of characters for this Text event
484: * within the text character array.
485: *
486: * @throws IllegalStateException if this state is not a valid text state.
487: */
488: public int getTextLength();
489:
490: /**
491: * Returns the input encoding if known or <code>null</code> if unknown.
492: *
493: * @return the encoding of this instance or null.
494: */
495: public String getEncoding();
496:
497: /**
498: * Indicates if the current event has text. The following
499: * events have text: CHARACTERS, DTD ,ENTITY_REFERENCE, COMMENT, SPACE.
500: *
501: * @return <code>true</code> if the current event as text;
502: * <code>false</code> otherwise.
503: */
504: public boolean hasText();
505:
506: /**
507: * Return the current location of the processor. If the Location is unknown
508: * the processor should return an implementation of Location that returns -1
509: * for the location and null for the publicId and systemId. The location
510: * information is only valid until next() is called.
511: *
512: * @return the current location.
513: */
514: public Location getLocation();
515:
516: /**
517: * Returns the (local) name of the current event. For START_ELEMENT or
518: * END_ELEMENT returns the (local) name of the current element. For
519: * ENTITY_REFERENCE it returns entity name. The current event must be
520: * START_ELEMENT or END_ELEMENT, or ENTITY_REFERENCE.
521: *
522: * @return the localName.
523: * @throws IllegalStateException if this not a START_ELEMENT, END_ELEMENT
524: * or ENTITY_REFERENCE
525: */
526: public CharArray getLocalName();
527:
528: /**
529: * Indicates if the current event has a name (is a START_ELEMENT or
530: * END_ELEMENT).
531: *
532: * @return <code>true</code> if the current event has a name;
533: * <code>false</code> otherwise.
534: */
535: public boolean hasName();
536:
537: /**
538: * If the current event is a START_ELEMENT or END_ELEMENT this method
539: * returns the URI of the current element (URI mapping to the prefix
540: * element/attribute has; or if no prefix <code>null</code>).
541: *
542: * @return the URI bound to this elements prefix or <code>null</code>.
543: * @throws IllegalStateException if not a START_ELEMENT, END_ELEMENT
544: * or ATTRIBUTE.
545: */
546: public CharArray getNamespaceURI();
547:
548: /**
549: * Returns the prefix of the current event or null if the event does not
550: * have a prefix.
551: *
552: * @return the prefix or <code>null</code>
553: * @throws IllegalStateException if not a START_ELEMENT or END_ELEMENT.
554: */
555: public CharArray getPrefix();
556:
557: /**
558: * Gets the xml version declared on the xml declaration.
559: *
560: * @return the XML version or <code>null</code>
561: */
562: public CharArray getVersion();
563:
564: /**
565: * Gets the standalone declaration from the xml declaration.
566: *
567: * @return <code>true</code> if this is standalone;
568: * <code>false</code> otherwise.
569: */
570: public boolean isStandalone();
571:
572: /**
573: * Checks if standalone was set in the document.
574: *
575: * @return <code>true</code> if standalone was set;
576: * <code>false</code> otherwise.
577: */
578: public boolean standaloneSet();
579:
580: /**
581: * Returns the character encoding declared on the xml declaration.
582: *
583: * @return the encoding declared in the document or <code>null</code>
584: */
585: public CharArray getCharacterEncodingScheme();
586:
587: /**
588: * Returns the target of a processing instruction.
589: *
590: * @return the target.
591: * @throws IllegalStateException if the current event is not a
592: * {@link XMLStreamConstants#PROCESSING_INSTRUCTION}
593: */
594: public CharArray getPITarget();
595:
596: /**
597: * Get the data section of a processing instruction.
598: *
599: * @return the data (if processing instruction has any) or
600: * <code>null</code> if the processing instruction only has target.
601: * @throws IllegalStateException if the current event is not a
602: * {@link XMLStreamConstants#PROCESSING_INSTRUCTION}
603: */
604: public CharArray getPIData();
605: }
|