001: /*
002: * Copyright 2001-2004 The Apache Software Foundation.
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016: /*
017: * $Id: ToTextStream.java,v 1.22 2005/08/03 19:20:31 minchau Exp $
018: */
019: package org.apache.xml.serializer;
020:
021: import java.io.IOException;
022:
023: import org.apache.xml.serializer.utils.MsgKey;
024: import org.apache.xml.serializer.utils.Utils;
025: import org.xml.sax.Attributes;
026: import org.xml.sax.SAXException;
027:
028: /**
029: * This class is not a public API.
030: * It is only public because it is used in other packages.
031: * This class converts SAX or SAX-like calls to a
032: * serialized document for xsl:output method of "text".
033: * @xsl.usage internal
034: */
035: public final class ToTextStream extends ToStream {
036:
037: /**
038: * Default constructor.
039: */
040: public ToTextStream() {
041: super ();
042: }
043:
044: /**
045: * Receive notification of the beginning of a document.
046: *
047: * <p>The SAX parser will invoke this method only once, before any
048: * other methods in this interface or in DTDHandler (except for
049: * setDocumentLocator).</p>
050: *
051: * @throws org.xml.sax.SAXException Any SAX exception, possibly
052: * wrapping another exception.
053: *
054: * @throws org.xml.sax.SAXException
055: */
056: protected void startDocumentInternal()
057: throws org.xml.sax.SAXException {
058: super .startDocumentInternal();
059:
060: m_needToCallStartDocument = false;
061:
062: // No action for the moment.
063: }
064:
065: /**
066: * Receive notification of the end of a document.
067: *
068: * <p>The SAX parser will invoke this method only once, and it will
069: * be the last method invoked during the parse. The parser shall
070: * not invoke this method until it has either abandoned parsing
071: * (because of an unrecoverable error) or reached the end of
072: * input.</p>
073: *
074: * @throws org.xml.sax.SAXException Any SAX exception, possibly
075: * wrapping another exception.
076: *
077: * @throws org.xml.sax.SAXException
078: */
079: public void endDocument() throws org.xml.sax.SAXException {
080: flushPending();
081: flushWriter();
082: if (m_tracer != null)
083: super .fireEndDoc();
084: }
085:
086: /**
087: * Receive notification of the beginning of an element.
088: *
089: * <p>The Parser will invoke this method at the beginning of every
090: * element in the XML document; there will be a corresponding
091: * endElement() event for every startElement() event (even when the
092: * element is empty). All of the element's content will be
093: * reported, in order, before the corresponding endElement()
094: * event.</p>
095: *
096: * <p>If the element name has a namespace prefix, the prefix will
097: * still be attached. Note that the attribute list provided will
098: * contain only attributes with explicit values (specified or
099: * defaulted): #IMPLIED attributes will be omitted.</p>
100: *
101: *
102: * @param namespaceURI The Namespace URI, or the empty string if the
103: * element has no Namespace URI or if Namespace
104: * processing is not being performed.
105: * @param localName The local name (without prefix), or the
106: * empty string if Namespace processing is not being
107: * performed.
108: * @param name The qualified name (with prefix), or the
109: * empty string if qualified names are not available.
110: * @param atts The attributes attached to the element, if any.
111: * @throws org.xml.sax.SAXException Any SAX exception, possibly
112: * wrapping another exception.
113: * @see #endElement
114: * @see org.xml.sax.AttributeList
115: *
116: * @throws org.xml.sax.SAXException
117: */
118: public void startElement(String namespaceURI, String localName,
119: String name, Attributes atts)
120: throws org.xml.sax.SAXException {
121: // time to fire off startElement event
122: if (m_tracer != null) {
123: super .fireStartElem(name);
124: this .firePseudoAttributes();
125: }
126: return;
127: }
128:
129: /**
130: * Receive notification of the end of an element.
131: *
132: * <p>The SAX parser will invoke this method at the end of every
133: * element in the XML document; there will be a corresponding
134: * startElement() event for every endElement() event (even when the
135: * element is empty).</p>
136: *
137: * <p>If the element name has a namespace prefix, the prefix will
138: * still be attached to the name.</p>
139: *
140: *
141: * @param namespaceURI The Namespace URI, or the empty string if the
142: * element has no Namespace URI or if Namespace
143: * processing is not being performed.
144: * @param localName The local name (without prefix), or the
145: * empty string if Namespace processing is not being
146: * performed.
147: * @param name The qualified name (with prefix), or the
148: * empty string if qualified names are not available.
149: * @throws org.xml.sax.SAXException Any SAX exception, possibly
150: * wrapping another exception.
151: *
152: * @throws org.xml.sax.SAXException
153: */
154: public void endElement(String namespaceURI, String localName,
155: String name) throws org.xml.sax.SAXException {
156: if (m_tracer != null)
157: super .fireEndElem(name);
158: }
159:
160: /**
161: * Receive notification of character data.
162: *
163: * <p>The Parser will call this method to report each chunk of
164: * character data. SAX parsers may return all contiguous character
165: * data in a single chunk, or they may split it into several
166: * chunks; however, all of the characters in any single event
167: * must come from the same external entity, so that the Locator
168: * provides useful information.</p>
169: *
170: * <p>The application must not attempt to read from the array
171: * outside of the specified range.</p>
172: *
173: * <p>Note that some parsers will report whitespace using the
174: * ignorableWhitespace() method rather than this one (validating
175: * parsers must do so).</p>
176: *
177: * @param ch The characters from the XML document.
178: * @param start The start position in the array.
179: * @param length The number of characters to read from the array.
180: * @throws org.xml.sax.SAXException Any SAX exception, possibly
181: * wrapping another exception.
182: * @see #ignorableWhitespace
183: * @see org.xml.sax.Locator
184: */
185: public void characters(char ch[], int start, int length)
186: throws org.xml.sax.SAXException {
187:
188: flushPending();
189:
190: try {
191: if (inTemporaryOutputState()) {
192: /* leave characters un-processed as we are
193: * creating temporary output, the output generated by
194: * this serializer will be input to a final serializer
195: * later on and it will do the processing in final
196: * output state (not temporary output state).
197: *
198: * A "temporary" ToTextStream serializer is used to
199: * evaluate attribute value templates (for example),
200: * and the result of evaluating such a thing
201: * is fed into a final serializer later on.
202: */
203: m_writer.write(ch, start, length);
204: } else {
205: // In final output state we do process the characters!
206: writeNormalizedChars(ch, start, length, m_lineSepUse);
207: }
208:
209: if (m_tracer != null)
210: super .fireCharEvent(ch, start, length);
211: } catch (IOException ioe) {
212: throw new SAXException(ioe);
213: }
214: }
215:
216: /**
217: * If available, when the disable-output-escaping attribute is used,
218: * output raw text without escaping.
219: *
220: * @param ch The characters from the XML document.
221: * @param start The start position in the array.
222: * @param length The number of characters to read from the array.
223: *
224: * @throws org.xml.sax.SAXException Any SAX exception, possibly
225: * wrapping another exception.
226: */
227: public void charactersRaw(char ch[], int start, int length)
228: throws org.xml.sax.SAXException {
229:
230: try {
231: writeNormalizedChars(ch, start, length, m_lineSepUse);
232: } catch (IOException ioe) {
233: throw new SAXException(ioe);
234: }
235: }
236:
237: /**
238: * Normalize the characters, but don't escape. Different from
239: * SerializerToXML#writeNormalizedChars because it does not attempt to do
240: * XML escaping at all.
241: *
242: * @param ch The characters from the XML document.
243: * @param start The start position in the array.
244: * @param length The number of characters to read from the array.
245: * @param useLineSep true if the operating systems
246: * end-of-line separator should be output rather than a new-line character.
247: *
248: * @throws IOException
249: * @throws org.xml.sax.SAXException
250: */
251: void writeNormalizedChars(final char ch[], final int start,
252: final int length, final boolean useLineSep)
253: throws IOException, org.xml.sax.SAXException {
254: final String encoding = getEncoding();
255: final java.io.Writer writer = m_writer;
256: final int end = start + length;
257:
258: /* copy a few "constants" before the loop for performance */
259: final char S_LINEFEED = CharInfo.S_LINEFEED;
260:
261: // This for() loop always increments i by one at the end
262: // of the loop. Additional increments of i adjust for when
263: // two input characters (a high/low UTF16 surrogate pair)
264: // are processed.
265: for (int i = start; i < end; i++) {
266: final char c = ch[i];
267:
268: if (S_LINEFEED == c && useLineSep) {
269: writer.write(m_lineSep, 0, m_lineSepLen);
270: // one input char processed
271: } else if (m_encodingInfo.isInEncoding(c)) {
272: writer.write(c);
273: // one input char processed
274: } else if (Encodings.isHighUTF16Surrogate(c)) {
275: final int codePoint = writeUTF16Surrogate(c, ch, i, end);
276: if (codePoint != 0) {
277: // I think we can just emit the message,
278: // not crash and burn.
279: final String integralValue = Integer
280: .toString(codePoint);
281: final String msg = Utils.messages.createMessage(
282: MsgKey.ER_ILLEGAL_CHARACTER, new Object[] {
283: integralValue, encoding });
284:
285: //Older behavior was to throw the message,
286: //but newer gentler behavior is to write a message to System.err
287: //throw new SAXException(msg);
288: System.err.println(msg);
289:
290: }
291: i++; // two input chars processed
292: } else {
293: // Don't know what to do with this char, it is
294: // not in the encoding and not a high char in
295: // a surrogate pair, so write out as an entity ref
296: if (encoding != null) {
297: /* The output encoding is known,
298: * so somthing is wrong.
299: */
300:
301: // not in the encoding, so write out a character reference
302: writer.write('&');
303: writer.write('#');
304: writer.write(Integer.toString(c));
305: writer.write(';');
306:
307: // I think we can just emit the message,
308: // not crash and burn.
309: final String integralValue = Integer.toString(c);
310: final String msg = Utils.messages.createMessage(
311: MsgKey.ER_ILLEGAL_CHARACTER, new Object[] {
312: integralValue, encoding });
313:
314: //Older behavior was to throw the message,
315: //but newer gentler behavior is to write a message to System.err
316: //throw new SAXException(msg);
317: System.err.println(msg);
318: } else {
319: /* The output encoding is not known,
320: * so just write it out as-is.
321: */
322: writer.write(c);
323: }
324:
325: // one input char was processed
326: }
327: }
328: }
329:
330: /**
331: * Receive notification of cdata.
332: *
333: * <p>The Parser will call this method to report each chunk of
334: * character data. SAX parsers may return all contiguous character
335: * data in a single chunk, or they may split it into several
336: * chunks; however, all of the characters in any single event
337: * must come from the same external entity, so that the Locator
338: * provides useful information.</p>
339: *
340: * <p>The application must not attempt to read from the array
341: * outside of the specified range.</p>
342: *
343: * <p>Note that some parsers will report whitespace using the
344: * ignorableWhitespace() method rather than this one (validating
345: * parsers must do so).</p>
346: *
347: * @param ch The characters from the XML document.
348: * @param start The start position in the array.
349: * @param length The number of characters to read from the array.
350: * @throws org.xml.sax.SAXException Any SAX exception, possibly
351: * wrapping another exception.
352: * @see #ignorableWhitespace
353: * @see org.xml.sax.Locator
354: */
355: public void cdata(char ch[], int start, int length)
356: throws org.xml.sax.SAXException {
357: try {
358: writeNormalizedChars(ch, start, length, m_lineSepUse);
359: if (m_tracer != null)
360: super .fireCDATAEvent(ch, start, length);
361: } catch (IOException ioe) {
362: throw new SAXException(ioe);
363: }
364: }
365:
366: /**
367: * Receive notification of ignorable whitespace in element content.
368: *
369: * <p>Validating Parsers must use this method to report each chunk
370: * of ignorable whitespace (see the W3C XML 1.0 recommendation,
371: * section 2.10): non-validating parsers may also use this method
372: * if they are capable of parsing and using content models.</p>
373: *
374: * <p>SAX parsers may return all contiguous whitespace in a single
375: * chunk, or they may split it into several chunks; however, all of
376: * the characters in any single event must come from the same
377: * external entity, so that the Locator provides useful
378: * information.</p>
379: *
380: * <p>The application must not attempt to read from the array
381: * outside of the specified range.</p>
382: *
383: * @param ch The characters from the XML document.
384: * @param start The start position in the array.
385: * @param length The number of characters to read from the array.
386: * @throws org.xml.sax.SAXException Any SAX exception, possibly
387: * wrapping another exception.
388: * @see #characters
389: *
390: * @throws org.xml.sax.SAXException
391: */
392: public void ignorableWhitespace(char ch[], int start, int length)
393: throws org.xml.sax.SAXException {
394:
395: try {
396: writeNormalizedChars(ch, start, length, m_lineSepUse);
397: } catch (IOException ioe) {
398: throw new SAXException(ioe);
399: }
400: }
401:
402: /**
403: * Receive notification of a processing instruction.
404: *
405: * <p>The Parser will invoke this method once for each processing
406: * instruction found: note that processing instructions may occur
407: * before or after the main document element.</p>
408: *
409: * <p>A SAX parser should never report an XML declaration (XML 1.0,
410: * section 2.8) or a text declaration (XML 1.0, section 4.3.1)
411: * using this method.</p>
412: *
413: * @param target The processing instruction target.
414: * @param data The processing instruction data, or null if
415: * none was supplied.
416: * @throws org.xml.sax.SAXException Any SAX exception, possibly
417: * wrapping another exception.
418: *
419: * @throws org.xml.sax.SAXException
420: */
421: public void processingInstruction(String target, String data)
422: throws org.xml.sax.SAXException {
423: // flush anything pending first
424: flushPending();
425:
426: if (m_tracer != null)
427: super .fireEscapingEvent(target, data);
428: }
429:
430: /**
431: * Called when a Comment is to be constructed.
432: * Note that Xalan will normally invoke the other version of this method.
433: * %REVIEW% In fact, is this one ever needed, or was it a mistake?
434: *
435: * @param data The comment data.
436: * @throws org.xml.sax.SAXException Any SAX exception, possibly
437: * wrapping another exception.
438: */
439: public void comment(String data) throws org.xml.sax.SAXException {
440: final int length = data.length();
441: if (length > m_charsBuff.length) {
442: m_charsBuff = new char[length * 2 + 1];
443: }
444: data.getChars(0, length, m_charsBuff, 0);
445: comment(m_charsBuff, 0, length);
446: }
447:
448: /**
449: * Report an XML comment anywhere in the document.
450: *
451: * This callback will be used for comments inside or outside the
452: * document element, including comments in the external DTD
453: * subset (if read).
454: *
455: * @param ch An array holding the characters in the comment.
456: * @param start The starting position in the array.
457: * @param length The number of characters to use from the array.
458: * @throws org.xml.sax.SAXException The application may raise an exception.
459: */
460: public void comment(char ch[], int start, int length)
461: throws org.xml.sax.SAXException {
462:
463: flushPending();
464: if (m_tracer != null)
465: super .fireCommentEvent(ch, start, length);
466: }
467:
468: /**
469: * Receive notivication of a entityReference.
470: *
471: * @param name non-null reference to the name of the entity.
472: *
473: * @throws org.xml.sax.SAXException
474: */
475: public void entityReference(String name)
476: throws org.xml.sax.SAXException {
477: if (m_tracer != null)
478: super .fireEntityReference(name);
479: }
480:
481: /**
482: * @see ExtendedContentHandler#addAttribute(String, String, String, String, String)
483: */
484: public void addAttribute(String uri, String localName,
485: String rawName, String type, String value,
486: boolean XSLAttribute) {
487: // do nothing, just forget all about the attribute
488: }
489:
490: /**
491: * @see org.xml.sax.ext.LexicalHandler#endCDATA()
492: */
493: public void endCDATA() throws SAXException {
494: // do nothing
495: }
496:
497: /**
498: * @see ExtendedContentHandler#endElement(String)
499: */
500: public void endElement(String elemName) throws SAXException {
501: if (m_tracer != null)
502: super .fireEndElem(elemName);
503: }
504:
505: /**
506: * From XSLTC
507: */
508: public void startElement(String elementNamespaceURI,
509: String elementLocalName, String elementName)
510: throws SAXException {
511: if (m_needToCallStartDocument)
512: startDocumentInternal();
513: // time to fire off startlement event.
514: if (m_tracer != null) {
515: super .fireStartElem(elementName);
516: this .firePseudoAttributes();
517: }
518:
519: return;
520: }
521:
522: /**
523: * From XSLTC
524: */
525: public void characters(String characters) throws SAXException {
526: final int length = characters.length();
527: if (length > m_charsBuff.length) {
528: m_charsBuff = new char[length * 2 + 1];
529: }
530: characters.getChars(0, length, m_charsBuff, 0);
531: characters(m_charsBuff, 0, length);
532: }
533:
534: /**
535: * From XSLTC
536: */
537: public void addAttribute(String name, String value) {
538: // do nothing, forget about the attribute
539: }
540:
541: /**
542: * Add a unique attribute
543: */
544: public void addUniqueAttribute(String qName, String value, int flags)
545: throws SAXException {
546: // do nothing, forget about the attribute
547: }
548:
549: public boolean startPrefixMapping(String prefix, String uri,
550: boolean shouldFlush) throws SAXException {
551: // no namespace support for HTML
552: return false;
553: }
554:
555: public void startPrefixMapping(String prefix, String uri)
556: throws org.xml.sax.SAXException {
557: // no namespace support for HTML
558: }
559:
560: public void namespaceAfterStartElement(final String prefix,
561: final String uri) throws SAXException {
562: // no namespace support for HTML
563: }
564:
565: public void flushPending() throws org.xml.sax.SAXException {
566: if (m_needToCallStartDocument) {
567: startDocumentInternal();
568: m_needToCallStartDocument = false;
569: }
570: }
571: }
|