0001: /*
0002: * The Apache Software License, Version 1.1
0003: *
0004: *
0005: * Copyright (c) 1999 The Apache Software Foundation. All rights
0006: * reserved.
0007: *
0008: * Redistribution and use in source and binary forms, with or without
0009: * modification, are permitted provided that the following conditions
0010: * are met:
0011: *
0012: * 1. Redistributions of source code must retain the above copyright
0013: * notice, this list of conditions and the following disclaimer.
0014: *
0015: * 2. Redistributions in binary form must reproduce the above copyright
0016: * notice, this list of conditions and the following disclaimer in
0017: * the documentation and/or other materials provided with the
0018: * distribution.
0019: *
0020: * 3. The end-user documentation included with the redistribution,
0021: * if any, must include the following acknowledgment:
0022: * "This product includes software developed by the
0023: * Apache Software Foundation (http://www.apache.org/)."
0024: * Alternately, this acknowledgment may appear in the software itself,
0025: * if and wherever such third-party acknowledgments normally appear.
0026: *
0027: * 4. The names "Xerces" and "Apache Software Foundation" must
0028: * not be used to endorse or promote products derived from this
0029: * software without prior written permission. For written
0030: * permission, please contact apache@apache.org.
0031: *
0032: * 5. Products derived from this software may not be called "Apache",
0033: * nor may "Apache" appear in their name, without prior written
0034: * permission of the Apache Software Foundation.
0035: *
0036: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
0037: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
0038: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
0039: * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
0040: * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
0041: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
0042: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
0043: * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
0044: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
0045: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
0046: * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
0047: * SUCH DAMAGE.
0048: * ====================================================================
0049: *
0050: * This software consists of voluntary contributions made by many
0051: * individuals on behalf of the Apache Software Foundation and was
0052: * originally based on software copyright (c) 1999, International
0053: * Business Machines, Inc., http://www.apache.org. For more
0054: * information on the Apache Software Foundation, please see
0055: * <http://www.apache.org/>.
0056: */
0057:
0058: // Sep 14, 2000:
0059: // Fixed comments to preserve whitespaces and add a line break
0060: // when indenting. Reported by Gervase Markham <GRM@dataconnection.com>
0061: // Sep 14, 2000:
0062: // Fixed serializer to report IO exception directly, instead at
0063: // the end of document processing.
0064: // Reported by Patrick Higgins <phiggins@transzap.com>
0065: // Sep 13, 2000:
0066: // CR in character data will print as �D;
0067: // Aug 25, 2000:
0068: // Fixed processing instruction printing inside element content
0069: // to not escape content. Reported by Mikael Staldal
0070: // <d96-mst@d.kth.se>
0071: // Aug 25, 2000:
0072: // Added ability to omit comments.
0073: // Contributed by Anupam Bagchi <abagchi@jtcsv.com>
0074: // Aug 26, 2000:
0075: // Fixed bug in newline handling when preserving spaces.
0076: // Contributed by Mike Dusseault <mdusseault@home.com>
0077: // Aug 29, 2000:
0078: // Fixed state.unescaped not being set to false when
0079: // entering element state.
0080: // Reported by Lowell Vaughn <lvaughn@agillion.com>
0081:
0082: package org.apache.xml.serialize;
0083:
0084: import java.io.Writer;
0085: import java.io.OutputStream;
0086: import java.io.OutputStreamWriter;
0087: import java.io.IOException;
0088: import java.io.UnsupportedEncodingException;
0089: import java.util.Vector;
0090: import java.util.Hashtable;
0091: import java.util.StringTokenizer;
0092:
0093: import org.w3c.dom.*;
0094: import org.xml.sax.DocumentHandler;
0095: import org.xml.sax.DTDHandler;
0096: import org.xml.sax.Locator;
0097: import org.xml.sax.SAXException;
0098: import org.xml.sax.ContentHandler;
0099: import org.xml.sax.Attributes;
0100: import org.xml.sax.ext.LexicalHandler;
0101: import org.xml.sax.ext.DeclHandler;
0102:
0103: /**
0104: * Base class for a serializer supporting both DOM and SAX pretty
0105: * serializing of XML/HTML/XHTML documents. Derives classes perform
0106: * the method-specific serializing, this class provides the common
0107: * serializing mechanisms.
0108: * <p>
0109: * The serializer must be initialized with the proper writer and
0110: * output format before it can be used by calling {@link #init}.
0111: * The serializer can be reused any number of times, but cannot
0112: * be used concurrently by two threads.
0113: * <p>
0114: * If an output stream is used, the encoding is taken from the
0115: * output format (defaults to <tt>UTF-8</tt>). If a writer is
0116: * used, make sure the writer uses the same encoding (if applies)
0117: * as specified in the output format.
0118: * <p>
0119: * The serializer supports both DOM and SAX. DOM serializing is done
0120: * by calling {@link #serialize} and SAX serializing is done by firing
0121: * SAX events and using the serializer as a document handler.
0122: * This also applies to derived class.
0123: * <p>
0124: * If an I/O exception occurs while serializing, the serializer
0125: * will not throw an exception directly, but only throw it
0126: * at the end of serializing (either DOM or SAX's {@link
0127: * org.xml.sax.DocumentHandler#endDocument}.
0128: * <p>
0129: * For elements that are not specified as whitespace preserving,
0130: * the serializer will potentially break long text lines at space
0131: * boundaries, indent lines, and serialize elements on separate
0132: * lines. Line terminators will be regarded as spaces, and
0133: * spaces at beginning of line will be stripped.
0134: * <p>
0135: * When indenting, the serializer is capable of detecting seemingly
0136: * element content, and serializing these elements indented on separate
0137: * lines. An element is serialized indented when it is the first or
0138: * last child of an element, or immediate following or preceding
0139: * another element.
0140: *
0141: *
0142: * @version $Revision: 1.26 $ $Date: 2001/07/11 15:40:10 $
0143: * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
0144: * @see Serializer
0145: * @see DOMSerializer
0146: */
0147: public abstract class BaseMarkupSerializer implements ContentHandler,
0148: DocumentHandler, LexicalHandler, DTDHandler, DeclHandler,
0149: DOMSerializer, Serializer {
0150:
0151: private EncodingInfo _encodingInfo;
0152:
0153: /**
0154: * Holds array of all element states that have been entered.
0155: * The array is automatically resized. When leaving an element,
0156: * it's state is not removed but reused when later returning
0157: * to the same nesting level.
0158: */
0159: private ElementState[] _elementStates;
0160:
0161: /**
0162: * The index of the next state to place in the array,
0163: * or one plus the index of the current state. When zero,
0164: * we are in no state.
0165: */
0166: private int _elementStateCount;
0167:
0168: /**
0169: * Vector holding comments and PIs that come before the root
0170: * element (even after it), see {@link #serializePreRoot}.
0171: */
0172: private Vector _preRoot;
0173:
0174: /**
0175: * If the document has been started (header serialized), this
0176: * flag is set to true so it's not started twice.
0177: */
0178: protected boolean _started;
0179:
0180: /**
0181: * True if the serializer has been prepared. This flag is set
0182: * to false when the serializer is reset prior to using it,
0183: * and to true after it has been prepared for usage.
0184: */
0185: private boolean _prepared;
0186:
0187: /**
0188: * Association between namespace URIs (keys) and prefixes (values).
0189: * Accumulated here prior to starting an element and placing this
0190: * list in the element state.
0191: */
0192: protected Hashtable _prefixes;
0193:
0194: /**
0195: * The system identifier of the document type, if known.
0196: */
0197: protected String _docTypePublicId;
0198:
0199: /**
0200: * The system identifier of the document type, if known.
0201: */
0202: protected String _docTypeSystemId;
0203:
0204: /**
0205: * The output format associated with this serializer. This will never
0206: * be a null reference. If no format was passed to the constructor,
0207: * the default one for this document type will be used. The format
0208: * object is never changed by the serializer.
0209: */
0210: protected OutputFormat _format;
0211:
0212: /**
0213: * The printer used for printing text parts.
0214: */
0215: protected Printer _printer;
0216:
0217: /**
0218: * True if indenting printer.
0219: */
0220: protected boolean _indenting;
0221:
0222: /**
0223: * The underlying writer.
0224: */
0225: private Writer _writer;
0226:
0227: /**
0228: * The output stream.
0229: */
0230: private OutputStream _output;
0231:
0232: //--------------------------------//
0233: // Constructor and initialization //
0234: //--------------------------------//
0235:
0236: /**
0237: * Protected constructor can only be used by derived class.
0238: * Must initialize the serializer before serializing any document,
0239: * see {@link #init}.
0240: */
0241: protected BaseMarkupSerializer(OutputFormat format) {
0242: int i;
0243:
0244: _elementStates = new ElementState[10];
0245: for (i = 0; i < _elementStates.length; ++i)
0246: _elementStates[i] = new ElementState();
0247: _format = format;
0248: }
0249:
0250: public DocumentHandler asDocumentHandler() throws IOException {
0251: prepare();
0252: return this ;
0253: }
0254:
0255: public ContentHandler asContentHandler() throws IOException {
0256: prepare();
0257: return this ;
0258: }
0259:
0260: public DOMSerializer asDOMSerializer() throws IOException {
0261: prepare();
0262: return this ;
0263: }
0264:
0265: public void setOutputByteStream(OutputStream output) {
0266: if (output == null)
0267: throw new NullPointerException(
0268: "SER001 Argument 'output' is null.");
0269: _output = output;
0270: _writer = null;
0271: reset();
0272: }
0273:
0274: public void setOutputCharStream(Writer writer) {
0275: if (writer == null)
0276: throw new NullPointerException(
0277: "SER001 Argument 'writer' is null.");
0278: _writer = writer;
0279: _output = null;
0280: reset();
0281: }
0282:
0283: public void setOutputFormat(OutputFormat format) {
0284: if (format == null)
0285: throw new NullPointerException(
0286: "SER001 Argument 'format' is null.");
0287: _format = format;
0288: reset();
0289: }
0290:
0291: public boolean reset() {
0292: if (_elementStateCount > 1)
0293: throw new IllegalStateException(
0294: "Serializer reset in the middle of serialization");
0295: _prepared = false;
0296: return true;
0297: }
0298:
0299: protected void prepare() throws IOException {
0300: if (_prepared)
0301: return;
0302:
0303: if (_writer == null && _output == null)
0304: throw new IOException(
0305: "SER002 No writer supplied for serializer");
0306: // If the output stream has been set, use it to construct
0307: // the writer. It is possible that the serializer has been
0308: // reused with the same output stream and different encoding.
0309:
0310: _encodingInfo = _format.getEncodingInfo();
0311:
0312: if (_output != null) {
0313: _writer = _encodingInfo.getWriter(_output);
0314: }
0315:
0316: if (_format.getIndenting()) {
0317: _indenting = true;
0318: _printer = new IndentPrinter(_writer, _format);
0319: } else {
0320: _indenting = false;
0321: _printer = new Printer(_writer, _format);
0322: }
0323:
0324: ElementState state;
0325:
0326: _elementStateCount = 0;
0327: state = _elementStates[0];
0328: state.namespaceURI = null;
0329: state.localName = null;
0330: state.rawName = null;
0331: state.preserveSpace = _format.getPreserveSpace();
0332: state.empty = true;
0333: state.afterElement = false;
0334: state.afterComment = false;
0335: state.doCData = state.inCData = false;
0336: state.prefixes = null;
0337:
0338: _docTypePublicId = _format.getDoctypePublic();
0339: _docTypeSystemId = _format.getDoctypeSystem();
0340: _started = false;
0341: _prepared = true;
0342: }
0343:
0344: //----------------------------------//
0345: // DOM document serializing methods //
0346: //----------------------------------//
0347:
0348: /**
0349: * Serializes the DOM element using the previously specified
0350: * writer and output format. Throws an exception only if
0351: * an I/O exception occured while serializing.
0352: *
0353: * @param elem The element to serialize
0354: * @throws IOException An I/O exception occured while
0355: * serializing
0356: */
0357: public void serialize(Element elem) throws IOException {
0358: prepare();
0359: serializeNode(elem);
0360: _printer.flush();
0361: if (_printer.getException() != null)
0362: throw _printer.getException();
0363: }
0364:
0365: /**
0366: * Serializes the DOM document fragmnt using the previously specified
0367: * writer and output format. Throws an exception only if
0368: * an I/O exception occured while serializing.
0369: *
0370: * @param elem The element to serialize
0371: * @throws IOException An I/O exception occured while
0372: * serializing
0373: */
0374: public void serialize(DocumentFragment frag) throws IOException {
0375: prepare();
0376: serializeNode(frag);
0377: _printer.flush();
0378: if (_printer.getException() != null)
0379: throw _printer.getException();
0380: }
0381:
0382: /**
0383: * Serializes the DOM document using the previously specified
0384: * writer and output format. Throws an exception only if
0385: * an I/O exception occured while serializing.
0386: *
0387: * @param doc The document to serialize
0388: * @throws IOException An I/O exception occured while
0389: * serializing
0390: */
0391: public void serialize(Document doc) throws IOException {
0392: prepare();
0393: serializeNode(doc);
0394: serializePreRoot();
0395: _printer.flush();
0396: if (_printer.getException() != null)
0397: throw _printer.getException();
0398: }
0399:
0400: //------------------------------------------//
0401: // SAX document handler serializing methods //
0402: //------------------------------------------//
0403:
0404: public void startDocument() throws SAXException {
0405: try {
0406: prepare();
0407: } catch (IOException except) {
0408: throw new SAXException(except.toString());
0409: }
0410: // Nothing to do here. All the magic happens in startDocument(String)
0411: }
0412:
0413: public void characters(char[] chars, int start, int length)
0414: throws SAXException {
0415: ElementState state;
0416:
0417: try {
0418: state = content();
0419:
0420: // Check if text should be print as CDATA section or unescaped
0421: // based on elements listed in the output format (the element
0422: // state) or whether we are inside a CDATA section or entity.
0423:
0424: if (state.inCData || state.doCData) {
0425: int saveIndent;
0426:
0427: // Print a CDATA section. The text is not escaped, but ']]>'
0428: // appearing in the code must be identified and dealt with.
0429: // The contents of a text node is considered space preserving.
0430: if (!state.inCData) {
0431: _printer.printText("<![CDATA[");
0432: state.inCData = true;
0433: }
0434: saveIndent = _printer.getNextIndent();
0435: _printer.setNextIndent(0);
0436: for (int index = 0; index < length; ++index) {
0437: if (index + 2 < length && chars[index] == ']'
0438: && chars[index + 1] == ']'
0439: && chars[index + 2] == '>') {
0440:
0441: printText(chars, start, index + 2, true, true);
0442: _printer.printText("]]><![CDATA[");
0443: start += index + 2;
0444: length -= index + 2;
0445: index = 0;
0446: }
0447: }
0448: if (length > 0)
0449: printText(chars, start, length, true, true);
0450: _printer.setNextIndent(saveIndent);
0451:
0452: } else {
0453:
0454: int saveIndent;
0455:
0456: if (state.preserveSpace) {
0457: // If preserving space then hold of indentation so no
0458: // excessive spaces are printed at line breaks, escape
0459: // the text content without replacing spaces and print
0460: // the text breaking only at line breaks.
0461: saveIndent = _printer.getNextIndent();
0462: _printer.setNextIndent(0);
0463: printText(chars, start, length, true,
0464: state.unescaped);
0465: _printer.setNextIndent(saveIndent);
0466: } else {
0467: printText(chars, start, length, false,
0468: state.unescaped);
0469: }
0470: }
0471: } catch (IOException except) {
0472: throw new SAXException(except);
0473: }
0474: }
0475:
0476: public void ignorableWhitespace(char[] chars, int start, int length)
0477: throws SAXException {
0478: int i;
0479:
0480: try {
0481: content();
0482:
0483: // Print ignorable whitespaces only when indenting, after
0484: // all they are indentation. Cancel the indentation to
0485: // not indent twice.
0486: if (_indenting) {
0487: _printer.setThisIndent(0);
0488: for (i = start; length-- > 0; ++i)
0489: _printer.printText(chars[i]);
0490: }
0491: } catch (IOException except) {
0492: throw new SAXException(except);
0493: }
0494: }
0495:
0496: public final void processingInstruction(String target, String code)
0497: throws SAXException {
0498: try {
0499: processingInstructionIO(target, code);
0500: } catch (IOException except) {
0501: throw new SAXException(except);
0502: }
0503: }
0504:
0505: public void processingInstructionIO(String target, String code)
0506: throws IOException {
0507: int index;
0508: StringBuffer buffer;
0509: ElementState state;
0510:
0511: state = content();
0512: buffer = new StringBuffer(40);
0513:
0514: // Create the processing instruction textual representation.
0515: // Make sure we don't have '?>' inside either target or code.
0516: index = target.indexOf("?>");
0517: if (index >= 0)
0518: buffer.append("<?").append(target.substring(0, index));
0519: else
0520: buffer.append("<?").append(target);
0521: if (code != null) {
0522: buffer.append(' ');
0523: index = code.indexOf("?>");
0524: if (index >= 0)
0525: buffer.append(code.substring(0, index));
0526: else
0527: buffer.append(code);
0528: }
0529: buffer.append("?>");
0530:
0531: // If before the root element (or after it), do not print
0532: // the PI directly but place it in the pre-root vector.
0533: if (isDocumentState()) {
0534: if (_preRoot == null)
0535: _preRoot = new Vector();
0536: _preRoot.addElement(buffer.toString());
0537: } else {
0538: _printer.indent();
0539: printText(buffer.toString(), true, true);
0540: _printer.unindent();
0541: if (_indenting)
0542: state.afterElement = true;
0543: }
0544: }
0545:
0546: public void comment(char[] chars, int start, int length)
0547: throws SAXException {
0548: try {
0549: comment(new String(chars, start, length));
0550: } catch (IOException except) {
0551: throw new SAXException(except);
0552: }
0553: }
0554:
0555: public void comment(String text) throws IOException {
0556: StringBuffer buffer;
0557: int index;
0558: ElementState state;
0559:
0560: if (_format.getOmitComments())
0561: return;
0562:
0563: state = content();
0564: buffer = new StringBuffer(40);
0565: // Create the processing comment textual representation.
0566: // Make sure we don't have '-->' inside the comment.
0567: index = text.indexOf("-->");
0568: if (index >= 0)
0569: buffer.append("<!--").append(text.substring(0, index))
0570: .append("-->");
0571: else
0572: buffer.append("<!--").append(text).append("-->");
0573:
0574: // If before the root element (or after it), do not print
0575: // the comment directly but place it in the pre-root vector.
0576: if (isDocumentState()) {
0577: if (_preRoot == null)
0578: _preRoot = new Vector();
0579: _preRoot.addElement(buffer.toString());
0580: } else {
0581: // Indent this element on a new line if the first
0582: // content of the parent element or immediately
0583: // following an element.
0584: if (_indenting && !state.preserveSpace)
0585: _printer.breakLine();
0586: _printer.indent();
0587: printText(buffer.toString(), true, true);
0588: _printer.unindent();
0589: if (_indenting)
0590: state.afterElement = true;
0591: }
0592: state.afterComment = true;
0593: state.afterElement = false;
0594: }
0595:
0596: public void startCDATA() {
0597: ElementState state;
0598:
0599: state = getElementState();
0600: state.doCData = true;
0601: }
0602:
0603: public void endCDATA() {
0604: ElementState state;
0605:
0606: state = getElementState();
0607: state.doCData = false;
0608: }
0609:
0610: public void startNonEscaping() {
0611: ElementState state;
0612:
0613: state = getElementState();
0614: state.unescaped = true;
0615: }
0616:
0617: public void endNonEscaping() {
0618: ElementState state;
0619:
0620: state = getElementState();
0621: state.unescaped = false;
0622: }
0623:
0624: public void startPreserving() {
0625: ElementState state;
0626:
0627: state = getElementState();
0628: state.preserveSpace = true;
0629: }
0630:
0631: public void endPreserving() {
0632: ElementState state;
0633:
0634: state = getElementState();
0635: state.preserveSpace = false;
0636: }
0637:
0638: /**
0639: * Called at the end of the document to wrap it up.
0640: * Will flush the output stream and throw an exception
0641: * if any I/O error occured while serializing.
0642: *
0643: * @throws SAXException An I/O exception occured during
0644: * serializing
0645: */
0646: public void endDocument() throws SAXException {
0647: try {
0648: // Print all the elements accumulated outside of
0649: // the root element.
0650: serializePreRoot();
0651: // Flush the output, this is necessary for buffered output.
0652: _printer.flush();
0653: } catch (IOException except) {
0654: throw new SAXException(except);
0655: }
0656: }
0657:
0658: public void startEntity(String name) {
0659: // ???
0660: }
0661:
0662: public void endEntity(String name) {
0663: // ???
0664: }
0665:
0666: public void setDocumentLocator(Locator locator) {
0667: // Nothing to do
0668: }
0669:
0670: //-----------------------------------------//
0671: // SAX content handler serializing methods //
0672: //-----------------------------------------//
0673:
0674: public void skippedEntity(String name) throws SAXException {
0675: try {
0676: endCDATA();
0677: content();
0678: _printer.printText('&');
0679: _printer.printText(name);
0680: _printer.printText(';');
0681: } catch (IOException except) {
0682: throw new SAXException(except);
0683: }
0684: }
0685:
0686: public void startPrefixMapping(String prefix, String uri)
0687: throws SAXException {
0688: if (_prefixes == null)
0689: _prefixes = new Hashtable();
0690: _prefixes.put(uri, prefix == null ? "" : prefix);
0691: }
0692:
0693: public void endPrefixMapping(String prefix) throws SAXException {
0694: }
0695:
0696: //------------------------------------------//
0697: // SAX DTD/Decl handler serializing methods //
0698: //------------------------------------------//
0699:
0700: public final void startDTD(String name, String publicId,
0701: String systemId) throws SAXException {
0702: try {
0703: _printer.enterDTD();
0704: _docTypePublicId = publicId;
0705: _docTypeSystemId = systemId;
0706: } catch (IOException except) {
0707: throw new SAXException(except);
0708: }
0709: }
0710:
0711: public void endDTD() {
0712: // Nothing to do here, all the magic occurs in startDocument(String).
0713: }
0714:
0715: public void elementDecl(String name, String model)
0716: throws SAXException {
0717: try {
0718: _printer.enterDTD();
0719: _printer.printText("<!ELEMENT ");
0720: _printer.printText(name);
0721: _printer.printText(' ');
0722: _printer.printText(model);
0723: _printer.printText('>');
0724: if (_indenting)
0725: _printer.breakLine();
0726: } catch (IOException except) {
0727: throw new SAXException(except);
0728: }
0729: }
0730:
0731: public void attributeDecl(String eName, String aName, String type,
0732: String valueDefault, String value) throws SAXException {
0733: try {
0734: _printer.enterDTD();
0735: _printer.printText("<!ATTLIST ");
0736: _printer.printText(eName);
0737: _printer.printText(' ');
0738: _printer.printText(aName);
0739: _printer.printText(' ');
0740: _printer.printText(type);
0741: if (valueDefault != null) {
0742: _printer.printText(' ');
0743: _printer.printText(valueDefault);
0744: }
0745: if (value != null) {
0746: _printer.printText(" \"");
0747: printEscaped(value);
0748: _printer.printText('"');
0749: }
0750: _printer.printText('>');
0751: if (_indenting)
0752: _printer.breakLine();
0753: } catch (IOException except) {
0754: throw new SAXException(except);
0755: }
0756: }
0757:
0758: public void internalEntityDecl(String name, String value)
0759: throws SAXException {
0760: try {
0761: _printer.enterDTD();
0762: _printer.printText("<!ENTITY ");
0763: _printer.printText(name);
0764: _printer.printText(" \"");
0765: printEscaped(value);
0766: _printer.printText("\">");
0767: if (_indenting)
0768: _printer.breakLine();
0769: } catch (IOException except) {
0770: throw new SAXException(except);
0771: }
0772: }
0773:
0774: public void externalEntityDecl(String name, String publicId,
0775: String systemId) throws SAXException {
0776: try {
0777: _printer.enterDTD();
0778: unparsedEntityDecl(name, publicId, systemId, null);
0779: } catch (IOException except) {
0780: throw new SAXException(except);
0781: }
0782: }
0783:
0784: public void unparsedEntityDecl(String name, String publicId,
0785: String systemId, String notationName) throws SAXException {
0786: try {
0787: _printer.enterDTD();
0788: if (publicId == null) {
0789: _printer.printText("<!ENTITY ");
0790: _printer.printText(name);
0791: _printer.printText(" SYSTEM ");
0792: printDoctypeURL(systemId);
0793: } else {
0794: _printer.printText("<!ENTITY ");
0795: _printer.printText(name);
0796: _printer.printText(" PUBLIC ");
0797: printDoctypeURL(publicId);
0798: _printer.printText(' ');
0799: printDoctypeURL(systemId);
0800: }
0801: if (notationName != null) {
0802: _printer.printText(" NDATA ");
0803: _printer.printText(notationName);
0804: }
0805: _printer.printText('>');
0806: if (_indenting)
0807: _printer.breakLine();
0808: } catch (IOException except) {
0809: throw new SAXException(except);
0810: }
0811: }
0812:
0813: public void notationDecl(String name, String publicId,
0814: String systemId) throws SAXException {
0815: try {
0816: _printer.enterDTD();
0817: if (publicId != null) {
0818: _printer.printText("<!NOTATION ");
0819: _printer.printText(name);
0820: _printer.printText(" PUBLIC ");
0821: printDoctypeURL(publicId);
0822: if (systemId != null) {
0823: _printer.printText(' ');
0824: printDoctypeURL(systemId);
0825: }
0826: } else {
0827: _printer.printText("<!NOTATION ");
0828: _printer.printText(name);
0829: _printer.printText(" SYSTEM ");
0830: printDoctypeURL(systemId);
0831: }
0832: _printer.printText('>');
0833: if (_indenting)
0834: _printer.breakLine();
0835: } catch (IOException except) {
0836: throw new SAXException(except);
0837: }
0838: }
0839:
0840: //------------------------------------------//
0841: // Generic node serializing methods methods //
0842: //------------------------------------------//
0843:
0844: /**
0845: * Serialize the DOM node. This method is shared across XML, HTML and XHTML
0846: * serializers and the differences are masked out in a separate {@link
0847: * #serializeElement}.
0848: *
0849: * @param node The node to serialize
0850: * @see #serializeElement
0851: * @throws IOException An I/O exception occured while
0852: * serializing
0853: */
0854: protected void serializeNode(Node node) throws IOException {
0855: // Based on the node type call the suitable SAX handler.
0856: // Only comments entities and documents which are not
0857: // handled by SAX are serialized directly.
0858: switch (node.getNodeType()) {
0859: case Node.TEXT_NODE: {
0860: String text;
0861:
0862: text = node.getNodeValue();
0863: if (text != null)
0864: if (!_indenting
0865: || getElementState().preserveSpace
0866: || (text.replace('\n', ' ').trim().length() != 0))
0867: characters(text);
0868: break;
0869: }
0870:
0871: case Node.CDATA_SECTION_NODE: {
0872: String text;
0873:
0874: text = node.getNodeValue();
0875: if (text != null) {
0876: startCDATA();
0877: characters(text);
0878: endCDATA();
0879: }
0880: break;
0881: }
0882:
0883: case Node.COMMENT_NODE: {
0884: String text;
0885:
0886: if (!_format.getOmitComments()) {
0887: text = node.getNodeValue();
0888: if (text != null)
0889: comment(text);
0890: }
0891: break;
0892: }
0893:
0894: case Node.ENTITY_REFERENCE_NODE: {
0895: Node child;
0896:
0897: endCDATA();
0898: content();
0899: child = node.getFirstChild();
0900: while (child != null) {
0901: serializeNode(child);
0902: child = child.getNextSibling();
0903: }
0904: break;
0905: }
0906:
0907: case Node.PROCESSING_INSTRUCTION_NODE:
0908: processingInstructionIO(node.getNodeName(), node
0909: .getNodeValue());
0910: break;
0911:
0912: case Node.ELEMENT_NODE:
0913: serializeElement((Element) node);
0914: break;
0915:
0916: case Node.DOCUMENT_NODE: {
0917: DocumentType docType;
0918: DOMImplementation domImpl;
0919: NamedNodeMap map;
0920: Entity entity;
0921: Notation notation;
0922: int i;
0923:
0924: // If there is a document type, use the SAX events to
0925: // serialize it.
0926: docType = ((Document) node).getDoctype();
0927: if (docType != null) {
0928: // DOM Level 2 (or higher)
0929: domImpl = ((Document) node).getImplementation();
0930: try {
0931: String internal;
0932:
0933: _printer.enterDTD();
0934: _docTypePublicId = docType.getPublicId();
0935: _docTypeSystemId = docType.getSystemId();
0936: internal = docType.getInternalSubset();
0937: if (internal != null && internal.length() > 0)
0938: _printer.printText(internal);
0939: endDTD();
0940: }
0941: // DOM Level 1 -- does implementation have methods?
0942: catch (NoSuchMethodError nsme) {
0943: Class docTypeClass = docType.getClass();
0944:
0945: String docTypePublicId = null;
0946: String docTypeSystemId = null;
0947: try {
0948: java.lang.reflect.Method getPublicId = docTypeClass
0949: .getMethod("getPublicId", null);
0950: if (getPublicId.getReturnType().equals(
0951: String.class)) {
0952: docTypePublicId = (String) getPublicId
0953: .invoke(docType, null);
0954: }
0955: } catch (Exception e) {
0956: // ignore
0957: }
0958: try {
0959: java.lang.reflect.Method getSystemId = docTypeClass
0960: .getMethod("getSystemId", null);
0961: if (getSystemId.getReturnType().equals(
0962: String.class)) {
0963: docTypeSystemId = (String) getSystemId
0964: .invoke(docType, null);
0965: }
0966: } catch (Exception e) {
0967: // ignore
0968: }
0969: _printer.enterDTD();
0970: _docTypePublicId = docTypePublicId;
0971: _docTypeSystemId = docTypeSystemId;
0972: endDTD();
0973: }
0974: }
0975: // !! Fall through
0976: }
0977: case Node.DOCUMENT_FRAGMENT_NODE: {
0978: Node child;
0979:
0980: // By definition this will happen if the node is a document,
0981: // document fragment, etc. Just serialize its contents. It will
0982: // work well for other nodes that we do not know how to serialize.
0983: child = node.getFirstChild();
0984: while (child != null) {
0985: serializeNode(child);
0986: child = child.getNextSibling();
0987: }
0988: break;
0989: }
0990:
0991: default:
0992: break;
0993: }
0994: }
0995:
0996: /**
0997: * Must be called by a method about to print any type of content.
0998: * If the element was just opened, the opening tag is closed and
0999: * will be matched to a closing tag. Returns the current element
1000: * state with <tt>empty</tt> and <tt>afterElement</tt> set to false.
1001: *
1002: * @return The current element state
1003: * @throws IOException An I/O exception occured while
1004: * serializing
1005: */
1006: protected ElementState content() throws IOException {
1007: ElementState state;
1008:
1009: state = getElementState();
1010: if (!isDocumentState()) {
1011: // Need to close CData section first
1012: if (state.inCData && !state.doCData) {
1013: _printer.printText("]]>");
1014: state.inCData = false;
1015: }
1016: // If this is the first content in the element,
1017: // change the state to not-empty and close the
1018: // opening element tag.
1019: if (state.empty) {
1020: _printer.printText('>');
1021: state.empty = false;
1022: }
1023: // Except for one content type, all of them
1024: // are not last element. That one content
1025: // type will take care of itself.
1026: state.afterElement = false;
1027: // Except for one content type, all of them
1028: // are not last comment. That one content
1029: // type will take care of itself.
1030: state.afterComment = false;
1031: }
1032: return state;
1033: }
1034:
1035: /**
1036: * Called to print the text contents in the prevailing element format.
1037: * Since this method is capable of printing text as CDATA, it is used
1038: * for that purpose as well. White space handling is determined by the
1039: * current element state. In addition, the output format can dictate
1040: * whether the text is printed as CDATA or unescaped.
1041: *
1042: * @param text The text to print
1043: * @param unescaped True is should print unescaped
1044: * @throws IOException An I/O exception occured while
1045: * serializing
1046: */
1047: protected void characters(String text) throws IOException {
1048: ElementState state;
1049:
1050: state = content();
1051: // Check if text should be print as CDATA section or unescaped
1052: // based on elements listed in the output format (the element
1053: // state) or whether we are inside a CDATA section or entity.
1054:
1055: if (state.inCData || state.doCData) {
1056: StringBuffer buffer;
1057: int index;
1058: int saveIndent;
1059:
1060: // Print a CDATA section. The text is not escaped, but ']]>'
1061: // appearing in the code must be identified and dealt with.
1062: // The contents of a text node is considered space preserving.
1063: buffer = new StringBuffer(text.length());
1064: if (!state.inCData) {
1065: buffer.append("<![CDATA[");
1066: state.inCData = true;
1067: }
1068: index = text.indexOf("]]>");
1069: while (index >= 0) {
1070: buffer.append(text.substring(0, index + 2)).append(
1071: "]]><![CDATA[");
1072: text = text.substring(index + 2);
1073: index = text.indexOf("]]>");
1074: }
1075: buffer.append(text);
1076: saveIndent = _printer.getNextIndent();
1077: _printer.setNextIndent(0);
1078: printText(buffer.toString(), true, true);
1079: _printer.setNextIndent(saveIndent);
1080:
1081: } else {
1082:
1083: int saveIndent;
1084:
1085: if (state.preserveSpace) {
1086: // If preserving space then hold of indentation so no
1087: // excessive spaces are printed at line breaks, escape
1088: // the text content without replacing spaces and print
1089: // the text breaking only at line breaks.
1090: saveIndent = _printer.getNextIndent();
1091: _printer.setNextIndent(0);
1092: printText(text, true, state.unescaped);
1093: _printer.setNextIndent(saveIndent);
1094: } else {
1095: printText(text, false, state.unescaped);
1096: }
1097: }
1098: }
1099:
1100: /**
1101: * Returns the suitable entity reference for this character value,
1102: * or null if no such entity exists. Calling this method with <tt>'&'</tt>
1103: * will return <tt>"&amp;"</tt>.
1104: *
1105: * @param ch Character value
1106: * @return Character entity name, or null
1107: */
1108: protected abstract String getEntityRef(int ch);
1109:
1110: /**
1111: * Called to serializee the DOM element. The element is serialized based on
1112: * the serializer's method (XML, HTML, XHTML).
1113: *
1114: * @param elem The element to serialize
1115: * @throws IOException An I/O exception occured while
1116: * serializing
1117: */
1118: protected abstract void serializeElement(Element elem)
1119: throws IOException;
1120:
1121: /**
1122: * Comments and PIs cannot be serialized before the root element,
1123: * because the root element serializes the document type, which
1124: * generally comes first. Instead such PIs and comments are
1125: * accumulated inside a vector and serialized by calling this
1126: * method. Will be called when the root element is serialized
1127: * and when the document finished serializing.
1128: *
1129: * @throws IOException An I/O exception occured while
1130: * serializing
1131: */
1132: protected void serializePreRoot() throws IOException {
1133: int i;
1134:
1135: if (_preRoot != null) {
1136: for (i = 0; i < _preRoot.size(); ++i) {
1137: printText((String) _preRoot.elementAt(i), true, true);
1138: if (_indenting)
1139: _printer.breakLine();
1140: }
1141: _preRoot.removeAllElements();
1142: }
1143: }
1144:
1145: //---------------------------------------------//
1146: // Text pretty printing and formatting methods //
1147: //---------------------------------------------//
1148:
1149: /**
1150: * Called to print additional text with whitespace handling.
1151: * If spaces are preserved, the text is printed as if by calling
1152: * {@link #printText(String)} with a call to {@link #breakLine}
1153: * for each new line. If spaces are not preserved, the text is
1154: * broken at space boundaries if longer than the line width;
1155: * Multiple spaces are printed as such, but spaces at beginning
1156: * of line are removed.
1157: *
1158: * @param text The text to print
1159: * @param preserveSpace Space preserving flag
1160: * @param unescaped Print unescaped
1161: */
1162: protected final void printText(char[] chars, int start, int length,
1163: boolean preserveSpace, boolean unescaped)
1164: throws IOException {
1165: int index;
1166: char ch;
1167:
1168: if (preserveSpace) {
1169: // Preserving spaces: the text must print exactly as it is,
1170: // without breaking when spaces appear in the text and without
1171: // consolidating spaces. If a line terminator is used, a line
1172: // break will occur.
1173: while (length-- > 0) {
1174: ch = chars[start];
1175: ++start;
1176: if (ch == '\n' || ch == '\r' || unescaped)
1177: _printer.printText(ch);
1178: else
1179: printEscaped(ch);
1180: }
1181: } else {
1182: // Not preserving spaces: print one part at a time, and
1183: // use spaces between parts to break them into different
1184: // lines. Spaces at beginning of line will be stripped
1185: // by printing mechanism. Line terminator is treated
1186: // no different than other text part.
1187: while (length-- > 0) {
1188: ch = chars[start];
1189: ++start;
1190: if (ch == ' ' || ch == '\f' || ch == '\t' || ch == '\n'
1191: || ch == '\r')
1192: _printer.printSpace();
1193: else if (unescaped)
1194: _printer.printText(ch);
1195: else
1196: printEscaped(ch);
1197: }
1198: }
1199: }
1200:
1201: protected final void printText(String text, boolean preserveSpace,
1202: boolean unescaped) throws IOException {
1203: int index;
1204: char ch;
1205:
1206: if (preserveSpace) {
1207: // Preserving spaces: the text must print exactly as it is,
1208: // without breaking when spaces appear in the text and without
1209: // consolidating spaces. If a line terminator is used, a line
1210: // break will occur.
1211: for (index = 0; index < text.length(); ++index) {
1212: ch = text.charAt(index);
1213: if (ch == '\n' || ch == '\r' || unescaped)
1214: _printer.printText(ch);
1215: else
1216: printEscaped(ch);
1217: }
1218: } else {
1219: // Not preserving spaces: print one part at a time, and
1220: // use spaces between parts to break them into different
1221: // lines. Spaces at beginning of line will be stripped
1222: // by printing mechanism. Line terminator is treated
1223: // no different than other text part.
1224: for (index = 0; index < text.length(); ++index) {
1225: ch = text.charAt(index);
1226: if (ch == ' ' || ch == '\f' || ch == '\t' || ch == '\n'
1227: || ch == '\r')
1228: _printer.printSpace();
1229: else if (unescaped)
1230: _printer.printText(ch);
1231: else
1232: printEscaped(ch);
1233: }
1234: }
1235: }
1236:
1237: /**
1238: * Print a document type public or system identifier URL.
1239: * Encapsulates the URL in double quotes, escapes non-printing
1240: * characters and print it equivalent to {@link #printText}.
1241: *
1242: * @param url The document type url to print
1243: */
1244: protected void printDoctypeURL(String url) throws IOException {
1245: int i;
1246:
1247: _printer.printText('"');
1248: for (i = 0; i < url.length(); ++i) {
1249: if (url.charAt(i) == '"' || url.charAt(i) < 0x20
1250: || url.charAt(i) > 0x7F) {
1251: _printer.printText('%');
1252: _printer.printText(Integer.toHexString(url.charAt(i)));
1253: } else
1254: _printer.printText(url.charAt(i));
1255: }
1256: _printer.printText('"');
1257: }
1258:
1259: protected void printEscaped(int ch) throws IOException {
1260: String charRef;
1261:
1262: // If there is a suitable entity reference for this
1263: // character, print it. The list of available entity
1264: // references is almost but not identical between
1265: // XML and HTML.
1266: charRef = getEntityRef(ch);
1267: if (charRef != null) {
1268: _printer.printText('&');
1269: _printer.printText(charRef);
1270: _printer.printText(';');
1271: } else if ((ch >= ' ' && _encodingInfo.isPrintable(ch) && ch != 0xF7)
1272: || ch == '\n' || ch == '\r' || ch == '\t') {
1273: // If the character is not printable, print as character reference.
1274: // Non printables are below ASCII space but not tab or line
1275: // terminator, ASCII delete, or above a certain Unicode threshold.
1276: if (ch < 0x10000) {
1277: _printer.printText((char) ch);
1278: } else {
1279: _printer
1280: .printText((char) (((ch - 0x10000) >> 10) + 0xd800));
1281: _printer
1282: .printText((char) (((ch - 0x10000) & 0x3ff) + 0xdc00));
1283: }
1284:
1285: } else {
1286: _printer.printText("&#x");
1287: _printer.printText(Integer.toHexString(ch));
1288: _printer.printText(';');
1289: }
1290: }
1291:
1292: /**
1293: * Escapes a string so it may be printed as text content or attribute
1294: * value. Non printable characters are escaped using character references.
1295: * Where the format specifies a deault entity reference, that reference
1296: * is used (e.g. <tt>&lt;</tt>).
1297: *
1298: * @param source The string to escape
1299: */
1300: protected void printEscaped(String source) throws IOException {
1301: for (int i = 0; i < source.length(); ++i) {
1302: int ch = source.charAt(i);
1303: if ((ch & 0xfc00) == 0xd800 && i + 1 < source.length()) {
1304: int lowch = source.charAt(i + 1);
1305: if ((lowch & 0xfc00) == 0xdc00) {
1306: ch = 0x10000 + ((ch - 0xd800) << 10) + lowch
1307: - 0xdc00;
1308: i++;
1309: }
1310: }
1311: printEscaped(ch);
1312: }
1313: }
1314:
1315: //--------------------------------//
1316: // Element state handling methods //
1317: //--------------------------------//
1318:
1319: /**
1320: * Return the state of the current element.
1321: *
1322: * @return Current element state
1323: */
1324: protected ElementState getElementState() {
1325: return _elementStates[_elementStateCount];
1326: }
1327:
1328: /**
1329: * Enter a new element state for the specified element.
1330: * Tag name and space preserving is specified, element
1331: * state is initially empty.
1332: *
1333: * @return Current element state, or null
1334: */
1335: protected ElementState enterElementState(String namespaceURI,
1336: String localName, String rawName, boolean preserveSpace) {
1337: ElementState state;
1338:
1339: if (_elementStateCount + 1 == _elementStates.length) {
1340: ElementState[] newStates;
1341:
1342: // Need to create a larger array of states. This does not happen
1343: // often, unless the document is really deep.
1344: newStates = new ElementState[_elementStates.length + 10];
1345: for (int i = 0; i < _elementStates.length; ++i)
1346: newStates[i] = _elementStates[i];
1347: for (int i = _elementStates.length; i < newStates.length; ++i)
1348: newStates[i] = new ElementState();
1349: _elementStates = newStates;
1350: }
1351:
1352: ++_elementStateCount;
1353: state = _elementStates[_elementStateCount];
1354: state.namespaceURI = namespaceURI;
1355: state.localName = localName;
1356: state.rawName = rawName;
1357: state.preserveSpace = preserveSpace;
1358: state.empty = true;
1359: state.afterElement = false;
1360: state.afterComment = false;
1361: state.doCData = state.inCData = false;
1362: state.unescaped = false;
1363: state.prefixes = _prefixes;
1364:
1365: _prefixes = null;
1366: return state;
1367: }
1368:
1369: /**
1370: * Leave the current element state and return to the
1371: * state of the parent element. If this was the root
1372: * element, return to the state of the document.
1373: *
1374: * @return Previous element state
1375: */
1376: protected ElementState leaveElementState() {
1377: if (_elementStateCount > 0) {
1378: /*Corrected by David Blondeau (blondeau@intalio.com)*/
1379: _prefixes = null;
1380: //_prefixes = _elementStates[ _elementStateCount ].prefixes;
1381: --_elementStateCount;
1382: return _elementStates[_elementStateCount];
1383: } else
1384: throw new IllegalStateException(
1385: "Internal error: element state is zero");
1386: }
1387:
1388: /**
1389: * Returns true if in the state of the document.
1390: * Returns true before entering any element and after
1391: * leaving the root element.
1392: *
1393: * @return True if in the state of the document
1394: */
1395: protected boolean isDocumentState() {
1396: return _elementStateCount == 0;
1397: }
1398:
1399: /**
1400: * Returns the namespace prefix for the specified URI.
1401: * If the URI has been mapped to a prefix, returns the
1402: * prefix, otherwise returns null.
1403: *
1404: * @param namespaceURI The namespace URI
1405: * @return The namespace prefix if known, or null
1406: */
1407: protected String getPrefix(String namespaceURI) {
1408: String prefix;
1409:
1410: if (_prefixes != null) {
1411: prefix = (String) _prefixes.get(namespaceURI);
1412: if (prefix != null)
1413: return prefix;
1414: }
1415: if (_elementStateCount == 0)
1416: return null;
1417: else {
1418: for (int i = _elementStateCount; i > 0; --i) {
1419: if (_elementStates[i].prefixes != null) {
1420: prefix = (String) _elementStates[i].prefixes
1421: .get(namespaceURI);
1422: if (prefix != null)
1423: return prefix;
1424: }
1425: }
1426: }
1427: return null;
1428: }
1429:
1430: }
|