0001: /*
0002: * Licensed to the Apache Software Foundation (ASF) under one or more
0003: * contributor license agreements. See the NOTICE file distributed with
0004: * this work for additional information regarding copyright ownership.
0005: * The ASF licenses this file to You under the Apache License, Version 2.0
0006: * (the "License"); you may not use this file except in compliance with
0007: * the License. You may obtain a copy of the License at
0008: *
0009: * http://www.apache.org/licenses/LICENSE-2.0
0010: *
0011: * Unless required by applicable law or agreed to in writing, software
0012: * distributed under the License is distributed on an "AS IS" BASIS,
0013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014: * See the License for the specific language governing permissions and
0015: * limitations under the License.
0016: */
0017:
0018: // Sep 14, 2000:
0019: // Fixed comments to preserve whitespaces and add a line break
0020: // when indenting. Reported by Gervase Markham <gerv@gerv.net>
0021: // Sep 14, 2000:
0022: // Fixed serializer to report IO exception directly, instead at
0023: // the end of document processing.
0024: // Reported by Patrick Higgins <phiggins@transzap.com>
0025: // Sep 13, 2000:
0026: // CR in character data will print as �D;
0027: // Aug 25, 2000:
0028: // Fixed processing instruction printing inside element content
0029: // to not escape content. Reported by Mikael Staldal
0030: // <d96-mst@d.kth.se>
0031: // Aug 25, 2000:
0032: // Added ability to omit comments.
0033: // Contributed by Anupam Bagchi <abagchi@jtcsv.com>
0034: // Aug 26, 2000:
0035: // Fixed bug in newline handling when preserving spaces.
0036: // Contributed by Mike Dusseault <mdusseault@home.com>
0037: // Aug 29, 2000:
0038: // Fixed state.unescaped not being set to false when
0039: // entering element state.
0040: // Reported by Lowell Vaughn <lvaughn@agillion.com>
0041: package org.apache.xml.serialize;
0042:
0043: import java.io.IOException;
0044: import java.io.OutputStream;
0045: import java.io.Writer;
0046: import java.util.Hashtable;
0047: import java.util.Vector;
0048:
0049: import org.apache.xerces.dom.DOMErrorImpl;
0050: import org.apache.xerces.dom.DOMLocatorImpl;
0051: import org.apache.xerces.dom.DOMMessageFormatter;
0052: import org.apache.xerces.util.XMLChar;
0053: import org.w3c.dom.DOMError;
0054: import org.w3c.dom.DOMErrorHandler;
0055: import org.w3c.dom.Document;
0056: import org.w3c.dom.DocumentFragment;
0057: import org.w3c.dom.DocumentType;
0058: import org.w3c.dom.Element;
0059: import org.w3c.dom.Node;
0060: import org.w3c.dom.ls.LSException;
0061: import org.w3c.dom.ls.LSSerializer;
0062: import org.w3c.dom.ls.LSSerializerFilter;
0063: import org.w3c.dom.traversal.NodeFilter;
0064: import org.xml.sax.ContentHandler;
0065: import org.xml.sax.DTDHandler;
0066: import org.xml.sax.DocumentHandler;
0067: import org.xml.sax.Locator;
0068: import org.xml.sax.SAXException;
0069: import org.xml.sax.ext.DeclHandler;
0070: import org.xml.sax.ext.LexicalHandler;
0071:
0072: /**
0073: * Base class for a serializer supporting both DOM and SAX pretty
0074: * serializing of XML/HTML/XHTML documents. Derives classes perform
0075: * the method-specific serializing, this class provides the common
0076: * serializing mechanisms.
0077: * <p>
0078: * The serializer must be initialized with the proper writer and
0079: * output format before it can be used by calling {@link #setOutputCharStream}
0080: * or {@link #setOutputByteStream} for the writer and {@link #setOutputFormat}
0081: * for the output format.
0082: * <p>
0083: * The serializer can be reused any number of times, but cannot
0084: * be used concurrently by two threads.
0085: * <p>
0086: * If an output stream is used, the encoding is taken from the
0087: * output format (defaults to <tt>UTF-8</tt>). If a writer is
0088: * used, make sure the writer uses the same encoding (if applies)
0089: * as specified in the output format.
0090: * <p>
0091: * The serializer supports both DOM and SAX. DOM serializing is done
0092: * by calling {@link #serialize(Document)} and SAX serializing is done by firing
0093: * SAX events and using the serializer as a document handler.
0094: * This also applies to derived class.
0095: * <p>
0096: * If an I/O exception occurs while serializing, the serializer
0097: * will not throw an exception directly, but only throw it
0098: * at the end of serializing (either DOM or SAX's {@link
0099: * org.xml.sax.DocumentHandler#endDocument}.
0100: * <p>
0101: * For elements that are not specified as whitespace preserving,
0102: * the serializer will potentially break long text lines at space
0103: * boundaries, indent lines, and serialize elements on separate
0104: * lines. Line terminators will be regarded as spaces, and
0105: * spaces at beginning of line will be stripped.
0106: * <p>
0107: * When indenting, the serializer is capable of detecting seemingly
0108: * element content, and serializing these elements indented on separate
0109: * lines. An element is serialized indented when it is the first or
0110: * last child of an element, or immediate following or preceding
0111: * another element.
0112: *
0113: * @deprecated This class was deprecated in Xerces 2.9.0. It is recommended
0114: * that new applications use the DOM Level 3 LSSerializer or JAXP's Transformation
0115: * API for XML (TrAX) for serializing XML. See the Xerces documentation for more
0116: * information.
0117: * @version $Revision: 476047 $ $Date: 2006-11-16 23:27:45 -0500 (Thu, 16 Nov 2006) $
0118: * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
0119: * @author <a href="mailto:rahul.srivastava@sun.com">Rahul Srivastava</a>
0120: * @author Elena Litani, IBM
0121: * @see Serializer
0122: * @see org.w3c.dom.ls.LSSerializer
0123: */
0124: public abstract class BaseMarkupSerializer implements ContentHandler,
0125: DocumentHandler, LexicalHandler, DTDHandler, DeclHandler,
0126: DOMSerializer, Serializer {
0127:
0128: // DOM L3 implementation
0129: protected short features = 0xFFFFFFFF;
0130: protected DOMErrorHandler fDOMErrorHandler;
0131: protected final DOMErrorImpl fDOMError = new DOMErrorImpl();
0132: protected LSSerializerFilter fDOMFilter;
0133:
0134: protected EncodingInfo _encodingInfo;
0135:
0136: /**
0137: * Holds array of all element states that have been entered.
0138: * The array is automatically resized. When leaving an element,
0139: * it's state is not removed but reused when later returning
0140: * to the same nesting level.
0141: */
0142: private ElementState[] _elementStates;
0143:
0144: /**
0145: * The index of the next state to place in the array,
0146: * or one plus the index of the current state. When zero,
0147: * we are in no state.
0148: */
0149: private int _elementStateCount;
0150:
0151: /**
0152: * Vector holding comments and PIs that come before the root
0153: * element (even after it), see {@link #serializePreRoot}.
0154: */
0155: private Vector _preRoot;
0156:
0157: /**
0158: * If the document has been started (header serialized), this
0159: * flag is set to true so it's not started twice.
0160: */
0161: protected boolean _started;
0162:
0163: /**
0164: * True if the serializer has been prepared. This flag is set
0165: * to false when the serializer is reset prior to using it,
0166: * and to true after it has been prepared for usage.
0167: */
0168: private boolean _prepared;
0169:
0170: /**
0171: * Association between namespace URIs (keys) and prefixes (values).
0172: * Accumulated here prior to starting an element and placing this
0173: * list in the element state.
0174: */
0175: protected Hashtable _prefixes;
0176:
0177: /**
0178: * The system identifier of the document type, if known.
0179: */
0180: protected String _docTypePublicId;
0181:
0182: /**
0183: * The system identifier of the document type, if known.
0184: */
0185: protected String _docTypeSystemId;
0186:
0187: /**
0188: * The output format associated with this serializer. This will never
0189: * be a null reference. If no format was passed to the constructor,
0190: * the default one for this document type will be used. The format
0191: * object is never changed by the serializer.
0192: */
0193: protected OutputFormat _format;
0194:
0195: /**
0196: * The printer used for printing text parts.
0197: */
0198: protected Printer _printer;
0199:
0200: /**
0201: * True if indenting printer.
0202: */
0203: protected boolean _indenting;
0204:
0205: /** Temporary buffer to store character data */
0206: protected final StringBuffer fStrBuffer = new StringBuffer(40);
0207:
0208: /**
0209: * The underlying writer.
0210: */
0211: private Writer _writer;
0212:
0213: /**
0214: * The output stream.
0215: */
0216: private OutputStream _output;
0217:
0218: /** Current node that is being processed */
0219: protected Node fCurrentNode = null;
0220:
0221: //--------------------------------//
0222: // Constructor and initialization //
0223: //--------------------------------//
0224:
0225: /**
0226: * Protected constructor can only be used by derived class.
0227: * Must initialize the serializer before serializing any document,
0228: * by calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
0229: * first
0230: */
0231: protected BaseMarkupSerializer(OutputFormat format) {
0232: int i;
0233:
0234: _elementStates = new ElementState[10];
0235: for (i = 0; i < _elementStates.length; ++i)
0236: _elementStates[i] = new ElementState();
0237: _format = format;
0238: }
0239:
0240: public DocumentHandler asDocumentHandler() throws IOException {
0241: prepare();
0242: return this ;
0243: }
0244:
0245: public ContentHandler asContentHandler() throws IOException {
0246: prepare();
0247: return this ;
0248: }
0249:
0250: public DOMSerializer asDOMSerializer() throws IOException {
0251: prepare();
0252: return this ;
0253: }
0254:
0255: public void setOutputByteStream(OutputStream output) {
0256: if (output == null) {
0257: String msg = DOMMessageFormatter.formatMessage(
0258: DOMMessageFormatter.SERIALIZER_DOMAIN,
0259: "ArgumentIsNull", new Object[] { "output" });
0260: throw new NullPointerException(msg);
0261: }
0262: _output = output;
0263: _writer = null;
0264: reset();
0265: }
0266:
0267: public void setOutputCharStream(Writer writer) {
0268: if (writer == null) {
0269: String msg = DOMMessageFormatter.formatMessage(
0270: DOMMessageFormatter.SERIALIZER_DOMAIN,
0271: "ArgumentIsNull", new Object[] { "writer" });
0272: throw new NullPointerException(msg);
0273: }
0274: _writer = writer;
0275: _output = null;
0276: reset();
0277: }
0278:
0279: public void setOutputFormat(OutputFormat format) {
0280: if (format == null) {
0281: String msg = DOMMessageFormatter.formatMessage(
0282: DOMMessageFormatter.SERIALIZER_DOMAIN,
0283: "ArgumentIsNull", new Object[] { "format" });
0284: throw new NullPointerException(msg);
0285: }
0286: _format = format;
0287: reset();
0288: }
0289:
0290: public boolean reset() {
0291: if (_elementStateCount > 1) {
0292: String msg = DOMMessageFormatter.formatMessage(
0293: DOMMessageFormatter.SERIALIZER_DOMAIN,
0294: "ResetInMiddle", null);
0295: throw new IllegalStateException(msg);
0296: }
0297: _prepared = false;
0298: fCurrentNode = null;
0299: fStrBuffer.setLength(0);
0300: return true;
0301: }
0302:
0303: protected void prepare() throws IOException {
0304: if (_prepared)
0305: return;
0306:
0307: if (_writer == null && _output == null) {
0308: String msg = DOMMessageFormatter.formatMessage(
0309: DOMMessageFormatter.SERIALIZER_DOMAIN,
0310: "NoWriterSupplied", null);
0311: throw new IOException(msg);
0312: }
0313: // If the output stream has been set, use it to construct
0314: // the writer. It is possible that the serializer has been
0315: // reused with the same output stream and different encoding.
0316:
0317: _encodingInfo = _format.getEncodingInfo();
0318:
0319: if (_output != null) {
0320: _writer = _encodingInfo.getWriter(_output);
0321: }
0322:
0323: if (_format.getIndenting()) {
0324: _indenting = true;
0325: _printer = new IndentPrinter(_writer, _format);
0326: } else {
0327: _indenting = false;
0328: _printer = new Printer(_writer, _format);
0329: }
0330:
0331: ElementState state;
0332:
0333: _elementStateCount = 0;
0334: state = _elementStates[0];
0335: state.namespaceURI = null;
0336: state.localName = null;
0337: state.rawName = null;
0338: state.preserveSpace = _format.getPreserveSpace();
0339: state.empty = true;
0340: state.afterElement = false;
0341: state.afterComment = false;
0342: state.doCData = state.inCData = false;
0343: state.prefixes = null;
0344:
0345: _docTypePublicId = _format.getDoctypePublic();
0346: _docTypeSystemId = _format.getDoctypeSystem();
0347: _started = false;
0348: _prepared = true;
0349: }
0350:
0351: //----------------------------------//
0352: // DOM document serializing methods //
0353: //----------------------------------//
0354:
0355: /**
0356: * Serializes the DOM element using the previously specified
0357: * writer and output format. Throws an exception only if
0358: * an I/O exception occured while serializing.
0359: *
0360: * @param elem The element to serialize
0361: * @throws IOException An I/O exception occured while
0362: * serializing
0363: */
0364: public void serialize(Element elem) throws IOException {
0365: reset();
0366: prepare();
0367: serializeNode(elem);
0368: _printer.flush();
0369: if (_printer.getException() != null)
0370: throw _printer.getException();
0371: }
0372:
0373: /**
0374: * Serializes the DOM document fragmnt using the previously specified
0375: * writer and output format. Throws an exception only if
0376: * an I/O exception occured while serializing.
0377: *
0378: * @param frag The document fragment to serialize
0379: * @throws IOException An I/O exception occured while
0380: * serializing
0381: */
0382: public void serialize(DocumentFragment frag) throws IOException {
0383: reset();
0384: prepare();
0385: serializeNode(frag);
0386: _printer.flush();
0387: if (_printer.getException() != null)
0388: throw _printer.getException();
0389: }
0390:
0391: /**
0392: * Serializes the DOM document using the previously specified
0393: * writer and output format. Throws an exception only if
0394: * an I/O exception occured while serializing.
0395: *
0396: * @param doc The document to serialize
0397: * @throws IOException An I/O exception occured while
0398: * serializing
0399: */
0400: public void serialize(Document doc) throws IOException {
0401: reset();
0402: prepare();
0403: serializeNode(doc);
0404: serializePreRoot();
0405: _printer.flush();
0406: if (_printer.getException() != null)
0407: throw _printer.getException();
0408: }
0409:
0410: //------------------------------------------//
0411: // SAX document handler serializing methods //
0412: //------------------------------------------//
0413:
0414: public void startDocument() throws SAXException {
0415: try {
0416: prepare();
0417: } catch (IOException except) {
0418: throw new SAXException(except.toString());
0419: }
0420: // Nothing to do here. All the magic happens in startDocument(String)
0421: }
0422:
0423: public void characters(char[] chars, int start, int length)
0424: throws SAXException {
0425: ElementState state;
0426:
0427: try {
0428: state = content();
0429:
0430: // Check if text should be print as CDATA section or unescaped
0431: // based on elements listed in the output format (the element
0432: // state) or whether we are inside a CDATA section or entity.
0433:
0434: if (state.inCData || state.doCData) {
0435: int saveIndent;
0436:
0437: // Print a CDATA section. The text is not escaped, but ']]>'
0438: // appearing in the code must be identified and dealt with.
0439: // The contents of a text node is considered space preserving.
0440: if (!state.inCData) {
0441: _printer.printText("<![CDATA[");
0442: state.inCData = true;
0443: }
0444: saveIndent = _printer.getNextIndent();
0445: _printer.setNextIndent(0);
0446: char ch;
0447: final int end = start + length;
0448: for (int index = start; index < end; ++index) {
0449: ch = chars[index];
0450: if (ch == ']' && index + 2 < end
0451: && chars[index + 1] == ']'
0452: && chars[index + 2] == '>') {
0453: _printer.printText("]]]]><![CDATA[>");
0454: index += 2;
0455: continue;
0456: }
0457: if (!XMLChar.isValid(ch)) {
0458: // check if it is surrogate
0459: if (++index < end) {
0460: surrogates(ch, chars[index], true);
0461: } else {
0462: fatalError("The character '" + ch
0463: + "' is an invalid XML character");
0464: }
0465: continue;
0466: }
0467: if ((ch >= ' ' && _encodingInfo.isPrintable(ch) && ch != 0xF7)
0468: || ch == '\n' || ch == '\r' || ch == '\t') {
0469: _printer.printText(ch);
0470: } else {
0471: // The character is not printable -- split CDATA section
0472: _printer.printText("]]>&#x");
0473: _printer.printText(Integer.toHexString(ch));
0474: _printer.printText(";<![CDATA[");
0475: }
0476: }
0477: _printer.setNextIndent(saveIndent);
0478:
0479: } else {
0480:
0481: int saveIndent;
0482:
0483: if (state.preserveSpace) {
0484: // If preserving space then hold of indentation so no
0485: // excessive spaces are printed at line breaks, escape
0486: // the text content without replacing spaces and print
0487: // the text breaking only at line breaks.
0488: saveIndent = _printer.getNextIndent();
0489: _printer.setNextIndent(0);
0490: printText(chars, start, length, true,
0491: state.unescaped);
0492: _printer.setNextIndent(saveIndent);
0493: } else {
0494: printText(chars, start, length, false,
0495: state.unescaped);
0496: }
0497: }
0498: } catch (IOException except) {
0499: throw new SAXException(except);
0500: }
0501: }
0502:
0503: public void ignorableWhitespace(char[] chars, int start, int length)
0504: throws SAXException {
0505: int i;
0506:
0507: try {
0508: content();
0509:
0510: // Print ignorable whitespaces only when indenting, after
0511: // all they are indentation. Cancel the indentation to
0512: // not indent twice.
0513: if (_indenting) {
0514: _printer.setThisIndent(0);
0515: for (i = start; length-- > 0; ++i)
0516: _printer.printText(chars[i]);
0517: }
0518: } catch (IOException except) {
0519: throw new SAXException(except);
0520: }
0521: }
0522:
0523: public final void processingInstruction(String target, String code)
0524: throws SAXException {
0525: try {
0526: processingInstructionIO(target, code);
0527: } catch (IOException except) {
0528: throw new SAXException(except);
0529: }
0530: }
0531:
0532: public void processingInstructionIO(String target, String code)
0533: throws IOException {
0534: int index;
0535: ElementState state;
0536:
0537: state = content();
0538:
0539: // Create the processing instruction textual representation.
0540: // Make sure we don't have '?>' inside either target or code.
0541: index = target.indexOf("?>");
0542: if (index >= 0)
0543: fStrBuffer.append("<?").append(target.substring(0, index));
0544: else
0545: fStrBuffer.append("<?").append(target);
0546: if (code != null) {
0547: fStrBuffer.append(' ');
0548: index = code.indexOf("?>");
0549: if (index >= 0)
0550: fStrBuffer.append(code.substring(0, index));
0551: else
0552: fStrBuffer.append(code);
0553: }
0554: fStrBuffer.append("?>");
0555:
0556: // If before the root element (or after it), do not print
0557: // the PI directly but place it in the pre-root vector.
0558: if (isDocumentState()) {
0559: if (_preRoot == null)
0560: _preRoot = new Vector();
0561: _preRoot.addElement(fStrBuffer.toString());
0562: } else {
0563: _printer.indent();
0564: printText(fStrBuffer.toString(), true, true);
0565: _printer.unindent();
0566: if (_indenting)
0567: state.afterElement = true;
0568: }
0569:
0570: fStrBuffer.setLength(0);
0571: }
0572:
0573: public void comment(char[] chars, int start, int length)
0574: throws SAXException {
0575: try {
0576: comment(new String(chars, start, length));
0577: } catch (IOException except) {
0578: throw new SAXException(except);
0579: }
0580: }
0581:
0582: public void comment(String text) throws IOException {
0583: int index;
0584: ElementState state;
0585:
0586: if (_format.getOmitComments())
0587: return;
0588:
0589: state = content();
0590: // Create the processing comment textual representation.
0591: // Make sure we don't have '-->' inside the comment.
0592: index = text.indexOf("-->");
0593: if (index >= 0)
0594: fStrBuffer.append("<!--").append(text.substring(0, index))
0595: .append("-->");
0596: else
0597: fStrBuffer.append("<!--").append(text).append("-->");
0598:
0599: // If before the root element (or after it), do not print
0600: // the comment directly but place it in the pre-root vector.
0601: if (isDocumentState()) {
0602: if (_preRoot == null)
0603: _preRoot = new Vector();
0604: _preRoot.addElement(fStrBuffer.toString());
0605: } else {
0606: // Indent this element on a new line if the first
0607: // content of the parent element or immediately
0608: // following an element.
0609: if (_indenting && !state.preserveSpace)
0610: _printer.breakLine();
0611: _printer.indent();
0612: printText(fStrBuffer.toString(), true, true);
0613: _printer.unindent();
0614: if (_indenting)
0615: state.afterElement = true;
0616: }
0617:
0618: fStrBuffer.setLength(0);
0619: state.afterComment = true;
0620: state.afterElement = false;
0621: }
0622:
0623: public void startCDATA() {
0624: ElementState state;
0625:
0626: state = getElementState();
0627: state.doCData = true;
0628: }
0629:
0630: public void endCDATA() {
0631: ElementState state;
0632:
0633: state = getElementState();
0634: state.doCData = false;
0635: }
0636:
0637: public void startNonEscaping() {
0638: ElementState state;
0639:
0640: state = getElementState();
0641: state.unescaped = true;
0642: }
0643:
0644: public void endNonEscaping() {
0645: ElementState state;
0646:
0647: state = getElementState();
0648: state.unescaped = false;
0649: }
0650:
0651: public void startPreserving() {
0652: ElementState state;
0653:
0654: state = getElementState();
0655: state.preserveSpace = true;
0656: }
0657:
0658: public void endPreserving() {
0659: ElementState state;
0660:
0661: state = getElementState();
0662: state.preserveSpace = false;
0663: }
0664:
0665: /**
0666: * Called at the end of the document to wrap it up.
0667: * Will flush the output stream and throw an exception
0668: * if any I/O error occured while serializing.
0669: *
0670: * @throws SAXException An I/O exception occured during
0671: * serializing
0672: */
0673: public void endDocument() throws SAXException {
0674: try {
0675: // Print all the elements accumulated outside of
0676: // the root element.
0677: serializePreRoot();
0678: // Flush the output, this is necessary for fStrBuffered output.
0679: _printer.flush();
0680: } catch (IOException except) {
0681: throw new SAXException(except);
0682: }
0683: }
0684:
0685: public void startEntity(String name) {
0686: // ???
0687: }
0688:
0689: public void endEntity(String name) {
0690: // ???
0691: }
0692:
0693: public void setDocumentLocator(Locator locator) {
0694: // Nothing to do
0695: }
0696:
0697: //-----------------------------------------//
0698: // SAX content handler serializing methods //
0699: //-----------------------------------------//
0700:
0701: public void skippedEntity(String name) throws SAXException {
0702: try {
0703: endCDATA();
0704: content();
0705: _printer.printText('&');
0706: _printer.printText(name);
0707: _printer.printText(';');
0708: } catch (IOException except) {
0709: throw new SAXException(except);
0710: }
0711: }
0712:
0713: public void startPrefixMapping(String prefix, String uri)
0714: throws SAXException {
0715: if (_prefixes == null)
0716: _prefixes = new Hashtable();
0717: _prefixes.put(uri, prefix == null ? "" : prefix);
0718: }
0719:
0720: public void endPrefixMapping(String prefix) throws SAXException {
0721: }
0722:
0723: //------------------------------------------//
0724: // SAX DTD/Decl handler serializing methods //
0725: //------------------------------------------//
0726:
0727: public final void startDTD(String name, String publicId,
0728: String systemId) throws SAXException {
0729: try {
0730: _printer.enterDTD();
0731: _docTypePublicId = publicId;
0732: _docTypeSystemId = systemId;
0733: } catch (IOException except) {
0734: throw new SAXException(except);
0735: }
0736: }
0737:
0738: public void endDTD() {
0739: // Nothing to do here, all the magic occurs in startDocument(String).
0740: }
0741:
0742: public void elementDecl(String name, String model)
0743: throws SAXException {
0744: try {
0745: _printer.enterDTD();
0746: _printer.printText("<!ELEMENT ");
0747: _printer.printText(name);
0748: _printer.printText(' ');
0749: _printer.printText(model);
0750: _printer.printText('>');
0751: if (_indenting)
0752: _printer.breakLine();
0753: } catch (IOException except) {
0754: throw new SAXException(except);
0755: }
0756: }
0757:
0758: public void attributeDecl(String eName, String aName, String type,
0759: String valueDefault, String value) throws SAXException {
0760: try {
0761: _printer.enterDTD();
0762: _printer.printText("<!ATTLIST ");
0763: _printer.printText(eName);
0764: _printer.printText(' ');
0765: _printer.printText(aName);
0766: _printer.printText(' ');
0767: _printer.printText(type);
0768: if (valueDefault != null) {
0769: _printer.printText(' ');
0770: _printer.printText(valueDefault);
0771: }
0772: if (value != null) {
0773: _printer.printText(" \"");
0774: printEscaped(value);
0775: _printer.printText('"');
0776: }
0777: _printer.printText('>');
0778: if (_indenting)
0779: _printer.breakLine();
0780: } catch (IOException except) {
0781: throw new SAXException(except);
0782: }
0783: }
0784:
0785: public void internalEntityDecl(String name, String value)
0786: throws SAXException {
0787: try {
0788: _printer.enterDTD();
0789: _printer.printText("<!ENTITY ");
0790: _printer.printText(name);
0791: _printer.printText(" \"");
0792: printEscaped(value);
0793: _printer.printText("\">");
0794: if (_indenting)
0795: _printer.breakLine();
0796: } catch (IOException except) {
0797: throw new SAXException(except);
0798: }
0799: }
0800:
0801: public void externalEntityDecl(String name, String publicId,
0802: String systemId) throws SAXException {
0803: try {
0804: _printer.enterDTD();
0805: unparsedEntityDecl(name, publicId, systemId, null);
0806: } catch (IOException except) {
0807: throw new SAXException(except);
0808: }
0809: }
0810:
0811: public void unparsedEntityDecl(String name, String publicId,
0812: String systemId, String notationName) throws SAXException {
0813: try {
0814: _printer.enterDTD();
0815: if (publicId == null) {
0816: _printer.printText("<!ENTITY ");
0817: _printer.printText(name);
0818: _printer.printText(" SYSTEM ");
0819: printDoctypeURL(systemId);
0820: } else {
0821: _printer.printText("<!ENTITY ");
0822: _printer.printText(name);
0823: _printer.printText(" PUBLIC ");
0824: printDoctypeURL(publicId);
0825: _printer.printText(' ');
0826: printDoctypeURL(systemId);
0827: }
0828: if (notationName != null) {
0829: _printer.printText(" NDATA ");
0830: _printer.printText(notationName);
0831: }
0832: _printer.printText('>');
0833: if (_indenting)
0834: _printer.breakLine();
0835: } catch (IOException except) {
0836: throw new SAXException(except);
0837: }
0838: }
0839:
0840: public void notationDecl(String name, String publicId,
0841: String systemId) throws SAXException {
0842: try {
0843: _printer.enterDTD();
0844: if (publicId != null) {
0845: _printer.printText("<!NOTATION ");
0846: _printer.printText(name);
0847: _printer.printText(" PUBLIC ");
0848: printDoctypeURL(publicId);
0849: if (systemId != null) {
0850: _printer.printText(' ');
0851: printDoctypeURL(systemId);
0852: }
0853: } else {
0854: _printer.printText("<!NOTATION ");
0855: _printer.printText(name);
0856: _printer.printText(" SYSTEM ");
0857: printDoctypeURL(systemId);
0858: }
0859: _printer.printText('>');
0860: if (_indenting)
0861: _printer.breakLine();
0862: } catch (IOException except) {
0863: throw new SAXException(except);
0864: }
0865: }
0866:
0867: //------------------------------------------//
0868: // Generic node serializing methods methods //
0869: //------------------------------------------//
0870:
0871: /**
0872: * Serialize the DOM node. This method is shared across XML, HTML and XHTML
0873: * serializers and the differences are masked out in a separate {@link
0874: * #serializeElement}.
0875: *
0876: * @param node The node to serialize
0877: * @see #serializeElement
0878: * @throws IOException An I/O exception occured while
0879: * serializing
0880: */
0881: protected void serializeNode(Node node) throws IOException {
0882: fCurrentNode = node;
0883:
0884: // Based on the node type call the suitable SAX handler.
0885: // Only comments entities and documents which are not
0886: // handled by SAX are serialized directly.
0887: switch (node.getNodeType()) {
0888: case Node.TEXT_NODE: {
0889: String text;
0890:
0891: text = node.getNodeValue();
0892: if (text != null) {
0893: if (fDOMFilter != null
0894: && (fDOMFilter.getWhatToShow() & NodeFilter.SHOW_TEXT) != 0) {
0895: short code = fDOMFilter.acceptNode(node);
0896: switch (code) {
0897: case NodeFilter.FILTER_REJECT:
0898: case NodeFilter.FILTER_SKIP: {
0899: break;
0900: }
0901: default: {
0902: characters(text);
0903: }
0904: }
0905: } else if (!_indenting
0906: || getElementState().preserveSpace
0907: || (text.replace('\n', ' ').trim().length() != 0))
0908: characters(text);
0909:
0910: }
0911: break;
0912: }
0913:
0914: case Node.CDATA_SECTION_NODE: {
0915: String text = node.getNodeValue();
0916: if ((features & DOMSerializerImpl.CDATA) != 0) {
0917: if (text != null) {
0918: if (fDOMFilter != null
0919: && (fDOMFilter.getWhatToShow() & NodeFilter.SHOW_CDATA_SECTION) != 0) {
0920: short code = fDOMFilter.acceptNode(node);
0921: switch (code) {
0922: case NodeFilter.FILTER_REJECT:
0923: case NodeFilter.FILTER_SKIP: {
0924: // skip the CDATA node
0925: return;
0926: }
0927: default: {
0928: //fall through..
0929: }
0930: }
0931: }
0932: startCDATA();
0933: characters(text);
0934: endCDATA();
0935: }
0936: } else {
0937: // transform into a text node
0938: characters(text);
0939: }
0940: break;
0941: }
0942: case Node.COMMENT_NODE: {
0943: String text;
0944:
0945: if (!_format.getOmitComments()) {
0946: text = node.getNodeValue();
0947: if (text != null) {
0948:
0949: if (fDOMFilter != null
0950: && (fDOMFilter.getWhatToShow() & NodeFilter.SHOW_COMMENT) != 0) {
0951: short code = fDOMFilter.acceptNode(node);
0952: switch (code) {
0953: case NodeFilter.FILTER_REJECT:
0954: case NodeFilter.FILTER_SKIP: {
0955: // skip the comment node
0956: return;
0957: }
0958: default: {
0959: // fall through
0960: }
0961: }
0962: }
0963: comment(text);
0964: }
0965: }
0966: break;
0967: }
0968:
0969: case Node.ENTITY_REFERENCE_NODE: {
0970: Node child;
0971:
0972: endCDATA();
0973: content();
0974:
0975: if (((features & DOMSerializerImpl.ENTITIES) != 0)
0976: || (node.getFirstChild() == null)) {
0977: if (fDOMFilter != null
0978: && (fDOMFilter.getWhatToShow() & NodeFilter.SHOW_ENTITY_REFERENCE) != 0) {
0979: short code = fDOMFilter.acceptNode(node);
0980: switch (code) {
0981: case NodeFilter.FILTER_REJECT: {
0982: return; // remove the node
0983: }
0984: case NodeFilter.FILTER_SKIP: {
0985: child = node.getFirstChild();
0986: while (child != null) {
0987: serializeNode(child);
0988: child = child.getNextSibling();
0989: }
0990: return;
0991: }
0992:
0993: default: {
0994: // fall through
0995: }
0996: }
0997: }
0998: checkUnboundNamespacePrefixedNode(node);
0999:
1000: _printer.printText("&");
1001: _printer.printText(node.getNodeName());
1002: _printer.printText(";");
1003: } else {
1004: child = node.getFirstChild();
1005: while (child != null) {
1006: serializeNode(child);
1007: child = child.getNextSibling();
1008: }
1009: }
1010:
1011: break;
1012: }
1013:
1014: case Node.PROCESSING_INSTRUCTION_NODE: {
1015:
1016: if (fDOMFilter != null
1017: && (fDOMFilter.getWhatToShow() & NodeFilter.SHOW_PROCESSING_INSTRUCTION) != 0) {
1018: short code = fDOMFilter.acceptNode(node);
1019: switch (code) {
1020: case NodeFilter.FILTER_REJECT:
1021: case NodeFilter.FILTER_SKIP: {
1022: return; // skip this node
1023: }
1024: default: { // fall through
1025: }
1026: }
1027: }
1028: processingInstructionIO(node.getNodeName(), node
1029: .getNodeValue());
1030: break;
1031: }
1032: case Node.ELEMENT_NODE: {
1033:
1034: if (fDOMFilter != null
1035: && (fDOMFilter.getWhatToShow() & NodeFilter.SHOW_ELEMENT) != 0) {
1036: short code = fDOMFilter.acceptNode(node);
1037: switch (code) {
1038: case NodeFilter.FILTER_REJECT: {
1039: return;
1040: }
1041: case NodeFilter.FILTER_SKIP: {
1042: Node child = node.getFirstChild();
1043: while (child != null) {
1044: serializeNode(child);
1045: child = child.getNextSibling();
1046: }
1047: return; // skip this node
1048: }
1049:
1050: default: { // fall through
1051: }
1052: }
1053: }
1054: serializeElement((Element) node);
1055: break;
1056: }
1057: case Node.DOCUMENT_NODE: {
1058: DocumentType docType;
1059:
1060: // If there is a document type, use the SAX events to
1061: // serialize it.
1062: docType = ((Document) node).getDoctype();
1063: if (docType != null) {
1064: // DOM Level 2 (or higher)
1065: try {
1066: String internal;
1067:
1068: _printer.enterDTD();
1069: _docTypePublicId = docType.getPublicId();
1070: _docTypeSystemId = docType.getSystemId();
1071: internal = docType.getInternalSubset();
1072: if (internal != null && internal.length() > 0)
1073: _printer.printText(internal);
1074: endDTD();
1075: }
1076: // DOM Level 1 -- does implementation have methods?
1077: catch (NoSuchMethodError nsme) {
1078: Class docTypeClass = docType.getClass();
1079:
1080: String docTypePublicId = null;
1081: String docTypeSystemId = null;
1082: try {
1083: java.lang.reflect.Method getPublicId = docTypeClass
1084: .getMethod("getPublicId",
1085: (Class[]) null);
1086: if (getPublicId.getReturnType().equals(
1087: String.class)) {
1088: docTypePublicId = (String) getPublicId
1089: .invoke(docType, (Object[]) null);
1090: }
1091: } catch (Exception e) {
1092: // ignore
1093: }
1094: try {
1095: java.lang.reflect.Method getSystemId = docTypeClass
1096: .getMethod("getSystemId",
1097: (Class[]) null);
1098: if (getSystemId.getReturnType().equals(
1099: String.class)) {
1100: docTypeSystemId = (String) getSystemId
1101: .invoke(docType, (Object[]) null);
1102: }
1103: } catch (Exception e) {
1104: // ignore
1105: }
1106: _printer.enterDTD();
1107: _docTypePublicId = docTypePublicId;
1108: _docTypeSystemId = docTypeSystemId;
1109: endDTD();
1110: }
1111: }
1112: // !! Fall through
1113: }
1114: case Node.DOCUMENT_FRAGMENT_NODE: {
1115: Node child;
1116:
1117: // By definition this will happen if the node is a document,
1118: // document fragment, etc. Just serialize its contents. It will
1119: // work well for other nodes that we do not know how to serialize.
1120: child = node.getFirstChild();
1121: while (child != null) {
1122: serializeNode(child);
1123: child = child.getNextSibling();
1124: }
1125: break;
1126: }
1127:
1128: default:
1129: break;
1130: }
1131: }
1132:
1133: /**
1134: * Must be called by a method about to print any type of content.
1135: * If the element was just opened, the opening tag is closed and
1136: * will be matched to a closing tag. Returns the current element
1137: * state with <tt>empty</tt> and <tt>afterElement</tt> set to false.
1138: *
1139: * @return The current element state
1140: * @throws IOException An I/O exception occurred while
1141: * serializing
1142: */
1143: protected ElementState content() throws IOException {
1144: ElementState state;
1145:
1146: state = getElementState();
1147: if (!isDocumentState()) {
1148: // Need to close CData section first
1149: if (state.inCData && !state.doCData) {
1150: _printer.printText("]]>");
1151: state.inCData = false;
1152: }
1153: // If this is the first content in the element,
1154: // change the state to not-empty and close the
1155: // opening element tag.
1156: if (state.empty) {
1157: _printer.printText('>');
1158: state.empty = false;
1159: }
1160: // Except for one content type, all of them
1161: // are not last element. That one content
1162: // type will take care of itself.
1163: state.afterElement = false;
1164: // Except for one content type, all of them
1165: // are not last comment. That one content
1166: // type will take care of itself.
1167: state.afterComment = false;
1168: }
1169: return state;
1170: }
1171:
1172: /**
1173: * Called to print the text contents in the prevailing element format.
1174: * Since this method is capable of printing text as CDATA, it is used
1175: * for that purpose as well. White space handling is determined by the
1176: * current element state. In addition, the output format can dictate
1177: * whether the text is printed as CDATA or unescaped.
1178: *
1179: * @param text The text to print
1180: * @throws IOException An I/O exception occured while
1181: * serializing
1182: */
1183: protected void characters(String text) throws IOException {
1184: ElementState state;
1185:
1186: state = content();
1187: // Check if text should be print as CDATA section or unescaped
1188: // based on elements listed in the output format (the element
1189: // state) or whether we are inside a CDATA section or entity.
1190:
1191: if (state.inCData || state.doCData) {
1192: // Print a CDATA section. The text is not escaped, but ']]>'
1193: // appearing in the code must be identified and dealt with.
1194: // The contents of a text node is considered space preserving.
1195: if (!state.inCData) {
1196: _printer.printText("<![CDATA[");
1197: state.inCData = true;
1198: }
1199: int saveIndent = _printer.getNextIndent();
1200: _printer.setNextIndent(0);
1201: printCDATAText(text);
1202: _printer.setNextIndent(saveIndent);
1203:
1204: } else {
1205:
1206: int saveIndent;
1207:
1208: if (state.preserveSpace) {
1209: // If preserving space then hold of indentation so no
1210: // excessive spaces are printed at line breaks, escape
1211: // the text content without replacing spaces and print
1212: // the text breaking only at line breaks.
1213: saveIndent = _printer.getNextIndent();
1214: _printer.setNextIndent(0);
1215: printText(text, true, state.unescaped);
1216: _printer.setNextIndent(saveIndent);
1217: } else {
1218: printText(text, false, state.unescaped);
1219: }
1220: }
1221: }
1222:
1223: /**
1224: * Returns the suitable entity reference for this character value,
1225: * or null if no such entity exists. Calling this method with <tt>'&'</tt>
1226: * will return <tt>"&amp;"</tt>.
1227: *
1228: * @param ch Character value
1229: * @return Character entity name, or null
1230: */
1231: protected abstract String getEntityRef(int ch);
1232:
1233: /**
1234: * Called to serializee the DOM element. The element is serialized based on
1235: * the serializer's method (XML, HTML, XHTML).
1236: *
1237: * @param elem The element to serialize
1238: * @throws IOException An I/O exception occured while
1239: * serializing
1240: */
1241: protected abstract void serializeElement(Element elem)
1242: throws IOException;
1243:
1244: /**
1245: * Comments and PIs cannot be serialized before the root element,
1246: * because the root element serializes the document type, which
1247: * generally comes first. Instead such PIs and comments are
1248: * accumulated inside a vector and serialized by calling this
1249: * method. Will be called when the root element is serialized
1250: * and when the document finished serializing.
1251: *
1252: * @throws IOException An I/O exception occured while
1253: * serializing
1254: */
1255: protected void serializePreRoot() throws IOException {
1256: int i;
1257:
1258: if (_preRoot != null) {
1259: for (i = 0; i < _preRoot.size(); ++i) {
1260: printText((String) _preRoot.elementAt(i), true, true);
1261: if (_indenting)
1262: _printer.breakLine();
1263: }
1264: _preRoot.removeAllElements();
1265: }
1266: }
1267:
1268: //---------------------------------------------//
1269: // Text pretty printing and formatting methods //
1270: //---------------------------------------------//
1271:
1272: protected void printCDATAText(String text) throws IOException {
1273: int length = text.length();
1274: char ch;
1275:
1276: for (int index = 0; index < length; ++index) {
1277: ch = text.charAt(index);
1278: if (ch == ']' && index + 2 < length
1279: && text.charAt(index + 1) == ']'
1280: && text.charAt(index + 2) == '>') { // check for ']]>'
1281: if (fDOMErrorHandler != null) {
1282: // REVISIT: this means that if DOM Error handler is not registered we don't report any
1283: // fatal errors and might serialize not wellformed document
1284: if ((features & DOMSerializerImpl.SPLITCDATA) == 0) {
1285: String msg = DOMMessageFormatter.formatMessage(
1286: DOMMessageFormatter.SERIALIZER_DOMAIN,
1287: "EndingCDATA", null);
1288: if ((features & DOMSerializerImpl.WELLFORMED) != 0) {
1289: // issue fatal error
1290: modifyDOMError(msg,
1291: DOMError.SEVERITY_FATAL_ERROR,
1292: "wf-invalid-character",
1293: fCurrentNode);
1294: fDOMErrorHandler.handleError(fDOMError);
1295: throw new LSException(
1296: LSException.SERIALIZE_ERR, msg);
1297: }
1298: // issue error
1299: modifyDOMError(msg, DOMError.SEVERITY_ERROR,
1300: "cdata-section-not-splitted",
1301: fCurrentNode);
1302: if (!fDOMErrorHandler.handleError(fDOMError)) {
1303: throw new LSException(
1304: LSException.SERIALIZE_ERR, msg);
1305: }
1306: } else {
1307: // issue warning
1308: String msg = DOMMessageFormatter.formatMessage(
1309: DOMMessageFormatter.SERIALIZER_DOMAIN,
1310: "SplittingCDATA", null);
1311: modifyDOMError(msg, DOMError.SEVERITY_WARNING,
1312: null, fCurrentNode);
1313: fDOMErrorHandler.handleError(fDOMError);
1314: }
1315: }
1316: // split CDATA section
1317: _printer.printText("]]]]><![CDATA[>");
1318: index += 2;
1319: continue;
1320: }
1321:
1322: if (!XMLChar.isValid(ch)) {
1323: // check if it is surrogate
1324: if (++index < length) {
1325: surrogates(ch, text.charAt(index), true);
1326: } else {
1327: fatalError("The character '" + ch
1328: + "' is an invalid XML character");
1329: }
1330: continue;
1331: }
1332: if ((ch >= ' ' && _encodingInfo.isPrintable(ch) && ch != 0xF7)
1333: || ch == '\n' || ch == '\r' || ch == '\t') {
1334: _printer.printText(ch);
1335: } else {
1336:
1337: // The character is not printable -- split CDATA section
1338: _printer.printText("]]>&#x");
1339: _printer.printText(Integer.toHexString(ch));
1340: _printer.printText(";<![CDATA[");
1341: }
1342: }
1343: }
1344:
1345: protected void surrogates(int high, int low, boolean inContent)
1346: throws IOException {
1347: if (XMLChar.isHighSurrogate(high)) {
1348: if (!XMLChar.isLowSurrogate(low)) {
1349: //Invalid XML
1350: fatalError("The character '" + (char) low
1351: + "' is an invalid XML character");
1352: } else {
1353: int supplemental = XMLChar.supplemental((char) high,
1354: (char) low);
1355: if (!XMLChar.isValid(supplemental)) {
1356: //Invalid XML
1357: fatalError("The character '" + (char) supplemental
1358: + "' is an invalid XML character");
1359: } else {
1360: if (inContent && content().inCData) {
1361: _printer.printText("]]>&#x");
1362: _printer.printText(Integer
1363: .toHexString(supplemental));
1364: _printer.printText(";<![CDATA[");
1365: } else {
1366: printHex(supplemental);
1367: }
1368: }
1369: }
1370: } else {
1371: fatalError("The character '" + (char) high
1372: + "' is an invalid XML character");
1373: }
1374:
1375: }
1376:
1377: /**
1378: * Called to print additional text with whitespace handling.
1379: * If spaces are preserved, the text is printed as if by calling
1380: * {@link #printText(String,boolean,boolean)} with a call to {@link Printer#breakLine}
1381: * for each new line. If spaces are not preserved, the text is
1382: * broken at space boundaries if longer than the line width;
1383: * Multiple spaces are printed as such, but spaces at beginning
1384: * of line are removed.
1385: *
1386: * @param chars The text to print
1387: * @param start The start offset
1388: * @param length The number of characters
1389: * @param preserveSpace Space preserving flag
1390: * @param unescaped Print unescaped
1391: */
1392: protected void printText(char[] chars, int start, int length,
1393: boolean preserveSpace, boolean unescaped)
1394: throws IOException {
1395:
1396: if (preserveSpace) {
1397: // Preserving spaces: the text must print exactly as it is,
1398: // without breaking when spaces appear in the text and without
1399: // consolidating spaces. If a line terminator is used, a line
1400: // break will occur.
1401: while (length-- > 0) {
1402: char ch = chars[start];
1403: ++start;
1404: if (ch == '\n' || ch == '\r' || unescaped) {
1405: _printer.printText(ch);
1406: } else {
1407: printEscaped(ch);
1408: }
1409: }
1410: } else {
1411: // Not preserving spaces: print one part at a time, and
1412: // use spaces between parts to break them into different
1413: // lines. Spaces at beginning of line will be stripped
1414: // by printing mechanism. Line terminator is treated
1415: // no different than other text part.
1416: while (length-- > 0) {
1417: char ch = chars[start];
1418: ++start;
1419: if (ch == ' ' || ch == '\f' || ch == '\t' || ch == '\n'
1420: || ch == '\r') {
1421: _printer.printSpace();
1422: } else if (unescaped) {
1423: _printer.printText(ch);
1424: } else {
1425: printEscaped(ch);
1426: }
1427: }
1428: }
1429: }
1430:
1431: protected void printText(String text, boolean preserveSpace,
1432: boolean unescaped) throws IOException {
1433: int index;
1434: char ch;
1435:
1436: if (preserveSpace) {
1437: // Preserving spaces: the text must print exactly as it is,
1438: // without breaking when spaces appear in the text and without
1439: // consolidating spaces. If a line terminator is used, a line
1440: // break will occur.
1441: for (index = 0; index < text.length(); ++index) {
1442: ch = text.charAt(index);
1443: if (ch == '\n' || ch == '\r' || unescaped)
1444: _printer.printText(ch);
1445: else
1446: printEscaped(ch);
1447: }
1448: } else {
1449: // Not preserving spaces: print one part at a time, and
1450: // use spaces between parts to break them into different
1451: // lines. Spaces at beginning of line will be stripped
1452: // by printing mechanism. Line terminator is treated
1453: // no different than other text part.
1454: for (index = 0; index < text.length(); ++index) {
1455: ch = text.charAt(index);
1456: if (ch == ' ' || ch == '\f' || ch == '\t' || ch == '\n'
1457: || ch == '\r') {
1458: _printer.printSpace();
1459: } else if (unescaped) {
1460: _printer.printText(ch);
1461: } else {
1462: printEscaped(ch);
1463: }
1464: }
1465: }
1466: }
1467:
1468: /**
1469: * Print a document type public or system identifier URL.
1470: * Encapsulates the URL in double quotes, escapes non-printing
1471: * characters and print it equivalent to {@link #printText}.
1472: *
1473: * @param url The document type url to print
1474: */
1475: protected void printDoctypeURL(String url) throws IOException {
1476: int i;
1477:
1478: _printer.printText('"');
1479: for (i = 0; i < url.length(); ++i) {
1480: if (url.charAt(i) == '"' || url.charAt(i) < 0x20
1481: || url.charAt(i) > 0x7F) {
1482: _printer.printText('%');
1483: _printer.printText(Integer.toHexString(url.charAt(i)));
1484: } else
1485: _printer.printText(url.charAt(i));
1486: }
1487: _printer.printText('"');
1488: }
1489:
1490: protected void printEscaped(int ch) throws IOException {
1491: String charRef;
1492: // If there is a suitable entity reference for this
1493: // character, print it. The list of available entity
1494: // references is almost but not identical between
1495: // XML and HTML.
1496: charRef = getEntityRef(ch);
1497: if (charRef != null) {
1498: _printer.printText('&');
1499: _printer.printText(charRef);
1500: _printer.printText(';');
1501: } else if ((ch >= ' ' && _encodingInfo.isPrintable((char) ch) && ch != 0xF7)
1502: || ch == '\n' || ch == '\r' || ch == '\t') {
1503: // Non printables are below ASCII space but not tab or line
1504: // terminator, ASCII delete, or above a certain Unicode threshold.
1505: if (ch < 0x10000) {
1506: _printer.printText((char) ch);
1507: } else {
1508: _printer
1509: .printText((char) (((ch - 0x10000) >> 10) + 0xd800));
1510: _printer
1511: .printText((char) (((ch - 0x10000) & 0x3ff) + 0xdc00));
1512: }
1513: } else {
1514: printHex(ch);
1515: }
1516: }
1517:
1518: /**
1519: * Escapes chars
1520: */
1521: final void printHex(int ch) throws IOException {
1522: _printer.printText("&#x");
1523: _printer.printText(Integer.toHexString(ch));
1524: _printer.printText(';');
1525:
1526: }
1527:
1528: /**
1529: * Escapes a string so it may be printed as text content or attribute
1530: * value. Non printable characters are escaped using character references.
1531: * Where the format specifies a deault entity reference, that reference
1532: * is used (e.g. <tt>&lt;</tt>).
1533: *
1534: * @param source The string to escape
1535: */
1536: protected void printEscaped(String source) throws IOException {
1537: for (int i = 0; i < source.length(); ++i) {
1538: int ch = source.charAt(i);
1539: if ((ch & 0xfc00) == 0xd800 && i + 1 < source.length()) {
1540: int lowch = source.charAt(i + 1);
1541: if ((lowch & 0xfc00) == 0xdc00) {
1542: ch = 0x10000 + ((ch - 0xd800) << 10) + lowch
1543: - 0xdc00;
1544: i++;
1545: }
1546: }
1547: printEscaped(ch);
1548: }
1549: }
1550:
1551: //--------------------------------//
1552: // Element state handling methods //
1553: //--------------------------------//
1554:
1555: /**
1556: * Return the state of the current element.
1557: *
1558: * @return Current element state
1559: */
1560: protected ElementState getElementState() {
1561: return _elementStates[_elementStateCount];
1562: }
1563:
1564: /**
1565: * Enter a new element state for the specified element.
1566: * Tag name and space preserving is specified, element
1567: * state is initially empty.
1568: *
1569: * @return Current element state, or null
1570: */
1571: protected ElementState enterElementState(String namespaceURI,
1572: String localName, String rawName, boolean preserveSpace) {
1573: ElementState state;
1574:
1575: if (_elementStateCount + 1 == _elementStates.length) {
1576: ElementState[] newStates;
1577:
1578: // Need to create a larger array of states. This does not happen
1579: // often, unless the document is really deep.
1580: newStates = new ElementState[_elementStates.length + 10];
1581: for (int i = 0; i < _elementStates.length; ++i)
1582: newStates[i] = _elementStates[i];
1583: for (int i = _elementStates.length; i < newStates.length; ++i)
1584: newStates[i] = new ElementState();
1585: _elementStates = newStates;
1586: }
1587:
1588: ++_elementStateCount;
1589: state = _elementStates[_elementStateCount];
1590: state.namespaceURI = namespaceURI;
1591: state.localName = localName;
1592: state.rawName = rawName;
1593: state.preserveSpace = preserveSpace;
1594: state.empty = true;
1595: state.afterElement = false;
1596: state.afterComment = false;
1597: state.doCData = state.inCData = false;
1598: state.unescaped = false;
1599: state.prefixes = _prefixes;
1600:
1601: _prefixes = null;
1602: return state;
1603: }
1604:
1605: /**
1606: * Leave the current element state and return to the
1607: * state of the parent element. If this was the root
1608: * element, return to the state of the document.
1609: *
1610: * @return Previous element state
1611: */
1612: protected ElementState leaveElementState() {
1613: if (_elementStateCount > 0) {
1614: /*Corrected by David Blondeau (blondeau@intalio.com)*/
1615: _prefixes = null;
1616: //_prefixes = _elementStates[ _elementStateCount ].prefixes;
1617: --_elementStateCount;
1618: return _elementStates[_elementStateCount];
1619: }
1620: String msg = DOMMessageFormatter
1621: .formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN,
1622: "Internal", null);
1623: throw new IllegalStateException(msg);
1624: }
1625:
1626: /**
1627: * Returns true if in the state of the document.
1628: * Returns true before entering any element and after
1629: * leaving the root element.
1630: *
1631: * @return True if in the state of the document
1632: */
1633: protected boolean isDocumentState() {
1634: return _elementStateCount == 0;
1635: }
1636:
1637: /** Clears document state. **/
1638: final void clearDocumentState() {
1639: _elementStateCount = 0;
1640: }
1641:
1642: /**
1643: * Returns the namespace prefix for the specified URI.
1644: * If the URI has been mapped to a prefix, returns the
1645: * prefix, otherwise returns null.
1646: *
1647: * @param namespaceURI The namespace URI
1648: * @return The namespace prefix if known, or null
1649: */
1650: protected String getPrefix(String namespaceURI) {
1651: String prefix;
1652:
1653: if (_prefixes != null) {
1654: prefix = (String) _prefixes.get(namespaceURI);
1655: if (prefix != null)
1656: return prefix;
1657: }
1658: if (_elementStateCount == 0) {
1659: return null;
1660: }
1661: for (int i = _elementStateCount; i > 0; --i) {
1662: if (_elementStates[i].prefixes != null) {
1663: prefix = (String) _elementStates[i].prefixes
1664: .get(namespaceURI);
1665: if (prefix != null)
1666: return prefix;
1667: }
1668: }
1669: return null;
1670: }
1671:
1672: /**
1673: * The method modifies global DOM error object
1674: *
1675: * @param message
1676: * @param severity
1677: * @param type
1678: * @return a DOMError
1679: */
1680: protected DOMError modifyDOMError(String message, short severity,
1681: String type, Node node) {
1682: fDOMError.reset();
1683: fDOMError.fMessage = message;
1684: fDOMError.fType = type;
1685: fDOMError.fSeverity = severity;
1686: fDOMError.fLocator = new DOMLocatorImpl(-1, -1, -1, node, null);
1687: return fDOMError;
1688:
1689: }
1690:
1691: protected void fatalError(String message) throws IOException {
1692: if (fDOMErrorHandler != null) {
1693: modifyDOMError(message, DOMError.SEVERITY_FATAL_ERROR,
1694: null, fCurrentNode);
1695: fDOMErrorHandler.handleError(fDOMError);
1696: } else {
1697: throw new IOException(message);
1698: }
1699: }
1700:
1701: /**
1702: * DOM level 3:
1703: * Check a node to determine if it contains unbound namespace prefixes.
1704: *
1705: * @param node The node to check for unbound namespace prefices
1706: */
1707: protected void checkUnboundNamespacePrefixedNode(Node node)
1708: throws IOException {
1709:
1710: }
1711: }
|