0001: package org.jivesoftware.util;
0002:
0003: import org.dom4j.*;
0004: import org.dom4j.io.OutputFormat;
0005: import org.dom4j.tree.NamespaceStack;
0006: import org.xml.sax.*;
0007: import org.xml.sax.ext.LexicalHandler;
0008: import org.xml.sax.helpers.XMLFilterImpl;
0009:
0010: import java.io.*;
0011: import java.util.*;
0012:
0013: /**
0014: * Replacement class of the original XMLWriter.java (version: 1.77) since the original is still
0015: * using StringBuffer which is not fast.
0016: */
0017: public class XMLWriter extends XMLFilterImpl implements LexicalHandler {
0018:
0019: private static final String PAD_TEXT = " ";
0020:
0021: protected static final String[] LEXICAL_HANDLER_NAMES = {
0022: "http://xml.org/sax/properties/lexical-handler",
0023: "http://xml.org/sax/handlers/LexicalHandler" };
0024:
0025: protected static final OutputFormat DEFAULT_FORMAT = new OutputFormat();
0026:
0027: /** Should entityRefs by resolved when writing ? */
0028: private boolean resolveEntityRefs = true;
0029:
0030: /** Stores the last type of node written so algorithms can refer to the
0031: * previous node type */
0032: protected int lastOutputNodeType;
0033:
0034: /** Stores the xml:space attribute value of preserve for whitespace flag */
0035: protected boolean preserve = false;
0036:
0037: /** The Writer used to output to */
0038: protected Writer writer;
0039:
0040: /** The Stack of namespaceStack written so far */
0041: private NamespaceStack namespaceStack = new NamespaceStack();
0042:
0043: /** The format used by this writer */
0044: private OutputFormat format;
0045:
0046: /** whether we should escape text */
0047: private boolean escapeText = true;
0048: /** The initial number of indentations (so you can print a whole
0049: document indented, if you like) **/
0050: private int indentLevel = 0;
0051:
0052: /** buffer used when escaping strings */
0053: private StringBuilder buffer = new StringBuilder();
0054:
0055: /** whether we have added characters before from the same chunk of characters */
0056: private boolean charactersAdded = false;
0057: private char lastChar;
0058:
0059: /** Whether a flush should occur after writing a document */
0060: private boolean autoFlush;
0061:
0062: /** Lexical handler we should delegate to */
0063: private LexicalHandler lexicalHandler;
0064:
0065: /** Whether comments should appear inside DTD declarations - defaults to false */
0066: private boolean showCommentsInDTDs;
0067:
0068: /** Is the writer curerntly inside a DTD definition? */
0069: private boolean inDTD;
0070:
0071: /** The namespaces used for the current element when consuming SAX events */
0072: private Map namespacesMap;
0073:
0074: /**
0075: * what is the maximum allowed character code
0076: * such as 127 in US-ASCII (7 bit) or 255 in ISO-* (8 bit)
0077: * or -1 to not escape any characters (other than the special XML characters like < > &)
0078: */
0079: private int maximumAllowedCharacter;
0080:
0081: public XMLWriter(Writer writer) {
0082: this (writer, DEFAULT_FORMAT);
0083: }
0084:
0085: public XMLWriter(Writer writer, OutputFormat format) {
0086: this .writer = writer;
0087: this .format = format;
0088: namespaceStack.push(Namespace.NO_NAMESPACE);
0089: }
0090:
0091: public XMLWriter() throws UnsupportedEncodingException {
0092: this .format = DEFAULT_FORMAT;
0093: this .writer = new BufferedWriter(new OutputStreamWriter(
0094: System.out, "UTF-8"));
0095: this .autoFlush = true;
0096: namespaceStack.push(Namespace.NO_NAMESPACE);
0097: }
0098:
0099: public XMLWriter(OutputStream out)
0100: throws UnsupportedEncodingException {
0101: this .format = DEFAULT_FORMAT;
0102: this .writer = createWriter(out, format.getEncoding());
0103: this .autoFlush = true;
0104: namespaceStack.push(Namespace.NO_NAMESPACE);
0105: }
0106:
0107: public XMLWriter(OutputStream out, OutputFormat format)
0108: throws UnsupportedEncodingException {
0109: this .format = format;
0110: this .writer = createWriter(out, format.getEncoding());
0111: this .autoFlush = true;
0112: namespaceStack.push(Namespace.NO_NAMESPACE);
0113: }
0114:
0115: public XMLWriter(OutputFormat format)
0116: throws UnsupportedEncodingException {
0117: this .format = format;
0118: this .writer = createWriter(System.out, format.getEncoding());
0119: this .autoFlush = true;
0120: namespaceStack.push(Namespace.NO_NAMESPACE);
0121: }
0122:
0123: public void setWriter(Writer writer) {
0124: this .writer = writer;
0125: this .autoFlush = false;
0126: }
0127:
0128: public void setOutputStream(OutputStream out)
0129: throws UnsupportedEncodingException {
0130: this .writer = createWriter(out, format.getEncoding());
0131: this .autoFlush = true;
0132: }
0133:
0134: /**
0135: * @return true if text thats output should be escaped.
0136: * This is enabled by default. It could be disabled if
0137: * the output format is textual, like in XSLT where we can have
0138: * xml, html or text output.
0139: */
0140: public boolean isEscapeText() {
0141: return escapeText;
0142: }
0143:
0144: /**
0145: * Sets whether text output should be escaped or not.
0146: * This is enabled by default. It could be disabled if
0147: * the output format is textual, like in XSLT where we can have
0148: * xml, html or text output.
0149: */
0150: public void setEscapeText(boolean escapeText) {
0151: this .escapeText = escapeText;
0152: }
0153:
0154: /** Set the initial indentation level. This can be used to output
0155: * a document (or, more likely, an element) starting at a given
0156: * indent level, so it's not always flush against the left margin.
0157: * Default: 0
0158: *
0159: * @param indentLevel the number of indents to start with
0160: */
0161: public void setIndentLevel(int indentLevel) {
0162: this .indentLevel = indentLevel;
0163: }
0164:
0165: /**
0166: * Returns the maximum allowed character code that should be allowed
0167: * unescaped which defaults to 127 in US-ASCII (7 bit) or
0168: * 255 in ISO-* (8 bit).
0169: */
0170: public int getMaximumAllowedCharacter() {
0171: if (maximumAllowedCharacter == 0) {
0172: maximumAllowedCharacter = defaultMaximumAllowedCharacter();
0173: }
0174: return maximumAllowedCharacter;
0175: }
0176:
0177: /**
0178: * Sets the maximum allowed character code that should be allowed
0179: * unescaped
0180: * such as 127 in US-ASCII (7 bit) or 255 in ISO-* (8 bit)
0181: * or -1 to not escape any characters (other than the special XML characters like < > &)
0182: *
0183: * If this is not explicitly set then it is defaulted from the encoding.
0184: *
0185: * @param maximumAllowedCharacter The maximumAllowedCharacter to set
0186: */
0187: public void setMaximumAllowedCharacter(int maximumAllowedCharacter) {
0188: this .maximumAllowedCharacter = maximumAllowedCharacter;
0189: }
0190:
0191: /** Flushes the underlying Writer */
0192: public void flush() throws IOException {
0193: writer.flush();
0194: }
0195:
0196: /** Closes the underlying Writer */
0197: public void close() throws IOException {
0198: writer.close();
0199: }
0200:
0201: /** Writes the new line text to the underlying Writer */
0202: public void println() throws IOException {
0203: writer.write(format.getLineSeparator());
0204: }
0205:
0206: /** Writes the given {@link org.dom4j.Attribute}.
0207: *
0208: * @param attribute <code>Attribute</code> to output.
0209: */
0210: public void write(Attribute attribute) throws IOException {
0211: writeAttribute(attribute);
0212:
0213: if (autoFlush) {
0214: flush();
0215: }
0216: }
0217:
0218: /** <p>This will print the <code>Document</code> to the current Writer.</p>
0219: *
0220: * <p> Warning: using your own Writer may cause the writer's
0221: * preferred character encoding to be ignored. If you use
0222: * encodings other than UTF8, we recommend using the method that
0223: * takes an OutputStream instead. </p>
0224: *
0225: * <p>Note: as with all Writers, you may need to flush() yours
0226: * after this method returns.</p>
0227: *
0228: * @param doc <code>Document</code> to format.
0229: * @throws IOException - if there's any problem writing.
0230: **/
0231: public void write(Document doc) throws IOException {
0232: writeDeclaration();
0233:
0234: if (doc.getDocType() != null) {
0235: indent();
0236: writeDocType(doc.getDocType());
0237: }
0238:
0239: for (int i = 0, size = doc.nodeCount(); i < size; i++) {
0240: Node node = doc.node(i);
0241: writeNode(node);
0242: }
0243: writePrintln();
0244:
0245: if (autoFlush) {
0246: flush();
0247: }
0248: }
0249:
0250: /** <p>Writes the <code>{@link org.dom4j.Element}</code>, including
0251: * its <code>{@link Attribute}</code>s, and its value, and all
0252: * its content (child nodes) to the current Writer.</p>
0253: *
0254: * @param element <code>Element</code> to output.
0255: */
0256: public void write(Element element) throws IOException {
0257: writeElement(element);
0258:
0259: if (autoFlush) {
0260: flush();
0261: }
0262: }
0263:
0264: /** Writes the given {@link CDATA}.
0265: *
0266: * @param cdata <code>CDATA</code> to output.
0267: */
0268: public void write(CDATA cdata) throws IOException {
0269: writeCDATA(cdata.getText());
0270:
0271: if (autoFlush) {
0272: flush();
0273: }
0274: }
0275:
0276: /** Writes the given {@link Comment}.
0277: *
0278: * @param comment <code>Comment</code> to output.
0279: */
0280: public void write(Comment comment) throws IOException {
0281: writeComment(comment.getText());
0282:
0283: if (autoFlush) {
0284: flush();
0285: }
0286: }
0287:
0288: /** Writes the given {@link DocumentType}.
0289: *
0290: * @param docType <code>DocumentType</code> to output.
0291: */
0292: public void write(DocumentType docType) throws IOException {
0293: writeDocType(docType);
0294:
0295: if (autoFlush) {
0296: flush();
0297: }
0298: }
0299:
0300: /** Writes the given {@link Entity}.
0301: *
0302: * @param entity <code>Entity</code> to output.
0303: */
0304: public void write(Entity entity) throws IOException {
0305: writeEntity(entity);
0306:
0307: if (autoFlush) {
0308: flush();
0309: }
0310: }
0311:
0312: /** Writes the given {@link Namespace}.
0313: *
0314: * @param namespace <code>Namespace</code> to output.
0315: */
0316: public void write(Namespace namespace) throws IOException {
0317: writeNamespace(namespace);
0318:
0319: if (autoFlush) {
0320: flush();
0321: }
0322: }
0323:
0324: /** Writes the given {@link ProcessingInstruction}.
0325: *
0326: * @param processingInstruction <code>ProcessingInstruction</code> to output.
0327: */
0328: public void write(ProcessingInstruction processingInstruction)
0329: throws IOException {
0330: writeProcessingInstruction(processingInstruction);
0331:
0332: if (autoFlush) {
0333: flush();
0334: }
0335: }
0336:
0337: /** <p>Print out a {@link String}, Perfoms
0338: * the necessary entity escaping and whitespace stripping.</p>
0339: *
0340: * @param text is the text to output
0341: */
0342: public void write(String text) throws IOException {
0343: writeString(text);
0344:
0345: if (autoFlush) {
0346: flush();
0347: }
0348: }
0349:
0350: /** Writes the given {@link Text}.
0351: *
0352: * @param text <code>Text</code> to output.
0353: */
0354: public void write(Text text) throws IOException {
0355: writeString(text.getText());
0356:
0357: if (autoFlush) {
0358: flush();
0359: }
0360: }
0361:
0362: /** Writes the given {@link Node}.
0363: *
0364: * @param node <code>Node</code> to output.
0365: */
0366: public void write(Node node) throws IOException {
0367: writeNode(node);
0368:
0369: if (autoFlush) {
0370: flush();
0371: }
0372: }
0373:
0374: /** Writes the given object which should be a String, a Node or a List
0375: * of Nodes.
0376: *
0377: * @param object is the object to output.
0378: */
0379: public void write(Object object) throws IOException {
0380: if (object instanceof Node) {
0381: write((Node) object);
0382: } else if (object instanceof String) {
0383: write((String) object);
0384: } else if (object instanceof List) {
0385: List list = (List) object;
0386: for (int i = 0, size = list.size(); i < size; i++) {
0387: write(list.get(i));
0388: }
0389: } else if (object != null) {
0390: throw new IOException("Invalid object: " + object);
0391: }
0392: }
0393:
0394: /** <p>Writes the opening tag of an {@link Element},
0395: * including its {@link Attribute}s
0396: * but without its content.</p>
0397: *
0398: * @param element <code>Element</code> to output.
0399: */
0400: public void writeOpen(Element element) throws IOException {
0401: writer.write("<");
0402: writer.write(element.getQualifiedName());
0403: writeAttributes(element);
0404: writer.write(">");
0405: }
0406:
0407: /** <p>Writes the closing tag of an {@link Element}</p>
0408: *
0409: * @param element <code>Element</code> to output.
0410: */
0411: public void writeClose(Element element) throws IOException {
0412: writeClose(element.getQualifiedName());
0413: }
0414:
0415: // XMLFilterImpl methods
0416: //-------------------------------------------------------------------------
0417: public void parse(InputSource source) throws IOException,
0418: SAXException {
0419: installLexicalHandler();
0420: super .parse(source);
0421: }
0422:
0423: public void setProperty(String name, Object value)
0424: throws SAXNotRecognizedException, SAXNotSupportedException {
0425: for (int i = 0; i < LEXICAL_HANDLER_NAMES.length; i++) {
0426: if (LEXICAL_HANDLER_NAMES[i].equals(name)) {
0427: setLexicalHandler((LexicalHandler) value);
0428: return;
0429: }
0430: }
0431: super .setProperty(name, value);
0432: }
0433:
0434: public Object getProperty(String name)
0435: throws SAXNotRecognizedException, SAXNotSupportedException {
0436: for (int i = 0; i < LEXICAL_HANDLER_NAMES.length; i++) {
0437: if (LEXICAL_HANDLER_NAMES[i].equals(name)) {
0438: return getLexicalHandler();
0439: }
0440: }
0441: return super .getProperty(name);
0442: }
0443:
0444: public void setLexicalHandler(LexicalHandler handler) {
0445: if (handler == null) {
0446: throw new NullPointerException("Null lexical handler");
0447: } else {
0448: this .lexicalHandler = handler;
0449: }
0450: }
0451:
0452: public LexicalHandler getLexicalHandler() {
0453: return lexicalHandler;
0454: }
0455:
0456: // ContentHandler interface
0457: //-------------------------------------------------------------------------
0458: public void setDocumentLocator(Locator locator) {
0459: super .setDocumentLocator(locator);
0460: }
0461:
0462: public void startDocument() throws SAXException {
0463: try {
0464: writeDeclaration();
0465: super .startDocument();
0466: } catch (IOException e) {
0467: handleException(e);
0468: }
0469: }
0470:
0471: public void endDocument() throws SAXException {
0472: super .endDocument();
0473:
0474: if (autoFlush) {
0475: try {
0476: flush();
0477: } catch (IOException e) {
0478: }
0479: }
0480: }
0481:
0482: public void startPrefixMapping(String prefix, String uri)
0483: throws SAXException {
0484: if (namespacesMap == null) {
0485: namespacesMap = new HashMap();
0486: }
0487: namespacesMap.put(prefix, uri);
0488: super .startPrefixMapping(prefix, uri);
0489: }
0490:
0491: public void endPrefixMapping(String prefix) throws SAXException {
0492: super .endPrefixMapping(prefix);
0493: }
0494:
0495: public void startElement(String namespaceURI, String localName,
0496: String qName, Attributes attributes) throws SAXException {
0497: try {
0498: charactersAdded = false;
0499:
0500: writePrintln();
0501: indent();
0502: writer.write("<");
0503: writer.write(qName);
0504: writeNamespaces();
0505: writeAttributes(attributes);
0506: writer.write(">");
0507: ++indentLevel;
0508: lastOutputNodeType = Node.ELEMENT_NODE;
0509:
0510: super .startElement(namespaceURI, localName, qName,
0511: attributes);
0512: } catch (IOException e) {
0513: handleException(e);
0514: }
0515: }
0516:
0517: public void endElement(String namespaceURI, String localName,
0518: String qName) throws SAXException {
0519: try {
0520: charactersAdded = false;
0521: --indentLevel;
0522: if (lastOutputNodeType == Node.ELEMENT_NODE) {
0523: writePrintln();
0524: indent();
0525: }
0526:
0527: // XXXX: need to determine this using a stack and checking for
0528: // content / children
0529: boolean hadContent = true;
0530: if (hadContent) {
0531: writeClose(qName);
0532: } else {
0533: writeEmptyElementClose(qName);
0534: }
0535: lastOutputNodeType = Node.ELEMENT_NODE;
0536:
0537: super .endElement(namespaceURI, localName, qName);
0538: } catch (IOException e) {
0539: handleException(e);
0540: }
0541: }
0542:
0543: public void characters(char[] ch, int start, int length)
0544: throws SAXException {
0545: if (ch == null || ch.length == 0 || length <= 0) {
0546: return;
0547: }
0548:
0549: try {
0550: /*
0551: * we can't use the writeString method here because it's possible
0552: * we don't receive all characters at once and calling writeString
0553: * would cause unwanted spaces to be added in between these chunks
0554: * of character arrays.
0555: */
0556: String string = new String(ch, start, length);
0557:
0558: if (escapeText) {
0559: string = escapeElementEntities(string);
0560: }
0561:
0562: if (format.isTrimText()) {
0563: if ((lastOutputNodeType == Node.TEXT_NODE)
0564: && !charactersAdded) {
0565: writer.write(" ");
0566: } else if (charactersAdded
0567: && Character.isWhitespace(lastChar)) {
0568: writer.write(lastChar);
0569: }
0570:
0571: String delim = "";
0572: StringTokenizer tokens = new StringTokenizer(string);
0573: while (tokens.hasMoreTokens()) {
0574: writer.write(delim);
0575: writer.write(tokens.nextToken());
0576: delim = " ";
0577: }
0578: } else {
0579: writer.write(string);
0580: }
0581:
0582: charactersAdded = true;
0583: lastChar = ch[start + length - 1];
0584: lastOutputNodeType = Node.TEXT_NODE;
0585:
0586: super .characters(ch, start, length);
0587: } catch (IOException e) {
0588: handleException(e);
0589: }
0590: }
0591:
0592: public void ignorableWhitespace(char[] ch, int start, int length)
0593: throws SAXException {
0594: super .ignorableWhitespace(ch, start, length);
0595: }
0596:
0597: public void processingInstruction(String target, String data)
0598: throws SAXException {
0599: try {
0600: indent();
0601: writer.write("<?");
0602: writer.write(target);
0603: writer.write(" ");
0604: writer.write(data);
0605: writer.write("?>");
0606: writePrintln();
0607: lastOutputNodeType = Node.PROCESSING_INSTRUCTION_NODE;
0608:
0609: super .processingInstruction(target, data);
0610: } catch (IOException e) {
0611: handleException(e);
0612: }
0613: }
0614:
0615: // DTDHandler interface
0616: //-------------------------------------------------------------------------
0617: public void notationDecl(String name, String publicID,
0618: String systemID) throws SAXException {
0619: super .notationDecl(name, publicID, systemID);
0620: }
0621:
0622: public void unparsedEntityDecl(String name, String publicID,
0623: String systemID, String notationName) throws SAXException {
0624: super
0625: .unparsedEntityDecl(name, publicID, systemID,
0626: notationName);
0627: }
0628:
0629: // LexicalHandler interface
0630: //-------------------------------------------------------------------------
0631: public void startDTD(String name, String publicID, String systemID)
0632: throws SAXException {
0633: inDTD = true;
0634: try {
0635: writeDocType(name, publicID, systemID);
0636: } catch (IOException e) {
0637: handleException(e);
0638: }
0639:
0640: if (lexicalHandler != null) {
0641: lexicalHandler.startDTD(name, publicID, systemID);
0642: }
0643: }
0644:
0645: public void endDTD() throws SAXException {
0646: inDTD = false;
0647: if (lexicalHandler != null) {
0648: lexicalHandler.endDTD();
0649: }
0650: }
0651:
0652: public void startCDATA() throws SAXException {
0653: try {
0654: writer.write("<![CDATA[");
0655: } catch (IOException e) {
0656: handleException(e);
0657: }
0658:
0659: if (lexicalHandler != null) {
0660: lexicalHandler.startCDATA();
0661: }
0662: }
0663:
0664: public void endCDATA() throws SAXException {
0665: try {
0666: writer.write("]]>");
0667: } catch (IOException e) {
0668: handleException(e);
0669: }
0670:
0671: if (lexicalHandler != null) {
0672: lexicalHandler.endCDATA();
0673: }
0674: }
0675:
0676: public void startEntity(String name) throws SAXException {
0677: try {
0678: writeEntityRef(name);
0679: } catch (IOException e) {
0680: handleException(e);
0681: }
0682:
0683: if (lexicalHandler != null) {
0684: lexicalHandler.startEntity(name);
0685: }
0686: }
0687:
0688: public void endEntity(String name) throws SAXException {
0689: if (lexicalHandler != null) {
0690: lexicalHandler.endEntity(name);
0691: }
0692: }
0693:
0694: public void comment(char[] ch, int start, int length)
0695: throws SAXException {
0696: if (showCommentsInDTDs || !inDTD) {
0697: try {
0698: charactersAdded = false;
0699: writeComment(new String(ch, start, length));
0700: } catch (IOException e) {
0701: handleException(e);
0702: }
0703: }
0704:
0705: if (lexicalHandler != null) {
0706: lexicalHandler.comment(ch, start, length);
0707: }
0708: }
0709:
0710: // Implementation methods
0711: //-------------------------------------------------------------------------
0712: protected void writeElement(Element element) throws IOException {
0713: int size = element.nodeCount();
0714: String qualifiedName = element.getQualifiedName();
0715:
0716: writePrintln();
0717: indent();
0718:
0719: writer.write("<");
0720: writer.write(qualifiedName);
0721:
0722: int previouslyDeclaredNamespaces = namespaceStack.size();
0723: Namespace ns = element.getNamespace();
0724: if (isNamespaceDeclaration(ns)) {
0725: namespaceStack.push(ns);
0726: writeNamespace(ns);
0727: }
0728:
0729: // Print out additional namespace declarations
0730: boolean textOnly = true;
0731: for (int i = 0; i < size; i++) {
0732: Node node = element.node(i);
0733: if (node instanceof Namespace) {
0734: Namespace additional = (Namespace) node;
0735: if (isNamespaceDeclaration(additional)) {
0736: namespaceStack.push(additional);
0737: writeNamespace(additional);
0738: }
0739: } else if (node instanceof Element) {
0740: textOnly = false;
0741: } else if (node instanceof Comment) {
0742: textOnly = false;
0743: }
0744: }
0745:
0746: writeAttributes(element);
0747:
0748: lastOutputNodeType = Node.ELEMENT_NODE;
0749:
0750: if (size <= 0) {
0751: writeEmptyElementClose(qualifiedName);
0752: } else {
0753: writer.write(">");
0754: if (textOnly) {
0755: // we have at least one text node so lets assume
0756: // that its non-empty
0757: writeElementContent(element);
0758: } else {
0759: // we know it's not null or empty from above
0760: ++indentLevel;
0761:
0762: writeElementContent(element);
0763:
0764: --indentLevel;
0765:
0766: writePrintln();
0767: indent();
0768: }
0769: writer.write("</");
0770: writer.write(qualifiedName);
0771: writer.write(">");
0772: }
0773:
0774: // remove declared namespaceStack from stack
0775: while (namespaceStack.size() > previouslyDeclaredNamespaces) {
0776: namespaceStack.pop();
0777: }
0778:
0779: lastOutputNodeType = Node.ELEMENT_NODE;
0780: }
0781:
0782: /**
0783: * Determines if element is a special case of XML elements
0784: * where it contains an xml:space attribute of "preserve".
0785: * If it does, then retain whitespace.
0786: */
0787: protected final boolean isElementSpacePreserved(Element element) {
0788: final Attribute attr = (Attribute) element.attribute("space");
0789: boolean preserveFound = preserve; //default to global state
0790: if (attr != null) {
0791: if ("xml".equals(attr.getNamespacePrefix())
0792: && "preserve".equals(attr.getText())) {
0793: preserveFound = true;
0794: } else {
0795: preserveFound = false;
0796: }
0797: }
0798: return preserveFound;
0799: }
0800:
0801: /** Outputs the content of the given element. If whitespace trimming is
0802: * enabled then all adjacent text nodes are appended together before
0803: * the whitespace trimming occurs to avoid problems with multiple
0804: * text nodes being created due to text content that spans parser buffers
0805: * in a SAX parser.
0806: */
0807: protected void writeElementContent(Element element)
0808: throws IOException {
0809: boolean trim = format.isTrimText();
0810: boolean oldPreserve = preserve;
0811: if (trim) { //verify we have to before more expensive test
0812: preserve = isElementSpacePreserved(element);
0813: trim = !preserve;
0814: }
0815: if (trim) {
0816: // concatenate adjacent text nodes together
0817: // so that whitespace trimming works properly
0818: Text lastTextNode = null;
0819: StringBuilder buffer = null;
0820: boolean textOnly = true;
0821: for (int i = 0, size = element.nodeCount(); i < size; i++) {
0822: Node node = element.node(i);
0823: if (node instanceof Text) {
0824: if (lastTextNode == null) {
0825: lastTextNode = (Text) node;
0826: } else {
0827: if (buffer == null) {
0828: buffer = new StringBuilder(lastTextNode
0829: .getText());
0830: }
0831: buffer.append(((Text) node).getText());
0832: }
0833: } else {
0834: if (!textOnly && format.isPadText()) {
0835: writer.write(PAD_TEXT);
0836: }
0837:
0838: textOnly = false;
0839:
0840: if (lastTextNode != null) {
0841: if (buffer != null) {
0842: writeString(buffer.toString());
0843: buffer = null;
0844: } else {
0845: writeString(lastTextNode.getText());
0846: }
0847: lastTextNode = null;
0848:
0849: if (format.isPadText()) {
0850: writer.write(PAD_TEXT);
0851: }
0852: }
0853: writeNode(node);
0854: }
0855: }
0856: if (lastTextNode != null) {
0857: if (!textOnly && format.isPadText()) {
0858: writer.write(PAD_TEXT);
0859: }
0860: if (buffer != null) {
0861: writeString(buffer.toString());
0862: buffer = null;
0863: } else {
0864: writeString(lastTextNode.getText());
0865: }
0866: lastTextNode = null;
0867: }
0868: } else {
0869: Node lastTextNode = null;
0870: for (int i = 0, size = element.nodeCount(); i < size; i++) {
0871: Node node = element.node(i);
0872: if (node instanceof Text) {
0873: writeNode(node);
0874: lastTextNode = node;
0875: } else {
0876: if ((lastTextNode != null) && format.isPadText()) {
0877: writer.write(PAD_TEXT);
0878: }
0879: writeNode(node);
0880: if ((lastTextNode != null) && format.isPadText()) {
0881: writer.write(PAD_TEXT);
0882: }
0883: lastTextNode = null;
0884: }
0885: }
0886: }
0887: preserve = oldPreserve;
0888: }
0889:
0890: protected void writeCDATA(String text) throws IOException {
0891: writer.write("<![CDATA[");
0892: if (text != null) {
0893: writer.write(text);
0894: }
0895: writer.write("]]>");
0896:
0897: lastOutputNodeType = Node.CDATA_SECTION_NODE;
0898: }
0899:
0900: protected void writeDocType(DocumentType docType)
0901: throws IOException {
0902: if (docType != null) {
0903: docType.write(writer);
0904: //writeDocType( docType.getElementName(), docType.getPublicID(), docType.getSystemID() );
0905: writePrintln();
0906: }
0907: }
0908:
0909: protected void writeNamespace(Namespace namespace)
0910: throws IOException {
0911: if (namespace != null) {
0912: writeNamespace(namespace.getPrefix(), namespace.getURI());
0913: }
0914: }
0915:
0916: /**
0917: * Writes the SAX namepsaces
0918: */
0919: protected void writeNamespaces() throws IOException {
0920: if (namespacesMap != null) {
0921: for (Iterator iter = namespacesMap.entrySet().iterator(); iter
0922: .hasNext();) {
0923: Map.Entry entry = (Map.Entry) iter.next();
0924: String prefix = (String) entry.getKey();
0925: String uri = (String) entry.getValue();
0926: writeNamespace(prefix, uri);
0927: }
0928: namespacesMap = null;
0929: }
0930: }
0931:
0932: /**
0933: * Writes the SAX namepsaces
0934: */
0935: protected void writeNamespace(String prefix, String uri)
0936: throws IOException {
0937: if (prefix != null && prefix.length() > 0) {
0938: writer.write(" xmlns:");
0939: writer.write(prefix);
0940: writer.write("=\"");
0941: } else {
0942: writer.write(" xmlns=\"");
0943: }
0944: writer.write(uri);
0945: writer.write("\"");
0946: }
0947:
0948: protected void writeProcessingInstruction(
0949: ProcessingInstruction processingInstruction)
0950: throws IOException {
0951: //indent();
0952: writer.write("<?");
0953: writer.write(processingInstruction.getName());
0954: writer.write(" ");
0955: writer.write(processingInstruction.getText());
0956: writer.write("?>");
0957: writePrintln();
0958:
0959: lastOutputNodeType = Node.PROCESSING_INSTRUCTION_NODE;
0960: }
0961:
0962: protected void writeString(String text) throws IOException {
0963: if (text != null && text.length() > 0) {
0964: if (escapeText) {
0965: text = escapeElementEntities(text);
0966: }
0967:
0968: // if (format.isPadText()) {
0969: // if (lastOutputNodeType == Node.ELEMENT_NODE) {
0970: // writer.write(PAD_TEXT);
0971: // }
0972: // }
0973:
0974: if (format.isTrimText()) {
0975: boolean first = true;
0976: StringTokenizer tokenizer = new StringTokenizer(text);
0977: while (tokenizer.hasMoreTokens()) {
0978: String token = tokenizer.nextToken();
0979: if (first) {
0980: first = false;
0981: if (lastOutputNodeType == Node.TEXT_NODE) {
0982: writer.write(" ");
0983: }
0984: } else {
0985: writer.write(" ");
0986: }
0987: writer.write(token);
0988: lastOutputNodeType = Node.TEXT_NODE;
0989: }
0990: } else {
0991: lastOutputNodeType = Node.TEXT_NODE;
0992: writer.write(text);
0993: }
0994: }
0995: }
0996:
0997: /**
0998: * This method is used to write out Nodes that contain text
0999: * and still allow for xml:space to be handled properly.
1000: *
1001: */
1002: protected void writeNodeText(Node node) throws IOException {
1003: String text = node.getText();
1004: if (text != null && text.length() > 0) {
1005: if (escapeText) {
1006: text = escapeElementEntities(text);
1007: }
1008:
1009: lastOutputNodeType = Node.TEXT_NODE;
1010: writer.write(text);
1011: }
1012: }
1013:
1014: protected void writeNode(Node node) throws IOException {
1015: int nodeType = node.getNodeType();
1016: switch (nodeType) {
1017: case Node.ELEMENT_NODE:
1018: writeElement((Element) node);
1019: break;
1020: case Node.ATTRIBUTE_NODE:
1021: writeAttribute((Attribute) node);
1022: break;
1023: case Node.TEXT_NODE:
1024: writeNodeText(node);
1025: //write((Text) node);
1026: break;
1027: case Node.CDATA_SECTION_NODE:
1028: writeCDATA(node.getText());
1029: break;
1030: case Node.ENTITY_REFERENCE_NODE:
1031: writeEntity((Entity) node);
1032: break;
1033: case Node.PROCESSING_INSTRUCTION_NODE:
1034: writeProcessingInstruction((ProcessingInstruction) node);
1035: break;
1036: case Node.COMMENT_NODE:
1037: writeComment(node.getText());
1038: break;
1039: case Node.DOCUMENT_NODE:
1040: write((Document) node);
1041: break;
1042: case Node.DOCUMENT_TYPE_NODE:
1043: writeDocType((DocumentType) node);
1044: break;
1045: case Node.NAMESPACE_NODE:
1046: // Will be output with attributes
1047: //write((Namespace) node);
1048: break;
1049: default:
1050: throw new IOException("Invalid node type: " + node);
1051: }
1052: }
1053:
1054: protected void installLexicalHandler() {
1055: XMLReader parent = getParent();
1056: if (parent == null) {
1057: throw new NullPointerException("No parent for filter");
1058: }
1059: // try to register for lexical events
1060: for (int i = 0; i < LEXICAL_HANDLER_NAMES.length; i++) {
1061: try {
1062: parent.setProperty(LEXICAL_HANDLER_NAMES[i], this );
1063: break;
1064: } catch (SAXNotRecognizedException ex) {
1065: // ignore
1066: } catch (SAXNotSupportedException ex) {
1067: // ignore
1068: }
1069: }
1070: }
1071:
1072: protected void writeDocType(String name, String publicID,
1073: String systemID) throws IOException {
1074: boolean hasPublic = false;
1075:
1076: writer.write("<!DOCTYPE ");
1077: writer.write(name);
1078: if ((publicID != null) && (!publicID.equals(""))) {
1079: writer.write(" PUBLIC \"");
1080: writer.write(publicID);
1081: writer.write("\"");
1082: hasPublic = true;
1083: }
1084: if ((systemID != null) && (!systemID.equals(""))) {
1085: if (!hasPublic) {
1086: writer.write(" SYSTEM");
1087: }
1088: writer.write(" \"");
1089: writer.write(systemID);
1090: writer.write("\"");
1091: }
1092: writer.write(">");
1093: writePrintln();
1094: }
1095:
1096: protected void writeEntity(Entity entity) throws IOException {
1097: if (!resolveEntityRefs()) {
1098: writeEntityRef(entity.getName());
1099: } else {
1100: writer.write(entity.getText());
1101: }
1102: }
1103:
1104: protected void writeEntityRef(String name) throws IOException {
1105: writer.write("&");
1106: writer.write(name);
1107: writer.write(";");
1108:
1109: lastOutputNodeType = Node.ENTITY_REFERENCE_NODE;
1110: }
1111:
1112: protected void writeComment(String text) throws IOException {
1113: if (format.isNewlines()) {
1114: println();
1115: indent();
1116: }
1117: writer.write("<!--");
1118: writer.write(text);
1119: writer.write("-->");
1120:
1121: lastOutputNodeType = Node.COMMENT_NODE;
1122: }
1123:
1124: /** Writes the attributes of the given element
1125: *
1126: */
1127: protected void writeAttributes(Element element) throws IOException {
1128:
1129: // I do not yet handle the case where the same prefix maps to
1130: // two different URIs. For attributes on the same element
1131: // this is illegal; but as yet we don't throw an exception
1132: // if someone tries to do this
1133: for (int i = 0, size = element.attributeCount(); i < size; i++) {
1134: Attribute attribute = element.attribute(i);
1135: Namespace ns = attribute.getNamespace();
1136: if (ns != null && ns != Namespace.NO_NAMESPACE
1137: && ns != Namespace.XML_NAMESPACE) {
1138: String prefix = ns.getPrefix();
1139: String uri = namespaceStack.getURI(prefix);
1140: if (!ns.getURI().equals(uri)) { // output a new namespace declaration
1141: writeNamespace(ns);
1142: namespaceStack.push(ns);
1143: }
1144: }
1145:
1146: // If the attribute is a namespace declaration, check if we have already
1147: // written that declaration elsewhere (if that's the case, it must be
1148: // in the namespace stack
1149: String attName = attribute.getName();
1150: if (attName.startsWith("xmlns:")) {
1151: String prefix = attName.substring(6);
1152: if (namespaceStack.getNamespaceForPrefix(prefix) == null) {
1153: String uri = attribute.getValue();
1154: namespaceStack.push(prefix, uri);
1155: writeNamespace(prefix, uri);
1156: }
1157: } else if (attName.equals("xmlns")) {
1158: if (namespaceStack.getDefaultNamespace() == null) {
1159: String uri = attribute.getValue();
1160: namespaceStack.push(null, uri);
1161: writeNamespace(null, uri);
1162: }
1163: } else {
1164: char quote = format.getAttributeQuoteCharacter();
1165: writer.write(" ");
1166: writer.write(attribute.getQualifiedName());
1167: writer.write("=");
1168: writer.write(quote);
1169: writeEscapeAttributeEntities(attribute.getValue());
1170: writer.write(quote);
1171: }
1172: }
1173: }
1174:
1175: protected void writeAttribute(Attribute attribute)
1176: throws IOException {
1177: writer.write(" ");
1178: writer.write(attribute.getQualifiedName());
1179: writer.write("=");
1180:
1181: char quote = format.getAttributeQuoteCharacter();
1182: writer.write(quote);
1183:
1184: writeEscapeAttributeEntities(attribute.getValue());
1185:
1186: writer.write(quote);
1187: lastOutputNodeType = Node.ATTRIBUTE_NODE;
1188: }
1189:
1190: protected void writeAttributes(Attributes attributes)
1191: throws IOException {
1192: for (int i = 0, size = attributes.getLength(); i < size; i++) {
1193: writeAttribute(attributes, i);
1194: }
1195: }
1196:
1197: protected void writeAttribute(Attributes attributes, int index)
1198: throws IOException {
1199: char quote = format.getAttributeQuoteCharacter();
1200: writer.write(" ");
1201: writer.write(attributes.getQName(index));
1202: writer.write("=");
1203: writer.write(quote);
1204: writeEscapeAttributeEntities(attributes.getValue(index));
1205: writer.write(quote);
1206: }
1207:
1208: protected void indent() throws IOException {
1209: String indent = format.getIndent();
1210: if (indent != null && indent.length() > 0) {
1211: for (int i = 0; i < indentLevel; i++) {
1212: writer.write(indent);
1213: }
1214: }
1215: }
1216:
1217: /**
1218: * <p>
1219: * This will print a new line only if the newlines flag was set to true
1220: * </p>
1221: */
1222: protected void writePrintln() throws IOException {
1223: if (format.isNewlines()) {
1224: writer.write(format.getLineSeparator());
1225: }
1226: }
1227:
1228: /**
1229: * Get an OutputStreamWriter, use preferred encoding.
1230: */
1231: protected Writer createWriter(OutputStream outStream,
1232: String encoding) throws UnsupportedEncodingException {
1233: return new BufferedWriter(new OutputStreamWriter(outStream,
1234: encoding));
1235: }
1236:
1237: /**
1238: * <p>
1239: * This will write the declaration to the given Writer.
1240: * Assumes XML version 1.0 since we don't directly know.
1241: * </p>
1242: */
1243: protected void writeDeclaration() throws IOException {
1244: String encoding = format.getEncoding();
1245:
1246: // Only print of declaration is not suppressed
1247: if (!format.isSuppressDeclaration()) {
1248: // Assume 1.0 version
1249: if (encoding.equals("UTF8")) {
1250: writer.write("<?xml version=\"1.0\"");
1251: if (!format.isOmitEncoding()) {
1252: writer.write(" encoding=\"UTF-8\"");
1253: }
1254: writer.write("?>");
1255: } else {
1256: writer.write("<?xml version=\"1.0\"");
1257: if (!format.isOmitEncoding()) {
1258: writer.write(" encoding=\"" + encoding + "\"");
1259: }
1260: writer.write("?>");
1261: }
1262: if (format.isNewLineAfterDeclaration()) {
1263: println();
1264: }
1265: }
1266: }
1267:
1268: protected void writeClose(String qualifiedName) throws IOException {
1269: writer.write("</");
1270: writer.write(qualifiedName);
1271: writer.write(">");
1272: }
1273:
1274: protected void writeEmptyElementClose(String qualifiedName)
1275: throws IOException {
1276: // Simply close up
1277: if (!format.isExpandEmptyElements()) {
1278: writer.write("/>");
1279: } else {
1280: writer.write("></");
1281: writer.write(qualifiedName);
1282: writer.write(">");
1283: }
1284: }
1285:
1286: protected boolean isExpandEmptyElements() {
1287: return format.isExpandEmptyElements();
1288: }
1289:
1290: /** This will take the pre-defined entities in XML 1.0 and
1291: * convert their character representation to the appropriate
1292: * entity reference, suitable for XML attributes.
1293: */
1294: protected String escapeElementEntities(String text) {
1295: char[] block = null;
1296: int i, last = 0, size = text.length();
1297: for (i = 0; i < size; i++) {
1298: String entity = null;
1299: char c = text.charAt(i);
1300: switch (c) {
1301: case '<':
1302: entity = "<";
1303: break;
1304: case '>':
1305: entity = ">";
1306: break;
1307: case '&':
1308: entity = "&";
1309: break;
1310: case '\t':
1311: case '\n':
1312: case '\r':
1313: // don't encode standard whitespace characters
1314: if (preserve) {
1315: entity = String.valueOf(c);
1316: }
1317: break;
1318: default:
1319: if (c < 32 || shouldEncodeChar(c)) {
1320: entity = "&#" + (int) c + ";";
1321: }
1322: break;
1323: }
1324: if (entity != null) {
1325: if (block == null) {
1326: block = text.toCharArray();
1327: }
1328: buffer.append(block, last, i - last);
1329: buffer.append(entity);
1330: last = i + 1;
1331: }
1332: }
1333: if (last == 0) {
1334: return text;
1335: }
1336: if (last < size) {
1337: if (block == null) {
1338: block = text.toCharArray();
1339: }
1340: buffer.append(block, last, i - last);
1341: }
1342: String answer = buffer.toString();
1343: buffer.setLength(0);
1344: return answer;
1345: }
1346:
1347: protected void writeEscapeAttributeEntities(String text)
1348: throws IOException {
1349: if (text != null) {
1350: String escapedText = escapeAttributeEntities(text);
1351: writer.write(escapedText);
1352: }
1353: }
1354:
1355: /** This will take the pre-defined entities in XML 1.0 and
1356: * convert their character representation to the appropriate
1357: * entity reference, suitable for XML attributes.
1358: */
1359: protected String escapeAttributeEntities(String text) {
1360: char quote = format.getAttributeQuoteCharacter();
1361:
1362: char[] block = null;
1363: int i, last = 0, size = text.length();
1364: for (i = 0; i < size; i++) {
1365: String entity = null;
1366: char c = text.charAt(i);
1367: switch (c) {
1368: case '<':
1369: entity = "<";
1370: break;
1371: case '>':
1372: entity = ">";
1373: break;
1374: case '\'':
1375: if (quote == '\'') {
1376: entity = "'";
1377: }
1378: break;
1379: case '\"':
1380: if (quote == '\"') {
1381: entity = """;
1382: }
1383: break;
1384: case '&':
1385: entity = "&";
1386: break;
1387: case '\t':
1388: case '\n':
1389: case '\r':
1390: // don't encode standard whitespace characters
1391: break;
1392: default:
1393: if (c < 32 || shouldEncodeChar(c)) {
1394: entity = "&#" + (int) c + ";";
1395: }
1396: break;
1397: }
1398: if (entity != null) {
1399: if (block == null) {
1400: block = text.toCharArray();
1401: }
1402: buffer.append(block, last, i - last);
1403: buffer.append(entity);
1404: last = i + 1;
1405: }
1406: }
1407: if (last == 0) {
1408: return text;
1409: }
1410: if (last < size) {
1411: if (block == null) {
1412: block = text.toCharArray();
1413: }
1414: buffer.append(block, last, i - last);
1415: }
1416: String answer = buffer.toString();
1417: buffer.setLength(0);
1418: return answer;
1419: }
1420:
1421: /**
1422: * Should the given character be escaped. This depends on the
1423: * encoding of the document.
1424: *
1425: * @return boolean
1426: */
1427: protected boolean shouldEncodeChar(char c) {
1428: int max = getMaximumAllowedCharacter();
1429: return max > 0 && c > max;
1430: }
1431:
1432: /**
1433: * Returns the maximum allowed character code that should be allowed
1434: * unescaped which defaults to 127 in US-ASCII (7 bit) or
1435: * 255 in ISO-* (8 bit).
1436: */
1437: protected int defaultMaximumAllowedCharacter() {
1438: String encoding = format.getEncoding();
1439: if (encoding != null) {
1440: if (encoding.equals("US-ASCII")) {
1441: return 127;
1442: }
1443: }
1444: // no encoding for things like ISO-*, UTF-8 or UTF-16
1445: return -1;
1446: }
1447:
1448: protected boolean isNamespaceDeclaration(Namespace ns) {
1449: if (ns != null && ns != Namespace.XML_NAMESPACE) {
1450: String uri = ns.getURI();
1451: if (uri != null) {
1452: if (!namespaceStack.contains(ns)) {
1453: return true;
1454:
1455: }
1456: }
1457: }
1458: return false;
1459:
1460: }
1461:
1462: protected void handleException(IOException e) throws SAXException {
1463: throw new SAXException(e);
1464: }
1465:
1466: //Laramie Crocker 4/8/2002 10:38AM
1467: /** Lets subclasses get at the current format object, so they can call setTrimText, setNewLines, etc.
1468: * Put in to support the HTMLWriter, in the way
1469: * that it pushes the current newline/trim state onto a stack and overrides
1470: * the state within preformatted tags.
1471: */
1472: protected OutputFormat getOutputFormat() {
1473: return format;
1474: }
1475:
1476: public boolean resolveEntityRefs() {
1477: return resolveEntityRefs;
1478: }
1479:
1480: public void setResolveEntityRefs(boolean resolve) {
1481: this.resolveEntityRefs = resolve;
1482: }
1483: }
|