0001: /* Copyright 2002-2005 Elliotte Rusty Harold
0002:
0003: This library is free software; you can redistribute it and/or modify
0004: it under the terms of version 2.1 of the GNU Lesser General Public
0005: License as published by the Free Software Foundation.
0006:
0007: This library is distributed in the hope that it will be useful,
0008: but WITHOUT ANY WARRANTY; without even the implied warranty of
0009: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
0010: GNU Lesser General Public License for more details.
0011:
0012: You should have received a copy of the GNU Lesser General Public
0013: License along with this library; if not, write to the
0014: Free Software Foundation, Inc., 59 Temple Place, Suite 330,
0015: Boston, MA 02111-1307 USA
0016:
0017: You can contact Elliotte Rusty Harold by sending e-mail to
0018: elharo@metalab.unc.edu. Please include the word "XOM" in the
0019: subject line. The XOM home page is located at http://www.xom.nu/
0020: */
0021:
0022: package nu.xom.canonical;
0023:
0024: import java.io.IOException;
0025: import java.io.OutputStream;
0026: import java.util.ArrayList;
0027: import java.util.Arrays;
0028: import java.util.Comparator;
0029: import java.util.Iterator;
0030: import java.util.List;
0031: import java.util.Map;
0032: import java.util.SortedMap;
0033: import java.util.StringTokenizer;
0034: import java.util.TreeMap;
0035: import java.util.Map.Entry;
0036:
0037: import org.xml.sax.helpers.NamespaceSupport;
0038:
0039: import nu.xom.Attribute;
0040: import nu.xom.Comment;
0041: import nu.xom.DocType;
0042: import nu.xom.Document;
0043: import nu.xom.Element;
0044: import nu.xom.Namespace;
0045: import nu.xom.Node;
0046: import nu.xom.Nodes;
0047: import nu.xom.ParentNode;
0048: import nu.xom.ProcessingInstruction;
0049: import nu.xom.Serializer;
0050: import nu.xom.Text;
0051: import nu.xom.XPathContext;
0052:
0053: /**
0054: * <p>
0055: * Writes XML in the format specified by <a target="_top"
0056: * href="http://www.w3.org/TR/2001/REC-xml-c14n-20010315">Canonical
0057: * XML Version 1.0</a> or <a target="_top"
0058: * href="http://www.w3.org/TR/2002/REC-xml-exc-c14n-20020718/">Exclusive
0059: * XML Canonicalization Version 1.0</a>.
0060: * </p>
0061: *
0062: * @author Elliotte Rusty Harold
0063: * @version 1.1b4
0064: *
0065: */
0066: public class Canonicalizer {
0067:
0068: private boolean withComments;
0069: private boolean exclusive = false;
0070: private CanonicalXMLSerializer serializer;
0071: private List inclusiveNamespacePrefixes = new ArrayList();
0072:
0073: private static Comparator comparator = new AttributeComparator();
0074:
0075: public final static String CANONICAL_XML = "http://www.w3.org/TR/2001/REC-xml-c14n-20010315";
0076: public final static String CANONICAL_XML_WITH_COMMENTS = "http://www.w3.org/TR/2001/REC-xml-c14n-20010315#WithComments";
0077: public final static String EXCLUSIVE_XML_CANONICALIZATION = "http://www.w3.org/2001/10/xml-exc-c14n#";
0078: public final static String EXCLUSIVE_XML_CANONICALIZATION_WITH_COMMENTS = "http://www.w3.org/2001/10/xml-exc-c14n#WithComments";
0079:
0080: private static class AttributeComparator implements Comparator {
0081:
0082: public int compare(Object o1, Object o2) {
0083: Attribute a1 = (Attribute) o1;
0084: Attribute a2 = (Attribute) o2;
0085:
0086: String namespace1 = a1.getNamespaceURI();
0087: String namespace2 = a2.getNamespaceURI();
0088: if (namespace1.equals(namespace2)) {
0089: return a1.getLocalName().compareTo(a2.getLocalName());
0090: } else if (namespace1.equals("")) {
0091: return -1;
0092: } else if (namespace2.equals("")) {
0093: return 1;
0094: } else { // compare namespace URIs
0095: return namespace1.compareTo(namespace2);
0096: }
0097:
0098: }
0099:
0100: }
0101:
0102: /**
0103: * <p>
0104: * Creates a <code>Canonicalizer</code> that outputs a
0105: * canonical XML document with comments.
0106: * </p>
0107: *
0108: * @param out the output stream the document
0109: * is written onto
0110: */
0111: public Canonicalizer(OutputStream out) {
0112: this (out, true, false);
0113: }
0114:
0115: /**
0116: * <p>
0117: * Creates a <code>Canonicalizer</code> that outputs a
0118: * canonical XML document with or without comments.
0119: * </p>
0120: *
0121: * @param out the output stream the document
0122: * is written onto
0123: * @param withComments true if comments should be included
0124: * in the output, false otherwise
0125: */
0126: public Canonicalizer(OutputStream out, boolean withComments) {
0127: this (out, withComments, false);
0128: }
0129:
0130: /**
0131: * <p>
0132: * Creates a <code>Canonicalizer</code> that outputs a
0133: * canonical XML document with or without comments,
0134: * using either the original or the exclusive canonicalization
0135: * algorithm.
0136: * </p>
0137: *
0138: * @param out the output stream the document
0139: * is written onto
0140: * @param withComments true if comments should be included
0141: * in the output, false otherwise
0142: * @param exclusive true if exclusive XML canonicalization
0143: * should be performed, false if regular XML canonicalization
0144: * should be performed
0145: */
0146: private Canonicalizer(OutputStream out, boolean withComments,
0147: boolean exclusive) {
0148:
0149: this .serializer = new CanonicalXMLSerializer(out);
0150: serializer.setLineSeparator("\n");
0151: this .withComments = withComments;
0152: this .exclusive = exclusive;
0153:
0154: }
0155:
0156: /**
0157: * <p>
0158: * Creates a <code>Canonicalizer</code> that outputs a
0159: * canonical XML document using the specified algorithm.
0160: * Currently, four algorithms are defined and supported:
0161: * </p>
0162: *
0163: * <ul>
0164: * <li>Canonical XML without comments:
0165: * <code>http://www.w3.org/TR/2001/REC-xml-c14n-20010315</code></li>
0166: * <li>Canonical XML with comments:
0167: * <code>http://www.w3.org/TR/2001/REC-xml-c14n-20010315#WithComments</code></li>
0168: * <li>Exclusive XML canonicalization without comments:
0169: * <code>http://www.w3.org/2001/10/xml-exc-c14n#</code></li>
0170: * <li>Exclusive XML canonicalization with comments:
0171: * <code>http://www.w3.org/2001/10/xml-exc-c14n#WithComments</code></li>
0172: * </ul>
0173: *
0174: * @param out the output stream the document
0175: * is written onto
0176: * @param algorithm the URI for the canonicalization algorithm
0177: *
0178: * @throws CanonicalizationException if the algorithm is
0179: * not recognized
0180: * @throws NullPointerException if the algorithm is null
0181: *
0182: */
0183: public Canonicalizer(OutputStream out, String algorithm) {
0184:
0185: if (algorithm == null) {
0186: throw new NullPointerException("Null algorithm");
0187: }
0188: this .serializer = new CanonicalXMLSerializer(out);
0189: serializer.setLineSeparator("\n");
0190: if (algorithm.equals(CANONICAL_XML)) {
0191: this .withComments = false;
0192: this .exclusive = false;
0193: } else if (algorithm.equals(CANONICAL_XML_WITH_COMMENTS)) {
0194: this .withComments = true;
0195: this .exclusive = false;
0196: } else if (algorithm.equals(EXCLUSIVE_XML_CANONICALIZATION)) {
0197: this .withComments = false;
0198: this .exclusive = true;
0199: } else if (algorithm
0200: .equals(EXCLUSIVE_XML_CANONICALIZATION_WITH_COMMENTS)) {
0201: this .withComments = true;
0202: this .exclusive = true;
0203: } else {
0204: throw new CanonicalizationException(
0205: "Unsupported canonicalization algorithm: "
0206: + algorithm);
0207: }
0208:
0209: }
0210:
0211: private class CanonicalXMLSerializer extends Serializer {
0212:
0213: // If nodes is null we're canonicalizing all nodes;
0214: // the entire document; this is somewhat easier than when
0215: // canonicalizing only a document subset embedded in nodes
0216: private Nodes nodes;
0217: private NamespaceSupport inScope;
0218:
0219: /**
0220: * <p>
0221: * Creates a <code>Serializer</code> that outputs a
0222: * canonical XML document with or without comments.
0223: * </p>
0224: *
0225: * @param out the <code>OutputStream</code> the document
0226: * is written onto
0227: * @param withComments true if comments should be included
0228: * in the output, false otherwise
0229: */
0230: CanonicalXMLSerializer(OutputStream out) {
0231: super (out);
0232: setLineSeparator("\n");
0233: }
0234:
0235: /**
0236: * <p>
0237: * Serializes a document onto the output
0238: * stream using the canonical XML algorithm.
0239: * </p>
0240: *
0241: * @param doc the <code>Document</code> to serialize
0242: *
0243: * @throws IOException if the underlying <code>OutputStream</code>
0244: * encounters an I/O error
0245: */
0246: public final void write(Document doc) throws IOException {
0247:
0248: inScope = new NamespaceSupport();
0249: int position = 0;
0250: while (true) {
0251: Node child = doc.getChild(position);
0252: if (nodes == null || child instanceof Element
0253: || nodes.contains(child)) {
0254: writeChild(child);
0255: if (child instanceof ProcessingInstruction)
0256: breakLine();
0257: else if (child instanceof Comment && withComments) {
0258: breakLine();
0259: }
0260: }
0261: position++;
0262: if (child instanceof Element)
0263: break;
0264: }
0265:
0266: for (int i = position; i < doc.getChildCount(); i++) {
0267: Node child = doc.getChild(i);
0268: if (nodes == null || child instanceof Element
0269: || nodes.contains(child)) {
0270: if (child instanceof ProcessingInstruction)
0271: breakLine();
0272: else if (child instanceof Comment && withComments) {
0273: breakLine();
0274: }
0275: writeChild(child);
0276: }
0277: }
0278:
0279: flush();
0280:
0281: }
0282:
0283: /**
0284: * <p>
0285: * Serializes an element onto the output stream using the canonical
0286: * XML algorithm. The result is guaranteed to be well-formed.
0287: * If <code>element</code> does not have a parent element, it will
0288: * also be namespace well-formed.
0289: * </p>
0290: *
0291: * @param element the <code>Element</code> to serialize
0292: *
0293: * @throws IOException if the underlying <code>OutputStream</code>
0294: * encounters an I/O error
0295: */
0296: protected final void write(Element element) throws IOException {
0297:
0298: // treat empty elements differently to avoid an
0299: // instanceof test
0300: if (element.getChildCount() == 0) {
0301: writeStartTag(element, false);
0302: writeEndTag(element);
0303: } else {
0304: Node current = element;
0305: boolean end = false;
0306: int index = -1;
0307: int[] indexes = new int[10];
0308: int top = 0;
0309: indexes[0] = -1;
0310: while (true) {
0311: if (!end && current.getChildCount() > 0) {
0312: writeStartTag((Element) current, false);
0313: current = current.getChild(0);
0314: index = 0;
0315: top++;
0316: indexes = grow(indexes, top);
0317: indexes[top] = 0;
0318: } else {
0319: if (end) {
0320: writeEndTag((Element) current);
0321: if (current == element)
0322: break;
0323: } else {
0324: writeChild(current);
0325: }
0326: end = false;
0327: ParentNode parent = current.getParent();
0328: if (parent.getChildCount() - 1 == index) {
0329: current = parent;
0330: top--;
0331: if (current != element) {
0332: index = indexes[top];
0333: }
0334: end = true;
0335: } else {
0336: index++;
0337: indexes[top] = index;
0338: current = parent.getChild(index);
0339: }
0340: }
0341: }
0342: }
0343:
0344: }
0345:
0346: private int[] grow(int[] indexes, int top) {
0347:
0348: if (top < indexes.length)
0349: return indexes;
0350: int[] result = new int[indexes.length * 2];
0351: System.arraycopy(indexes, 0, result, 0, indexes.length);
0352: return result;
0353:
0354: }
0355:
0356: protected void writeStartTag(Element element, boolean isEmpty)
0357: throws IOException {
0358:
0359: boolean writeElement = nodes == null
0360: || nodes.contains(element);
0361: if (writeElement) {
0362: inScope.pushContext();
0363: writeRaw("<");
0364: writeRaw(element.getQualifiedName());
0365: }
0366:
0367: SortedMap map = new TreeMap();
0368: if (nodes == null) {
0369: ParentNode parent = element.getParent();
0370: Element parentElement = null;
0371: if (parent instanceof Element) {
0372: parentElement = (Element) parent;
0373: }
0374: for (int i = 0; i < element
0375: .getNamespaceDeclarationCount(); i++) {
0376: String prefix = element.getNamespacePrefix(i);
0377: String uri = element.getNamespaceURI(prefix);
0378:
0379: if (uri.equals(inScope.getURI(prefix))) {
0380: continue;
0381: } else if (exclusive) {
0382: if (needToDeclareNamespace(element, prefix, uri)) {
0383: map.put(prefix, uri);
0384: }
0385: } else if (uri.equals("")) {
0386: // no need to say xmlns=""
0387: if (parentElement == null)
0388: continue;
0389: if (""
0390: .equals(parentElement
0391: .getNamespaceURI(""))) {
0392: continue;
0393: }
0394: map.put(prefix, uri);
0395: } else {
0396: map.put(prefix, uri);
0397: }
0398:
0399: }
0400:
0401: writeNamespaceDeclarations(map);
0402:
0403: } else {
0404: int position = indexOf(element);
0405: // do we need to undeclare a default namespace?
0406: // You know, should I instead create an output tree and then just
0407: // canonicalize that? probably not
0408: if (position != -1
0409: && "".equals(element.getNamespaceURI())) {
0410: ParentNode parent = element.getParent();
0411: // Here we have to check for the nearest default on parents in the
0412: // output tree, not the input tree
0413: while (parent instanceof Element
0414: && !(nodes.contains(parent))) {
0415: parent = parent.getParent();
0416: }
0417: if (parent instanceof Element) {
0418: String uri = ((Element) parent)
0419: .getNamespaceURI("");
0420: if (!"".equals(uri)) {
0421: map.put("", "");
0422: }
0423: }
0424: }
0425:
0426: for (int i = position + 1; i < nodes.size(); i++) {
0427: Node next = nodes.get(i);
0428: if (!(next instanceof Namespace))
0429: break;
0430: Namespace namespace = (Namespace) next;
0431: String prefix = namespace.getPrefix();
0432: String uri = namespace.getValue();
0433:
0434: if (uri.equals(inScope.getURI(prefix))) {
0435: continue;
0436: } else if (exclusive) {
0437: if (needToDeclareNamespace(element, prefix, uri)) {
0438: map.put(prefix, uri);
0439: }
0440: } else {
0441: map.put(prefix, uri);
0442: }
0443:
0444: }
0445:
0446: writeNamespaceDeclarations(map);
0447:
0448: }
0449:
0450: Attribute[] sorted = sortAttributes(element);
0451: for (int i = 0; i < sorted.length; i++) {
0452: if (nodes == null
0453: || nodes.contains(sorted[i])
0454: || (sorted[i].getNamespaceURI().equals(
0455: Namespace.XML_NAMESPACE) && sorted[i]
0456: .getParent() != element)) {
0457: write(sorted[i]);
0458: }
0459: }
0460:
0461: if (writeElement) {
0462: writeRaw(">");
0463: }
0464:
0465: }
0466:
0467: private void writeNamespaceDeclarations(SortedMap map)
0468: throws IOException {
0469:
0470: Iterator prefixes = map.entrySet().iterator();
0471: while (prefixes.hasNext()) {
0472: Map.Entry entry = (Entry) prefixes.next();
0473: String prefix = (String) entry.getKey();
0474: String uri = (String) entry.getValue();
0475: writeRaw(" ");
0476: writeNamespaceDeclaration(prefix, uri);
0477: inScope.declarePrefix(prefix, uri);
0478: }
0479:
0480: }
0481:
0482: private boolean needToDeclareNamespace(Element parent,
0483: String prefix, String uri) {
0484:
0485: boolean match = visiblyUtilized(parent, prefix, uri);
0486:
0487: if (match || inclusiveNamespacePrefixes.contains(prefix)) {
0488: return noOutputAncestorUsesPrefix(parent, prefix, uri);
0489: }
0490:
0491: return false;
0492:
0493: }
0494:
0495: private boolean visiblyUtilized(Element element, String prefix,
0496: String uri) {
0497:
0498: boolean match = false;
0499: String pfx = element.getNamespacePrefix();
0500: String local = element.getNamespaceURI();
0501: if (prefix.equals(pfx) && local.equals(uri)) {
0502: match = true;
0503: } else {
0504: for (int i = 0; i < element.getAttributeCount(); i++) {
0505: Attribute attribute = element.getAttribute(i);
0506: if (nodes == null || nodes.contains(attribute)) {
0507: pfx = attribute.getNamespacePrefix();
0508: if (prefix.equals(pfx)) {
0509: match = true;
0510: break;
0511: }
0512: }
0513: }
0514: }
0515: return match;
0516: }
0517:
0518: private boolean noOutputAncestorUsesPrefix(Element original,
0519: String prefix, String uri) {
0520:
0521: ParentNode parent = original.getParent();
0522: if (parent instanceof Document && "".equals(uri)) {
0523: return false;
0524: }
0525:
0526: while (parent != null && !(parent instanceof Document)) {
0527: if (nodes == null || nodes.contains(parent)) {
0528: Element element = (Element) parent;
0529: String pfx = element.getNamespacePrefix();
0530: if (pfx.equals(prefix)) {
0531: String newURI = element.getNamespaceURI(prefix);
0532: return !newURI.equals(uri);
0533: }
0534:
0535: for (int i = 0; i < element.getAttributeCount(); i++) {
0536: Attribute attribute = element.getAttribute(i);
0537: String current = attribute.getNamespacePrefix();
0538: if (current.equals(prefix)) {
0539: String newURI = element
0540: .getNamespaceURI(prefix);
0541: return !newURI.equals(uri);
0542: }
0543: }
0544: }
0545: parent = parent.getParent();
0546: }
0547: return true;
0548:
0549: }
0550:
0551: // ???? move into Nodes?
0552: private int indexOf(Element element) {
0553: for (int i = 0; i < nodes.size(); i++) {
0554: if (nodes.get(i) == element)
0555: return i;
0556: }
0557: return -1;
0558: }
0559:
0560: protected void write(Attribute attribute) throws IOException {
0561:
0562: writeRaw(" ");
0563: writeRaw(attribute.getQualifiedName());
0564: writeRaw("=\"");
0565: writeRaw(prepareAttributeValue(attribute));
0566: writeRaw("\"");
0567:
0568: }
0569:
0570: protected void writeEndTag(Element element) throws IOException {
0571:
0572: if (nodes == null || nodes.contains(element)) {
0573: writeRaw("</");
0574: writeRaw(element.getQualifiedName());
0575: writeRaw(">");
0576: inScope.popContext();
0577: }
0578:
0579: }
0580:
0581: private final XPathContext xmlcontext = new XPathContext("xml",
0582: Namespace.XML_NAMESPACE);
0583:
0584: private Attribute[] sortAttributes(Element element) {
0585:
0586: Map nearest = new TreeMap();
0587: // add in any inherited xml: attributes
0588: if (!exclusive && nodes != null && nodes.contains(element)
0589: && !nodes.contains(element.getParent())) {
0590: // grab all xml: attributes
0591: Nodes attributes = element.query("ancestor::*/@xml:*",
0592: xmlcontext);
0593: if (attributes.size() != 0) {
0594: // It's important to count backwards here because
0595: // XPath returns all nodes in document order, which
0596: // is top-down. To get the nearest we need to go
0597: // bottom up instead.
0598: for (int i = attributes.size() - 1; i >= 0; i--) {
0599: Attribute a = (Attribute) attributes.get(i);
0600: String name = a.getLocalName();
0601: if (element.getAttribute(name,
0602: Namespace.XML_NAMESPACE) != null) {
0603: // this element already has that attribute
0604: continue;
0605: }
0606: if (!nearest.containsKey(name)) {
0607: Element parent = (Element) a.getParent();
0608: if (!nodes.contains(parent)) {
0609: nearest.put(name, a);
0610: } else {
0611: nearest.put(name, null);
0612: }
0613: }
0614: }
0615: }
0616:
0617: // remove null values
0618: Iterator iterator = nearest.values().iterator();
0619: while (iterator.hasNext()) {
0620: if (iterator.next() == null)
0621: iterator.remove();
0622: }
0623:
0624: }
0625:
0626: int localCount = element.getAttributeCount();
0627: Attribute[] result = new Attribute[localCount
0628: + nearest.size()];
0629: for (int i = 0; i < localCount; i++) {
0630: result[i] = element.getAttribute(i);
0631: }
0632:
0633: Iterator iterator = nearest.values().iterator();
0634: for (int j = localCount; j < result.length; j++) {
0635: result[j] = (Attribute) iterator.next();
0636: }
0637:
0638: Arrays.sort(result, comparator);
0639:
0640: return result;
0641:
0642: }
0643:
0644: private String prepareAttributeValue(Attribute attribute) {
0645:
0646: String value = attribute.getValue();
0647: StringBuffer result = new StringBuffer(value.length());
0648:
0649: if (attribute.getType().equals(Attribute.Type.CDATA)
0650: || attribute.getType().equals(
0651: Attribute.Type.UNDECLARED)) {
0652: char[] data = value.toCharArray();
0653: for (int i = 0; i < data.length; i++) {
0654: char c = data[i];
0655: if (c == '\t') {
0656: result.append("	");
0657: } else if (c == '\n') {
0658: result.append("
");
0659: } else if (c == '\r') {
0660: result.append("
");
0661: } else if (c == '\"') {
0662: result.append(""");
0663: } else if (c == '&') {
0664: result.append("&");
0665: } else if (c == '<') {
0666: result.append("<");
0667: } else {
0668: result.append(c);
0669: }
0670: }
0671: } else {
0672: // According to the spec, "Whitespace character references
0673: // other than   are not affected by attribute value
0674: // normalization. For parsed documents, the parser will
0675: // still replace these with the actual character. I am
0676: // going to assume that if one is found here, that the
0677: // user meant to put it there; and so we will escape it
0678: // with a character reference
0679: char[] data = value.toCharArray();
0680: boolean seenFirstNonSpace = false;
0681: for (int i = 0; i < data.length; i++) {
0682: if (data[i] == ' ') {
0683: if (i != data.length - 1 && data[i + 1] != ' '
0684: && seenFirstNonSpace) {
0685: result.append(data[i]);
0686: }
0687: continue;
0688: }
0689: seenFirstNonSpace = true;
0690: if (data[i] == '\t') {
0691: result.append("	");
0692: } else if (data[i] == '\n') {
0693: result.append("
");
0694: } else if (data[i] == '\r') {
0695: result.append("
");
0696: } else if (data[i] == '\"') {
0697: result.append(""");
0698: } else if (data[i] == '&') {
0699: result.append("&");
0700: } else if (data[i] == '<') {
0701: result.append("<");
0702: } else {
0703: result.append(data[i]);
0704: }
0705: }
0706: }
0707:
0708: return result.toString();
0709:
0710: }
0711:
0712: /**
0713: * <p>
0714: * Serializes a <code>Text</code> object
0715: * onto the output stream using the UTF-8 encoding.
0716: * The reserved characters <, >, and &
0717: * are escaped using the standard entity references such as
0718: * <code>&lt;</code>, <code>&gt;</code>,
0719: * and <code>&amp;</code>.
0720: * </p>
0721: *
0722: * @param text the <code>Text</code> to serialize
0723: *
0724: * @throws IOException if the underlying <code>OutputStream</code>
0725: * encounters an I/O error
0726: */
0727: protected final void write(Text text) throws IOException {
0728:
0729: if (nodes == null || nodes.contains(text)) {
0730: String input = text.getValue();
0731: StringBuffer result = new StringBuffer(input.length());
0732: for (int i = 0; i < input.length(); i++) {
0733: char c = input.charAt(i);
0734: if (c == '\r') {
0735: result.append("
");
0736: } else if (c == '&') {
0737: result.append("&");
0738: } else if (c == '<') {
0739: result.append("<");
0740: } else if (c == '>') {
0741: result.append(">");
0742: } else {
0743: result.append(c);
0744: }
0745: }
0746: writeRaw(result.toString());
0747: }
0748:
0749: }
0750:
0751: /**
0752: * <p>
0753: * Serializes a <code>Comment</code> object
0754: * onto the output stream if and only if this
0755: * serializer is configured to produce canonical XML
0756: * with comments.
0757: * </p>
0758: *
0759: * @param comment the <code>Comment</code> to serialize
0760: *
0761: * @throws IOException if the underlying <code>OutputStream</code>
0762: * encounters an I/O error
0763: */
0764: protected final void write(Comment comment) throws IOException {
0765: if (withComments
0766: && (nodes == null || nodes.contains(comment))) {
0767: super .write(comment);
0768: }
0769: }
0770:
0771: protected final void write(ProcessingInstruction pi)
0772: throws IOException {
0773: if (nodes == null || nodes.contains(pi)) {
0774: super .write(pi);
0775: }
0776: }
0777:
0778: /**
0779: * <p>
0780: * Does nothing because canonical XML does not include
0781: * document type declarations.
0782: * </p>
0783: *
0784: * @param doctype the document type declaration to serialize
0785: */
0786: protected final void write(DocType doctype) {
0787: // DocType is not serialized in canonical XML
0788: }
0789:
0790: public void write(Node node) throws IOException {
0791:
0792: if (node instanceof Document) {
0793: write((Document) node);
0794: } else if (node instanceof Attribute) {
0795: write((Attribute) node);
0796: } else if (node instanceof Namespace) {
0797: write((Namespace) node);
0798: } else {
0799: writeChild(node);
0800: }
0801:
0802: }
0803:
0804: private void write(Namespace namespace) throws IOException {
0805:
0806: String prefix = namespace.getPrefix();
0807: String uri = namespace.getValue();
0808: writeRaw(" xmlns");
0809: if (!"".equals(prefix)) {
0810: writeRaw(":");
0811: writeRaw(prefix);
0812: }
0813: writeRaw("=\"");
0814: writeAttributeValue(uri);
0815: writeRaw("\"");
0816:
0817: }
0818:
0819: }
0820:
0821: /**
0822: * <p>
0823: * Serializes a node onto the output stream using the specified
0824: * canonicalization algorithm. If the node is a document or an
0825: * element, then the node's entire subtree is written out.
0826: * </p>
0827: *
0828: * @param node the node to canonicalize
0829: *
0830: * @throws IOException if the underlying <code>OutputStream</code>
0831: * encounters an I/O error
0832: */
0833: public final void write(Node node) throws IOException {
0834:
0835: // See this thread:
0836: // http://lists.ibiblio.org/pipermail/xom-interest/2005-October/002656.html
0837: if (node instanceof Element) {
0838: Document doc = node.getDocument();
0839: Element pseudoRoot = null;
0840: if (doc == null) {
0841: pseudoRoot = new Element("pseudo");
0842: doc = new Document(pseudoRoot);
0843: ParentNode root = (ParentNode) node;
0844: while (root.getParent() != null)
0845: root = root.getParent();
0846: pseudoRoot.appendChild(root);
0847: }
0848: try {
0849: write(node.query(".//. | .//@* | .//namespace::*"));
0850: } finally {
0851: if (pseudoRoot != null)
0852: pseudoRoot.removeChild(0);
0853: }
0854: } else {
0855: serializer.nodes = null;
0856: serializer.write(node);
0857: }
0858: serializer.flush();
0859:
0860: }
0861:
0862: /**
0863: * <p>
0864: * Serializes a document subset onto the output stream using the
0865: * canonical XML algorithm. All nodes in the list must come from
0866: * same document. Furthermore, they must come from a document.
0867: * They cannot be detached. The nodes need not be sorted. This
0868: * method will sort them into the appropriate order for
0869: * canonicalization.
0870: * </p>
0871: *
0872: * <p>
0873: * In most common use cases, these nodes will be the result of
0874: * evaluating an XPath expression. For example,
0875: * </p>
0876: *
0877: * <pre><code> Canonicalizer canonicalizer
0878: * = new Canonicalizer(System.out, Canonicalizer.CANONICAL_XML);
0879: * Nodes result = doc.query("//. | //@* | //namespace::*");
0880: * canonicalizer.write(result);
0881: * </code></pre>
0882: *
0883: * <p>
0884: * Children are not output unless the subset also includes them.
0885: * Including an element in the subset does not automatically
0886: * select all the element's children, attributes, and namespaces.
0887: * Furthermore, not selecting an element does not imply that its
0888: * children, namespaces, attributes will not be output.
0889: * </p>
0890: *
0891: * @param documentSubset the nodes to serialize
0892: *
0893: * @throws IOException if the underlying <code>OutputStream</code>
0894: * encounters an I/O error
0895: * @throws CanonicalizationException if the nodes come from more
0896: * than one document; or if a detached node is in the list
0897: */
0898: public final void write(Nodes documentSubset) throws IOException {
0899:
0900: if (documentSubset.size() > 0) {
0901: Document doc = documentSubset.get(0).getDocument();
0902: if (doc == null) {
0903: throw new CanonicalizationException(
0904: "Canonicalization is not defined for detached nodes");
0905: }
0906: Nodes result = sort(documentSubset);
0907: serializer.nodes = result;
0908: serializer.write(doc);
0909: serializer.flush();
0910: }
0911:
0912: }
0913:
0914: /**
0915: * <p>
0916: * Specifies the prefixes that will be output as specified in
0917: * regular canonical XML, even when doing exclusive
0918: * XML canonicalization.
0919: * </p>
0920: *
0921: * @param inclusiveNamespacePrefixes a whitespace separated list
0922: * of namespace prefixes that will always be included in the
0923: * output, even in exclusive canonicalization
0924: */
0925: public final void setInclusiveNamespacePrefixList(
0926: String inclusiveNamespacePrefixes) throws IOException {
0927:
0928: this .inclusiveNamespacePrefixes.clear();
0929: if (this .exclusive && inclusiveNamespacePrefixes != null) {
0930: StringTokenizer tokenizer = new StringTokenizer(
0931: inclusiveNamespacePrefixes, " \t\r\n", false);
0932: while (tokenizer.hasMoreTokens()) {
0933: this .inclusiveNamespacePrefixes.add(tokenizer
0934: .nextToken());
0935: }
0936: }
0937:
0938: }
0939:
0940: // XXX remove recursion
0941: // recursively descend through document; in document
0942: // order, and add results as they are found
0943: private Nodes sort(Nodes in) {
0944:
0945: Node root = in.get(0).getDocument();
0946: if (in.size() > 1) {
0947: Nodes out = new Nodes();
0948: List list = new ArrayList(in.size());
0949: List namespaces = new ArrayList();
0950: for (int i = 0; i < in.size(); i++) {
0951: Node node = in.get(i);
0952: list.add(node);
0953: if (node instanceof Namespace)
0954: namespaces.add(node);
0955: }
0956: sort(list, namespaces, out, (ParentNode) root);
0957: if (!list.isEmpty()) {
0958: // Are these just duplicates; or is there really a node
0959: // from a different document?
0960: Iterator iterator = list.iterator();
0961: while (iterator.hasNext()) {
0962: Node next = (Node) iterator.next();
0963: if (root != next.getDocument()) {
0964: throw new CanonicalizationException(
0965: "Cannot canonicalize subsets that contain nodes from more than one document");
0966: }
0967: }
0968: }
0969: return out;
0970: } else {
0971: return new Nodes(in.get(0));
0972: }
0973:
0974: }
0975:
0976: private static void sort(List in, List namespaces, Nodes out,
0977: ParentNode parent) {
0978:
0979: if (in.isEmpty())
0980: return;
0981: if (in.contains(parent)) {
0982: out.append(parent);
0983: in.remove(parent);
0984: // I'm fairly sure this next line is unreachable, but just
0985: // in case it isn't I'll leave this comment here.
0986: // if (in.isEmpty()) return;
0987: }
0988:
0989: int childCount = parent.getChildCount();
0990: for (int i = 0; i < childCount; i++) {
0991: Node child = parent.getChild(i);
0992: if (child instanceof Element) {
0993: Element element = (Element) child;
0994: if (in.contains(element)) {
0995: out.append(element);
0996: in.remove(element);
0997: }
0998: // attach namespaces
0999: if (!namespaces.isEmpty()) {
1000: Iterator iterator = in.iterator();
1001: while (iterator.hasNext()) {
1002: Object o = iterator.next();
1003: if (o instanceof Namespace) {
1004: Namespace n = (Namespace) o;
1005: if (element == n.getParent()) {
1006: out.append(n);
1007: iterator.remove();
1008: }
1009: }
1010: }
1011: }
1012:
1013: // attach attributes
1014: for (int a = 0; a < element.getAttributeCount(); a++) {
1015: Attribute att = element.getAttribute(a);
1016: if (in.contains(att)) {
1017: out.append(att);
1018: in.remove(att);
1019: if (in.isEmpty())
1020: return;
1021: }
1022: }
1023: sort(in, namespaces, out, element);
1024: } else {
1025: if (in.contains(child)) {
1026: out.append(child);
1027: in.remove(child);
1028: if (in.isEmpty())
1029: return;
1030: }
1031: }
1032: }
1033:
1034: }
1035:
1036: }
|