Source Code Cross Referenced for Canonicalizer.java in » XML » xom » nu » xom » canonical » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI
Java
Java Tutorial
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » XML » xom » nu.xom.canonical
Source Cross Referenced Class Diagram Java Document (Java Doc)
0001:        /* Copyright 2002-2005 Elliotte Rusty Harold
0002:           
0003:           This library is free software; you can redistribute it and/or modify
0004:           it under the terms of version 2.1 of the GNU Lesser General Public 
0005:           License as published by the Free Software Foundation.
0006:           
0007:           This library is distributed in the hope that it will be useful,
0008:           but WITHOUT ANY WARRANTY; without even the implied warranty of
0009:           MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
0010:           GNU Lesser General Public License for more details.
0011:           
0012:           You should have received a copy of the GNU Lesser General Public
0013:           License along with this library; if not, write to the 
0014:           Free Software Foundation, Inc., 59 Temple Place, Suite 330, 
0015:           Boston, MA 02111-1307  USA
0016:           
0017:           You can contact Elliotte Rusty Harold by sending e-mail to
0018:           elharo@metalab.unc.edu. Please include the word "XOM" in the
0019:           subject line. The XOM home page is located at http://www.xom.nu/
0020:         */
0021:
0022:        package nu.xom.canonical;
0023:
0024:        import java.io.IOException;
0025:        import java.io.OutputStream;
0026:        import java.util.ArrayList;
0027:        import java.util.Arrays;
0028:        import java.util.Comparator;
0029:        import java.util.Iterator;
0030:        import java.util.List;
0031:        import java.util.Map;
0032:        import java.util.SortedMap;
0033:        import java.util.StringTokenizer;
0034:        import java.util.TreeMap;
0035:        import java.util.Map.Entry;
0036:
0037:        import org.xml.sax.helpers.NamespaceSupport;
0038:
0039:        import nu.xom.Attribute;
0040:        import nu.xom.Comment;
0041:        import nu.xom.DocType;
0042:        import nu.xom.Document;
0043:        import nu.xom.Element;
0044:        import nu.xom.Namespace;
0045:        import nu.xom.Node;
0046:        import nu.xom.Nodes;
0047:        import nu.xom.ParentNode;
0048:        import nu.xom.ProcessingInstruction;
0049:        import nu.xom.Serializer;
0050:        import nu.xom.Text;
0051:        import nu.xom.XPathContext;
0052:
0053:        /**
0054:         * <p>
0055:         *   Writes XML in the format specified by <a target="_top"
0056:         *   href="http://www.w3.org/TR/2001/REC-xml-c14n-20010315">Canonical
0057:         *   XML Version 1.0</a> or <a target="_top"
0058:         *   href="http://www.w3.org/TR/2002/REC-xml-exc-c14n-20020718/">Exclusive
0059:         *   XML Canonicalization Version 1.0</a>. 
0060:         * </p>
0061:         * 
0062:         * @author Elliotte Rusty Harold
0063:         * @version 1.1b4
0064:         *
0065:         */
0066:        public class Canonicalizer {
0067:
0068:            private boolean withComments;
0069:            private boolean exclusive = false;
0070:            private CanonicalXMLSerializer serializer;
0071:            private List inclusiveNamespacePrefixes = new ArrayList();
0072:
0073:            private static Comparator comparator = new AttributeComparator();
0074:
0075:            public final static String CANONICAL_XML = "http://www.w3.org/TR/2001/REC-xml-c14n-20010315";
0076:            public final static String CANONICAL_XML_WITH_COMMENTS = "http://www.w3.org/TR/2001/REC-xml-c14n-20010315#WithComments";
0077:            public final static String EXCLUSIVE_XML_CANONICALIZATION = "http://www.w3.org/2001/10/xml-exc-c14n#";
0078:            public final static String EXCLUSIVE_XML_CANONICALIZATION_WITH_COMMENTS = "http://www.w3.org/2001/10/xml-exc-c14n#WithComments";
0079:
0080:            private static class AttributeComparator implements  Comparator {
0081:
0082:                public int compare(Object o1, Object o2) {
0083:                    Attribute a1 = (Attribute) o1;
0084:                    Attribute a2 = (Attribute) o2;
0085:
0086:                    String namespace1 = a1.getNamespaceURI();
0087:                    String namespace2 = a2.getNamespaceURI();
0088:                    if (namespace1.equals(namespace2)) {
0089:                        return a1.getLocalName().compareTo(a2.getLocalName());
0090:                    } else if (namespace1.equals("")) {
0091:                        return -1;
0092:                    } else if (namespace2.equals("")) {
0093:                        return 1;
0094:                    } else { // compare namespace URIs
0095:                        return namespace1.compareTo(namespace2);
0096:                    }
0097:
0098:                }
0099:
0100:            }
0101:
0102:            /**
0103:             * <p>
0104:             *   Creates a <code>Canonicalizer</code> that outputs a 
0105:             *   canonical XML document with comments.
0106:             * </p>
0107:             * 
0108:             * @param out the output stream the document
0109:             *     is written onto
0110:             */
0111:            public Canonicalizer(OutputStream out) {
0112:                this (out, true, false);
0113:            }
0114:
0115:            /**
0116:             * <p>
0117:             *   Creates a <code>Canonicalizer</code> that outputs a 
0118:             *   canonical XML document with or without comments.
0119:             * </p>
0120:             * 
0121:             * @param out the output stream the document
0122:             *     is written onto
0123:             * @param withComments true if comments should be included 
0124:             *     in the output, false otherwise
0125:             */
0126:            public Canonicalizer(OutputStream out, boolean withComments) {
0127:                this (out, withComments, false);
0128:            }
0129:
0130:            /**
0131:             * <p>
0132:             * Creates a <code>Canonicalizer</code> that outputs a 
0133:             * canonical XML document with or without comments,
0134:             * using either the original or the exclusive canonicalization
0135:             * algorithm. 
0136:             * </p>
0137:             * 
0138:             * @param out the output stream the document
0139:             *     is written onto
0140:             * @param withComments true if comments should be included 
0141:             *     in the output, false otherwise
0142:             * @param exclusive true if exclusive XML canonicalization 
0143:             *     should be performed, false if regular XML canonicalization
0144:             *     should be performed
0145:             */
0146:            private Canonicalizer(OutputStream out, boolean withComments,
0147:                    boolean exclusive) {
0148:
0149:                this .serializer = new CanonicalXMLSerializer(out);
0150:                serializer.setLineSeparator("\n");
0151:                this .withComments = withComments;
0152:                this .exclusive = exclusive;
0153:
0154:            }
0155:
0156:            /**
0157:             * <p>
0158:             * Creates a <code>Canonicalizer</code> that outputs a 
0159:             * canonical XML document using the specified algorithm. 
0160:             * Currently, four algorithms are defined and supported:
0161:             * </p>
0162:             * 
0163:             * <ul>
0164:             * <li>Canonical XML without comments: 
0165:             * <code>http://www.w3.org/TR/2001/REC-xml-c14n-20010315</code></li>
0166:             * <li>Canonical XML with comments: 
0167:             * <code>http://www.w3.org/TR/2001/REC-xml-c14n-20010315#WithComments</code></li>
0168:             * <li>Exclusive XML canonicalization without comments: 
0169:             * <code>http://www.w3.org/2001/10/xml-exc-c14n#</code></li>
0170:             * <li>Exclusive XML canonicalization with comments: 
0171:             * <code>http://www.w3.org/2001/10/xml-exc-c14n#WithComments</code></li>
0172:             * </ul>
0173:             * 
0174:             * @param out the output stream the document
0175:             *     is written onto
0176:             * @param algorithm the URI for the canonicalization algorithm
0177:             * 
0178:             * @throws CanonicalizationException if the algorithm is 
0179:             *     not recognized
0180:             * @throws NullPointerException if the algorithm is null
0181:             * 
0182:             */
0183:            public Canonicalizer(OutputStream out, String algorithm) {
0184:
0185:                if (algorithm == null) {
0186:                    throw new NullPointerException("Null algorithm");
0187:                }
0188:                this .serializer = new CanonicalXMLSerializer(out);
0189:                serializer.setLineSeparator("\n");
0190:                if (algorithm.equals(CANONICAL_XML)) {
0191:                    this .withComments = false;
0192:                    this .exclusive = false;
0193:                } else if (algorithm.equals(CANONICAL_XML_WITH_COMMENTS)) {
0194:                    this .withComments = true;
0195:                    this .exclusive = false;
0196:                } else if (algorithm.equals(EXCLUSIVE_XML_CANONICALIZATION)) {
0197:                    this .withComments = false;
0198:                    this .exclusive = true;
0199:                } else if (algorithm
0200:                        .equals(EXCLUSIVE_XML_CANONICALIZATION_WITH_COMMENTS)) {
0201:                    this .withComments = true;
0202:                    this .exclusive = true;
0203:                } else {
0204:                    throw new CanonicalizationException(
0205:                            "Unsupported canonicalization algorithm: "
0206:                                    + algorithm);
0207:                }
0208:
0209:            }
0210:
0211:            private class CanonicalXMLSerializer extends Serializer {
0212:
0213:                // If nodes is null we're canonicalizing all nodes;
0214:                // the entire document; this is somewhat easier than when
0215:                // canonicalizing only a document subset embedded in nodes
0216:                private Nodes nodes;
0217:                private NamespaceSupport inScope;
0218:
0219:                /**
0220:                 * <p>
0221:                 *   Creates a <code>Serializer</code> that outputs a 
0222:                 *   canonical XML document with or without comments.
0223:                 * </p>
0224:                 * 
0225:                 * @param out the <code>OutputStream</code> the document
0226:                 *     is written onto
0227:                 * @param withComments true if comments should be included 
0228:                 *     in the output, false otherwise
0229:                 */
0230:                CanonicalXMLSerializer(OutputStream out) {
0231:                    super (out);
0232:                    setLineSeparator("\n");
0233:                }
0234:
0235:                /**
0236:                 * <p>
0237:                 * Serializes a document onto the output 
0238:                 * stream using the canonical XML algorithm.
0239:                 * </p>
0240:                 * 
0241:                 * @param doc the <code>Document</code> to serialize
0242:                 * 
0243:                 * @throws IOException if the underlying <code>OutputStream</code>
0244:                 *      encounters an I/O error
0245:                 */
0246:                public final void write(Document doc) throws IOException {
0247:
0248:                    inScope = new NamespaceSupport();
0249:                    int position = 0;
0250:                    while (true) {
0251:                        Node child = doc.getChild(position);
0252:                        if (nodes == null || child instanceof  Element
0253:                                || nodes.contains(child)) {
0254:                            writeChild(child);
0255:                            if (child instanceof  ProcessingInstruction)
0256:                                breakLine();
0257:                            else if (child instanceof  Comment && withComments) {
0258:                                breakLine();
0259:                            }
0260:                        }
0261:                        position++;
0262:                        if (child instanceof  Element)
0263:                            break;
0264:                    }
0265:
0266:                    for (int i = position; i < doc.getChildCount(); i++) {
0267:                        Node child = doc.getChild(i);
0268:                        if (nodes == null || child instanceof  Element
0269:                                || nodes.contains(child)) {
0270:                            if (child instanceof  ProcessingInstruction)
0271:                                breakLine();
0272:                            else if (child instanceof  Comment && withComments) {
0273:                                breakLine();
0274:                            }
0275:                            writeChild(child);
0276:                        }
0277:                    }
0278:
0279:                    flush();
0280:
0281:                }
0282:
0283:                /**
0284:                 * <p>
0285:                 * Serializes an element onto the output stream using the canonical
0286:                 * XML algorithm.  The result is guaranteed to be well-formed. 
0287:                 * If <code>element</code> does not have a parent element, it will
0288:                 * also be namespace well-formed.
0289:                 * </p>
0290:                 * 
0291:                 * @param element the <code>Element</code> to serialize
0292:                 * 
0293:                 * @throws IOException if the underlying <code>OutputStream</code>
0294:                 *     encounters an I/O error
0295:                 */
0296:                protected final void write(Element element) throws IOException {
0297:
0298:                    // treat empty elements differently to avoid an
0299:                    // instanceof test
0300:                    if (element.getChildCount() == 0) {
0301:                        writeStartTag(element, false);
0302:                        writeEndTag(element);
0303:                    } else {
0304:                        Node current = element;
0305:                        boolean end = false;
0306:                        int index = -1;
0307:                        int[] indexes = new int[10];
0308:                        int top = 0;
0309:                        indexes[0] = -1;
0310:                        while (true) {
0311:                            if (!end && current.getChildCount() > 0) {
0312:                                writeStartTag((Element) current, false);
0313:                                current = current.getChild(0);
0314:                                index = 0;
0315:                                top++;
0316:                                indexes = grow(indexes, top);
0317:                                indexes[top] = 0;
0318:                            } else {
0319:                                if (end) {
0320:                                    writeEndTag((Element) current);
0321:                                    if (current == element)
0322:                                        break;
0323:                                } else {
0324:                                    writeChild(current);
0325:                                }
0326:                                end = false;
0327:                                ParentNode parent = current.getParent();
0328:                                if (parent.getChildCount() - 1 == index) {
0329:                                    current = parent;
0330:                                    top--;
0331:                                    if (current != element) {
0332:                                        index = indexes[top];
0333:                                    }
0334:                                    end = true;
0335:                                } else {
0336:                                    index++;
0337:                                    indexes[top] = index;
0338:                                    current = parent.getChild(index);
0339:                                }
0340:                            }
0341:                        }
0342:                    }
0343:
0344:                }
0345:
0346:                private int[] grow(int[] indexes, int top) {
0347:
0348:                    if (top < indexes.length)
0349:                        return indexes;
0350:                    int[] result = new int[indexes.length * 2];
0351:                    System.arraycopy(indexes, 0, result, 0, indexes.length);
0352:                    return result;
0353:
0354:                }
0355:
0356:                protected void writeStartTag(Element element, boolean isEmpty)
0357:                        throws IOException {
0358:
0359:                    boolean writeElement = nodes == null
0360:                            || nodes.contains(element);
0361:                    if (writeElement) {
0362:                        inScope.pushContext();
0363:                        writeRaw("<");
0364:                        writeRaw(element.getQualifiedName());
0365:                    }
0366:
0367:                    SortedMap map = new TreeMap();
0368:                    if (nodes == null) {
0369:                        ParentNode parent = element.getParent();
0370:                        Element parentElement = null;
0371:                        if (parent instanceof  Element) {
0372:                            parentElement = (Element) parent;
0373:                        }
0374:                        for (int i = 0; i < element
0375:                                .getNamespaceDeclarationCount(); i++) {
0376:                            String prefix = element.getNamespacePrefix(i);
0377:                            String uri = element.getNamespaceURI(prefix);
0378:
0379:                            if (uri.equals(inScope.getURI(prefix))) {
0380:                                continue;
0381:                            } else if (exclusive) {
0382:                                if (needToDeclareNamespace(element, prefix, uri)) {
0383:                                    map.put(prefix, uri);
0384:                                }
0385:                            } else if (uri.equals("")) {
0386:                                // no need to say xmlns=""
0387:                                if (parentElement == null)
0388:                                    continue;
0389:                                if (""
0390:                                        .equals(parentElement
0391:                                                .getNamespaceURI(""))) {
0392:                                    continue;
0393:                                }
0394:                                map.put(prefix, uri);
0395:                            } else {
0396:                                map.put(prefix, uri);
0397:                            }
0398:
0399:                        }
0400:
0401:                        writeNamespaceDeclarations(map);
0402:
0403:                    } else {
0404:                        int position = indexOf(element);
0405:                        // do we need to undeclare a default namespace?
0406:                        // You know, should I instead create an output tree and then just
0407:                        // canonicalize that? probably not
0408:                        if (position != -1
0409:                                && "".equals(element.getNamespaceURI())) {
0410:                            ParentNode parent = element.getParent();
0411:                            // Here we have to check for the nearest default on parents in the
0412:                            // output tree, not the input tree
0413:                            while (parent instanceof  Element
0414:                                    && !(nodes.contains(parent))) {
0415:                                parent = parent.getParent();
0416:                            }
0417:                            if (parent instanceof  Element) {
0418:                                String uri = ((Element) parent)
0419:                                        .getNamespaceURI("");
0420:                                if (!"".equals(uri)) {
0421:                                    map.put("", "");
0422:                                }
0423:                            }
0424:                        }
0425:
0426:                        for (int i = position + 1; i < nodes.size(); i++) {
0427:                            Node next = nodes.get(i);
0428:                            if (!(next instanceof  Namespace))
0429:                                break;
0430:                            Namespace namespace = (Namespace) next;
0431:                            String prefix = namespace.getPrefix();
0432:                            String uri = namespace.getValue();
0433:
0434:                            if (uri.equals(inScope.getURI(prefix))) {
0435:                                continue;
0436:                            } else if (exclusive) {
0437:                                if (needToDeclareNamespace(element, prefix, uri)) {
0438:                                    map.put(prefix, uri);
0439:                                }
0440:                            } else {
0441:                                map.put(prefix, uri);
0442:                            }
0443:
0444:                        }
0445:
0446:                        writeNamespaceDeclarations(map);
0447:
0448:                    }
0449:
0450:                    Attribute[] sorted = sortAttributes(element);
0451:                    for (int i = 0; i < sorted.length; i++) {
0452:                        if (nodes == null
0453:                                || nodes.contains(sorted[i])
0454:                                || (sorted[i].getNamespaceURI().equals(
0455:                                        Namespace.XML_NAMESPACE) && sorted[i]
0456:                                        .getParent() != element)) {
0457:                            write(sorted[i]);
0458:                        }
0459:                    }
0460:
0461:                    if (writeElement) {
0462:                        writeRaw(">");
0463:                    }
0464:
0465:                }
0466:
0467:                private void writeNamespaceDeclarations(SortedMap map)
0468:                        throws IOException {
0469:
0470:                    Iterator prefixes = map.entrySet().iterator();
0471:                    while (prefixes.hasNext()) {
0472:                        Map.Entry entry = (Entry) prefixes.next();
0473:                        String prefix = (String) entry.getKey();
0474:                        String uri = (String) entry.getValue();
0475:                        writeRaw(" ");
0476:                        writeNamespaceDeclaration(prefix, uri);
0477:                        inScope.declarePrefix(prefix, uri);
0478:                    }
0479:
0480:                }
0481:
0482:                private boolean needToDeclareNamespace(Element parent,
0483:                        String prefix, String uri) {
0484:
0485:                    boolean match = visiblyUtilized(parent, prefix, uri);
0486:
0487:                    if (match || inclusiveNamespacePrefixes.contains(prefix)) {
0488:                        return noOutputAncestorUsesPrefix(parent, prefix, uri);
0489:                    }
0490:
0491:                    return false;
0492:
0493:                }
0494:
0495:                private boolean visiblyUtilized(Element element, String prefix,
0496:                        String uri) {
0497:
0498:                    boolean match = false;
0499:                    String pfx = element.getNamespacePrefix();
0500:                    String local = element.getNamespaceURI();
0501:                    if (prefix.equals(pfx) && local.equals(uri)) {
0502:                        match = true;
0503:                    } else {
0504:                        for (int i = 0; i < element.getAttributeCount(); i++) {
0505:                            Attribute attribute = element.getAttribute(i);
0506:                            if (nodes == null || nodes.contains(attribute)) {
0507:                                pfx = attribute.getNamespacePrefix();
0508:                                if (prefix.equals(pfx)) {
0509:                                    match = true;
0510:                                    break;
0511:                                }
0512:                            }
0513:                        }
0514:                    }
0515:                    return match;
0516:                }
0517:
0518:                private boolean noOutputAncestorUsesPrefix(Element original,
0519:                        String prefix, String uri) {
0520:
0521:                    ParentNode parent = original.getParent();
0522:                    if (parent instanceof  Document && "".equals(uri)) {
0523:                        return false;
0524:                    }
0525:
0526:                    while (parent != null && !(parent instanceof  Document)) {
0527:                        if (nodes == null || nodes.contains(parent)) {
0528:                            Element element = (Element) parent;
0529:                            String pfx = element.getNamespacePrefix();
0530:                            if (pfx.equals(prefix)) {
0531:                                String newURI = element.getNamespaceURI(prefix);
0532:                                return !newURI.equals(uri);
0533:                            }
0534:
0535:                            for (int i = 0; i < element.getAttributeCount(); i++) {
0536:                                Attribute attribute = element.getAttribute(i);
0537:                                String current = attribute.getNamespacePrefix();
0538:                                if (current.equals(prefix)) {
0539:                                    String newURI = element
0540:                                            .getNamespaceURI(prefix);
0541:                                    return !newURI.equals(uri);
0542:                                }
0543:                            }
0544:                        }
0545:                        parent = parent.getParent();
0546:                    }
0547:                    return true;
0548:
0549:                }
0550:
0551:                // ???? move into Nodes?
0552:                private int indexOf(Element element) {
0553:                    for (int i = 0; i < nodes.size(); i++) {
0554:                        if (nodes.get(i) == element)
0555:                            return i;
0556:                    }
0557:                    return -1;
0558:                }
0559:
0560:                protected void write(Attribute attribute) throws IOException {
0561:
0562:                    writeRaw(" ");
0563:                    writeRaw(attribute.getQualifiedName());
0564:                    writeRaw("=\"");
0565:                    writeRaw(prepareAttributeValue(attribute));
0566:                    writeRaw("\"");
0567:
0568:                }
0569:
0570:                protected void writeEndTag(Element element) throws IOException {
0571:
0572:                    if (nodes == null || nodes.contains(element)) {
0573:                        writeRaw("</");
0574:                        writeRaw(element.getQualifiedName());
0575:                        writeRaw(">");
0576:                        inScope.popContext();
0577:                    }
0578:
0579:                }
0580:
0581:                private final XPathContext xmlcontext = new XPathContext("xml",
0582:                        Namespace.XML_NAMESPACE);
0583:
0584:                private Attribute[] sortAttributes(Element element) {
0585:
0586:                    Map nearest = new TreeMap();
0587:                    // add in any inherited xml: attributes
0588:                    if (!exclusive && nodes != null && nodes.contains(element)
0589:                            && !nodes.contains(element.getParent())) {
0590:                        // grab all xml: attributes
0591:                        Nodes attributes = element.query("ancestor::*/@xml:*",
0592:                                xmlcontext);
0593:                        if (attributes.size() != 0) {
0594:                            // It's important to count backwards here because
0595:                            // XPath returns all nodes in document order, which 
0596:                            // is top-down. To get the nearest we need to go 
0597:                            // bottom up instead.
0598:                            for (int i = attributes.size() - 1; i >= 0; i--) {
0599:                                Attribute a = (Attribute) attributes.get(i);
0600:                                String name = a.getLocalName();
0601:                                if (element.getAttribute(name,
0602:                                        Namespace.XML_NAMESPACE) != null) {
0603:                                    // this element already has that attribute
0604:                                    continue;
0605:                                }
0606:                                if (!nearest.containsKey(name)) {
0607:                                    Element parent = (Element) a.getParent();
0608:                                    if (!nodes.contains(parent)) {
0609:                                        nearest.put(name, a);
0610:                                    } else {
0611:                                        nearest.put(name, null);
0612:                                    }
0613:                                }
0614:                            }
0615:                        }
0616:
0617:                        // remove null values
0618:                        Iterator iterator = nearest.values().iterator();
0619:                        while (iterator.hasNext()) {
0620:                            if (iterator.next() == null)
0621:                                iterator.remove();
0622:                        }
0623:
0624:                    }
0625:
0626:                    int localCount = element.getAttributeCount();
0627:                    Attribute[] result = new Attribute[localCount
0628:                            + nearest.size()];
0629:                    for (int i = 0; i < localCount; i++) {
0630:                        result[i] = element.getAttribute(i);
0631:                    }
0632:
0633:                    Iterator iterator = nearest.values().iterator();
0634:                    for (int j = localCount; j < result.length; j++) {
0635:                        result[j] = (Attribute) iterator.next();
0636:                    }
0637:
0638:                    Arrays.sort(result, comparator);
0639:
0640:                    return result;
0641:
0642:                }
0643:
0644:                private String prepareAttributeValue(Attribute attribute) {
0645:
0646:                    String value = attribute.getValue();
0647:                    StringBuffer result = new StringBuffer(value.length());
0648:
0649:                    if (attribute.getType().equals(Attribute.Type.CDATA)
0650:                            || attribute.getType().equals(
0651:                                    Attribute.Type.UNDECLARED)) {
0652:                        char[] data = value.toCharArray();
0653:                        for (int i = 0; i < data.length; i++) {
0654:                            char c = data[i];
0655:                            if (c == '\t') {
0656:                                result.append("&#x9;");
0657:                            } else if (c == '\n') {
0658:                                result.append("&#xA;");
0659:                            } else if (c == '\r') {
0660:                                result.append("&#xD;");
0661:                            } else if (c == '\"') {
0662:                                result.append("&quot;");
0663:                            } else if (c == '&') {
0664:                                result.append("&amp;");
0665:                            } else if (c == '<') {
0666:                                result.append("&lt;");
0667:                            } else {
0668:                                result.append(c);
0669:                            }
0670:                        }
0671:                    } else {
0672:                        // According to the spec, "Whitespace character references
0673:                        // other than &#x20; are not affected by attribute value 
0674:                        // normalization. For parsed documents, the parser will  
0675:                        // still replace these with the actual character. I am 
0676:                        // going to assume that if one is found here, that the 
0677:                        // user meant to put it there; and so we will escape it 
0678:                        // with a character reference
0679:                        char[] data = value.toCharArray();
0680:                        boolean seenFirstNonSpace = false;
0681:                        for (int i = 0; i < data.length; i++) {
0682:                            if (data[i] == ' ') {
0683:                                if (i != data.length - 1 && data[i + 1] != ' '
0684:                                        && seenFirstNonSpace) {
0685:                                    result.append(data[i]);
0686:                                }
0687:                                continue;
0688:                            }
0689:                            seenFirstNonSpace = true;
0690:                            if (data[i] == '\t') {
0691:                                result.append("&#x9;");
0692:                            } else if (data[i] == '\n') {
0693:                                result.append("&#xA;");
0694:                            } else if (data[i] == '\r') {
0695:                                result.append("&#xD;");
0696:                            } else if (data[i] == '\"') {
0697:                                result.append("&quot;");
0698:                            } else if (data[i] == '&') {
0699:                                result.append("&amp;");
0700:                            } else if (data[i] == '<') {
0701:                                result.append("&lt;");
0702:                            } else {
0703:                                result.append(data[i]);
0704:                            }
0705:                        }
0706:                    }
0707:
0708:                    return result.toString();
0709:
0710:                }
0711:
0712:                /**
0713:                 * <p>
0714:                 * Serializes a <code>Text</code> object
0715:                 * onto the output stream using the UTF-8 encoding.
0716:                 * The reserved characters &lt;, &gt;, and &amp;
0717:                 * are escaped using the standard entity references such as
0718:                 * <code>&amp;lt;</code>, <code>&amp;gt;</code>, 
0719:                 * and <code>&amp;amp;</code>.
0720:                 * </p>
0721:                 * 
0722:                 * @param text the <code>Text</code> to serialize
0723:                 * 
0724:                 * @throws IOException  if the underlying <code>OutputStream</code>
0725:                 *     encounters an I/O error
0726:                 */
0727:                protected final void write(Text text) throws IOException {
0728:
0729:                    if (nodes == null || nodes.contains(text)) {
0730:                        String input = text.getValue();
0731:                        StringBuffer result = new StringBuffer(input.length());
0732:                        for (int i = 0; i < input.length(); i++) {
0733:                            char c = input.charAt(i);
0734:                            if (c == '\r') {
0735:                                result.append("&#xD;");
0736:                            } else if (c == '&') {
0737:                                result.append("&amp;");
0738:                            } else if (c == '<') {
0739:                                result.append("&lt;");
0740:                            } else if (c == '>') {
0741:                                result.append("&gt;");
0742:                            } else {
0743:                                result.append(c);
0744:                            }
0745:                        }
0746:                        writeRaw(result.toString());
0747:                    }
0748:
0749:                }
0750:
0751:                /**
0752:                 * <p>
0753:                 * Serializes a <code>Comment</code> object
0754:                 * onto the output stream if and only if this
0755:                 * serializer is configured to produce canonical XML
0756:                 * with comments.
0757:                 * </p>
0758:                 * 
0759:                 * @param comment the <code>Comment</code> to serialize
0760:                 * 
0761:                 * @throws IOException if the underlying <code>OutputStream</code>
0762:                 *     encounters an I/O error
0763:                 */
0764:                protected final void write(Comment comment) throws IOException {
0765:                    if (withComments
0766:                            && (nodes == null || nodes.contains(comment))) {
0767:                        super .write(comment);
0768:                    }
0769:                }
0770:
0771:                protected final void write(ProcessingInstruction pi)
0772:                        throws IOException {
0773:                    if (nodes == null || nodes.contains(pi)) {
0774:                        super .write(pi);
0775:                    }
0776:                }
0777:
0778:                /**
0779:                 * <p>
0780:                 * Does nothing because canonical XML does not include
0781:                 * document type declarations.
0782:                 * </p>
0783:                 * 
0784:                 * @param doctype the document type declaration to serialize
0785:                 */
0786:                protected final void write(DocType doctype) {
0787:                    // DocType is not serialized in canonical XML
0788:                }
0789:
0790:                public void write(Node node) throws IOException {
0791:
0792:                    if (node instanceof  Document) {
0793:                        write((Document) node);
0794:                    } else if (node instanceof  Attribute) {
0795:                        write((Attribute) node);
0796:                    } else if (node instanceof  Namespace) {
0797:                        write((Namespace) node);
0798:                    } else {
0799:                        writeChild(node);
0800:                    }
0801:
0802:                }
0803:
0804:                private void write(Namespace namespace) throws IOException {
0805:
0806:                    String prefix = namespace.getPrefix();
0807:                    String uri = namespace.getValue();
0808:                    writeRaw(" xmlns");
0809:                    if (!"".equals(prefix)) {
0810:                        writeRaw(":");
0811:                        writeRaw(prefix);
0812:                    }
0813:                    writeRaw("=\"");
0814:                    writeAttributeValue(uri);
0815:                    writeRaw("\"");
0816:
0817:                }
0818:
0819:            }
0820:
0821:            /**
0822:             * <p>
0823:             * Serializes a node onto the output stream using the specified 
0824:             * canonicalization algorithm. If the node is a document or an 
0825:             * element, then the node's entire subtree is written out.
0826:             * </p>
0827:             * 
0828:             * @param node the node to canonicalize
0829:             * 
0830:             * @throws IOException if the underlying <code>OutputStream</code>
0831:             *      encounters an I/O error
0832:             */
0833:            public final void write(Node node) throws IOException {
0834:
0835:                // See this thread:
0836:                // http://lists.ibiblio.org/pipermail/xom-interest/2005-October/002656.html
0837:                if (node instanceof  Element) {
0838:                    Document doc = node.getDocument();
0839:                    Element pseudoRoot = null;
0840:                    if (doc == null) {
0841:                        pseudoRoot = new Element("pseudo");
0842:                        doc = new Document(pseudoRoot);
0843:                        ParentNode root = (ParentNode) node;
0844:                        while (root.getParent() != null)
0845:                            root = root.getParent();
0846:                        pseudoRoot.appendChild(root);
0847:                    }
0848:                    try {
0849:                        write(node.query(".//. | .//@* | .//namespace::*"));
0850:                    } finally {
0851:                        if (pseudoRoot != null)
0852:                            pseudoRoot.removeChild(0);
0853:                    }
0854:                } else {
0855:                    serializer.nodes = null;
0856:                    serializer.write(node);
0857:                }
0858:                serializer.flush();
0859:
0860:            }
0861:
0862:            /**
0863:             * <p>
0864:             * Serializes a document subset onto the output stream using the 
0865:             * canonical XML algorithm. All nodes in the list must come from 
0866:             * same document. Furthermore, they must come from a document.
0867:             * They cannot be detached. The nodes need not be sorted. This 
0868:             * method will sort them into the appropriate order for 
0869:             * canonicalization.
0870:             * </p>
0871:             * 
0872:             * <p>
0873:             * In most common use cases, these nodes will be the result of 
0874:             * evaluating an XPath expression. For example,
0875:             * </p>
0876:             * 
0877:             * <pre><code> Canonicalizer canonicalizer 
0878:             *   = new Canonicalizer(System.out, Canonicalizer.CANONICAL_XML);
0879:             * Nodes result = doc.query("//. | //@* | //namespace::*");
0880:             * canonicalizer.write(result);  
0881:             * </code></pre>
0882:             * 
0883:             * <p>
0884:             * Children are not output unless the subset also includes them.
0885:             * Including an element in the subset does not automatically  
0886:             * select all the element's children, attributes, and namespaces. 
0887:             * Furthermore, not selecting an element does not imply that its 
0888:             * children, namespaces, attributes will not be output. 
0889:             * </p>
0890:             * 
0891:             * @param documentSubset the nodes to serialize
0892:             * 
0893:             * @throws IOException if the underlying <code>OutputStream</code>
0894:             *     encounters an I/O error
0895:             * @throws CanonicalizationException if the nodes come from more
0896:             *     than one document; or if a detached node is in the list
0897:             */
0898:            public final void write(Nodes documentSubset) throws IOException {
0899:
0900:                if (documentSubset.size() > 0) {
0901:                    Document doc = documentSubset.get(0).getDocument();
0902:                    if (doc == null) {
0903:                        throw new CanonicalizationException(
0904:                                "Canonicalization is not defined for detached nodes");
0905:                    }
0906:                    Nodes result = sort(documentSubset);
0907:                    serializer.nodes = result;
0908:                    serializer.write(doc);
0909:                    serializer.flush();
0910:                }
0911:
0912:            }
0913:
0914:            /**
0915:             * <p>
0916:             * Specifies the prefixes that will be output as specified in 
0917:             * regular canonical XML, even when doing exclusive 
0918:             * XML canonicalization.
0919:             * </p>
0920:             * 
0921:             * @param inclusiveNamespacePrefixes a whitespace separated list 
0922:             *     of namespace prefixes that will always be included in the 
0923:             *     output, even in exclusive canonicalization
0924:             */
0925:            public final void setInclusiveNamespacePrefixList(
0926:                    String inclusiveNamespacePrefixes) throws IOException {
0927:
0928:                this .inclusiveNamespacePrefixes.clear();
0929:                if (this .exclusive && inclusiveNamespacePrefixes != null) {
0930:                    StringTokenizer tokenizer = new StringTokenizer(
0931:                            inclusiveNamespacePrefixes, " \t\r\n", false);
0932:                    while (tokenizer.hasMoreTokens()) {
0933:                        this .inclusiveNamespacePrefixes.add(tokenizer
0934:                                .nextToken());
0935:                    }
0936:                }
0937:
0938:            }
0939:
0940:            // XXX remove recursion
0941:            // recursively descend through document; in document
0942:            // order, and add results as they are found
0943:            private Nodes sort(Nodes in) {
0944:
0945:                Node root = in.get(0).getDocument();
0946:                if (in.size() > 1) {
0947:                    Nodes out = new Nodes();
0948:                    List list = new ArrayList(in.size());
0949:                    List namespaces = new ArrayList();
0950:                    for (int i = 0; i < in.size(); i++) {
0951:                        Node node = in.get(i);
0952:                        list.add(node);
0953:                        if (node instanceof  Namespace)
0954:                            namespaces.add(node);
0955:                    }
0956:                    sort(list, namespaces, out, (ParentNode) root);
0957:                    if (!list.isEmpty()) {
0958:                        // Are these just duplicates; or is there really a node
0959:                        // from a different document?
0960:                        Iterator iterator = list.iterator();
0961:                        while (iterator.hasNext()) {
0962:                            Node next = (Node) iterator.next();
0963:                            if (root != next.getDocument()) {
0964:                                throw new CanonicalizationException(
0965:                                        "Cannot canonicalize subsets that contain nodes from more than one document");
0966:                            }
0967:                        }
0968:                    }
0969:                    return out;
0970:                } else {
0971:                    return new Nodes(in.get(0));
0972:                }
0973:
0974:            }
0975:
0976:            private static void sort(List in, List namespaces, Nodes out,
0977:                    ParentNode parent) {
0978:
0979:                if (in.isEmpty())
0980:                    return;
0981:                if (in.contains(parent)) {
0982:                    out.append(parent);
0983:                    in.remove(parent);
0984:                    // I'm fairly sure this next line is unreachable, but just
0985:                    // in case it isn't I'll leave this comment here.
0986:                    // if (in.isEmpty()) return;
0987:                }
0988:
0989:                int childCount = parent.getChildCount();
0990:                for (int i = 0; i < childCount; i++) {
0991:                    Node child = parent.getChild(i);
0992:                    if (child instanceof  Element) {
0993:                        Element element = (Element) child;
0994:                        if (in.contains(element)) {
0995:                            out.append(element);
0996:                            in.remove(element);
0997:                        }
0998:                        // attach namespaces
0999:                        if (!namespaces.isEmpty()) {
1000:                            Iterator iterator = in.iterator();
1001:                            while (iterator.hasNext()) {
1002:                                Object o = iterator.next();
1003:                                if (o instanceof  Namespace) {
1004:                                    Namespace n = (Namespace) o;
1005:                                    if (element == n.getParent()) {
1006:                                        out.append(n);
1007:                                        iterator.remove();
1008:                                    }
1009:                                }
1010:                            }
1011:                        }
1012:
1013:                        // attach attributes
1014:                        for (int a = 0; a < element.getAttributeCount(); a++) {
1015:                            Attribute att = element.getAttribute(a);
1016:                            if (in.contains(att)) {
1017:                                out.append(att);
1018:                                in.remove(att);
1019:                                if (in.isEmpty())
1020:                                    return;
1021:                            }
1022:                        }
1023:                        sort(in, namespaces, out, element);
1024:                    } else {
1025:                        if (in.contains(child)) {
1026:                            out.append(child);
1027:                            in.remove(child);
1028:                            if (in.isEmpty())
1029:                                return;
1030:                        }
1031:                    }
1032:                }
1033:
1034:            }
1035:
1036:        }
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.