001: /* Copyright 2002-2005 Elliotte Rusty Harold
002:
003: This library is free software; you can redistribute it and/or modify
004: it under the terms of version 2.1 of the GNU Lesser General Public
005: License as published by the Free Software Foundation.
006:
007: This library is distributed in the hope that it will be useful,
008: but WITHOUT ANY WARRANTY; without even the implied warranty of
009: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
010: GNU Lesser General Public License for more details.
011:
012: You should have received a copy of the GNU Lesser General Public
013: License along with this library; if not, write to the
014: Free Software Foundation, Inc., 59 Temple Place, Suite 330,
015: Boston, MA 02111-1307 USA
016:
017: You can contact Elliotte Rusty Harold by sending e-mail to
018: elharo@metalab.unc.edu. Please include the word "XOM" in the
019: subject line. The XOM home page is located at http://www.xom.nu/
020: */
021:
022: package nu.xom.converters;
023:
024: import nu.xom.Attribute;
025: import nu.xom.Comment;
026: import nu.xom.DocType;
027: import nu.xom.Document;
028: import nu.xom.Element;
029: import nu.xom.Node;
030: import nu.xom.Nodes;
031: import nu.xom.ParentNode;
032: import nu.xom.ProcessingInstruction;
033: import nu.xom.Text;
034: import nu.xom.XMLException;
035:
036: import org.w3c.dom.Attr;
037: import org.w3c.dom.DOMImplementation;
038: import org.w3c.dom.DocumentFragment;
039: import org.w3c.dom.DocumentType;
040: import org.w3c.dom.NamedNodeMap;
041: import org.w3c.dom.NodeList;
042:
043: // Many DOM interfaces such as Element and Document
044: // have name conflicts with XOM classes.
045: // Thus they cannot be imported, and this class
046: // must use their fully package qualified names.
047:
048: /**
049: * <p>
050: * Converts XOM <code>Document</code> objects to and from DOM
051: * <code>Document</code> objects. This class can also
052: * convert many DOM node objects into the corresponding
053: * XOM node objects. However, the reverse is not possible because
054: * DOM objects cannot live outside their containing
055: * <code>Document</code>.
056: * </p>
057: *
058: * @author Elliotte Rusty Harold
059: * @version 1.1b1
060: *
061: */
062: public class DOMConverter {
063:
064: // prevent instantiation
065: private DOMConverter() {
066: }
067:
068: /**
069: * <p>
070: * DOM violates the namespaces 1.0 specification by mapping
071: * the <code>xmlns</code> prefix to the namespace URI
072: * <code>http://www.w3.org/2000/xmlns/</code>.
073: * </p>
074: */
075: private final static String XMLNS_NAMESPACE = "http://www.w3.org/2000/xmlns/";
076:
077: /**
078: * <p>
079: * Translates a DOM <code>org.w3c.dom.Document</code> object
080: * into an equivalent <code>nu.xom.Document</code> object.
081: * The original DOM document is not changed.
082: * Some DOM <code>Document</code> objects cannot
083: * be serialized as namespace well-formed XML, and
084: * thus cannot be converted to XOM.
085: * </p>
086: *
087: * @param domDocument the DOM document to translate
088: * @return a XOM document
089: *
090: * @throws XMLException if the DOM document is not a well-formed
091: * XML document
092: */
093: public static Document convert(org.w3c.dom.Document domDocument) {
094:
095: org.w3c.dom.Element domRoot = domDocument.getDocumentElement();
096: Element xomRoot = convert(domRoot);
097: Document xomDocument = new Document(xomRoot);
098:
099: org.w3c.dom.Node current = domDocument.getFirstChild();
100:
101: // prolog
102: for (int position = 0; current.getNodeType() != org.w3c.dom.Node.ELEMENT_NODE; position++, current = current
103: .getNextSibling()) {
104: xomDocument.insertChild(convert(current), position);
105: }
106: // root element
107: current = current.getNextSibling();
108:
109: // epilog
110: while (current != null) {
111: xomDocument.appendChild(convert(current));
112: current = current.getNextSibling();
113: }
114:
115: return xomDocument;
116:
117: }
118:
119: /**
120: * <p>
121: * Translates a DOM <code>org.w3c.dom.DocumentFragment</code>
122: * object into an equivalent <code>nu.xom.Nodes</code> object.
123: * The original DOM document fragment is not changed.
124: * Some DOM <code>DocumentFragment</code> objects cannot
125: * be serialized as namespace well-balanced XML, and
126: * thus cannot be converted to XOM.
127: * </p>
128: *
129: * @param fragment the DOM document fragment to translate
130: *
131: * @return a <code>Nodes</code> containing the converted
132: * fragment members
133: *
134: * @throws XMLException if the DOM object is not a well-balanced
135: * XML fragment
136: */
137: public static Nodes convert(DocumentFragment fragment) {
138:
139: Nodes result = new Nodes();
140: NodeList children = fragment.getChildNodes();
141: for (int i = 0; i < children.getLength(); i++) {
142: result.append(convert(children.item(i)));
143: }
144:
145: return result;
146:
147: }
148:
149: private static Node convert(org.w3c.dom.Node node) {
150:
151: int type = node.getNodeType();
152: switch (type) {
153: case org.w3c.dom.Node.ELEMENT_NODE:
154: return convert((org.w3c.dom.Element) node);
155: case org.w3c.dom.Node.COMMENT_NODE:
156: return convert((org.w3c.dom.Comment) node);
157: case org.w3c.dom.Node.DOCUMENT_TYPE_NODE:
158: return convert((org.w3c.dom.DocumentType) node);
159: case org.w3c.dom.Node.TEXT_NODE:
160: return convert((org.w3c.dom.Text) node);
161: case org.w3c.dom.Node.CDATA_SECTION_NODE:
162: return convert((org.w3c.dom.Text) node);
163: case org.w3c.dom.Node.PROCESSING_INSTRUCTION_NODE:
164: return convert((org.w3c.dom.ProcessingInstruction) node);
165: default:
166: throw new XMLException("Unexpected DOM node type: " + type);
167: }
168:
169: }
170:
171: /**
172: * <p>
173: * Translates a DOM <code>org.w3c.dom.Comment</code> object
174: * into an equivalent <code>nu.xom.Comment</code> object.
175: * The original DOM object is not changed.
176: * Some DOM <code>Comment</code> objects cannot
177: * be serialized as well-formed XML, and
178: * thus cannot be converted to XOM.
179: * </p>
180: *
181: * @param comment the DOM comment to translate
182: * @return a XOM comment
183: *
184: * @throws XMLException if the DOM comment is not a well-formed
185: * XML comment
186: */
187: public static Comment convert(org.w3c.dom.Comment comment) {
188: return new Comment(comment.getNodeValue());
189: }
190:
191: /**
192: * <p>
193: * Translates a DOM <code>org.w3c.dom.Text</code> object
194: * into an equivalent <code>nu.xom.Text</code>.
195: * This method will also convert <code>org.w3c.dom.CDATA</code>
196: * objects. The original DOM object is not changed.
197: * Some DOM <code>Text</code> objects cannot
198: * be serialized as well-formed XML, and
199: * thus cannot be converted to XOM.
200: * </p>
201: *
202: * @param text the DOM text to translate
203: * @return a XOM text
204: *
205: * @throws XMLException if the DOM text is not a well-formed
206: * XML text
207: */
208: public static Text convert(org.w3c.dom.Text text) {
209: return new Text(text.getNodeValue());
210: }
211:
212: /**
213: * <p>
214: * Translates a DOM <code>org.w3c.dom.Attr</code> object
215: * into an equivalent <code>nu.xom.Attribute</code> object.
216: * The original DOM object is not changed.
217: * Some DOM <code>Attr</code> objects cannot
218: * be serialized as well-formed XML, and
219: * thus cannot be converted to XOM. Furthermore, DOM uses
220: * <code>Attr</code> objects to represent namespace declarations.
221: * XOM does not. Converting an <code>Attr</code> object that
222: * represents an <code>xmlns</code> or
223: * <code>xmlns:<i>prefix</i></code> attribute will cause an
224: * exception.
225: * </p>
226: *
227: * @param attribute the DOM <code>Attr</code> to translate
228: * @return the equivalent XOM <code>Attribute</code>
229: *
230: * @throws XMLException if the DOM <code>Attr</code>
231: * is a namespace declaration or is not a well-formed
232: * XML attribute
233: */
234: public static Attribute convert(Attr attribute) {
235:
236: String name = attribute.getName();
237: String uri = attribute.getNamespaceURI();
238: if (uri == null)
239: uri = "";
240: return new Attribute(name, uri, attribute.getNodeValue());
241:
242: }
243:
244: /**
245: * <p>
246: * Translates a DOM <code>org.w3c.dom.ProcessingInstruction</code>
247: * object into an equivalent
248: * <code>nu.xom.ProcessingInstruction</code> object.
249: * The original DOM object is not changed.
250: * Some DOM <code>ProcessingInstruction</code> objects cannot
251: * be serialized as well-formed XML, and
252: * thus cannot be converted to XOM.
253: * </p>
254: *
255: * @param pi the DOM <code>ProcessingInstruction</code> to
256: * convert
257: * @return a XOM <code>ProcessingInstruction</code>
258: *
259: * @throws XMLException if the DOM <code>ProcessingInstruction</code>
260: * is not a well-formed XML processing instruction
261: */
262: public static ProcessingInstruction convert(
263: org.w3c.dom.ProcessingInstruction pi) {
264: return new ProcessingInstruction(pi.getTarget(), pi
265: .getNodeValue());
266: }
267:
268: /**
269: * <p>
270: * Translates a DOM <code>org.w3c.dom.DocumentType</code>
271: * object into an equivalent <code>nu.xom.DocType</code> object.
272: * The original DOM object is not changed. Some DOM
273: * <code>DocumentType</code> objects cannot be serialized as
274: * well-formed XML, and thus cannot be converted to XOM.
275: * </p>
276: *
277: * @param doctype the DOM <code>DocumentType</code> to convert
278: * @return the equivalent XOM <code>DocType</code>
279: *
280: * @throws XMLException if the DOM <code>DocumentType</code>
281: * is not a well-formed XML document type declaration
282: */
283: public static DocType convert(org.w3c.dom.DocumentType doctype) {
284:
285: DocType result = new DocType(doctype.getName(), doctype
286: .getPublicId(), doctype.getSystemId());
287: result.setInternalDTDSubset(doctype.getInternalSubset());
288:
289: return result;
290:
291: }
292:
293: /**
294: * <p>
295: * Translates a DOM <code>org.w3c.dom.Element</code>
296: * object into an equivalent <code>nu.xom.Element</code> object.
297: * The original DOM object is not changed. Some DOM
298: * <code>Element</code> objects cannot be serialized as
299: * namespace well-formed XML, and thus cannot be converted to XOM.
300: * </p>
301: *
302: * @param element the DOM <code>Element</code> to convert
303: * @return the equivalent XOM <code>Element</code>
304: *
305: * @throws XMLException if the DOM <code>Element</code>
306: * is not a well-formed XML element
307: */
308: public static Element convert(org.w3c.dom.Element element) {
309:
310: org.w3c.dom.Node current = element;
311: Element result = makeElement(element);
312: ParentNode parent = result;
313: boolean backtracking = false;
314: while (true) {
315: if (current.hasChildNodes() && !backtracking) {
316: current = current.getFirstChild();
317: backtracking = false;
318: } else if (current == element) {
319: break;
320: } else if (current.getNextSibling() != null) {
321: current = current.getNextSibling();
322: backtracking = false;
323: } else {
324: current = current.getParentNode();
325: backtracking = true;
326: parent = parent.getParent();
327: continue;
328: }
329:
330: int type = current.getNodeType();
331: if (type == org.w3c.dom.Node.ELEMENT_NODE) {
332: Element child = makeElement((org.w3c.dom.Element) current);
333: parent.appendChild(child);
334: if (current.hasChildNodes())
335: parent = child;
336: } else {
337: Node child = convert(current);
338: parent.appendChild(child);
339: }
340:
341: }
342:
343: return result;
344:
345: }
346:
347: private static Element makeElement(org.w3c.dom.Element element) {
348:
349: String namespaceURI = element.getNamespaceURI();
350: String tagName = element.getTagName();
351: Element result = new Element(tagName, namespaceURI);
352:
353: // fill element's attributes and additional namespace declarations
354: NamedNodeMap attributes = element.getAttributes();
355: for (int i = 0; i < attributes.getLength(); i++) {
356: org.w3c.dom.Attr attribute = (org.w3c.dom.Attr) attributes
357: .item(i);
358: String name = attribute.getName();
359: String uri = attribute.getNamespaceURI();
360: String value = attribute.getValue();
361: if (uri == null)
362: uri = "";
363: if (uri.equals(XMLNS_NAMESPACE)) {
364: if (name.equals("xmlns"))
365: continue;
366: String prefix = name.substring(name.indexOf(':') + 1);
367: String currentURI = result.getNamespaceURI(prefix);
368: if (!value.equals(currentURI)) {
369: result.addNamespaceDeclaration(prefix, value);
370: }
371: } else {
372: result.addAttribute(new Attribute(name, uri, value));
373: }
374: }
375: return result;
376:
377: }
378:
379: /**
380: * <p>
381: * Translates a XOM <code>nu.xom.Document</code> object
382: * into an equivalent <code>org.w3c.dom.Document</code>
383: * object. The original XOM document is not changed.
384: * Since DOM2 internal subsets are read-only,
385: * the internal DTD subset is not converted.
386: * All other aspects of the document should be
387: * translated without a problem.
388: * </p>
389: *
390: * @param document the XOM document to translate
391: * @param impl the specific DOM implementation into which this
392: * document will be converted
393: *
394: * @return a DOM document
395: */
396: public static org.w3c.dom.Document convert(Document document,
397: DOMImplementation impl) {
398:
399: Element root = document.getRootElement();
400: String rootName = root.getQualifiedName();
401: String rootNamespace = root.getNamespaceURI();
402: DocType doctype = document.getDocType();
403: DocumentType domDOCTYPE = null;
404: if (doctype != null) {
405: domDOCTYPE = impl.createDocumentType(rootName, doctype
406: .getPublicID(), doctype.getSystemID());
407: }
408:
409: org.w3c.dom.Document domDoc = impl.createDocument(
410: rootNamespace, rootName, domDOCTYPE);
411: org.w3c.dom.Element domRoot = domDoc.getDocumentElement();
412:
413: boolean beforeRoot = true;
414: for (int i = 0; i < document.getChildCount(); i++) {
415: Node original = document.getChild(i);
416: // Need to test positioning of doctype
417: if (original instanceof DocType)
418: continue;
419: else if (original instanceof Element) {
420: convert((Element) original, domDoc);
421: beforeRoot = false;
422: } else {
423: org.w3c.dom.Node domNode = convert(original, domDoc);
424: if (beforeRoot)
425: domDoc.insertBefore(domNode, domRoot);
426: else
427: domDoc.appendChild(domNode);
428: }
429: }
430:
431: return domDoc;
432:
433: }
434:
435: private static org.w3c.dom.Node convert(Node node,
436: org.w3c.dom.Document document) {
437:
438: if (node instanceof Text) {
439: return convert((Text) node, document);
440: } else if (node instanceof Comment) {
441: return convert((Comment) node, document);
442: } else if (node instanceof ProcessingInstruction) {
443: return convert((ProcessingInstruction) node, document);
444: }
445: // The non-recursive algorithm converts elements directly.
446: // It does not pass through this method.
447: else {
448: throw new XMLException("Unexpected node type: "
449: + node.getClass().getName());
450: }
451:
452: }
453:
454: private static org.w3c.dom.Comment convert(Comment comment,
455: org.w3c.dom.Document document) {
456: return document.createComment(comment.getValue());
457: }
458:
459: private static org.w3c.dom.Text convert(Text text,
460: org.w3c.dom.Document document) {
461: return document.createTextNode(text.getValue());
462: }
463:
464: private static org.w3c.dom.ProcessingInstruction convert(
465: ProcessingInstruction pi, org.w3c.dom.Document document) {
466: return document.createProcessingInstruction(pi.getTarget(), pi
467: .getValue());
468: }
469:
470: private static org.w3c.dom.Element convert(Element xomElement,
471: org.w3c.dom.Document document) {
472:
473: org.w3c.dom.Element domResult = makeElement(xomElement,
474: document);
475: org.w3c.dom.Node domParent = domResult;
476: Node xomCurrent = xomElement;
477: int index = 0;
478: int[] indexes = new int[10];
479: int top = 0;
480: indexes[0] = 0;
481: boolean end = false;
482: while (true) {
483:
484: if (!end && xomCurrent.getChildCount() > 0) {
485: xomCurrent = xomCurrent.getChild(0);
486: index = 0;
487: top++;
488: indexes = grow(indexes, top);
489: indexes[top] = 0;
490: } else {
491: end = false;
492: ParentNode xomParent = xomCurrent.getParent();
493: org.w3c.dom.Node grandparent = domParent
494: .getParentNode();
495: if (grandparent.getNodeType() == org.w3c.dom.Node.ELEMENT_NODE
496: && xomCurrent instanceof Element) {
497: domParent = grandparent;
498: }
499: if (xomParent.getChildCount() - 1 == index) {
500: xomCurrent = xomParent;
501: top--;
502: if (xomCurrent == xomElement)
503: break;
504: ParentNode tp = xomCurrent.getParent();
505: if (tp == null)
506: break;
507: index = indexes[top];
508: end = true;
509: continue;
510: } else {
511: index++;
512: indexes[top] = index;
513: xomCurrent = xomParent.getChild(index);
514: }
515: }
516:
517: if (xomCurrent instanceof Element) {
518: Element currentElement = (Element) xomCurrent;
519: org.w3c.dom.Element child = makeElement(currentElement,
520: document);
521: domParent.appendChild(child);
522: domParent = child;
523: } else {
524: org.w3c.dom.Node child = convert(xomCurrent, document);
525: domParent.appendChild(child);
526: }
527:
528: } // end while
529:
530: return domResult;
531:
532: }
533:
534: private static int[] grow(int[] indexes, int top) {
535:
536: if (top < indexes.length)
537: return indexes;
538: int[] result = new int[indexes.length * 2];
539: System.arraycopy(indexes, 0, result, 0, indexes.length);
540: return result;
541:
542: }
543:
544: private static org.w3c.dom.Element makeElement(Element element,
545: org.w3c.dom.Document document) {
546:
547: org.w3c.dom.Element result;
548: String namespace = element.getNamespaceURI();
549:
550: if (element.getParent() instanceof Document) {
551: result = document.getDocumentElement();
552: } else if (namespace.equals("")) {
553: result = document.createElement(element.getQualifiedName());
554: } else {
555: result = document.createElementNS(namespace, element
556: .getQualifiedName());
557: }
558:
559: int attributeCount = element.getAttributeCount();
560: for (int i = 0; i < attributeCount; i++) {
561: Attribute attribute = element.getAttribute(i);
562: String attns = attribute.getNamespaceURI();
563: Attr attr;
564: if (attns.equals("")) {
565: attr = document.createAttribute(attribute
566: .getLocalName());
567: result.setAttributeNode(attr);
568: } else {
569: attr = document.createAttributeNS(attns, attribute
570: .getQualifiedName());
571: result.setAttributeNodeNS(attr);
572: }
573: attr.setValue(attribute.getValue());
574: }
575:
576: int namespaceCount = element.getNamespaceDeclarationCount();
577: for (int i = 0; i < namespaceCount; i++) {
578: String additionalPrefix = element.getNamespacePrefix(i);
579: String uri = element.getNamespaceURI(additionalPrefix);
580:
581: ParentNode parentNode = element.getParent();
582: if (parentNode instanceof Element) {
583: Element parentElement = (Element) parentNode;
584: if (uri.equals(parentElement
585: .getNamespaceURI(additionalPrefix))) {
586: continue;
587: }
588: } else if (uri.equals("")) { //parent is Document or null
589: continue; // no need to say xmlns=""
590: }
591:
592: if ("".equals(additionalPrefix)) {
593: Attr attr = document.createAttributeNS(XMLNS_NAMESPACE,
594: "xmlns");
595: result.setAttributeNodeNS(attr);
596: attr.setValue(uri);
597: } else {
598: Attr attr = document.createAttributeNS(XMLNS_NAMESPACE,
599: "xmlns:" + additionalPrefix);
600: result.setAttributeNodeNS(attr);
601: attr.setValue(uri);
602: }
603: }
604:
605: return result;
606:
607: }
608:
609: }
|