001: /*
002: * Copyright 2006 Sun Microsystems, Inc. All Rights Reserved.
003: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
004: *
005: * This code is free software; you can redistribute it and/or modify it
006: * under the terms of the GNU General Public License version 2 only, as
007: * published by the Free Software Foundation. Sun designates this
008: * particular file as subject to the "Classpath" exception as provided
009: * by Sun in the LICENSE file that accompanied this code.
010: *
011: * This code is distributed in the hope that it will be useful, but WITHOUT
012: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
013: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
014: * version 2 for more details (a copy is included in the LICENSE file that
015: * accompanied this code).
016: *
017: * You should have received a copy of the GNU General Public License version
018: * 2 along with this work; if not, write to the Free Software Foundation,
019: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
020: *
021: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
022: * CA 95054 USA or visit www.sun.com if you need additional information or
023: * have any questions.
024: */
025:
026: package com.sun.tools.internal.xjc.reader.internalizer;
027:
028: import java.io.IOException;
029: import java.io.OutputStream;
030: import java.io.OutputStreamWriter;
031: import java.net.URI;
032: import java.net.URISyntaxException;
033: import java.util.ArrayList;
034: import java.util.Collections;
035: import java.util.HashMap;
036: import java.util.HashSet;
037: import java.util.List;
038: import java.util.Map;
039: import java.util.Set;
040:
041: import static javax.xml.XMLConstants.W3C_XML_SCHEMA_NS_URI;
042: import javax.xml.parsers.DocumentBuilder;
043: import javax.xml.parsers.DocumentBuilderFactory;
044: import javax.xml.parsers.ParserConfigurationException;
045: import javax.xml.parsers.SAXParserFactory;
046: import javax.xml.stream.XMLStreamException;
047: import javax.xml.stream.XMLStreamReader;
048: import javax.xml.transform.Source;
049: import javax.xml.transform.Transformer;
050: import javax.xml.transform.TransformerException;
051: import javax.xml.transform.TransformerFactory;
052: import javax.xml.transform.dom.DOMSource;
053: import javax.xml.transform.sax.SAXResult;
054: import javax.xml.transform.sax.SAXSource;
055: import javax.xml.validation.SchemaFactory;
056:
057: import com.sun.istack.internal.NotNull;
058: import com.sun.tools.internal.xjc.ErrorReceiver;
059: import com.sun.tools.internal.xjc.reader.Const;
060: import com.sun.tools.internal.xjc.reader.xmlschema.parser.SchemaConstraintChecker;
061: import com.sun.tools.internal.xjc.util.ErrorReceiverFilter;
062: import com.sun.tools.internal.xjc.util.XMLStreamReaderToContentHandler;
063: import com.sun.xml.internal.bind.marshaller.DataWriter;
064: import com.sun.xml.internal.xsom.parser.JAXPParser;
065: import com.sun.xml.internal.xsom.parser.XMLParser;
066:
067: import org.w3c.dom.Document;
068: import org.w3c.dom.Element;
069: import org.xml.sax.ContentHandler;
070: import org.xml.sax.EntityResolver;
071: import org.xml.sax.ErrorHandler;
072: import org.xml.sax.InputSource;
073: import org.xml.sax.SAXException;
074: import org.xml.sax.SAXParseException;
075: import org.xml.sax.XMLReader;
076: import org.xml.sax.helpers.XMLFilterImpl;
077:
078: /**
079: * Builds a DOM forest and maintains association from
080: * system IDs to DOM trees.
081: *
082: * <p>
083: * A forest is a transitive reflexive closure of referenced documents.
084: * IOW, if a document is in a forest, all the documents referenced from
085: * it is in a forest, too. To support this semantics, {@link DOMForest}
086: * uses {@link InternalizationLogic} to find referenced documents.
087: *
088: * <p>
089: * Some documents are marked as "root"s, meaning those documents were
090: * put into a forest explicitly, not because it is referenced from another
091: * document. (However, a root document can be referenced from other
092: * documents, too.)
093: *
094: * @author
095: * Kohsuke Kawaguchi (kohsuke.kawaguchi@sun.com)
096: */
097: public final class DOMForest {
098: /** actual data storage map<SystemId,Document>. */
099: private final Map<String, Document> core = new HashMap<String, Document>();
100:
101: /**
102: * To correctly feed documents to a schema parser, we need to remember
103: * which documents (of the forest) were given as the root
104: * documents, and which of them are read as included/imported
105: * documents.
106: *
107: * <p>
108: * Set of system ids as strings.
109: */
110: private final Set<String> rootDocuments = new HashSet<String>();
111:
112: /** Stores location information for all the trees in this forest. */
113: public final LocatorTable locatorTable = new LocatorTable();
114:
115: /** Stores all the outer-most <jaxb:bindings> customizations. */
116: public final Set<Element> outerMostBindings = new HashSet<Element>();
117:
118: /** Used to resolve references to other schema documents. */
119: private EntityResolver entityResolver = null;
120:
121: /** Errors encountered during the parsing will be sent to this object. */
122: private ErrorReceiver errorReceiver = null;
123:
124: /** Schema language dependent part of the processing. */
125: protected final InternalizationLogic logic;
126:
127: private final SAXParserFactory parserFactory;
128: private final DocumentBuilder documentBuilder;
129:
130: public DOMForest(SAXParserFactory parserFactory,
131: DocumentBuilder documentBuilder, InternalizationLogic logic) {
132:
133: this .parserFactory = parserFactory;
134: this .documentBuilder = documentBuilder;
135: this .logic = logic;
136: }
137:
138: public DOMForest(InternalizationLogic logic) {
139: try {
140: DocumentBuilderFactory dbf = DocumentBuilderFactory
141: .newInstance();
142: dbf.setNamespaceAware(true);
143: this .documentBuilder = dbf.newDocumentBuilder();
144:
145: this .parserFactory = SAXParserFactory.newInstance();
146: this .parserFactory.setNamespaceAware(true);
147: } catch (ParserConfigurationException e) {
148: throw new AssertionError(e);
149: }
150:
151: this .logic = logic;
152: }
153:
154: /**
155: * Gets the DOM tree associated with the specified system ID,
156: * or null if none is found.
157: */
158: public Document get(String systemId) {
159: Document doc = core.get(systemId);
160:
161: if (doc == null && systemId.startsWith("file:/")
162: && !systemId.startsWith("file://")) {
163: // As of JDK1.4, java.net.URL.toExternal method returns URLs like
164: // "file:/abc/def/ghi" which is an incorrect file protocol URL according to RFC1738.
165: // Some other correctly functioning parts return the correct URLs ("file:///abc/def/ghi"),
166: // and this descripancy breaks DOM look up by system ID.
167:
168: // this extra check solves this problem.
169: doc = core.get("file://" + systemId.substring(5));
170: }
171:
172: if (doc == null && systemId.startsWith("file:")) {
173: // on Windows, filenames are case insensitive.
174: // perform case-insensitive search for improved user experience
175: String systemPath = getPath(systemId);
176: for (String key : core.keySet()) {
177: if (key.startsWith("file:")
178: && getPath(key).equalsIgnoreCase(systemPath)) {
179: doc = core.get(key);
180: break;
181: }
182: }
183: }
184:
185: return doc;
186: }
187:
188: /**
189: * Strips off the leading 'file:///' portion from an URL.
190: */
191: private String getPath(String key) {
192: key = key.substring(5); // skip 'file:'
193: while (key.length() > 0 && key.charAt(0) == '/')
194: key = key.substring(1);
195: return key;
196: }
197:
198: /**
199: * Returns a read-only set of root document system IDs.
200: */
201: public Set<String> getRootDocuments() {
202: return Collections.unmodifiableSet(rootDocuments);
203: }
204:
205: /**
206: * Checks the correctness of the XML Schema documents and return true
207: * if it's OK.
208: *
209: * <p>
210: * This method performs a weaker version of the tests where error messages
211: * are provided without line number information. So whenever possible
212: * use {@link SchemaConstraintChecker}.
213: *
214: * @see SchemaConstraintChecker
215: */
216: public boolean checkSchemaCorrectness(ErrorReceiver errorHandler) {
217: try {
218: SchemaFactory sf = SchemaFactory
219: .newInstance(W3C_XML_SCHEMA_NS_URI);
220: ErrorReceiverFilter filter = new ErrorReceiverFilter(
221: errorHandler);
222: sf.setErrorHandler(filter);
223: Set<String> roots = getRootDocuments();
224: Source[] sources = new Source[roots.size()];
225: int i = 0;
226: for (String root : roots) {
227: sources[i++] = new DOMSource(get(root), root);
228: }
229: sf.newSchema(sources);
230: return !filter.hadError();
231: } catch (SAXException e) {
232: // the errors should have been reported
233: return false;
234: }
235: }
236:
237: /**
238: * Gets the system ID from which the given DOM is parsed.
239: * <p>
240: * Poor-man's base URI.
241: */
242: public String getSystemId(Document dom) {
243: for (Map.Entry<String, Document> e : core.entrySet()) {
244: if (e.getValue() == dom)
245: return e.getKey();
246: }
247: return null;
248: }
249:
250: public Document parse(InputSource source, boolean root)
251: throws SAXException {
252: if (source.getSystemId() == null)
253: throw new IllegalArgumentException();
254:
255: return parse(source.getSystemId(), source, root);
256: }
257:
258: /**
259: * Parses an XML at the given location (
260: * and XMLs referenced by it) into DOM trees
261: * and stores them to this forest.
262: *
263: * @return the parsed DOM document object.
264: */
265: public Document parse(String systemId, boolean root)
266: throws SAXException, IOException {
267:
268: systemId = normalizeSystemId(systemId);
269:
270: if (core.containsKey(systemId))
271: // this document has already been parsed. Just ignore.
272: return core.get(systemId);
273:
274: InputSource is = null;
275:
276: // allow entity resolver to find the actual byte stream.
277: if (entityResolver != null)
278: is = entityResolver.resolveEntity(null, systemId);
279: if (is == null)
280: is = new InputSource(systemId);
281:
282: // but we still use the original system Id as the key.
283: return parse(systemId, is, root);
284: }
285:
286: /**
287: * Returns a {@link ContentHandler} to feed SAX events into.
288: *
289: * <p>
290: * The client of this class can feed SAX events into the handler
291: * to parse a document into this DOM forest.
292: *
293: * This version requires that the DOM object to be created and registered
294: * to the map beforehand.
295: */
296: private ContentHandler getParserHandler(Document dom) {
297: ContentHandler handler = new DOMBuilder(dom, locatorTable,
298: outerMostBindings);
299: handler = new WhitespaceStripper(handler, errorReceiver,
300: entityResolver);
301: handler = new VersionChecker(handler, errorReceiver,
302: entityResolver);
303:
304: // insert the reference finder so that
305: // included/imported schemas will be also parsed
306: XMLFilterImpl f = logic.createExternalReferenceFinder(this );
307: f.setContentHandler(handler);
308:
309: if (errorReceiver != null)
310: f.setErrorHandler(errorReceiver);
311: if (entityResolver != null)
312: f.setEntityResolver(entityResolver);
313:
314: return f;
315: }
316:
317: public interface Handler extends ContentHandler {
318: /**
319: * Gets the DOM that was built.
320: */
321: public Document getDocument();
322: }
323:
324: private static abstract class HandlerImpl extends XMLFilterImpl
325: implements Handler {
326: }
327:
328: /**
329: * Returns a {@link ContentHandler} to feed SAX events into.
330: *
331: * <p>
332: * The client of this class can feed SAX events into the handler
333: * to parse a document into this DOM forest.
334: */
335: public Handler getParserHandler(String systemId, boolean root) {
336: final Document dom = documentBuilder.newDocument();
337: core.put(systemId, dom);
338: if (root)
339: rootDocuments.add(systemId);
340:
341: ContentHandler handler = getParserHandler(dom);
342:
343: // we will register the DOM to the map once the system ID becomes available.
344: // but the SAX allows the event source to not to provide that information,
345: // so be prepared for such case.
346: HandlerImpl x = new HandlerImpl() {
347: public Document getDocument() {
348: return dom;
349: }
350: };
351: x.setContentHandler(handler);
352:
353: return x;
354: }
355:
356: /**
357: * Parses the given document and add it to the DOM forest.
358: *
359: * @return
360: * null if there was a parse error. otherwise non-null.
361: */
362: public Document parse(String systemId, InputSource inputSource,
363: boolean root) throws SAXException {
364: Document dom = documentBuilder.newDocument();
365:
366: systemId = normalizeSystemId(systemId);
367:
368: // put into the map before growing a tree, to
369: // prevent recursive reference from causing infinite loop.
370: core.put(systemId, dom);
371: if (root)
372: rootDocuments.add(systemId);
373:
374: try {
375: XMLReader reader = parserFactory.newSAXParser()
376: .getXMLReader();
377: reader.setContentHandler(getParserHandler(dom));
378: if (errorReceiver != null)
379: reader.setErrorHandler(errorReceiver);
380: if (entityResolver != null)
381: reader.setEntityResolver(entityResolver);
382: reader.parse(inputSource);
383: } catch (ParserConfigurationException e) {
384: // in practice, this exception won't happen.
385: errorReceiver.error(e.getMessage(), e);
386: core.remove(systemId);
387: rootDocuments.remove(systemId);
388: return null;
389: } catch (IOException e) {
390: errorReceiver.error(e.getMessage(), e);
391: core.remove(systemId);
392: rootDocuments.remove(systemId);
393: return null;
394: }
395:
396: return dom;
397: }
398:
399: private String normalizeSystemId(String systemId) {
400: try {
401: systemId = new URI(systemId).normalize().toString();
402: } catch (URISyntaxException e) {
403: // leave the system ID untouched. In my experience URI is often too strict
404: }
405: return systemId;
406: }
407:
408: public Document parse(String systemId, XMLStreamReader parser,
409: boolean root) throws XMLStreamException {
410: Document dom = documentBuilder.newDocument();
411:
412: systemId = normalizeSystemId(systemId);
413:
414: if (root)
415: rootDocuments.add(systemId);
416:
417: if (systemId == null)
418: throw new IllegalArgumentException(
419: "system id cannot be null");
420: core.put(systemId, dom);
421:
422: new XMLStreamReaderToContentHandler(parser,
423: getParserHandler(dom)).bridge();
424:
425: return dom;
426: }
427:
428: /**
429: * Performs internalization.
430: *
431: * This method should be called only once, only after all the
432: * schemas are parsed.
433: */
434: public void transform() {
435: Internalizer.transform(this );
436: }
437:
438: /**
439: * Performs the schema correctness check by using JAXP 1.3.
440: *
441: * <p>
442: * This is "weak", because {@link SchemaFactory#newSchema(Source[])}
443: * doesn't handle inclusions very correctly (it ends up parsing it
444: * from its original source, not in this tree), and because
445: * it doesn't handle two documents for the same namespace very
446: * well.
447: *
448: * <p>
449: * We should eventually fix JAXP (and Xerces), but meanwhile
450: * this weaker and potentially wrong correctness check is still
451: * better than nothing when used inside JAX-WS (JAXB CLI and Ant
452: * does a better job of checking this.)
453: *
454: * <p>
455: * To receive errors, use {@link SchemaFactory#setErrorHandler(ErrorHandler)}.
456: */
457: public void weakSchemaCorrectnessCheck(SchemaFactory sf) {
458: List<SAXSource> sources = new ArrayList<SAXSource>();
459: for (String systemId : getRootDocuments()) {
460: Document dom = get(systemId);
461: if (dom.getDocumentElement().getNamespaceURI().equals(
462: Const.JAXB_NSURI))
463: continue; // this isn't a schema. we have to do a negative check because if we see completely unrelated ns, we want to report that as an error
464:
465: SAXSource ss = createSAXSource(systemId);
466: try {
467: ss
468: .getXMLReader()
469: .setFeature(
470: "http://xml.org/sax/features/namespace-prefixes",
471: true);
472: } catch (SAXException e) {
473: throw new AssertionError(e); // Xerces wants this. See 6395322.
474: }
475: sources.add(ss);
476: }
477:
478: try {
479: sf.newSchema(sources.toArray(new SAXSource[0]));
480: } catch (SAXException e) {
481: // error should have been reported.
482: } catch (RuntimeException e) {
483: // JAXP RI isn't very trustworthy when it comes to schema error check,
484: // and we know some cases where it just dies with NPE. So handle it gracefully.
485: // this masks a bug in the JAXP RI, but we need a release that we have to make.
486: try {
487: sf
488: .getErrorHandler()
489: .warning(
490: new SAXParseException(
491: Messages
492: .format(
493: Messages.ERR_GENERAL_SCHEMA_CORRECTNESS_ERROR,
494: e.getMessage()),
495: null, null, -1, -1, e));
496: } catch (SAXException _) {
497: // ignore
498: }
499: }
500: }
501:
502: /**
503: * Creates a {@link SAXSource} that, when parsed, reads from this {@link DOMForest}
504: * (instead of parsing the original source identified by the system ID.)
505: */
506: public @NotNull
507: SAXSource createSAXSource(String systemId) {
508: ContentHandlerNamespacePrefixAdapter reader = new ContentHandlerNamespacePrefixAdapter(
509: new XMLFilterImpl() {
510: // XMLReader that uses XMLParser to parse. We need to use XMLFilter to indrect
511: // handlers, since SAX allows handlers to be changed while parsing.
512: public void parse(InputSource input)
513: throws SAXException, IOException {
514: createParser().parse(input, this , this , this );
515: }
516:
517: public void parse(String systemId)
518: throws SAXException, IOException {
519: parse(new InputSource(systemId));
520: }
521: });
522:
523: return new SAXSource(reader, new InputSource(systemId));
524: }
525:
526: /**
527: * Creates {@link XMLParser} for XSOM which reads documents from
528: * this DOMForest rather than doing a fresh parse.
529: *
530: * The net effect is that XSOM will read transformed XML Schemas
531: * instead of the original documents.
532: */
533: public XMLParser createParser() {
534: return new DOMForestParser(this , new JAXPParser());
535: }
536:
537: public EntityResolver getEntityResolver() {
538: return entityResolver;
539: }
540:
541: public void setEntityResolver(EntityResolver entityResolver) {
542: this .entityResolver = entityResolver;
543: }
544:
545: public ErrorReceiver getErrorHandler() {
546: return errorReceiver;
547: }
548:
549: public void setErrorHandler(ErrorReceiver errorHandler) {
550: this .errorReceiver = errorHandler;
551: }
552:
553: /**
554: * Gets all the parsed documents.
555: */
556: public Document[] listDocuments() {
557: return core.values().toArray(new Document[core.size()]);
558: }
559:
560: /**
561: * Gets all the system IDs of the documents.
562: */
563: public String[] listSystemIDs() {
564: return core.keySet().toArray(new String[core.keySet().size()]);
565: }
566:
567: /**
568: * Dumps the contents of the forest to the specified stream.
569: *
570: * This is a debug method. As such, error handling is sloppy.
571: */
572: public void dump(OutputStream out) throws IOException {
573: try {
574: // create identity transformer
575: Transformer it = TransformerFactory.newInstance()
576: .newTransformer();
577:
578: for (Map.Entry<String, Document> e : core.entrySet()) {
579: out.write(("---<< " + e.getKey() + '\n').getBytes());
580:
581: DataWriter dw = new DataWriter(new OutputStreamWriter(
582: out), null);
583: dw.setIndentStep(" ");
584: it.transform(new DOMSource(e.getValue()),
585: new SAXResult(dw));
586:
587: out.write("\n\n\n".getBytes());
588: }
589: } catch (TransformerException e) {
590: e.printStackTrace();
591: }
592: }
593: }
|