001: /*
002: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
003: *
004: * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved.
005: *
006: * The contents of this file are subject to the terms of either the GNU
007: * General Public License Version 2 only ("GPL") or the Common Development
008: * and Distribution License("CDDL") (collectively, the "License"). You
009: * may not use this file except in compliance with the License. You can obtain
010: * a copy of the License at https://glassfish.dev.java.net/public/CDDL+GPL.html
011: * or glassfish/bootstrap/legal/LICENSE.txt. See the License for the specific
012: * language governing permissions and limitations under the License.
013: *
014: * When distributing the software, include this License Header Notice in each
015: * file and include the License file at glassfish/bootstrap/legal/LICENSE.txt.
016: * Sun designates this particular file as subject to the "Classpath" exception
017: * as provided by Sun in the GPL Version 2 section of the License file that
018: * accompanied this code. If applicable, add the following below the License
019: * Header, with the fields enclosed by brackets [] replaced by your own
020: * identifying information: "Portions Copyrighted [year]
021: * [name of copyright owner]"
022: *
023: * Contributor(s):
024: *
025: * If you wish your version of this file to be governed by only the CDDL or
026: * only the GPL Version 2, indicate your decision by adding "[Contributor]
027: * elects to include this software in this distribution under the [CDDL or GPL
028: * Version 2] license." If you don't indicate a single choice of license, a
029: * recipient has the option to distribute your version of this file under
030: * either the CDDL, the GPL Version 2 or to extend the choice of license to
031: * its licensees as provided above. However, if you add GPL Version 2 code
032: * and therefore, elected the GPL Version 2 license, then the option applies
033: * only if the new code is made subject to such option by the copyright
034: * holder.
035: */
036: package com.sun.tools.xjc.reader.internalizer;
037:
038: import java.io.IOException;
039: import java.io.OutputStream;
040: import java.io.OutputStreamWriter;
041: import java.net.URI;
042: import java.net.URISyntaxException;
043: import java.util.ArrayList;
044: import java.util.Collections;
045: import java.util.HashMap;
046: import java.util.HashSet;
047: import java.util.List;
048: import java.util.Map;
049: import java.util.Set;
050:
051: import static javax.xml.XMLConstants.W3C_XML_SCHEMA_NS_URI;
052: import javax.xml.parsers.DocumentBuilder;
053: import javax.xml.parsers.DocumentBuilderFactory;
054: import javax.xml.parsers.ParserConfigurationException;
055: import javax.xml.parsers.SAXParserFactory;
056: import javax.xml.stream.XMLStreamException;
057: import javax.xml.stream.XMLStreamReader;
058: import javax.xml.transform.Source;
059: import javax.xml.transform.Transformer;
060: import javax.xml.transform.TransformerException;
061: import javax.xml.transform.TransformerFactory;
062: import javax.xml.transform.dom.DOMSource;
063: import javax.xml.transform.sax.SAXResult;
064: import javax.xml.transform.sax.SAXSource;
065: import javax.xml.validation.SchemaFactory;
066:
067: import com.sun.istack.NotNull;
068: import com.sun.istack.XMLStreamReaderToContentHandler;
069: import com.sun.tools.xjc.ErrorReceiver;
070: import com.sun.tools.xjc.reader.Const;
071: import com.sun.tools.xjc.reader.xmlschema.parser.SchemaConstraintChecker;
072: import com.sun.tools.xjc.util.ErrorReceiverFilter;
073: import com.sun.xml.bind.marshaller.DataWriter;
074: import com.sun.xml.xsom.parser.JAXPParser;
075: import com.sun.xml.xsom.parser.XMLParser;
076:
077: import org.w3c.dom.Document;
078: import org.w3c.dom.Element;
079: import org.xml.sax.ContentHandler;
080: import org.xml.sax.EntityResolver;
081: import org.xml.sax.ErrorHandler;
082: import org.xml.sax.InputSource;
083: import org.xml.sax.SAXException;
084: import org.xml.sax.SAXParseException;
085: import org.xml.sax.XMLReader;
086: import org.xml.sax.helpers.XMLFilterImpl;
087:
088: /**
089: * Builds a DOM forest and maintains association from
090: * system IDs to DOM trees.
091: *
092: * <p>
093: * A forest is a transitive reflexive closure of referenced documents.
094: * IOW, if a document is in a forest, all the documents referenced from
095: * it is in a forest, too. To support this semantics, {@link DOMForest}
096: * uses {@link InternalizationLogic} to find referenced documents.
097: *
098: * <p>
099: * Some documents are marked as "root"s, meaning those documents were
100: * put into a forest explicitly, not because it is referenced from another
101: * document. (However, a root document can be referenced from other
102: * documents, too.)
103: *
104: * @author
105: * Kohsuke Kawaguchi (kohsuke.kawaguchi@sun.com)
106: */
107: public final class DOMForest {
108: /** actual data storage map<SystemId,Document>. */
109: private final Map<String, Document> core = new HashMap<String, Document>();
110:
111: /**
112: * To correctly feed documents to a schema parser, we need to remember
113: * which documents (of the forest) were given as the root
114: * documents, and which of them are read as included/imported
115: * documents.
116: *
117: * <p>
118: * Set of system ids as strings.
119: */
120: private final Set<String> rootDocuments = new HashSet<String>();
121:
122: /** Stores location information for all the trees in this forest. */
123: public final LocatorTable locatorTable = new LocatorTable();
124:
125: /** Stores all the outer-most <jaxb:bindings> customizations. */
126: public final Set<Element> outerMostBindings = new HashSet<Element>();
127:
128: /** Used to resolve references to other schema documents. */
129: private EntityResolver entityResolver = null;
130:
131: /** Errors encountered during the parsing will be sent to this object. */
132: private ErrorReceiver errorReceiver = null;
133:
134: /** Schema language dependent part of the processing. */
135: protected final InternalizationLogic logic;
136:
137: private final SAXParserFactory parserFactory;
138: private final DocumentBuilder documentBuilder;
139:
140: public DOMForest(SAXParserFactory parserFactory,
141: DocumentBuilder documentBuilder, InternalizationLogic logic) {
142:
143: this .parserFactory = parserFactory;
144: this .documentBuilder = documentBuilder;
145: this .logic = logic;
146: }
147:
148: public DOMForest(InternalizationLogic logic) {
149: try {
150: DocumentBuilderFactory dbf = DocumentBuilderFactory
151: .newInstance();
152: dbf.setNamespaceAware(true);
153: this .documentBuilder = dbf.newDocumentBuilder();
154:
155: this .parserFactory = SAXParserFactory.newInstance();
156: this .parserFactory.setNamespaceAware(true);
157: } catch (ParserConfigurationException e) {
158: throw new AssertionError(e);
159: }
160:
161: this .logic = logic;
162: }
163:
164: /**
165: * Gets the DOM tree associated with the specified system ID,
166: * or null if none is found.
167: */
168: public Document get(String systemId) {
169: Document doc = core.get(systemId);
170:
171: if (doc == null && systemId.startsWith("file:/")
172: && !systemId.startsWith("file://")) {
173: // As of JDK1.4, java.net.URL.toExternal method returns URLs like
174: // "file:/abc/def/ghi" which is an incorrect file protocol URL according to RFC1738.
175: // Some other correctly functioning parts return the correct URLs ("file:///abc/def/ghi"),
176: // and this descripancy breaks DOM look up by system ID.
177:
178: // this extra check solves this problem.
179: doc = core.get("file://" + systemId.substring(5));
180: }
181:
182: if (doc == null && systemId.startsWith("file:")) {
183: // on Windows, filenames are case insensitive.
184: // perform case-insensitive search for improved user experience
185: String systemPath = getPath(systemId);
186: for (String key : core.keySet()) {
187: if (key.startsWith("file:")
188: && getPath(key).equalsIgnoreCase(systemPath)) {
189: doc = core.get(key);
190: break;
191: }
192: }
193: }
194:
195: return doc;
196: }
197:
198: /**
199: * Strips off the leading 'file:///' portion from an URL.
200: */
201: private String getPath(String key) {
202: key = key.substring(5); // skip 'file:'
203: while (key.length() > 0 && key.charAt(0) == '/')
204: key = key.substring(1);
205: return key;
206: }
207:
208: /**
209: * Returns a read-only set of root document system IDs.
210: */
211: public Set<String> getRootDocuments() {
212: return Collections.unmodifiableSet(rootDocuments);
213: }
214:
215: /**
216: * Picks one document at random and returns it.
217: */
218: public Document getOneDocument() {
219: for (Document dom : core.values()) {
220: if (!dom.getDocumentElement().getNamespaceURI().equals(
221: Const.JAXB_NSURI))
222: return dom;
223: }
224: // we should have caught this error very early on
225: throw new AssertionError();
226: }
227:
228: /**
229: * Checks the correctness of the XML Schema documents and return true
230: * if it's OK.
231: *
232: * <p>
233: * This method performs a weaker version of the tests where error messages
234: * are provided without line number information. So whenever possible
235: * use {@link SchemaConstraintChecker}.
236: *
237: * @see SchemaConstraintChecker
238: */
239: public boolean checkSchemaCorrectness(ErrorReceiver errorHandler) {
240: try {
241: SchemaFactory sf = SchemaFactory
242: .newInstance(W3C_XML_SCHEMA_NS_URI);
243: ErrorReceiverFilter filter = new ErrorReceiverFilter(
244: errorHandler);
245: sf.setErrorHandler(filter);
246: Set<String> roots = getRootDocuments();
247: Source[] sources = new Source[roots.size()];
248: int i = 0;
249: for (String root : roots) {
250: sources[i++] = new DOMSource(get(root), root);
251: }
252: sf.newSchema(sources);
253: return !filter.hadError();
254: } catch (SAXException e) {
255: // the errors should have been reported
256: return false;
257: }
258: }
259:
260: /**
261: * Gets the system ID from which the given DOM is parsed.
262: * <p>
263: * Poor-man's base URI.
264: */
265: public String getSystemId(Document dom) {
266: for (Map.Entry<String, Document> e : core.entrySet()) {
267: if (e.getValue() == dom)
268: return e.getKey();
269: }
270: return null;
271: }
272:
273: public Document parse(InputSource source, boolean root)
274: throws SAXException {
275: if (source.getSystemId() == null)
276: throw new IllegalArgumentException();
277:
278: return parse(source.getSystemId(), source, root);
279: }
280:
281: /**
282: * Parses an XML at the given location (
283: * and XMLs referenced by it) into DOM trees
284: * and stores them to this forest.
285: *
286: * @return the parsed DOM document object.
287: */
288: public Document parse(String systemId, boolean root)
289: throws SAXException, IOException {
290:
291: systemId = normalizeSystemId(systemId);
292:
293: if (core.containsKey(systemId))
294: // this document has already been parsed. Just ignore.
295: return core.get(systemId);
296:
297: InputSource is = null;
298:
299: // allow entity resolver to find the actual byte stream.
300: if (entityResolver != null)
301: is = entityResolver.resolveEntity(null, systemId);
302: if (is == null)
303: is = new InputSource(systemId);
304:
305: // but we still use the original system Id as the key.
306: return parse(systemId, is, root);
307: }
308:
309: /**
310: * Returns a {@link ContentHandler} to feed SAX events into.
311: *
312: * <p>
313: * The client of this class can feed SAX events into the handler
314: * to parse a document into this DOM forest.
315: *
316: * This version requires that the DOM object to be created and registered
317: * to the map beforehand.
318: */
319: private ContentHandler getParserHandler(Document dom) {
320: ContentHandler handler = new DOMBuilder(dom, locatorTable,
321: outerMostBindings);
322: handler = new WhitespaceStripper(handler, errorReceiver,
323: entityResolver);
324: handler = new VersionChecker(handler, errorReceiver,
325: entityResolver);
326:
327: // insert the reference finder so that
328: // included/imported schemas will be also parsed
329: XMLFilterImpl f = logic.createExternalReferenceFinder(this );
330: f.setContentHandler(handler);
331:
332: if (errorReceiver != null)
333: f.setErrorHandler(errorReceiver);
334: if (entityResolver != null)
335: f.setEntityResolver(entityResolver);
336:
337: return f;
338: }
339:
340: public interface Handler extends ContentHandler {
341: /**
342: * Gets the DOM that was built.
343: */
344: public Document getDocument();
345: }
346:
347: private static abstract class HandlerImpl extends XMLFilterImpl
348: implements Handler {
349: }
350:
351: /**
352: * Returns a {@link ContentHandler} to feed SAX events into.
353: *
354: * <p>
355: * The client of this class can feed SAX events into the handler
356: * to parse a document into this DOM forest.
357: */
358: public Handler getParserHandler(String systemId, boolean root) {
359: final Document dom = documentBuilder.newDocument();
360: core.put(systemId, dom);
361: if (root)
362: rootDocuments.add(systemId);
363:
364: ContentHandler handler = getParserHandler(dom);
365:
366: // we will register the DOM to the map once the system ID becomes available.
367: // but the SAX allows the event source to not to provide that information,
368: // so be prepared for such case.
369: HandlerImpl x = new HandlerImpl() {
370: public Document getDocument() {
371: return dom;
372: }
373: };
374: x.setContentHandler(handler);
375:
376: return x;
377: }
378:
379: /**
380: * Parses the given document and add it to the DOM forest.
381: *
382: * @return
383: * null if there was a parse error. otherwise non-null.
384: */
385: public Document parse(String systemId, InputSource inputSource,
386: boolean root) throws SAXException {
387: Document dom = documentBuilder.newDocument();
388:
389: systemId = normalizeSystemId(systemId);
390:
391: // put into the map before growing a tree, to
392: // prevent recursive reference from causing infinite loop.
393: core.put(systemId, dom);
394: if (root)
395: rootDocuments.add(systemId);
396:
397: try {
398: XMLReader reader = parserFactory.newSAXParser()
399: .getXMLReader();
400: reader.setContentHandler(getParserHandler(dom));
401: if (errorReceiver != null)
402: reader.setErrorHandler(errorReceiver);
403: if (entityResolver != null)
404: reader.setEntityResolver(entityResolver);
405: reader.parse(inputSource);
406: } catch (ParserConfigurationException e) {
407: // in practice, this exception won't happen.
408: errorReceiver.error(e.getMessage(), e);
409: core.remove(systemId);
410: rootDocuments.remove(systemId);
411: return null;
412: } catch (IOException e) {
413: errorReceiver.error(e.getMessage(), e);
414: core.remove(systemId);
415: rootDocuments.remove(systemId);
416: return null;
417: }
418:
419: return dom;
420: }
421:
422: private String normalizeSystemId(String systemId) {
423: try {
424: systemId = new URI(systemId).normalize().toString();
425: } catch (URISyntaxException e) {
426: // leave the system ID untouched. In my experience URI is often too strict
427: }
428: return systemId;
429: }
430:
431: public Document parse(String systemId, XMLStreamReader parser,
432: boolean root) throws XMLStreamException {
433: Document dom = documentBuilder.newDocument();
434:
435: systemId = normalizeSystemId(systemId);
436:
437: if (root)
438: rootDocuments.add(systemId);
439:
440: if (systemId == null)
441: throw new IllegalArgumentException(
442: "system id cannot be null");
443: core.put(systemId, dom);
444:
445: new XMLStreamReaderToContentHandler(parser,
446: getParserHandler(dom), false, false).bridge();
447:
448: return dom;
449: }
450:
451: /**
452: * Performs internalization.
453: *
454: * This method should be called only once, only after all the
455: * schemas are parsed.
456: *
457: * @return
458: * the returned bindings need to be applied after schema
459: * components are built.
460: */
461: public SCDBasedBindingSet transform(boolean enableSCD) {
462: return Internalizer.transform(this , enableSCD);
463: }
464:
465: /**
466: * Performs the schema correctness check by using JAXP 1.3.
467: *
468: * <p>
469: * This is "weak", because {@link SchemaFactory#newSchema(Source[])}
470: * doesn't handle inclusions very correctly (it ends up parsing it
471: * from its original source, not in this tree), and because
472: * it doesn't handle two documents for the same namespace very
473: * well.
474: *
475: * <p>
476: * We should eventually fix JAXP (and Xerces), but meanwhile
477: * this weaker and potentially wrong correctness check is still
478: * better than nothing when used inside JAX-WS (JAXB CLI and Ant
479: * does a better job of checking this.)
480: *
481: * <p>
482: * To receive errors, use {@link SchemaFactory#setErrorHandler(ErrorHandler)}.
483: */
484: public void weakSchemaCorrectnessCheck(SchemaFactory sf) {
485: List<SAXSource> sources = new ArrayList<SAXSource>();
486: for (String systemId : getRootDocuments()) {
487: Document dom = get(systemId);
488: if (dom.getDocumentElement().getNamespaceURI().equals(
489: Const.JAXB_NSURI))
490: continue; // this isn't a schema. we have to do a negative check because if we see completely unrelated ns, we want to report that as an error
491:
492: SAXSource ss = createSAXSource(systemId);
493: try {
494: ss
495: .getXMLReader()
496: .setFeature(
497: "http://xml.org/sax/features/namespace-prefixes",
498: true);
499: } catch (SAXException e) {
500: throw new AssertionError(e); // Xerces wants this. See 6395322.
501: }
502: sources.add(ss);
503: }
504:
505: try {
506: sf.newSchema(sources.toArray(new SAXSource[0]));
507: } catch (SAXException e) {
508: // error should have been reported.
509: } catch (RuntimeException e) {
510: // JAXP RI isn't very trustworthy when it comes to schema error check,
511: // and we know some cases where it just dies with NPE. So handle it gracefully.
512: // this masks a bug in the JAXP RI, but we need a release that we have to make.
513: try {
514: sf
515: .getErrorHandler()
516: .warning(
517: new SAXParseException(
518: Messages
519: .format(
520: Messages.ERR_GENERAL_SCHEMA_CORRECTNESS_ERROR,
521: e.getMessage()),
522: null, null, -1, -1, e));
523: } catch (SAXException _) {
524: // ignore
525: }
526: }
527: }
528:
529: /**
530: * Creates a {@link SAXSource} that, when parsed, reads from this {@link DOMForest}
531: * (instead of parsing the original source identified by the system ID.)
532: */
533: public @NotNull
534: SAXSource createSAXSource(String systemId) {
535: ContentHandlerNamespacePrefixAdapter reader = new ContentHandlerNamespacePrefixAdapter(
536: new XMLFilterImpl() {
537: // XMLReader that uses XMLParser to parse. We need to use XMLFilter to indrect
538: // handlers, since SAX allows handlers to be changed while parsing.
539: public void parse(InputSource input)
540: throws SAXException, IOException {
541: createParser().parse(input, this , this , this );
542: }
543:
544: public void parse(String systemId)
545: throws SAXException, IOException {
546: parse(new InputSource(systemId));
547: }
548: });
549:
550: return new SAXSource(reader, new InputSource(systemId));
551: }
552:
553: /**
554: * Creates {@link XMLParser} for XSOM which reads documents from
555: * this DOMForest rather than doing a fresh parse.
556: *
557: * The net effect is that XSOM will read transformed XML Schemas
558: * instead of the original documents.
559: */
560: public XMLParser createParser() {
561: return new DOMForestParser(this , new JAXPParser());
562: }
563:
564: public EntityResolver getEntityResolver() {
565: return entityResolver;
566: }
567:
568: public void setEntityResolver(EntityResolver entityResolver) {
569: this .entityResolver = entityResolver;
570: }
571:
572: public ErrorReceiver getErrorHandler() {
573: return errorReceiver;
574: }
575:
576: public void setErrorHandler(ErrorReceiver errorHandler) {
577: this .errorReceiver = errorHandler;
578: }
579:
580: /**
581: * Gets all the parsed documents.
582: */
583: public Document[] listDocuments() {
584: return core.values().toArray(new Document[core.size()]);
585: }
586:
587: /**
588: * Gets all the system IDs of the documents.
589: */
590: public String[] listSystemIDs() {
591: return core.keySet().toArray(new String[core.keySet().size()]);
592: }
593:
594: /**
595: * Dumps the contents of the forest to the specified stream.
596: *
597: * This is a debug method. As such, error handling is sloppy.
598: */
599: public void dump(OutputStream out) throws IOException {
600: try {
601: // create identity transformer
602: Transformer it = TransformerFactory.newInstance()
603: .newTransformer();
604:
605: for (Map.Entry<String, Document> e : core.entrySet()) {
606: out.write(("---<< " + e.getKey() + '\n').getBytes());
607:
608: DataWriter dw = new DataWriter(new OutputStreamWriter(
609: out), null);
610: dw.setIndentStep(" ");
611: it.transform(new DOMSource(e.getValue()),
612: new SAXResult(dw));
613:
614: out.write("\n\n\n".getBytes());
615: }
616: } catch (TransformerException e) {
617: e.printStackTrace();
618: }
619: }
620: }
|