001: /*
002: * This file or a portion of this file is licensed under the terms of
003: * the Globus Toolkit Public License, found in file ../GTPL, or at
004: * http://www.globus.org/toolkit/download/license.html. This notice must
005: * appear in redistributions of this file, with or without modification.
006: *
007: * Redistributions of this Software, with or without modification, must
008: * reproduce the GTPL in: (1) the Software, or (2) the Documentation or
009: * some other similar material which is provided with the Software (if
010: * any).
011: *
012: * Copyright 1999-2004 University of Chicago and The University of
013: * Southern California. All rights reserved.
014: */
015: package org.griphyn.vdl.parser;
016:
017: import org.griphyn.vdl.classes.*;
018: import org.griphyn.vdl.util.Logging;
019:
020: // Xerces
021: import org.xml.sax.*;
022: import javax.xml.parsers.*;
023: import java.io.*;
024:
025: /**
026: * This class uses the Xerces SAX2 parser to validate and parse an XML
027: * document. The content handler <code>VDLContentHandler</code> and
028: * error handler <code>VDLErrorHandler</code> are necessary to handle
029: * various callbacks.
030:
031: * @author Jens-S. Vöckler
032: * @author Yong Zhao
033: * @version $Revision: 50 $
034:
035: * @see VDLContentHandler
036: * @see VDLErrorHandler
037: */
038: public class VDLxParser {
039: /**
040: * Default parser is the Xerces parser.
041: */
042: protected static final String vendorParserClass = "org.apache.xerces.parsers.SAXParser";
043:
044: /**
045: * Holds the instance of a {@link org.xml.sax.XMLReader} class.
046: */
047: private XMLReader m_parser;
048:
049: /**
050: * Handles the filling in of content, and callbacks to the
051: * {@link DefinitionHandler} interface.
052: */
053: private VDLContentHandler m_contentHandler;
054:
055: /**
056: * Sets a feature while capturing failed features right here.
057: *
058: * @param uri is the feature's URI to modify
059: * @param flag is the new value to set.
060: * @return true, if the feature could be set, false for an exception
061: */
062: private boolean set(String uri, boolean flag) {
063: boolean result = false;
064: try {
065: this .m_parser.setFeature(uri, flag);
066: result = true;
067: } catch (SAXException se) {
068: Logging.instance().log("default", 0,
069: "Could not set parser feature " + se.getMessage());
070: }
071: return result;
072: }
073:
074: /**
075: * The class constructor. This function initializes the Xerces parser
076: * and the features that enable schema validation.
077: *
078: * @param schemaLocation is the default location of the XML Schema
079: * which this parser is capable of parsing. It may be null to use
080: * the defaults provided in the document.
081: */
082: public VDLxParser(String schemaLocation) {
083: try {
084: m_parser = (XMLReader) Class.forName(vendorParserClass)
085: .newInstance();
086: m_contentHandler = new VDLContentHandler();
087: m_parser.setContentHandler(m_contentHandler);
088: m_parser.setErrorHandler(new VDLErrorHandler());
089:
090: set("http://xml.org/sax/features/validation", true);
091: set("http://apache.org/xml/features/validation/dynamic",
092: true);
093: set("http://apache.org/xml/features/validation/schema",
094: true);
095: // time+memory consuming, see http://xml.apache.org/xerces2-j/features.html
096: // set( "http://apache.org/xml/features/validation/schema-full-checking", true );
097:
098: // Send XML Schema element default values via characters().
099: set(
100: "http://apache.org/xml/features/validation/schema/element-default",
101: true);
102: set(
103: "http://apache.org/xml/features/validation/warn-on-duplicate-attdef",
104: true);
105: // mysteriously, this one fails with recent Xerces
106: // set( "http://apache.org/xml/features/validation/warn-on-undeclared-elemdef", true );
107: set(
108: "http://apache.org/xml/features/warn-on-duplicate-entitydef",
109: true);
110:
111: // set the schema default location.
112: if (schemaLocation != null) {
113: setSchemaLocations(Definitions.SCHEMA_NAMESPACE + ' '
114: + schemaLocation);
115: Logging.instance().log("parser", 0,
116: "will use " + schemaLocation);
117: } else {
118: Logging.instance().log("parser", 0,
119: "will use document schema hint");
120: }
121: } catch (ClassNotFoundException e) {
122: Logging.instance().log("defaut", 0,
123: "The SAXParser class was not found: " + e);
124: } catch (InstantiationException e) {
125: Logging.instance().log(
126: "default",
127: 0,
128: "The SAXParser class could not be instantiated: "
129: + e);
130: } catch (IllegalAccessException e) {
131: Logging.instance().log("default", 0,
132: "The SAXParser class could not be accessed: " + e);
133: }
134: }
135:
136: /**
137: * Sets the list of external real locations where the XML schema may
138: * be found. Since this list can be determined at run-time through
139: * properties etc., we expect this function to be called between
140: * instantiating the parser, and using the parser.
141: *
142: * @param list is a list of strings representing schema locations. The
143: * content exists in pairs, one of the namespace URI, one of the
144: * location URL.
145: */
146: public void setSchemaLocations(String list) {
147: /*
148: // default place to add
149: list += "http://www.griphyn.org/working_groups/VDS/vdl-1.24.xsd " +
150: "http://www.griphyn.org/working_groups/VDS/vdl-1.24.xsd";
151: */
152:
153: // schema location handling
154: try {
155: m_parser
156: .setProperty(
157: "http://apache.org/xml/properties/schema/external-schemaLocation",
158: list);
159: } catch (SAXException se) {
160: Logging.instance().log("default", 0,
161: "The SAXParser reported an error: " + se);
162: }
163: }
164:
165: /**
166: * Sets the list of external real locations where the XML schema may
167: * be found when no namespace is active. Only one location can be
168: * specified. We expect this function to be called between
169: * instantiating the parser, and using the parser.
170: *
171: * @param location is the location of the schema file (location URL).
172: */
173: public void setDefaultSchemaLocation(String location) {
174: /*
175: // default place to add
176: list += "http://www.griphyn.org/working_groups/VDS/vdl-1.19.xsd " +
177: "http://www.griphyn.org/working_groups/VDS/vdl-1.19.xsd";
178: */
179:
180: // schema location handling
181: try {
182: m_parser
183: .setProperty(
184: "http://apache.org/xml/properties/schema/external-noNamespaceSchemaLocation",
185: location);
186: } catch (SAXException se) {
187: Logging.instance().log("default", 0,
188: "The SAXParser reported an error: " + se);
189: }
190: }
191:
192: /**
193: * This function parses a XML source from an InputStream source, and
194: * creates java class instances that correspond to different elements
195: * in the XML source.
196: *
197: * @param reader is a bytestream opened for reading.
198: * @param definitions is a reference to the already known definitions in
199: * the system. The definitions may be empty, but must not be null.
200: * @param overwrite is a flag to indicate the insertion mode. If set to
201: * <code>false</code>, an insert mode is assumed. Violations will be
202: * returned as clashes. With value <code>true</code>, an update mode
203: * is assumed. Old definitions of updates will be returned.
204: * @param dontcare is a flag to minimize memory consumption. Clashes in
205: * insert mode will be signalled with an Exception. Old values in update
206: * mode will be ignored. Effectively, the resulting list is always empty
207: * in dontcare mode.
208: *
209: * @return usually an empty list. If not empty, it contains clashes in
210: * insert, or old definitions in update mode. Please note that each
211: * element is a single Definition, either Transformation or
212: * Derivation. It is not a Definitions object, since multiple old
213: * versions may appear in update mode. Returns null on error!
214: *
215: * @see org.griphyn.vdl.classes.Definitions
216: */
217: public java.util.List parse(java.io.InputStream reader,
218: Definitions definitions, boolean overwrite, boolean dontcare) {
219: try {
220: MemoryStorage database = new MemoryStorage(definitions,
221: overwrite, dontcare);
222: m_contentHandler.setDefinitionHandler(database);
223:
224: m_parser.parse(new InputSource(reader));
225:
226: java.util.List result = database.getRejects();
227: Logging.instance().log(
228: "parser",
229: 1,
230: "Now with " + definitions.getDefinitionCount()
231: + " definitions, and " + result.size()
232: + " rejects");
233: return result;
234: } catch (SAXException e) {
235: Logging.instance().log("default", 0, "SAX Error: " + e);
236: } catch (IOException e) {
237: Logging.instance().log("default", 0, "IO Error: " + e);
238: }
239:
240: return null;
241: }
242:
243: /**
244: * This function parses an XML source (could be a document, a stream,
245: * etc.), and creates java class instances that correspond to
246: * different elements in the XML source.
247: *
248: * @param reader is an XML input source, which may be a character stream,
249: * byte stream, or even an URI.
250: * @param callback is a handler for store callbacks that will take
251: * one complete definition each time one is ready to be processed.
252: *
253: * @return true for successful parsing, false in case of error.
254: * @see org.griphyn.vdl.classes.Definitions
255: */
256: public boolean parse(InputSource reader, DefinitionHandler callback) {
257: try {
258: m_contentHandler.setDefinitionHandler(callback);
259: m_parser.parse(reader);
260: return true;
261: } catch (SAXException e) {
262: Logging.instance().log("default", 0, "SAX Error: " + e);
263: } catch (IOException e) {
264: Logging.instance().log("default", 0, "IO Error: " + e);
265: }
266:
267: return false;
268: }
269:
270: /**
271: * This function parses an XML source (could be a document, a stream,
272: * etc.), and invokes a callback for the top-level element with the
273: * corresponding Java class. Note: The finalizer cannot be called for
274: * Definitions elements. This method should be used for "partial VDLx",
275: * which contains XML for a Transformation or Derivation.
276: *
277: * @param reader is an XML input source, which may be a character stream,
278: * byte stream, or even an URI.
279: * @param callback is a handler for store callbacks that will take
280: * one complete definition.
281: *
282: * @return true for successful parsing, false in case of error.
283: * @see org.griphyn.vdl.classes.Definitions
284: */
285: public boolean parse(InputSource reader, FinalizerHandler callback) {
286: try {
287: m_contentHandler.setFinalizerHandler(callback);
288: m_parser.parse(reader);
289: return true;
290: } catch (SAXException e) {
291: Logging.instance().log("default", 0, "SAX Error: " + e);
292: } catch (IOException e) {
293: Logging.instance().log("default", 0, "IO Error: " + e);
294: }
295:
296: return false;
297: }
298:
299: // public Definitions parse(String xmlURI);
300: // public Definitions parse(InputStream stream);
301: // public Definitions parse(java.io.Reader reader);
302: }
|