001: /*
002: * GeoTools - OpenSource mapping toolkit
003: * http://geotools.org
004: * (C) 2002-2006, GeoTools Project Managment Committee (PMC)
005: *
006: * This library is free software; you can redistribute it and/or
007: * modify it under the terms of the GNU Lesser General Public
008: * License as published by the Free Software Foundation;
009: * version 2.1 of the License.
010: *
011: * This library is distributed in the hope that it will be useful,
012: * but WITHOUT ANY WARRANTY; without even the implied warranty of
013: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
014: * Lesser General Public License for more details.
015: */
016: package org.geotools.xml;
017:
018: import java.io.BufferedInputStream;
019: import java.io.File;
020: import java.io.FileInputStream;
021: import java.io.IOException;
022: import java.io.InputStream;
023: import java.io.Reader;
024: import java.net.URI;
025: import java.net.URISyntaxException;
026: import java.util.ArrayList;
027: import java.util.Iterator;
028: import java.util.List;
029:
030: import javax.xml.XMLConstants;
031: import javax.xml.namespace.QName;
032: import javax.xml.parsers.ParserConfigurationException;
033:
034: import org.apache.xerces.jaxp.SAXParserFactoryImpl;
035: import org.apache.xerces.parsers.SAXParser;
036: import org.eclipse.xsd.XSDSchema;
037: import org.geotools.xml.impl.ParserHandler;
038: import org.geotools.xs.bindings.XS;
039: import org.xml.sax.InputSource;
040: import org.xml.sax.SAXException;
041: import org.xml.sax.helpers.NamespaceSupport;
042:
043: /**
044: * Main interface to the geotools xml parser.
045: *
046: * <p>
047: * <h3>Schema Resolution</h3>
048: * See {@link org.geotools.xml.Configuration} javadocs for instructions on how
049: * to customize schema resolution. This is often desirable in the case that
050: * the instance document being parsed contains invalid uri's in schema imports
051: * and includes.
052: * </p>
053: * @author Justin Deoliveira, The Open Planning Project
054: */
055: public class Parser {
056: /** sax handler which maintains the element stack */
057: private ParserHandler handler;
058:
059: /** the sax parser driving the handler */
060: private SAXParser parser;
061:
062: /** the instance document being parsed */
063: private InputStream input;
064:
065: /**
066: * Creats a new instance of the parser.
067: *
068: * @param configuration The parser configuration, bindings and context,
069: * must never be <code>null</code>.
070: *
071: */
072: public Parser(Configuration configuration) {
073:
074: if (configuration == null) {
075: throw new NullPointerException("configuration");
076: }
077:
078: handler = new ParserHandler(configuration);
079: }
080:
081: /**
082: * Creates a new instance of the parser.
083: *
084: * @param configuration Object representing the configuration of the parser.
085: * @param input A uri representing the instance document to be parsed.
086: *
087: * @throws ParserConfigurationException
088: * @throws SAXException If a sax parser can not be created.
089: * @throws URISyntaxException If <code>input</code> is not a valid uri.
090: *
091: * @deprecated use {@link #Parser(Configuration)} and {@link #parse(InputStream)}.
092: */
093: public Parser(Configuration configuration, String input)
094: throws IOException, URISyntaxException {
095: this (configuration, new BufferedInputStream(
096: new FileInputStream(new File(new URI(input)))));
097: }
098:
099: /**
100: * Creates a new instance of the parser.
101: *
102: * @param configuration Object representing the configuration of the parser.
103: * @param input The stream representing the instance document to be parsed.
104: *
105: * @deprecated use {@link #Parser(Configuration)} and {@link #parse(InputStream)}.
106: */
107: public Parser(Configuration configuration, InputStream input) {
108: this (configuration);
109: this .input = input;
110: }
111:
112: /**
113: * Signals the parser to parse the entire instance document. The object
114: * returned from the parse is the object which has been bound to the root
115: * element of the document. This method should only be called once for
116: * a single instance document.
117: *
118: * @return The object representation of the root element of the document.
119: *
120: * @throws IOException
121: * @throws SAXException
122: * @throws ParserConfigurationException
123: *
124: * @deprecated use {@link #parse(InputStream)}
125: */
126: public Object parse() throws IOException, SAXException,
127: ParserConfigurationException {
128: return parse(input);
129: }
130:
131: /**
132: * Parses an instance documented defined by an input stream.
133: * <p>
134: * The object returned from the parse is the object which has been bound to the root
135: * element of the document. This method should only be called once for a single instance document.
136: * </p>
137: *
138: * @return The object representation of the root element of the document.
139: *
140: * @throws IOException
141: * @throws SAXException
142: * @throws ParserConfigurationException
143: */
144: public Object parse(InputStream input) throws IOException,
145: SAXException, ParserConfigurationException {
146: return parse(new InputSource(input));
147: }
148:
149: /**
150: * Parses an instance documented defined by a reader.
151: * <p>
152: * The object returned from the parse is the object which has been bound to the root
153: * element of the document. This method should only be called once for a single instance document.
154: * </p>
155: *
156: * @return The object representation of the root element of the document.
157: *
158: * @throws IOException
159: * @throws SAXException
160: * @throws ParserConfigurationException
161: */
162: public Object parse(Reader reader) throws IOException,
163: SAXException, ParserConfigurationException {
164: return parse(new InputSource(reader));
165: }
166:
167: /**
168: * Parses an instance documented defined by a sax input source.
169: * <p>
170: * The object returned from the parse is the object which has been bound to the root
171: * element of the document. This method should only be called once for a single instance document.
172: * </p>
173: *
174: * @return The object representation of the root element of the document.
175: *
176: * @throws IOException
177: * @throws SAXException
178: * @throws ParserConfigurationException
179: */
180: public Object parse(InputSource source) throws IOException,
181: SAXException, ParserConfigurationException {
182: parser = parser();
183: parser.setContentHandler(handler);
184: parser.setErrorHandler(handler);
185:
186: parser.parse(source);
187:
188: return handler.getValue();
189: }
190:
191: /**
192: * Sets the strict parsing flag.
193: * <p>
194: * When set to <code>true</code>, this will cause the parser to operate in
195: * a strict mode, which means that xml being parsed must be exactly correct
196: * with respect to the schema it references.
197: * </p>
198: * <p>
199: * Some examples of cases in which the parser will throw an exception while
200: * operating in strict mode:
201: * <ul>
202: * <li>no 'schemaLocation' specified, or specified incorrectly
203: * <li>element found which is not declared in the schema
204: * </ul>
205: * </p>
206: * @param strict The strict flag.
207: */
208: public void setStrict(boolean strict) {
209: handler.setStrict(strict);
210: }
211:
212: /**
213: * Sets the flag controlling wether the parser should validate or not.
214: *
215: * @param validating Validation flag, <code>true</code> to validate, otherwise <code>false</code>
216: */
217: public void setValidating(boolean validating) {
218: handler.setValidating(validating);
219: }
220:
221: /**
222: * Returns a list of any validation errors that occured while parsing.
223: *
224: * @return A list of errors, or an empty list if none.
225: */
226: public List getValidationErrors() {
227: return handler.getValidationErrors();
228: }
229:
230: /**
231: * Returns the schema objects referenced by the instance document being
232: * parsed. This method can only be called after a successful parse has
233: * begun.
234: *
235: * @return The schema objects used to parse the document, or null if parsing
236: * has not commenced.
237: */
238: public XSDSchema[] getSchemas() {
239: if (handler != null) {
240: return handler.getSchemas();
241: }
242:
243: return null;
244: }
245:
246: /**
247: * Returns the namespace mappings maintained by the parser.
248: * <p>
249: * Clients may register additional namespace mappings. This is useful when
250: * an application whishes to provide some "default" namespace mappings.
251: * </p>
252: * <p>
253: * Clients should register namespace mappings in the current "context", ie
254: * do not call {@link NamespaceSupport#pushContext()}. Example:
255: * <code>
256: * Parser parser = new Parser( ... );
257: * parser.getNamespaces().declarePrefix( "foo", "http://www.foo.com" );
258: * ...
259: * </code>
260: * </p>
261: *
262: * @return The namespace support containing prefix to uri mappings.
263: * @since 2.4
264: */
265: public NamespaceSupport getNamespaces() {
266: return handler.getNamespaceSupport();
267: }
268:
269: protected SAXParser parser() throws ParserConfigurationException,
270: SAXException {
271: //JD: we use xerces directly here because jaxp does seem to allow use to
272: // override all the namespaces to validate against
273: SAXParser parser = new SAXParser();
274:
275: //set the appropriate features
276: parser.setFeature("http://xml.org/sax/features/namespaces",
277: true);
278: if (handler.isValidating()) {
279: parser.setFeature("http://xml.org/sax/features/validation",
280: true);
281: parser.setFeature(
282: "http://apache.org/xml/features/validation/schema",
283: true);
284: parser
285: .setFeature(
286: "http://apache.org/xml/features/validation/schema-full-checking",
287: true);
288: }
289:
290: //set the schema sources of this configuration, and all dependent ones
291: StringBuffer schemaLocation = new StringBuffer();
292: for (Iterator d = handler.getConfiguration().allDependencies()
293: .iterator(); d.hasNext();) {
294: Configuration dependency = (Configuration) d.next();
295:
296: //ignore xs namespace
297: if (XS.NAMESPACE.equals(dependency.getNamespaceURI()))
298: continue;
299:
300: //seperate entries by space
301: if (schemaLocation.length() > 0) {
302: schemaLocation.append(" ");
303: }
304:
305: //add the entry
306: schemaLocation.append(dependency.getNamespaceURI());
307: schemaLocation.append(" ");
308: schemaLocation.append(dependency.getSchemaFileURL());
309: }
310:
311: //set hte property to map namespaces to schema locations
312: parser
313: .setProperty(
314: "http://apache.org/xml/properties/schema/external-schemaLocation",
315: schemaLocation.toString());
316:
317: //set the default location
318: parser
319: .setProperty(
320: "http://apache.org/xml/properties/schema/external-noNamespaceSchemaLocation",
321: handler.getConfiguration().getSchemaFileURL());
322:
323: return parser;
324: }
325:
326: /**
327: * Properties used to control the parser behaviour.
328: * <p>
329: * Parser properties are set in the configuration of a parser.
330: * <pre>
331: * Configuration configuration = new ....
332: * configuration.getProperties().add( Parser.Properties.PARSE_UNKNOWN_ELEMENTS );
333: * configuration.getProperties().add( Parser.Properties.PARSE_UNKNOWN_ATTRIBUTES );
334: * </pre>
335: * </p>
336: * @author Justin Deoliveira, The Open Planning Project
337: * @deprecated
338: */
339: public static interface Properties {
340:
341: /**
342: * If set, the parser will continue to parse when it finds an element
343: * and cannot determine its type.
344: *
345: * @deprecated use {@link Parser#setStrict(boolean)}
346: */
347: QName PARSE_UNKNOWN_ELEMENTS = new QName(
348: "http://www.geotools.org", "parseUnknownElements");
349:
350: /**
351: * If set, the parser will continue to parse when it finds an attribute
352: * and cannot determine its type.
353: *
354: * @deprecated use {@link Parser#setStrict(boolean)}
355: */
356: QName PARSE_UNKNOWN_ATTRIBUTES = new QName(
357: "http://www.geotools.org", "parseUnknownAttributes");
358:
359: /**
360: * If set, the parser will ignore the schemaLocation attribute of an
361: * instance document.
362: *
363: * @deprecated use {@link Parser#setStrict(boolean)}
364: */
365: QName IGNORE_SCHEMA_LOCATION = new QName(
366: "http://www.geotools.org", "ignoreSchemaLocation");
367: }
368: }
|