001: /*
002: * (c) Copyright 2004, 2005, 2006, 2007, 2008 Hewlett-Packard Development Company, LP
003: * [See end of file]
004: */
005:
006: package com.hp.hpl.jena.rdf.arp.impl;
007:
008: import java.io.IOException;
009: import java.io.InputStreamReader;
010: import java.io.Reader;
011: import java.io.UTFDataFormatException;
012:
013: import org.apache.xerces.parsers.SAXParser;
014: import org.apache.xerces.parsers.StandardParserConfiguration;
015: import org.apache.xerces.xni.Augmentations;
016: import org.xml.sax.InputSource;
017: import org.xml.sax.SAXException;
018: import org.xml.sax.SAXParseException;
019:
020: import com.hp.hpl.jena.rdf.arp.FatalParsingErrorException;
021: import com.hp.hpl.jena.rdf.arp.SAX2RDF;
022: import com.hp.hpl.jena.util.CharEncoding;
023:
024: /**
025: *
026: * The main parser, other variants of XMLHandler are for more specialized purposes.
027: *
028: * @author Jeremy J. Carroll
029: *
030: */
031: public class RDFXMLParser extends XMLHandler {
032:
033: private SAXParser saxParser;
034:
035: private String readerXMLEncoding = null;
036:
037: private String xmlEncoding = null;
038:
039: private RDFXMLParser(SAXParser rdr) {
040: super ();
041: saxParser = rdr;
042: try {
043: SAX2RDF.installHandlers(rdr, this );
044: } catch (SAXException e) {
045: throw new RuntimeException("Supposedly impossible:", e);
046: }
047: }
048:
049: public SAXParser getSAXParser() {
050: return saxParser;
051: }
052:
053: static private class MySAXParser extends SAXParser {
054: MySAXParser(StandardParserConfiguration c) {
055: super (c);
056: // try {
057: // setFeature("http://xml.org/sax/features/string-interning",
058: // false);
059: // } catch (SAXException e) {
060: // // Not supported - aggh
061: // // TO DO ask on xerces list why not?
062: // // e.printStackTrace();
063: // }
064: }
065:
066: RDFXMLParser a;
067:
068: public void xmlDecl(String version, String encoding,
069: String standalone, Augmentations augs) {
070: try {
071: a.setEncoding(encoding == null ? "UTF" : encoding);
072: } catch (SAXParseException e) {
073: throw new WrappedException(e);
074: }
075: super .xmlDecl(version, encoding, standalone, augs);
076:
077: }
078: }
079:
080: public static RDFXMLParser create() {
081: StandardParserConfiguration c = new StandardParserConfiguration();
082: MySAXParser msp = new MySAXParser(c);
083: RDFXMLParser a = new RDFXMLParser(msp);
084: msp.a = a;
085: return a;
086: }
087:
088: public void parse(InputSource input) throws IOException,
089: SAXException {
090: parse(input, input.getSystemId());
091: }
092:
093: synchronized public void parse(InputSource input, String base)
094: throws IOException, SAXException {
095: // Make sure we have a sane state for
096: // Namespace processing.
097:
098: initParse(base, "");
099: SAX2RDF.installHandlers(saxParser, this );
100: saxParser.reset();
101:
102: initEncodingChecks(input);
103: try {
104:
105: saxParser.parse(input);
106:
107: } catch (UTFDataFormatException e) {
108: generalError(ERR_UTF_ENCODING, e);
109: } catch (IOException e) {
110: generalError(ERR_GENERIC_IO, e);
111: } catch (WrappedException wrapped) {
112: wrapped.throwMe();
113: } catch (FatalParsingErrorException e) {
114: // ignore this.
115: } finally {
116: afterParse();
117: }
118:
119: }
120:
121: private void initEncodingChecks(InputSource in) {
122: Reader rdr = in.getCharacterStream();
123: readerXMLEncoding = null;
124: encodingProblems = false;
125: if (rdr != null && rdr instanceof InputStreamReader) {
126: String javaEnc = ((InputStreamReader) rdr).getEncoding();
127: readerXMLEncoding = CharEncoding.create(javaEnc).name();
128: }
129: }
130:
131: void setEncoding(String original) throws SAXParseException {
132:
133: CharEncoding encodingInfo = CharEncoding.create(original);
134: String e = encodingInfo.name();
135: if (xmlEncoding == null) {
136: // special case UTF-8 or UTF-16?
137: if (e.equals("UTF") && readerXMLEncoding != null
138: && readerXMLEncoding.startsWith("UTF")) {
139: xmlEncoding = readerXMLEncoding;
140: return;
141: }
142: xmlEncoding = e;
143: if (readerXMLEncoding != null
144: && !readerXMLEncoding.equalsIgnoreCase(e)) {
145: warning(
146: null,
147: WARN_ENCODING_MISMATCH,
148: "Encoding on InputStreamReader or FileReader does not match that of XML document. Use FileInputStream. ["
149: + readerXMLEncoding + " != " + e + "]");
150: encodingProblems = true;
151: }
152:
153: if (e.equals("UTF"))
154: return;
155:
156: if (!encodingInfo.isIANA()) {
157: warning(null,
158: encodingInfo.isInNIO() ? WARN_NON_IANA_ENCODING
159: : WARN_UNSUPPORTED_ENCODING,
160: encodingInfo.warningMessage());
161: } else if (!original.equalsIgnoreCase(e)) {
162: warning(
163: null,
164: WARN_NONCANONICAL_IANA_NAME,
165: "The encoding \""
166: + original
167: + "\" is not the canonical name at IANA, suggest \""
168: + e
169: + "\" would give more interoperability.");
170:
171: }
172: }
173: }
174:
175: }
176:
177: /*
178: * (c) Copyright 2004, 2005, 2006, 2007, 2008 Hewlett-Packard Development Company, LP All rights
179: * reserved.
180: *
181: * Redistribution and use in source and binary forms, with or without
182: * modification, are permitted provided that the following conditions are met:
183: * 1. Redistributions of source code must retain the above copyright notice,
184: * this list of conditions and the following disclaimer. 2. Redistributions in
185: * binary form must reproduce the above copyright notice, this list of
186: * conditions and the following disclaimer in the documentation and/or other
187: * materials provided with the distribution. 3. The name of the author may not
188: * be used to endorse or promote products derived from this software without
189: * specific prior written permission.
190: *
191: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
192: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
193: * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
194: * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
195: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
196: * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
197: * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
198: * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
199: * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
200: * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
201: */
|