001: /*
002: (c) Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 Hewlett-Packard Development Company, LP
003: [See end of file]
004: */
005:
006: package com.hp.hpl.jena.rdf.arp;
007:
008: import java.io.FileNotFoundException;
009: import java.io.IOException;
010: import java.io.InputStream;
011: import java.io.InputStreamReader;
012: import java.io.Reader;
013: import java.net.MalformedURLException;
014: import java.net.URL;
015: import java.net.URLConnection;
016:
017: import org.xml.sax.InputSource;
018: import org.xml.sax.SAXException;
019: import org.xml.sax.SAXNotRecognizedException;
020: import org.xml.sax.SAXNotSupportedException;
021:
022: import com.hp.hpl.jena.datatypes.RDFDatatype;
023: import com.hp.hpl.jena.datatypes.TypeMapper;
024: import com.hp.hpl.jena.graph.Graph;
025: import com.hp.hpl.jena.graph.GraphEvents;
026: import com.hp.hpl.jena.graph.Node;
027: import com.hp.hpl.jena.graph.Triple;
028: import com.hp.hpl.jena.rdf.arp.impl.RDFXMLParser;
029: import com.hp.hpl.jena.rdf.model.Literal;
030: import com.hp.hpl.jena.rdf.model.Model;
031: import com.hp.hpl.jena.rdf.model.Property;
032: import com.hp.hpl.jena.rdf.model.RDFErrorHandler;
033: import com.hp.hpl.jena.rdf.model.RDFReader;
034: import com.hp.hpl.jena.rdf.model.impl.LiteralImpl;
035: import com.hp.hpl.jena.rdf.model.impl.PropertyImpl;
036: import com.hp.hpl.jena.rdf.model.impl.RDFDefaultErrorHandler;
037: import com.hp.hpl.jena.shared.DoesNotExistException;
038: import com.hp.hpl.jena.shared.JenaException;
039: import com.hp.hpl.jena.shared.UnknownPropertyException;
040: import com.hp.hpl.jena.shared.WrappedIOException;
041:
042: /**
043: * Interface between Jena and ARP.
044: *
045: * @author jjc
046: */
047: public class JenaReader implements RDFReader, ARPErrorNumbers {
048:
049: /**
050: * Sets the reader for the languages RDF/XML and RDF/XML-ABBREV to be
051: * JenaReader.
052: * @deprecated This is the default behaviour
053: * @param m
054: * The Model on which to set the reader properties.
055: */
056: static public void useMe(Model m) {
057: m.setReaderClassName("RDF/XML", JenaReader.class.getName());
058: m.setReaderClassName("RDF/XML-ABBREV", JenaReader.class
059: .getName());
060: }
061:
062: static private final String saxFeaturesURL = "http://xml.org/sax/features/";
063:
064: static private final String saxPropertiesURL = "http://xml.org/sax/properties/";
065:
066: static private final String apacheFeaturesURL = "http://apache.org/xml/features/";
067:
068: static private final String apachePropertiesURL = "http://apache.org/xml/properties/";
069:
070: static final String arpPropertiesURL = "http://jena.hpl.hp.com/arp/properties/";
071:
072: static final int arpPropertiesURLLength = arpPropertiesURL.length();
073:
074: /**
075: * Creates new JenaReader
076: */
077: public JenaReader() {
078: arpf = RDFXMLParser.create();
079: }
080:
081: final private RDFXMLParser arpf;
082:
083: private Model model;
084:
085: /**
086: * Reads from url, using url as base, adding triples to model.
087: * Uses content negotiation to ask for application/rdf+xml, if available.
088: *
089: * @param m
090: * A model to add triples to.
091: * @param url
092: * The URL of the RDF/XML document.
093: */
094: public void read(Model m, String url) throws JenaException {
095: try {
096: URLConnection conn = new URL(url).openConnection();
097: conn
098: .setRequestProperty(
099: "accept",
100: "application/rdf+xml, application/xml; q=0.8, text/xml; q=0.7, application/rss+xml; q=0.3, */*; q=0.2");
101: String encoding = conn.getContentEncoding();
102: if (encoding == null)
103: read(m, conn.getInputStream(), url);
104: else
105: read(m, new InputStreamReader(conn.getInputStream(),
106: encoding), url);
107: } catch (FileNotFoundException e) {
108: throw new DoesNotExistException(url);
109: } catch (IOException e) {
110: throw new JenaException(e);
111: }
112: }
113:
114: // static public void main(String [] a){
115: // String url =
116: // "http://www.bbc.co.uk/portuguese/index.xml";
117: //// "http://jena.sourceforge.net/test/mime/test1";
118: // try {
119: // URLConnection conn = new URL(url).openConnection();
120: // conn.setRequestProperty("accept", "application/rdf+xml, application/xml, text/xml, */*; q=0.5");
121: // System.err.println(conn.getContentType());
122: // } catch (MalformedURLException e) {
123: // // TODO Auto-generated catch block
124: // e.printStackTrace();
125: // } catch (IOException e) {
126: // // TODO Auto-generated catch block
127: // e.printStackTrace();
128: // }
129: //
130: // }
131: /**
132: * Converts an ARP literal into a Jena Literal.
133: *
134: * @param lit
135: * The ARP literal.
136: * @return The Jena Literal.
137: */
138: static private Literal translate(ALiteral lit) {
139: return new LiteralImpl(lit.toString(), lit.getLang(), lit
140: .isWellFormedXML(), null);
141: }
142:
143: private static Node convert(ALiteral lit) {
144: String dtURI = lit.getDatatypeURI();
145: if (dtURI == null)
146: return Node.createLiteral(lit.toString(), lit.getLang(),
147: false);
148:
149: if (lit.isWellFormedXML()) {
150: return Node.createLiteral(lit.toString(), null, true);
151: }
152:
153: RDFDatatype dt = TypeMapper.getInstance().getSafeTypeByName(
154: dtURI);
155: return Node.createLiteral(lit.toString(), null, dt);
156:
157: }
158:
159: private static Node convert(AResource r) {
160: if (!r.isAnonymous())
161: return Node.createURI(r.getURI());
162:
163: // String id = r.getAnonymousID();
164: Node rr = (Node) r.getUserData();
165: if (rr == null) {
166: rr = Node.createAnon();
167: r.setUserData(rr);
168: }
169: return rr;
170:
171: }
172:
173: static Triple convert(AResource s, AResource p, AResource o) {
174: return Triple.create(convert(s), convert(p), convert(o));
175: }
176:
177: static Triple convert(AResource s, AResource p, ALiteral o) {
178: return Triple.create(convert(s), convert(p), convert(o));
179: }
180:
181: /**
182: * Converts an ARP resource into a Jena property.
183: *
184: * @param r
185: * The ARP resource.
186: * @throws JenaException
187: * If r is anonymous, or similarly ill-formed.
188: * @return The Jena property.
189: */
190: static private Property translatePred(AResource r)
191: throws JenaException {
192: return new PropertyImpl(r.getURI());
193: }
194:
195: /**
196: * Reads from reader, using base URI xmlbase, adding triples to model. If
197: * xmlbase is "" then relative URIs may be added to model.
198: *
199: * @param m
200: * A model to add triples to.
201: * @param reader
202: * The RDF/XML document.
203: * @param xmlBase
204: * The base URI of the document or "".
205: */
206: private void read(Model m, InputSource inputS, String xmlBase)
207: throws JenaException {
208: model = m;
209: read(model.getGraph(), inputS, xmlBase, model);
210: }
211:
212: private JenaHandler handler;
213:
214: synchronized private void read(final Graph g, InputSource inputS,
215: String xmlBase, Model m) {
216:
217: try {
218: g.getEventManager().notifyEvent(g, GraphEvents.startRead);
219: inputS.setSystemId(xmlBase);
220: handler = new JenaHandler(g, m, errorHandler);
221: handler.useWith(arpf.getHandlers());
222: arpf.parse(inputS, xmlBase);
223: handler.bulkUpdate();
224: } catch (IOException e) {
225: throw new WrappedIOException(e);
226: } catch (SAXException e) {
227: throw new JenaException(e);
228: } finally {
229: g.getEventManager().notifyEvent(g, GraphEvents.finishRead);
230: handler = null;
231: }
232: }
233:
234: /**
235: * Reads from reader, using base URI xmlbase, adding triples to model. If
236: * xmlbase is "" then relative URIs may be added to model.
237: *
238: * @param m
239: * A model to add triples to.
240: * @param reader
241: * The RDF/XML document.
242: * @param xmlBase
243: * The base URI of the document or "".
244: */
245: public void read(final Model m, Reader reader, String xmlBase)
246: throws JenaException {
247: read(m, new InputSource(reader), xmlBase);
248: }
249:
250: /**
251: * Reads from reader, using base URI xmlbase, adding triples to graph. If
252: * xmlbase is "" then relative URIs may be added to graph.
253: *
254: * @param g
255: * A graph to add triples to.
256: * @param reader
257: * The RDF/XML document.
258: * @param xmlBase
259: * The base URI of the document or "".
260: */
261: public void read(Graph g, Reader reader, String xmlBase)
262: throws JenaException {
263: read(g, new InputSource(reader), xmlBase, null);
264: }
265:
266: /**
267: * Reads from inputStream, using base URI xmlbase, adding triples to model.
268: * If xmlbase is "" then relative URIs may be added to model.
269: *
270: * @param m
271: * A model to add triples to.
272: * @param in
273: * The RDF/XML document stream.
274: * @param xmlBase
275: * The base URI of the document or "".
276: */
277: public void read(final Model m, InputStream in, String xmlBase)
278: throws JenaException {
279: read(m, new InputSource(in), xmlBase);
280: }
281:
282: /**
283: * Reads from inputStream, using base URI xmlbase, adding triples to graph.
284: * If xmlbase is "" then relative URIs may be added to graph.
285: *
286: * @param g
287: * A graph to add triples to.
288: * @param in
289: * The RDF/XML document stream.
290: * @param xmlBase
291: * The base URI of the document or "".
292: */
293: public void read(Graph g, InputStream in, String xmlBase) {
294: read(g, new InputSource(in), xmlBase, null);
295: }
296:
297: private RDFErrorHandler errorHandler = new RDFDefaultErrorHandler();
298:
299: /**
300: * Change the error handler.
301: * <p>
302: * Note that errors of class {@link ParseException}can be promoted using
303: * the {@link ParseException#promote}method. See ARP documentation for
304: * {@link org.xml.sax.ErrorHandler}for the details of error promotion.
305: *
306: * @param errHandler
307: * The new error handler.
308: * @return The old error handler.
309: */
310: public RDFErrorHandler setErrorHandler(RDFErrorHandler errHandler) {
311: RDFErrorHandler old = this .errorHandler;
312: this .errorHandler = errHandler;
313: JenaHandler h = handler;
314: if (h != null) {
315: h.setErrorHandler(errHandler);
316: }
317: return old;
318: }
319:
320: /**
321: *
322: * Change a property of the RDF or XML parser.
323: * <p>
324: * I do not believe that many of the XML features or properties are in fact
325: * useful for ARP users. The ARP properties allow fine-grained control over
326: * error reporting.
327: * <p>
328: * This interface can be used to set and get:
329: * <dl>
330: * <dt>SAX2 features</dt>
331: * <dd>See <a href="http://xml.apache.org/xerces-j/features.html">Xerces
332: * features </a>. Value should be given as a String "true" or "false" or a
333: * Boolean.</dd>
334: * <dt>SAX2 properties</dt>
335: * <dd>See <a href="http://xml.apache.org/xerces-j/properties.html">Xerces
336: * properties </a>.</dd>
337: * <dt>Xerces features</dt>
338: * <dd>See <a href="http://xml.apache.org/xerces-j/features.html">Xerces
339: * features </a>. Value should be given as a String "true" or "false" or a
340: * Boolean.</dd>
341: * <dt>Xerces properties</dt>
342: * <dd>See <a href="http://xml.apache.org/xerces-j/properties.html">Xerces
343: * properties </a>.</dd>
344: * <dt>ARP properties</dt>
345: * <dd>These are referred to either by their property name, (see below) or
346: * by an absolute URL of the form
347: * <code>http://jena.hpl.hp.com/arp/properties/<PropertyName></code>.
348: * The value should be a String, an Integer or a Boolean depending on the
349: * property. <br>
350: * ARP property names and string values are case insensitive. <br>
351: * <TABLE BORDER="1" CELLPADDING="3" CELLSPACING="0">
352: * <TR BGCOLOR="#CCCCFF" CLASS="TableHeadingColor">
353: * <TD COLSPAN=4><FONT SIZE="+2"> <B>ARP Properties </B> </FONT></TD>
354: * </TR>
355: * <tr BGCOLOR="#EEEEFF" CLASS="TableSubHeadingColor">
356: * <th>Property Name</th>
357: * <th>Description</th>
358: * <th>Value class</th>
359: * <th>Legal Values</th>
360: * </tr>
361: * <tr BGCOLOR="white" CLASS="TableRowColor">
362: * <td><CODE>error-mode</CODE></td>
363: * <td>{@link ARPOptions#setDefaultErrorMode}<br>
364: * {@link ARPOptions#setLaxErrorMode}<br>
365: * {@link ARPOptions#setStrictErrorMode()}<br>
366: * {@link ARPOptions#setStrictErrorMode(int)}<br>
367: * </td>
368: * <td>String</td>
369: * <td><CODE>default</CODE><br>
370: * <CODE>lax</CODE><br>
371: * <CODE>strict</CODE><br>
372: * <CODE>strict-ignore</CODE><br>
373: * <CODE>strict-warning</CODE><br>
374: * <CODE>strict-error</CODE><br>
375: * <CODE>strict-fatal</CODE><br>
376: * </td>
377: * </tr>
378: * <tr BGCOLOR="white" CLASS="TableRowColor">
379: * <td><CODE>embedding</CODE></td>
380: * <td>{@link ARP#setEmbedding}</td>
381: * <td>String or Boolean</td>
382: * <td><CODE>true</CODE> or <CODE>false</CODE></td>
383: * </tr>
384: * <tr BGCOLOR="white" CLASS="TableRowColor">
385: * <td><code>ERR_<XXX></code><br>
386: * <code>WARN_<XXX></code><br>
387: * <code>IGN_<XXX></code></td>
388: * <td>{@link ARPErrorNumbers}<br>
389: * Any of the error condition numbers listed. <br>
390: * {@link ARPOptions#setErrorMode(int, int)}</td>
391: * <td>String or Integer</td>
392: * <td>{@link ARPErrorNumbers#EM_IGNORE EM_IGNORE}<br>
393: * {@link ARPErrorNumbers#EM_WARNING EM_WARNING}<br>
394: * {@link ARPErrorNumbers#EM_ERROR EM_ERROR}<br>
395: * {@link ARPErrorNumbers#EM_FATAL EM_FATAL}<br>
396: * </td>
397: * </tr>
398: * </table></dd>
399: * </dl>
400: *
401: * @param str
402: * The property to set.
403: * @param value
404: * The new value; values of class String will be converted into
405: * appropriate classes. Values of class Boolean or Integer will
406: * be used for appropriate properties.
407: * @throws JenaException
408: * For bad values.
409: * @return The old value, or null if none, or old value is inaccesible.
410: */
411: public Object setProperty(String str, Object value)
412: throws JenaException {
413: Object obj = value;
414: if (str.startsWith("http:")) {
415: if (str.startsWith(arpPropertiesURL)) {
416: return setArpProperty(str
417: .substring(arpPropertiesURLLength), obj);
418: }
419: if (str.startsWith(saxPropertiesURL)
420: || str.startsWith(apachePropertiesURL)) {
421: Object old;
422: try {
423: old = arpf.getSAXParser().getProperty(str);
424: } catch (SAXNotSupportedException ns) {
425: old = null;
426: } catch (SAXNotRecognizedException nr) {
427: errorHandler
428: .error(new UnknownPropertyException(str));
429: return null;
430: }
431: try {
432: arpf.getSAXParser().setProperty(str, obj);
433: } catch (SAXNotSupportedException ns) {
434: errorHandler.error(new JenaException(ns));
435: } catch (SAXNotRecognizedException nr) {
436: errorHandler
437: .error(new UnknownPropertyException(str));
438: return null;
439: }
440: return old;
441: }
442:
443: if (str.startsWith(saxFeaturesURL)
444: || str.startsWith(apacheFeaturesURL)) {
445: Boolean old;
446: try {
447: old = new Boolean(arpf.getSAXParser().getFeature(
448: str));
449: } catch (SAXNotSupportedException ns) {
450: old = null;
451: } catch (SAXNotRecognizedException nr) {
452: errorHandler
453: .error(new UnknownPropertyException(str));
454: return null;
455: }
456: try {
457: arpf.getSAXParser().setFeature(str,
458: ((Boolean) obj).booleanValue());
459: } catch (SAXNotSupportedException ns) {
460: errorHandler.error(new JenaException(ns));
461: } catch (SAXNotRecognizedException nr) {
462: errorHandler
463: .error(new UnknownPropertyException(str));
464: return null;
465: } catch (ClassCastException cc) {
466: errorHandler
467: .error(new JenaException(
468: new SAXNotSupportedException(
469: "Feature: '"
470: + str
471: + "' can only have a boolean value.")));
472: }
473: return old;
474: }
475: }
476: return setArpProperty(str, obj);
477: }
478:
479: private Object setArpProperty(String str, Object v) {
480: return setArpProperty(getOptions(), str, v, errorHandler);
481: }
482:
483: public ARPOptions getOptions() {
484: return arpf.getOptions();
485: }
486:
487: public void setOptionsWith(ARPOptions opts) {
488: arpf.setOptionsWith(opts);
489: }
490:
491: // /**
492: // * @deprecated Use {@link ParseException#errorCodeName(int)}
493: // */
494: // static public String errorCodeName(int errNo) {
495: // return ParseException.errorCodeName(errNo);
496: // }
497: //
498: //
499: //
500: //
501: // /**
502: // * @deprecated Use {@link ParseException#errorCode(String)}
503: // */
504: // static public int errorCode(String upper) {
505: // return ParseException.errorCode(upper);
506: // }
507: /**
508: * Supported proprties: error-mode (String) default, lax, strict,
509: * strict-ignore, strict-warning, strict-error, strict-fatal embedding
510: * (String/Boolean) true, false ERR_* (String/Integer) em_warning, em_fatal,
511: * em_ignore, em_error IGN_* ditto WARN_* ditto
512: */
513: static Object setArpProperty(ARPOptions options, String str,
514: Object v, RDFErrorHandler eh) {
515: // ARPOptions options = arpf.getOptions();
516: str = str.toUpperCase();
517: if (v == null)
518: v = "";
519: if (v instanceof String) {
520: v = ((String) v).toUpperCase();
521: }
522: if (str.equals("ERROR-MODE")) {
523: if (v instanceof String) {
524: String val = (String) v;
525: if (val.equals("LAX")) {
526: options.setLaxErrorMode();
527: return null;
528: }
529: if (val.equals("DEFAULT")) {
530: options.setDefaultErrorMode();
531: return null;
532: }
533: if (val.equals("STRICT")) {
534: options.setStrictErrorMode();
535: return null;
536: }
537: if (val.equals("STRICT-WARNING")) {
538: options.setStrictErrorMode(EM_WARNING);
539: return null;
540: }
541: if (val.equals("STRICT-FATAL")) {
542: options.setStrictErrorMode(EM_FATAL);
543: return null;
544: }
545: if (val.equals("STRICT-IGNORE")) {
546: options.setStrictErrorMode(EM_IGNORE);
547: return null;
548: }
549: if (val.equals("STRICT-ERROR")) {
550: options.setStrictErrorMode(EM_ERROR);
551: return null;
552: }
553: }
554: eh
555: .error(new IllegalArgumentException(
556: "Property \"ERROR-MODE\" takes the following values: "
557: + "\"default\", \"lax\", \"strict\", \"strict-ignore\", \"strict-warning\", \"strict-error\", \"strict-fatal\"."));
558: return null;
559: }
560: if (str.equals("EMBEDDING")) {
561: if (v instanceof String) {
562: v = Boolean.valueOf((String) v);
563: }
564: if ((v instanceof Boolean))
565: return new Boolean(options.setEmbedding(((Boolean) v)
566: .booleanValue()));
567:
568: // Illegal value.
569: eh
570: .error(new IllegalArgumentException(
571: "Property \"EMBEDDING\" requires a boolean value."));
572: boolean old = options.setEmbedding(false);
573: options.setEmbedding(old);
574: return new Boolean(old);
575:
576: }
577: if (str.startsWith("ERR_") || str.startsWith("IGN_")
578: || str.startsWith("WARN_")) {
579: int cond = ParseException.errorCode(str);
580: if (cond == -1) {
581: // error, see end of function.
582: } else {
583: if (v instanceof String) {
584: if (!((String) v).startsWith("EM_")) {
585: // error, see below.
586: } else {
587: int val = ParseException.errorCode((String) v);
588: if (val == -1) {
589: // error, see below.
590: } else {
591: int rslt = options.setErrorMode(cond, val);
592: return new Integer(rslt);
593: }
594: }
595: } else if (v instanceof Integer) {
596: int val = ((Integer) v).intValue();
597: switch (val) {
598: case EM_IGNORE:
599: case EM_WARNING:
600: case EM_ERROR:
601: case EM_FATAL:
602: int rslt = options.setErrorMode(cond, val);
603: return new Integer(rslt);
604: default:
605: // error, see below.
606: }
607: }
608: // Illegal value.
609: eh
610: .error(new IllegalArgumentException(
611: "Property \"" + str
612: + "\" cannot have value: "
613: + v.toString()));
614: int old = options.setErrorMode(cond, EM_ERROR);
615: options.setErrorMode(cond, old);
616: return new Integer(old);
617: }
618: }
619: eh.error(new UnknownPropertyException(str));
620: return null;
621: }
622:
623: }
624:
625: /*
626: * (c) Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 Hewlett-Packard Development
627: * Company, LP All rights reserved.
628: *
629: * Redistribution and use in source and binary forms, with or without
630: * modification, are permitted provided that the following conditions are met:
631: * 1. Redistributions of source code must retain the above copyright notice,
632: * this list of conditions and the following disclaimer. 2. Redistributions in
633: * binary form must reproduce the above copyright notice, this list of
634: * conditions and the following disclaimer in the documentation and/or other
635: * materials provided with the distribution. 3. The name of the author may not
636: * be used to endorse or promote products derived from this software without
637: * specific prior written permission.
638: *
639: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
640: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
641: * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
642: * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
643: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
644: * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
645: * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
646: * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
647: * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
648: * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * $Id: JenaReader.java,v 1.2
649: * 2005/07/31 08:21:43 jeremy_carroll Exp $
650: *
651: * AUTHOR: Jeremy J. Carroll
652: */
|