001: /*
002: * Copyright Aduna (http://www.aduna-software.com/) (c) 1997-2007.
003: *
004: * Licensed under the Aduna BSD-style license.
005: */
006: package org.openrdf.rio.helpers;
007:
008: import java.util.HashMap;
009: import java.util.Map;
010:
011: import info.aduna.net.ParsedURI;
012:
013: import org.openrdf.model.BNode;
014: import org.openrdf.model.Literal;
015: import org.openrdf.model.Resource;
016: import org.openrdf.model.Statement;
017: import org.openrdf.model.URI;
018: import org.openrdf.model.Value;
019: import org.openrdf.model.ValueFactory;
020: import org.openrdf.model.datatypes.XMLDatatypeUtil;
021: import org.openrdf.model.impl.ValueFactoryImpl;
022:
023: import org.openrdf.rio.ParseErrorListener;
024: import org.openrdf.rio.ParseLocationListener;
025: import org.openrdf.rio.RDFHandler;
026: import org.openrdf.rio.RDFParseException;
027: import org.openrdf.rio.RDFParser;
028:
029: /**
030: * Base class for {@link RDFParser}s offering common functionality for RDF
031: * parsers.
032: *
033: * @author Arjohn Kampman
034: */
035: public abstract class RDFParserBase implements RDFParser {
036:
037: /*-----------*
038: * Variables *
039: *-----------*/
040:
041: /**
042: * The RDFHandler that will handle the parsed RDF.
043: */
044: protected RDFHandler rdfHandler;
045:
046: /**
047: * An optional ParseErrorListener to report parse errors to.
048: */
049: private ParseErrorListener errListener;
050:
051: /**
052: * An optional ParseLocationListener to report parse progress in the form of
053: * line- and column numbers to.
054: */
055: private ParseLocationListener locationListener;
056:
057: /**
058: * The ValueFactory to use for creating RDF model objects.
059: */
060: private ValueFactory valueFactory;
061:
062: /**
063: * Flag indicating whether the parser should verify the data it parses.
064: */
065: private boolean verifyData;
066:
067: /**
068: * Flag indicating whether the parser should preserve bnode identifiers from
069: * the parsed data in the created BNode objects.
070: */
071: private boolean preserveBNodeIDs;
072:
073: /**
074: * Flag indicating whether the parser should immediately stop parsing when it
075: * finds an error in the data.
076: */
077: private boolean stopAtFirstError;
078:
079: /**
080: * Indicates how datatyped literals should be handled.
081: */
082: private DatatypeHandling datatypeHandling;
083:
084: /**
085: * The base URI for resolving relative URIs.
086: */
087: private ParsedURI baseURI;
088:
089: /**
090: * Mapping from blank node identifiers as used in the RDF document to the
091: * object created for it by the ValueFactory. This mapping is used to return
092: * identical BNode objects for recurring blank node identifiers.
093: */
094: private Map<String, BNode> bNodeIDMap;
095:
096: /**
097: * Mapping from namespace prefixes to namespace names.
098: */
099: private Map<String, String> namespaceTable;
100:
101: /*--------------*
102: * Constructors *
103: *--------------*/
104:
105: /**
106: * Creates a new RDFParserBase that will use a {@link ValueFactoryImpl} to
107: * create RDF model objects.
108: */
109: public RDFParserBase() {
110: this (new ValueFactoryImpl());
111: }
112:
113: /**
114: * Creates a new TurtleParser that will use the supplied ValueFactory to
115: * create RDF model objects.
116: *
117: * @param valueFactory
118: * A ValueFactory.
119: */
120: public RDFParserBase(ValueFactory valueFactory) {
121: bNodeIDMap = new HashMap<String, BNode>(16);
122: namespaceTable = new HashMap<String, String>(16);
123:
124: setValueFactory(valueFactory);
125: setVerifyData(true);
126: setPreserveBNodeIDs(false);
127: setStopAtFirstError(true);
128: setDatatypeHandling(DatatypeHandling.VERIFY);
129: }
130:
131: /*---------*
132: * Methods *
133: *---------*/
134:
135: public void setValueFactory(ValueFactory valueFactory) {
136: this .valueFactory = valueFactory;
137: }
138:
139: public void setRDFHandler(RDFHandler handler) {
140: rdfHandler = handler;
141: }
142:
143: public RDFHandler getRDFHandler() {
144: return rdfHandler;
145: }
146:
147: public void setParseErrorListener(ParseErrorListener el) {
148: errListener = el;
149: }
150:
151: public ParseErrorListener getParseErrorListener() {
152: return errListener;
153: }
154:
155: public void setParseLocationListener(ParseLocationListener el) {
156: locationListener = el;
157: }
158:
159: public ParseLocationListener getParseLocationListener() {
160: return locationListener;
161: }
162:
163: public void setVerifyData(boolean verifyData) {
164: this .verifyData = verifyData;
165: }
166:
167: public boolean verifyData() {
168: return verifyData;
169: }
170:
171: public void setPreserveBNodeIDs(boolean preserveBNodeIDs) {
172: this .preserveBNodeIDs = preserveBNodeIDs;
173: }
174:
175: public boolean preserveBNodeIDs() {
176: return preserveBNodeIDs;
177: }
178:
179: public void setStopAtFirstError(boolean stopAtFirstError) {
180: this .stopAtFirstError = stopAtFirstError;
181: }
182:
183: public boolean stopAtFirstError() {
184: return stopAtFirstError;
185: }
186:
187: public void setDatatypeHandling(DatatypeHandling datatypeHandling) {
188: this .datatypeHandling = datatypeHandling;
189: }
190:
191: public DatatypeHandling datatypeHandling() {
192: return datatypeHandling;
193: }
194:
195: /**
196: * Parses and normalizes the supplied URI-string and sets it as the base URI
197: * for resolving relative URIs.
198: */
199: protected void setBaseURI(String uriSpec) {
200: // Store normalized base URI
201: ParsedURI baseURI = new ParsedURI(uriSpec);
202: baseURI.normalize();
203: setBaseURI(baseURI);
204: }
205:
206: /**
207: * Sets the base URI for resolving relative URIs.
208: */
209: protected void setBaseURI(ParsedURI baseURI) {
210: this .baseURI = baseURI;
211: }
212:
213: /**
214: * Associates the specified prefix to the specified namespace.
215: */
216: protected void setNamespace(String prefix, String namespace) {
217: namespaceTable.put(prefix, namespace);
218: }
219:
220: /**
221: * Gets the namespace that is associated with the specified prefix, if any.
222: */
223: protected String getNamespace(String prefix) {
224: return namespaceTable.get(prefix);
225: }
226:
227: /**
228: * Clears any information that has been collected while parsing. This method
229: * must be called by subclasses when finishing the parse process.
230: */
231: protected void clear() {
232: baseURI = null;
233: clearBNodeIDMap();
234: namespaceTable.clear();
235: }
236:
237: /**
238: * Clears the map that keeps track of blank nodes that have been parsed.
239: * Normally, this map is clear when the document has been parsed completely,
240: * but subclasses can clear the map at other moments too, for example when a
241: * bnode scope ends.
242: */
243: protected void clearBNodeIDMap() {
244: bNodeIDMap.clear();
245: }
246:
247: /**
248: * Resolves a URI-string against the base URI and creates a {@link URI}
249: * object for it.
250: */
251: protected URI resolveURI(String uriSpec) throws RDFParseException {
252: if (baseURI == null) {
253: reportFatalError("Unable to resolve URIs, no base URI has been set");
254: }
255:
256: // Resolve relative URIs against base URI
257: ParsedURI uri = new ParsedURI(uriSpec);
258:
259: if (verifyData) {
260: if (uri.isRelative() && !uri.isSelfReference()
261: && baseURI.isOpaque()) {
262: reportError("Relative URI '"
263: + uriSpec
264: + "' cannot be resolved using the opaque base URI '"
265: + baseURI + "'");
266: }
267: }
268:
269: uri = baseURI.resolve(uri);
270:
271: return createURI(uri.toString());
272: }
273:
274: /**
275: * Creates a {@link URI} object for the specified URI-string.
276: */
277: protected URI createURI(String uri) throws RDFParseException {
278: try {
279: return valueFactory.createURI(uri);
280: } catch (Exception e) {
281: reportFatalError(e);
282: return null; // required by compiler
283: }
284: }
285:
286: /**
287: * Creates a new {@link BNode} object.
288: */
289: protected BNode createBNode() throws RDFParseException {
290: try {
291: return valueFactory.createBNode();
292: } catch (Exception e) {
293: reportFatalError(e);
294: return null; // required by compiler
295: }
296: }
297:
298: /**
299: * Creates a {@link BNode} object for the specified identifier.
300: */
301: protected BNode createBNode(String nodeID) throws RDFParseException {
302: // Maybe the node ID has been used before:
303: BNode result = bNodeIDMap.get(nodeID);
304:
305: if (result == null) {
306: // This is a new node ID, create a new BNode object for it
307: try {
308: if (preserveBNodeIDs) {
309: result = valueFactory.createBNode(nodeID);
310: } else {
311: result = valueFactory.createBNode();
312: }
313: } catch (Exception e) {
314: reportFatalError(e);
315: }
316:
317: // Remember it, the nodeID might occur again.
318: bNodeIDMap.put(nodeID, result);
319: }
320:
321: return result;
322: }
323:
324: /**
325: * Creates a {@link Literal} object with the supplied parameters.
326: */
327: protected Literal createLiteral(String label, String lang,
328: URI datatype) throws RDFParseException {
329: if (datatype != null) {
330: if (datatypeHandling == DatatypeHandling.VERIFY) {
331: if (!XMLDatatypeUtil.isValidValue(label, datatype)) {
332: reportError("'" + label
333: + "' is not a valid value for datatype "
334: + datatype);
335: }
336: } else if (datatypeHandling == DatatypeHandling.NORMALIZE) {
337: try {
338: label = XMLDatatypeUtil.normalize(label, datatype);
339: } catch (IllegalArgumentException e) {
340: reportError("'" + label
341: + "' is not a valid value for datatype "
342: + datatype + ": " + e.getMessage());
343: }
344: }
345: }
346:
347: try {
348: if (datatype != null) {
349: return valueFactory.createLiteral(label, datatype);
350: } else if (lang != null) {
351: return valueFactory.createLiteral(label, lang);
352: } else {
353: return valueFactory.createLiteral(label);
354: }
355: } catch (Exception e) {
356: reportFatalError(e);
357: return null; // required by compiler
358: }
359: }
360:
361: /**
362: * Creates a new {@link Statement} object with the supplied components.
363: */
364: protected Statement createStatement(Resource subj, URI pred,
365: Value obj) throws RDFParseException {
366: try {
367: return valueFactory.createStatement(subj, pred, obj);
368: } catch (Exception e) {
369: reportFatalError(e);
370: return null; // required by compiler
371: }
372: }
373:
374: /**
375: * Creates a new {@link Statement} object with the supplied components.
376: */
377: protected Statement createStatement(Resource subj, URI pred,
378: Value obj, Resource context) throws RDFParseException {
379: try {
380: return valueFactory.createStatement(subj, pred, obj,
381: context);
382: } catch (Exception e) {
383: reportFatalError(e);
384: return null; // required by compiler
385: }
386: }
387:
388: /**
389: * Reports the specified line- and column number to the registered
390: * {@link ParseLocationListener}, if any.
391: */
392: protected void reportLocation(int lineNo, int columnNo) {
393: if (locationListener != null) {
394: locationListener.parseLocationUpdate(lineNo, columnNo);
395: }
396: }
397:
398: /**
399: * Reports a warning to the registered ParseErrorListener, if any. This
400: * method simply calls {@link #reportWarning(String,int,int)} supplying
401: * <tt>-1</tt> for the line- and column number.
402: */
403: protected void reportWarning(String msg) {
404: reportWarning(msg, -1, -1);
405: }
406:
407: /**
408: * Reports a warning with associated line- and column number to the
409: * registered ParseErrorListener, if any.
410: */
411: protected void reportWarning(String msg, int lineNo, int columnNo) {
412: if (errListener != null) {
413: errListener.warning(msg, lineNo, columnNo);
414: }
415: }
416:
417: /**
418: * Reports an error to the registered ParseErrorListener, if any. This method
419: * simply calls {@link #reportError(String,int,int)} supplying <tt>-1</tt>
420: * for the line- and column number. This method throws a
421: * <tt>ParseException</tt> when 'stop-at-first-error' has been set to
422: * <tt>true</tt>.
423: *
424: * @see #setStopAtFirstError
425: */
426: protected void reportError(String msg) throws RDFParseException {
427: reportError(msg, -1, -1);
428: }
429:
430: /**
431: * Reports an error with associated line- and column number to the registered
432: * ParseErrorListener, if any. This method throws a <tt>ParseException</tt>
433: * when 'stop-at-first-error' has been set to <tt>true</tt>.
434: *
435: * @see #setStopAtFirstError
436: */
437: protected void reportError(String msg, int lineNo, int columnNo)
438: throws RDFParseException {
439: if (errListener != null) {
440: errListener.error(msg, lineNo, columnNo);
441: }
442:
443: if (stopAtFirstError) {
444: throw new RDFParseException(msg, lineNo, columnNo);
445: }
446: }
447:
448: /**
449: * Reports a fatal error to the registered ParseErrorListener, if any, and
450: * throws a <tt>ParseException</tt> afterwards. This method simply calls
451: * {@link #reportFatalError(String,int,int)} supplying <tt>-1</tt> for the
452: * line- and column number.
453: */
454: protected void reportFatalError(String msg)
455: throws RDFParseException {
456: reportFatalError(msg, -1, -1);
457: }
458:
459: /**
460: * Reports a fatal error with associated line- and column number to the
461: * registered ParseErrorListener, if any, and throws a
462: * <tt>ParseException</tt> afterwards.
463: */
464: protected void reportFatalError(String msg, int lineNo, int columnNo)
465: throws RDFParseException {
466: if (errListener != null) {
467: errListener.fatalError(msg, lineNo, columnNo);
468: }
469:
470: throw new RDFParseException(msg, lineNo, columnNo);
471: }
472:
473: /**
474: * Reports a fatal error to the registered ParseErrorListener, if any, and
475: * throws a <tt>ParseException</tt> afterwards. An exception is made for
476: * the case where the supplied exception is a {@link RDFParseException}; in
477: * that case the supplied exception is not wrapped in another ParseException
478: * and the error message is not reported to the ParseErrorListener, assuming
479: * that it has already been reported when the original ParseException was
480: * thrown.
481: * <p>
482: * This method simply calls {@link #reportFatalError(Exception,int,int)}
483: * supplying <tt>-1</tt> for the line- and column number.
484: */
485: protected void reportFatalError(Exception e)
486: throws RDFParseException {
487: reportFatalError(e, -1, -1);
488: }
489:
490: /**
491: * Reports a fatal error with associated line- and column number to the
492: * registered ParseErrorListener, if any, and throws a
493: * <tt>ParseException</tt> wrapped the supplied exception afterwards. An
494: * exception is made for the case where the supplied exception is a
495: * {@link RDFParseException}; in that case the supplied exception is not
496: * wrapped in another ParseException and the error message is not reported to
497: * the ParseErrorListener, assuming that it has already been reported when
498: * the original ParseException was thrown.
499: */
500: protected void reportFatalError(Exception e, int lineNo,
501: int columnNo) throws RDFParseException {
502: if (e instanceof RDFParseException) {
503: throw (RDFParseException) e;
504: } else {
505: if (errListener != null) {
506: errListener
507: .fatalError(e.getMessage(), lineNo, columnNo);
508: }
509:
510: throw new RDFParseException(e, lineNo, columnNo);
511: }
512: }
513:
514: }
|