0001: /*
0002: * Copyright Aduna (http://www.aduna-software.com/) (c) 1997-2007.
0003: *
0004: * Licensed under the Aduna BSD-style license.
0005: */
0006: package org.openrdf.rio.rdfxml;
0007:
0008: import java.io.IOException;
0009: import java.io.InputStream;
0010: import java.io.Reader;
0011: import java.util.HashSet;
0012: import java.util.Iterator;
0013: import java.util.Set;
0014: import java.util.Stack;
0015:
0016: import org.xml.sax.InputSource;
0017: import org.xml.sax.Locator;
0018: import org.xml.sax.SAXException;
0019: import org.xml.sax.SAXParseException;
0020: import org.xml.sax.XMLReader;
0021:
0022: import info.aduna.net.ParsedURI;
0023: import info.aduna.xml.XMLReaderFactory;
0024: import info.aduna.xml.XMLUtil;
0025:
0026: import org.openrdf.model.BNode;
0027: import org.openrdf.model.Literal;
0028: import org.openrdf.model.Resource;
0029: import org.openrdf.model.Statement;
0030: import org.openrdf.model.URI;
0031: import org.openrdf.model.Value;
0032: import org.openrdf.model.ValueFactory;
0033: import org.openrdf.model.impl.ValueFactoryImpl;
0034: import org.openrdf.model.vocabulary.RDF;
0035: import org.openrdf.rio.RDFFormat;
0036: import org.openrdf.rio.RDFHandlerException;
0037: import org.openrdf.rio.RDFParseException;
0038: import org.openrdf.rio.helpers.RDFParserBase;
0039:
0040: /**
0041: * A parser for XML-serialized RDF. This parser operates directly on the SAX
0042: * events generated by a SAX-enabled XML parser. The XML parser should be
0043: * compliant with SAX2. You should specify which SAX parser should be used by
0044: * setting the <code>org.xml.sax.driver</code> property. This parser is not
0045: * thread-safe, therefore it's public methods are synchronized.
0046: * <p>
0047: * To parse a document using this parser:
0048: * <ul>
0049: * <li>Create an instance of RDFXMLParser, optionally supplying it with your
0050: * own ValueFactory.
0051: * <li>Set the RDFHandler.
0052: * <li>Optionally, set the ParseErrorListener and/or ParseLocationListener.
0053: * <li>Optionally, specify whether the parser should verify the data it parses
0054: * and whether it should stop immediately when it finds an error in the data
0055: * (both default to <tt>true</tt>).
0056: * <li>Call the parse method.
0057: * </ul>
0058: * Example code:
0059: *
0060: * <pre>
0061: * // Use the SAX2-compliant Xerces parser:
0062: * System.setProperty("org.xml.sax.driver", "org.apache.xerces.parsers.SAXParser");
0063: *
0064: * RDFParser parser = new RDFXMLParser();
0065: * parser.setRDFHandler(myRDFHandler);
0066: * parser.setParseErrorListener(myParseErrorListener);
0067: * parser.setVerifyData(true);
0068: * parser.stopAtFirstError(false);
0069: *
0070: * // Parse the data from inputStream, resolving any
0071: * // relative URIs against http://foo/bar:
0072: * parser.parse(inputStream, "http://foo/bar");
0073: * </pre>
0074: *
0075: * @see org.openrdf.model.ValueFactory
0076: * @see org.openrdf.rio.RDFHandler
0077: * @see org.openrdf.rio.ParseErrorListener
0078: * @see org.openrdf.rio.ParseLocationListener
0079: * @author Arjohn Kampman
0080: */
0081: public class RDFXMLParser extends RDFParserBase {
0082:
0083: /*-----------*
0084: * Variables *
0085: *-----------*/
0086:
0087: /**
0088: * A filter filtering calls to SAX methods specifically for this parser.
0089: */
0090: private SAXFilter saxFilter;
0091:
0092: /**
0093: * The base URI of the document. This variable is set when
0094: * <tt>parse(inputStream, baseURI)</tt> is called and will not be changed
0095: * during parsing.
0096: */
0097: private String documentURI;
0098:
0099: /**
0100: * The language of literal values as can be specified using xml:lang
0101: * attributes. This variable is set/modified by the SAXFilter during parsing
0102: * such that it always represents the language of the context in which
0103: * elements are reported.
0104: */
0105: private String xmlLang;
0106:
0107: /**
0108: * A stack of node- and property elements.
0109: */
0110: private Stack<Object> elementStack = new Stack<Object>();
0111:
0112: /**
0113: * A set containing URIs that have been generated as a result of rdf:ID
0114: * attributes. These URIs should be unique within a single document.
0115: */
0116: private Set<URI> usedIDs = new HashSet<URI>();
0117:
0118: /*--------------*
0119: * Constructors *
0120: *--------------*/
0121:
0122: /**
0123: * Creates a new RDFXMLParser that will use a {@link ValueFactoryImpl} to
0124: * create RDF model objects.
0125: */
0126: public RDFXMLParser() {
0127: super ();
0128:
0129: // SAXFilter does some filtering and verifying of SAX events
0130: saxFilter = new SAXFilter(this );
0131: }
0132:
0133: /**
0134: * Creates a new RDFXMLParser that will use the supplied
0135: * <tt>ValueFactory</tt> to create RDF model objects.
0136: *
0137: * @param valueFactory
0138: * A ValueFactory.
0139: */
0140: public RDFXMLParser(ValueFactory valueFactory) {
0141: super (valueFactory);
0142:
0143: // SAXFilter does some filtering and verifying of SAX events
0144: saxFilter = new SAXFilter(this );
0145: }
0146:
0147: /*---------*
0148: * Methods *
0149: *---------*/
0150:
0151: // implements RDFParser.getRDFFormat()
0152: public final RDFFormat getRDFFormat() {
0153: return RDFFormat.RDFXML;
0154: }
0155:
0156: /**
0157: * Sets the parser in a mode to parse stand-alone RDF documents. In
0158: * stand-alone RDF documents, the enclosing <tt>rdf:RDF</tt> root element
0159: * is optional if this root element contains just one element (e.g.
0160: * <tt>rdf:Description</tt>.
0161: */
0162: public void setParseStandAloneDocuments(boolean standAloneDocs) {
0163: saxFilter.setParseStandAloneDocuments(standAloneDocs);
0164: }
0165:
0166: /**
0167: * Returns whether the parser is currently in a mode to parse stand-alone RDF
0168: * documents.
0169: *
0170: * @see #setParseStandAloneDocuments
0171: */
0172: public boolean getParseStandAloneDocuments() {
0173: return saxFilter.getParseStandAloneDocuments();
0174: }
0175:
0176: /**
0177: * Parses the data from the supplied InputStream, using the supplied baseURI
0178: * to resolve any relative URI references.
0179: *
0180: * @param in
0181: * The InputStream from which to read the data, must not be
0182: * <tt>null</tt>.
0183: * @param baseURI
0184: * The URI associated with the data in the InputStream, must not be
0185: * <tt>null</tt>.
0186: * @throws IOException
0187: * If an I/O error occurred while data was read from the InputStream.
0188: * @throws RDFParseException
0189: * If the parser has found an unrecoverable parse error.
0190: * @throws RDFHandlerException
0191: * If the configured statement handler encountered an unrecoverable
0192: * error.
0193: * @throws IllegalArgumentException
0194: * If the supplied input stream or base URI is <tt>null</tt>.
0195: */
0196: public synchronized void parse(InputStream in, String baseURI)
0197: throws IOException, RDFParseException, RDFHandlerException {
0198: if (in == null) {
0199: throw new IllegalArgumentException(
0200: "Input stream cannot be 'null'");
0201: }
0202: if (baseURI == null) {
0203: throw new IllegalArgumentException(
0204: "Base URI cannot be 'null'");
0205: }
0206:
0207: InputSource inputSource = new InputSource(in);
0208: inputSource.setSystemId(baseURI);
0209:
0210: parse(inputSource);
0211: }
0212:
0213: /**
0214: * Parses the data from the supplied Reader, using the supplied baseURI to
0215: * resolve any relative URI references.
0216: *
0217: * @param reader
0218: * The Reader from which to read the data, must not be <tt>null</tt>.
0219: * @param baseURI
0220: * The URI associated with the data in the InputStream, must not be
0221: * <tt>null</tt>.
0222: * @throws IOException
0223: * If an I/O error occurred while data was read from the InputStream.
0224: * @throws RDFParseException
0225: * If the parser has found an unrecoverable parse error.
0226: * @throws RDFHandlerException
0227: * If the configured statement handler has encountered an
0228: * unrecoverable error.
0229: * @throws IllegalArgumentException
0230: * If the supplied reader or base URI is <tt>null</tt>.
0231: */
0232: public synchronized void parse(Reader reader, String baseURI)
0233: throws IOException, RDFParseException, RDFHandlerException {
0234: if (reader == null) {
0235: throw new IllegalArgumentException(
0236: "Reader cannot be 'null'");
0237: }
0238: if (baseURI == null) {
0239: throw new IllegalArgumentException(
0240: "Base URI cannot be 'null'");
0241: }
0242:
0243: InputSource inputSource = new InputSource(reader);
0244: inputSource.setSystemId(baseURI);
0245:
0246: parse(inputSource);
0247: }
0248:
0249: private void parse(InputSource inputSource) throws IOException,
0250: RDFParseException, RDFHandlerException {
0251: try {
0252: documentURI = inputSource.getSystemId();
0253:
0254: // saxFilter.clear();
0255: saxFilter.setDocumentURI(documentURI);
0256:
0257: XMLReader xmlReader = XMLReaderFactory.createXMLReader();
0258: xmlReader.setContentHandler(saxFilter);
0259:
0260: rdfHandler.startRDF();
0261: xmlReader.parse(inputSource);
0262: rdfHandler.endRDF();
0263: } catch (SAXParseException e) {
0264: Exception wrappedExc = e.getException();
0265: if (wrappedExc == null) {
0266: wrappedExc = e;
0267: }
0268: reportFatalError(wrappedExc, e.getLineNumber(), e
0269: .getColumnNumber());
0270: } catch (SAXException e) {
0271: Exception wrappedExc = e.getException();
0272: if (wrappedExc == null) {
0273: wrappedExc = e;
0274: }
0275:
0276: if (wrappedExc instanceof RDFParseException) {
0277: throw (RDFParseException) wrappedExc;
0278: } else if (wrappedExc instanceof RDFHandlerException) {
0279: throw (RDFHandlerException) wrappedExc;
0280: } else {
0281: reportFatalError(wrappedExc);
0282: }
0283: } finally {
0284: // Clean up
0285: saxFilter.clear();
0286: xmlLang = null;
0287: elementStack.clear();
0288: usedIDs.clear();
0289: clear();
0290: }
0291: }
0292:
0293: /*-----------------------------*
0294: * Methods called by SAXFilter *
0295: *-----------------------------*/
0296:
0297: @Override
0298: protected void setBaseURI(ParsedURI baseURI) {
0299: // Note: we need to override this method to allow SAXFilter to access it
0300: super .setBaseURI(baseURI);
0301: }
0302:
0303: void setXMLLang(String xmlLang) {
0304: if ("".equals(xmlLang)) {
0305: this .xmlLang = null;
0306: } else {
0307: this .xmlLang = xmlLang;
0308: }
0309: }
0310:
0311: void startElement(String namespaceURI, String localName,
0312: String qName, Atts atts) throws RDFParseException,
0313: RDFHandlerException {
0314: if (topIsProperty()) {
0315: // this element represents the subject and/or object of a statement
0316: processNodeElt(namespaceURI, localName, qName, atts, false);
0317: } else {
0318: // this element represents a property
0319: processPropertyElt(namespaceURI, localName, qName, atts,
0320: false);
0321: }
0322: }
0323:
0324: void endElement(String namespaceURI, String localName, String qName)
0325: throws RDFParseException, RDFHandlerException {
0326: Object topElement = peekStack(0);
0327:
0328: if (topElement instanceof NodeElement) {
0329: // Check if top node is 'volatile', meaning that it doesn't have a
0330: // start- and end element associated with it.
0331: if (((NodeElement) topElement).isVolatile()) {
0332: elementStack.pop();
0333: }
0334: } else {
0335: // topElement instanceof PropertyElement
0336: PropertyElement predicate = (PropertyElement) topElement;
0337:
0338: if (predicate.parseCollection()) {
0339: Resource lastListResource = predicate
0340: .getLastListResource();
0341:
0342: if (lastListResource == null) {
0343: // no last list resource, list must have been empty.
0344: NodeElement subject = (NodeElement) peekStack(1);
0345:
0346: reportStatement(subject.getResource(), predicate
0347: .getURI(), RDF.NIL);
0348:
0349: handleReification(RDF.NIL);
0350: } else {
0351: // Generate the final tail of the list.
0352: reportStatement(lastListResource, RDF.REST, RDF.NIL);
0353: }
0354: }
0355:
0356: }
0357:
0358: elementStack.pop();
0359: }
0360:
0361: void emptyElement(String namespaceURI, String localName,
0362: String qName, Atts atts) throws RDFParseException,
0363: RDFHandlerException {
0364: if (topIsProperty()) {
0365: // this element represents the subject and/or object of a statement
0366: processNodeElt(namespaceURI, localName, qName, atts, true);
0367: } else {
0368: // this element represents a property
0369: processPropertyElt(namespaceURI, localName, qName, atts,
0370: true);
0371: }
0372: }
0373:
0374: void text(String text) throws RDFParseException,
0375: RDFHandlerException {
0376: if (!topIsProperty()) {
0377: reportError("unexpected literal");
0378: return;
0379: }
0380:
0381: PropertyElement propEl = (PropertyElement) peekStack(0);
0382: URI datatype = propEl.getDatatype();
0383:
0384: Literal lit = createLiteral(text, xmlLang, datatype);
0385:
0386: NodeElement subject = (NodeElement) peekStack(1);
0387: PropertyElement predicate = (PropertyElement) peekStack(0);
0388:
0389: reportStatement(subject.getResource(), predicate.getURI(), lit);
0390:
0391: handleReification(lit);
0392: }
0393:
0394: /*------------------------*
0395: * RDF processing methods *
0396: *------------------------*/
0397:
0398: /* Process a node element (can be both subject and object) */
0399: private void processNodeElt(String namespaceURI, String localName,
0400: String qName, Atts atts, boolean isEmptyElt)
0401: throws RDFParseException, RDFHandlerException {
0402: if (verifyData()) {
0403: // Check the element name
0404: checkNodeEltName(namespaceURI, localName, qName);
0405: }
0406:
0407: Resource nodeResource = getNodeResource(atts);
0408: NodeElement nodeElement = new NodeElement(nodeResource);
0409:
0410: if (!elementStack.isEmpty()) {
0411: // node can be object of a statement, or part of an rdf:List
0412: NodeElement subject = (NodeElement) peekStack(1);
0413: PropertyElement predicate = (PropertyElement) peekStack(0);
0414:
0415: if (predicate.parseCollection()) {
0416: Resource lastListRes = predicate.getLastListResource();
0417: BNode newListRes = createBNode();
0418:
0419: if (lastListRes == null) {
0420: // first element in the list
0421: reportStatement(subject.getResource(), predicate
0422: .getURI(), newListRes);
0423:
0424: handleReification(newListRes);
0425: } else {
0426: // not the first element in the list
0427: reportStatement(lastListRes, RDF.REST, newListRes);
0428: }
0429:
0430: reportStatement(newListRes, RDF.FIRST, nodeResource);
0431:
0432: predicate.setLastListResource(newListRes);
0433: } else {
0434: reportStatement(subject.getResource(), predicate
0435: .getURI(), nodeResource);
0436:
0437: handleReification(nodeResource);
0438: }
0439: }
0440:
0441: if (!localName.equals("Description")
0442: || !namespaceURI.equals(RDF.NAMESPACE)) {
0443: // element name is uri's type
0444: URI className = null;
0445: if ("".equals(namespaceURI)) {
0446: // No namespace, use base URI
0447: className = buildResourceFromLocalName(localName);
0448: } else {
0449: className = createURI(namespaceURI + localName);
0450: }
0451: reportStatement(nodeResource, RDF.TYPE, className);
0452: }
0453:
0454: Att type = atts.removeAtt(RDF.NAMESPACE, "type");
0455: if (type != null) {
0456: // rdf:type attribute, value is a URI-reference
0457: URI className = resolveURI(type.getValue());
0458:
0459: reportStatement(nodeResource, RDF.TYPE, className);
0460: }
0461:
0462: if (verifyData()) {
0463: checkRDFAtts(atts);
0464: }
0465:
0466: processSubjectAtts(nodeElement, atts);
0467:
0468: if (!isEmptyElt) {
0469: elementStack.push(nodeElement);
0470: }
0471: }
0472:
0473: /**
0474: * Retrieves the resource of a node element (subject or object) using
0475: * relevant attributes (rdf:ID, rdf:about and rdf:nodeID) from its attributes
0476: * list.
0477: *
0478: * @return a resource or a bNode.
0479: */
0480: private Resource getNodeResource(Atts atts)
0481: throws RDFParseException {
0482: Att id = atts.removeAtt(RDF.NAMESPACE, "ID");
0483: Att about = atts.removeAtt(RDF.NAMESPACE, "about");
0484: Att nodeID = atts.removeAtt(RDF.NAMESPACE, "nodeID");
0485:
0486: if (verifyData()) {
0487: int definedAttsCount = 0;
0488:
0489: if (id != null) {
0490: definedAttsCount++;
0491: }
0492: if (about != null) {
0493: definedAttsCount++;
0494: }
0495: if (nodeID != null) {
0496: definedAttsCount++;
0497: }
0498:
0499: if (definedAttsCount > 1) {
0500: reportError("Only one of the attributes rdf:ID, rdf:about or rdf:nodeID can be used here");
0501: }
0502: }
0503:
0504: Resource result = null;
0505:
0506: if (id != null) {
0507: result = buildURIFromID(id.getValue());
0508: } else if (about != null) {
0509: result = resolveURI(about.getValue());
0510: } else if (nodeID != null) {
0511: result = createBNode(nodeID.getValue());
0512: } else {
0513: // No resource specified, generate a bNode
0514: result = createBNode();
0515: }
0516:
0517: return result;
0518: }
0519:
0520: /** processes subject attributes. */
0521: private void processSubjectAtts(NodeElement nodeElt, Atts atts)
0522: throws RDFParseException, RDFHandlerException {
0523: Resource subject = nodeElt.getResource();
0524:
0525: Iterator<Att> iter = atts.iterator();
0526:
0527: while (iter.hasNext()) {
0528: Att att = iter.next();
0529:
0530: URI predicate = createURI(att.getURI());
0531: Literal lit = createLiteral(att.getValue(), xmlLang, null);
0532:
0533: reportStatement(subject, predicate, lit);
0534: }
0535: }
0536:
0537: private void processPropertyElt(String namespaceURI,
0538: String localName, String qName, Atts atts,
0539: boolean isEmptyElt) throws RDFParseException,
0540: RDFHandlerException {
0541: if (verifyData()) {
0542: checkPropertyEltName(namespaceURI, localName, qName);
0543: }
0544:
0545: // Get the URI of the property
0546: URI propURI = null;
0547: if (namespaceURI.equals("")) {
0548: // no namespace URI
0549: reportError("unqualified property element <" + qName
0550: + "> not allowed");
0551: // Use base URI as namespace:
0552: propURI = buildResourceFromLocalName(localName);
0553: } else {
0554: propURI = createURI(namespaceURI + localName);
0555: }
0556:
0557: // List expansion rule
0558: if (propURI.equals(RDF.LI)) {
0559: NodeElement subject = (NodeElement) peekStack(0);
0560: propURI = createURI(RDF.NAMESPACE + "_"
0561: + subject.getNextLiCounter());
0562: }
0563:
0564: // Push the property on the stack.
0565: PropertyElement predicate = new PropertyElement(propURI);
0566: elementStack.push(predicate);
0567:
0568: // Check if property has a reification ID
0569: Att id = atts.removeAtt(RDF.NAMESPACE, "ID");
0570: if (id != null) {
0571: URI reifURI = buildURIFromID(id.getValue());
0572: predicate.setReificationURI(reifURI);
0573: }
0574:
0575: // Check for presence of rdf:parseType attribute
0576: Att parseType = atts.removeAtt(RDF.NAMESPACE, "parseType");
0577:
0578: if (parseType != null) {
0579: if (verifyData()) {
0580: checkNoMoreAtts(atts);
0581: }
0582:
0583: String parseTypeValue = parseType.getValue();
0584:
0585: if (parseTypeValue.equals("Resource")) {
0586: BNode objectResource = createBNode();
0587: NodeElement subject = (NodeElement) peekStack(1);
0588:
0589: reportStatement(subject.getResource(), propURI,
0590: objectResource);
0591:
0592: if (isEmptyElt) {
0593: handleReification(objectResource);
0594: } else {
0595: NodeElement object = new NodeElement(objectResource);
0596: object.setIsVolatile(true);
0597: elementStack.push(object);
0598: }
0599: } else if (parseTypeValue.equals("Collection")) {
0600: if (isEmptyElt) {
0601: NodeElement subject = (NodeElement) peekStack(1);
0602: reportStatement(subject.getResource(), propURI,
0603: RDF.NIL);
0604: handleReification(RDF.NIL);
0605: } else {
0606: predicate.setParseCollection(true);
0607: }
0608: } else {
0609: // other parseType
0610: if (!parseTypeValue.equals("Literal")) {
0611: reportWarning("unknown parseType: "
0612: + parseType.getValue());
0613: }
0614:
0615: if (isEmptyElt) {
0616: NodeElement subject = (NodeElement) peekStack(1);
0617:
0618: Literal lit = createLiteral("", null,
0619: RDF.XMLLITERAL);
0620:
0621: reportStatement(subject.getResource(), propURI, lit);
0622:
0623: handleReification(lit);
0624: } else {
0625: // The next string is an rdf:XMLLiteral
0626: predicate.setDatatype(RDF.XMLLITERAL);
0627:
0628: saxFilter.setParseLiteralMode();
0629: }
0630: }
0631: }
0632: // parseType == null
0633: else if (isEmptyElt) {
0634: // empty element without an rdf:parseType attribute
0635:
0636: // Note: we handle rdf:datatype attributes here to allow datatyped
0637: // empty strings in documents. The current spec does have a
0638: // production rule that matches this, which is likely to be an
0639: // omission on its part.
0640: Att datatype = atts.getAtt(RDF.NAMESPACE, "datatype");
0641:
0642: if (atts.size() == 0 || atts.size() == 1
0643: && datatype != null) {
0644: // element had no attributes, or only the optional
0645: // rdf:ID and/or rdf:datatype attributes.
0646: NodeElement subject = (NodeElement) peekStack(1);
0647:
0648: URI dtURI = null;
0649: if (datatype != null) {
0650: dtURI = createURI(datatype.getValue());
0651: }
0652:
0653: Literal lit = createLiteral("", xmlLang, dtURI);
0654:
0655: reportStatement(subject.getResource(), propURI, lit);
0656: handleReification(lit);
0657: } else {
0658: // Create resource for the statement's object.
0659: Resource resourceRes = getPropertyResource(atts);
0660:
0661: // All special rdf attributes have been checked/removed.
0662: if (verifyData()) {
0663: checkRDFAtts(atts);
0664: }
0665:
0666: NodeElement resourceElt = new NodeElement(resourceRes);
0667: NodeElement subject = (NodeElement) peekStack(1);
0668:
0669: reportStatement(subject.getResource(), propURI,
0670: resourceRes);
0671: handleReification(resourceRes);
0672:
0673: Att type = atts.removeAtt(RDF.NAMESPACE, "type");
0674: if (type != null) {
0675: // rdf:type attribute, value is a URI-reference
0676: URI className = resolveURI(type.getValue());
0677:
0678: reportStatement(resourceRes, RDF.TYPE, className);
0679: }
0680:
0681: processSubjectAtts(resourceElt, atts);
0682: }
0683: } else {
0684: // Not an empty element, sub elements will follow.
0685:
0686: // Check for rdf:datatype attribute
0687: Att datatype = atts.removeAtt(RDF.NAMESPACE, "datatype");
0688: if (datatype != null) {
0689: URI dtURI = createURI(datatype.getValue());
0690: predicate.setDatatype(dtURI);
0691: }
0692:
0693: // No more attributes are expected.
0694: if (verifyData()) {
0695: checkNoMoreAtts(atts);
0696: }
0697: }
0698:
0699: if (isEmptyElt) {
0700: // Empty element has been pushed on the stack
0701: // at the start of this method, remove it.
0702: elementStack.pop();
0703: }
0704: }
0705:
0706: /**
0707: * Retrieves the object resource of a property element using relevant
0708: * attributes (rdf:resource and rdf:nodeID) from its attributes list.
0709: *
0710: * @return a resource or a bNode.
0711: */
0712: private Resource getPropertyResource(Atts atts)
0713: throws RDFParseException {
0714: Att resource = atts.removeAtt(RDF.NAMESPACE, "resource");
0715: Att nodeID = atts.removeAtt(RDF.NAMESPACE, "nodeID");
0716:
0717: if (verifyData()) {
0718: int definedAttsCount = 0;
0719:
0720: if (resource != null) {
0721: definedAttsCount++;
0722: }
0723: if (nodeID != null) {
0724: definedAttsCount++;
0725: }
0726:
0727: if (definedAttsCount > 1) {
0728: reportError("Only one of the attributes rdf:resource or rdf:nodeID can be used here");
0729: }
0730: }
0731:
0732: Resource result = null;
0733:
0734: if (resource != null) {
0735: result = resolveURI(resource.getValue());
0736: } else if (nodeID != null) {
0737: result = createBNode(nodeID.getValue());
0738: } else {
0739: // No resource specified, generate a bNode
0740: result = createBNode();
0741: }
0742:
0743: return result;
0744: }
0745:
0746: /*
0747: * Processes any rdf:ID attributes that generate reified statements. This
0748: * method assumes that a PropertyElement (which can have an rdf:ID attribute)
0749: * is on top of the stack, and a NodeElement is below that.
0750: */
0751: private void handleReification(Value value)
0752: throws RDFParseException, RDFHandlerException {
0753: PropertyElement predicate = (PropertyElement) peekStack(0);
0754:
0755: if (predicate.isReified()) {
0756: NodeElement subject = (NodeElement) peekStack(1);
0757: URI reifRes = predicate.getReificationURI();
0758: reifyStatement(reifRes, subject.getResource(), predicate
0759: .getURI(), value);
0760: }
0761: }
0762:
0763: private void reifyStatement(Resource reifNode, Resource subj,
0764: URI pred, Value obj) throws RDFParseException,
0765: RDFHandlerException {
0766: reportStatement(reifNode, RDF.TYPE, RDF.STATEMENT);
0767: reportStatement(reifNode, RDF.SUBJECT, subj);
0768: reportStatement(reifNode, RDF.PREDICATE, pred);
0769: reportStatement(reifNode, RDF.OBJECT, obj);
0770: }
0771:
0772: /**
0773: * Builds a Resource from a non-qualified localname.
0774: */
0775: private URI buildResourceFromLocalName(String localName)
0776: throws RDFParseException {
0777: return resolveURI("#" + localName);
0778: }
0779:
0780: /**
0781: * Builds a Resource from the value of an rdf:ID attribute.
0782: */
0783: private URI buildURIFromID(String id) throws RDFParseException {
0784: if (verifyData()) {
0785: // Check if 'id' is a legal NCName
0786: if (!XMLUtil.isNCName(id)) {
0787: reportError("Not an XML Name: " + id);
0788: }
0789: }
0790:
0791: URI uri = resolveURI("#" + id);
0792:
0793: if (verifyData()) {
0794: // ID (URI) should be unique in the current document
0795:
0796: if (!usedIDs.add(uri)) {
0797: // URI was not added because the set already contained an equal
0798: // string
0799: reportError("ID '" + id + "' has already been defined");
0800: }
0801: }
0802:
0803: return uri;
0804: }
0805:
0806: // Overrides RDFParserBase._createBNode(...)
0807: protected BNode createBNode(String nodeID) throws RDFParseException {
0808: if (verifyData()) {
0809: // Check if 'nodeID' is a legal NCName
0810: if (!XMLUtil.isNCName(nodeID)) {
0811: reportError("Not an XML Name: " + nodeID);
0812: }
0813: }
0814:
0815: return super .createBNode(nodeID);
0816: }
0817:
0818: private Object peekStack(int distFromTop) {
0819: return elementStack.get(elementStack.size() - 1 - distFromTop);
0820: }
0821:
0822: private boolean topIsProperty() {
0823: return elementStack.isEmpty()
0824: || peekStack(0) instanceof PropertyElement;
0825: }
0826:
0827: /**
0828: * Checks whether the node element name is from the RDF namespace and, if so,
0829: * if it is allowed to be used in a node element. If the name is equal to one
0830: * of the disallowed names (RDF, ID, about, parseType, resource, nodeID,
0831: * datatype and li), an error is generated. If the name is not defined in the
0832: * RDF namespace, but it claims that it is from this namespace, a warning is
0833: * generated.
0834: */
0835: private void checkNodeEltName(String namespaceURI,
0836: String localName, String qName) throws RDFParseException {
0837: if (RDF.NAMESPACE.equals(namespaceURI)) {
0838:
0839: if (localName.equals("Description")
0840: || localName.equals("Seq")
0841: || localName.equals("Bag")
0842: || localName.equals("Alt")
0843: || localName.equals("Statement")
0844: || localName.equals("Property")
0845: || localName.equals("List")
0846: || localName.equals("subject")
0847: || localName.equals("predicate")
0848: || localName.equals("object")
0849: || localName.equals("type")
0850: || localName.equals("value")
0851: || localName.equals("first")
0852: || localName.equals("rest")
0853: || localName.equals("nil")
0854: || localName.startsWith("_")) {
0855: // These are OK
0856: } else if (localName.equals("li")
0857: || localName.equals("RDF")
0858: || localName.equals("ID")
0859: || localName.equals("about")
0860: || localName.equals("parseType")
0861: || localName.equals("resource")
0862: || localName.equals("nodeID")
0863: || localName.equals("datatype")) {
0864: reportError("<" + qName
0865: + "> not allowed as node element");
0866: } else if (localName.equals("bagID")
0867: || localName.equals("aboutEach")
0868: || localName.equals("aboutEachPrefix")) {
0869: reportError(qName + " is no longer a valid RDF name");
0870: } else {
0871: reportWarning("unknown rdf element <" + qName + ">");
0872: }
0873: }
0874: }
0875:
0876: /**
0877: * Checks whether the property element name is from the RDF namespace and, if
0878: * so, if it is allowed to be used in a property element. If the name is
0879: * equal to one of the disallowed names (RDF, ID, about, parseType, resource
0880: * and li), an error is generated. If the name is not defined in the RDF
0881: * namespace, but it claims that it is from this namespace, a warning is
0882: * generated.
0883: */
0884: private void checkPropertyEltName(String namespaceURI,
0885: String localName, String qName) throws RDFParseException {
0886: if (RDF.NAMESPACE.equals(namespaceURI)) {
0887:
0888: if (localName.equals("li") || localName.equals("Seq")
0889: || localName.equals("Bag")
0890: || localName.equals("Alt")
0891: || localName.equals("Statement")
0892: || localName.equals("Property")
0893: || localName.equals("List")
0894: || localName.equals("subject")
0895: || localName.equals("predicate")
0896: || localName.equals("object")
0897: || localName.equals("type")
0898: || localName.equals("value")
0899: || localName.equals("first")
0900: || localName.equals("rest")
0901: || localName.equals("nil")
0902: || localName.startsWith("_")) {
0903: // These are OK
0904: } else if (localName.equals("Description")
0905: || localName.equals("RDF")
0906: || localName.equals("ID")
0907: || localName.equals("about")
0908: || localName.equals("parseType")
0909: || localName.equals("resource")
0910: || localName.equals("nodeID")
0911: || localName.equals("datatype")) {
0912: reportError("<" + qName
0913: + "> not allowed as property element");
0914: } else if (localName.equals("bagID")
0915: || localName.equals("aboutEach")
0916: || localName.equals("aboutEachPrefix")) {
0917: reportError(qName + " is no longer a valid RDF name");
0918: } else {
0919: reportWarning("unknown rdf element <" + qName + ">");
0920: }
0921: }
0922: }
0923:
0924: /**
0925: * Checks whether 'atts' contains attributes from the RDF namespace that are
0926: * not allowed as attributes. If such an attribute is found, an error is
0927: * generated and the attribute is removed from 'atts'. If the attribute is
0928: * not defined in the RDF namespace, but it claims that it is from this
0929: * namespace, a warning is generated.
0930: */
0931: private void checkRDFAtts(Atts atts) throws RDFParseException {
0932: Iterator<Att> iter = atts.iterator();
0933:
0934: while (iter.hasNext()) {
0935: Att att = iter.next();
0936:
0937: if (RDF.NAMESPACE.equals(att.getNamespace())) {
0938: String localName = att.getLocalName();
0939:
0940: if (localName.equals("Seq") || localName.equals("Bag")
0941: || localName.equals("Alt")
0942: || localName.equals("Statement")
0943: || localName.equals("Property")
0944: || localName.equals("List")
0945: || localName.equals("subject")
0946: || localName.equals("predicate")
0947: || localName.equals("object")
0948: || localName.equals("type")
0949: || localName.equals("value")
0950: || localName.equals("first")
0951: || localName.equals("rest")
0952: || localName.equals("nil")
0953: || localName.startsWith("_")) {
0954: // These are OK
0955: } else if (localName.equals("Description")
0956: || localName.equals("li")
0957: || localName.equals("RDF")
0958: || localName.equals("ID")
0959: || localName.equals("about")
0960: || localName.equals("parseType")
0961: || localName.equals("resource")
0962: || localName.equals("nodeID")
0963: || localName.equals("datatype")) {
0964: reportError("'" + att.getQName()
0965: + "' not allowed as attribute name");
0966: iter.remove();
0967: } else if (localName.equals("bagID")
0968: || localName.equals("aboutEach")
0969: || localName.equals("aboutEachPrefix")) {
0970: reportError(att.getQName()
0971: + " is no longer a valid RDF name");
0972: } else {
0973: reportWarning("unknown rdf attribute '"
0974: + att.getQName() + "'");
0975: }
0976: }
0977: }
0978: }
0979:
0980: /**
0981: * Checks whether 'atts' is empty. If this is not the case, a warning is
0982: * generated for each attribute that is still present.
0983: */
0984: private void checkNoMoreAtts(Atts atts) throws RDFParseException {
0985: if (atts.size() > 0) {
0986: Iterator<Att> iter = atts.iterator();
0987:
0988: while (iter.hasNext()) {
0989: Att att = iter.next();
0990:
0991: reportError("unexpected attribute '" + att.getQName()
0992: + "'");
0993: iter.remove();
0994: }
0995: }
0996: }
0997:
0998: /**
0999: * Reports a stament to the configured RDFHandlerException.
1000: *
1001: * @param subject
1002: * The statement's subject.
1003: * @param predicate
1004: * The statement's predicate.
1005: * @param object
1006: * The statement's object.
1007: * @throws RDFHandlerException
1008: * If the configured RDFHandlerException throws an
1009: * RDFHandlerException.
1010: */
1011: private void reportStatement(Resource subject, URI predicate,
1012: Value object) throws RDFParseException, RDFHandlerException {
1013: Statement st = createStatement(subject, predicate, object);
1014: rdfHandler.handleStatement(st);
1015: }
1016:
1017: /**
1018: * Overrides {@link RDFParserBase#reportWarning(String)}, adding line- and
1019: * column number information to the error.
1020: */
1021: protected void reportWarning(String msg) {
1022: Locator locator = saxFilter.getLocator();
1023: if (locator != null) {
1024: reportWarning(msg, locator.getLineNumber(), locator
1025: .getColumnNumber());
1026: } else {
1027: reportWarning(msg, -1, -1);
1028: }
1029: }
1030:
1031: /**
1032: * Overrides {@link RDFParserBase#reportError(String)}, adding line- and
1033: * column number information to the error.
1034: */
1035: protected void reportError(String msg) throws RDFParseException {
1036: Locator locator = saxFilter.getLocator();
1037: if (locator != null) {
1038: reportError(msg, locator.getLineNumber(), locator
1039: .getColumnNumber());
1040: } else {
1041: reportError(msg, -1, -1);
1042: }
1043: }
1044:
1045: /**
1046: * Overrides {@link RDFParserBase#reportFatalError(String)}, adding line-
1047: * and column number information to the error.
1048: */
1049: protected void reportFatalError(String msg)
1050: throws RDFParseException {
1051: Locator locator = saxFilter.getLocator();
1052: if (locator != null) {
1053: reportFatalError(msg, locator.getLineNumber(), locator
1054: .getColumnNumber());
1055: } else {
1056: reportFatalError(msg, -1, -1);
1057: }
1058: }
1059:
1060: /**
1061: * Overrides {@link RDFParserBase#reportFatalError(Exception)}, adding line-
1062: * and column number information to the error.
1063: */
1064: protected void reportFatalError(Exception e)
1065: throws RDFParseException {
1066: Locator locator = saxFilter.getLocator();
1067: if (locator != null) {
1068: reportFatalError(e, locator.getLineNumber(), locator
1069: .getColumnNumber());
1070: } else {
1071: reportFatalError(e, -1, -1);
1072: }
1073: }
1074:
1075: /*-----------------------------------------------*
1076: * Inner classes NodeElement and PropertyElement *
1077: *-----------------------------------------------*/
1078:
1079: static class NodeElement {
1080:
1081: private Resource resource;
1082:
1083: private boolean isVolatile = false;;
1084:
1085: private int liCounter = 1;
1086:
1087: public NodeElement(Resource resource) {
1088: this .resource = resource;
1089: }
1090:
1091: public Resource getResource() {
1092: return resource;
1093: }
1094:
1095: public void setIsVolatile(boolean isVolatile) {
1096: this .isVolatile = isVolatile;
1097: }
1098:
1099: public boolean isVolatile() {
1100: return isVolatile;
1101: }
1102:
1103: public int getNextLiCounter() {
1104: return liCounter++;
1105: }
1106: }
1107:
1108: static class PropertyElement {
1109:
1110: /** The property URI. */
1111: private URI uri;
1112:
1113: /** An optional reification identifier. */
1114: private URI reificationURI;
1115:
1116: /** An optional datatype. */
1117: private URI datatype;
1118:
1119: /**
1120: * Flag indicating whether this PropertyElement has an attribute
1121: * <tt>rdf:parseType="Collection"</tt>.
1122: */
1123: private boolean parseCollection = false;
1124:
1125: /**
1126: * The resource that was used to append the last part of an rdf:List.
1127: */
1128: private Resource lastListResource;
1129:
1130: public PropertyElement(URI uri) {
1131: this .uri = uri;
1132: }
1133:
1134: public URI getURI() {
1135: return uri;
1136: }
1137:
1138: public boolean isReified() {
1139: return reificationURI != null;
1140: }
1141:
1142: public void setReificationURI(URI reifURI) {
1143: this .reificationURI = reifURI;
1144: }
1145:
1146: public URI getReificationURI() {
1147: return reificationURI;
1148: }
1149:
1150: public void setDatatype(URI datatype) {
1151: this .datatype = datatype;
1152: }
1153:
1154: public URI getDatatype() {
1155: return datatype;
1156: }
1157:
1158: public boolean parseCollection() {
1159: return parseCollection;
1160: }
1161:
1162: public void setParseCollection(boolean parseCollection) {
1163: this .parseCollection = parseCollection;
1164: }
1165:
1166: public Resource getLastListResource() {
1167: return lastListResource;
1168: }
1169:
1170: public void setLastListResource(Resource resource) {
1171: lastListResource = resource;
1172: }
1173: }
1174: }
|