001: /*
002: * Copyright Aduna (http://www.aduna-software.com/) (c) 1997-2007.
003: *
004: * Licensed under the Aduna BSD-style license.
005: */
006: package org.openrdf.rio.trix;
007:
008: import java.io.IOException;
009: import java.io.InputStream;
010: import java.io.Reader;
011: import java.util.ArrayList;
012: import java.util.List;
013: import java.util.Map;
014:
015: import org.xml.sax.SAXException;
016:
017: import info.aduna.xml.SimpleSAXAdapter;
018: import info.aduna.xml.SimpleSAXParser;
019:
020: import org.openrdf.model.Resource;
021: import org.openrdf.model.Statement;
022: import org.openrdf.model.URI;
023: import org.openrdf.model.Value;
024: import org.openrdf.model.ValueFactory;
025: import org.openrdf.model.impl.ValueFactoryImpl;
026:
027: import org.openrdf.rio.RDFParseException;
028: import org.openrdf.rio.RDFFormat;
029: import org.openrdf.rio.RDFHandlerException;
030: import org.openrdf.rio.helpers.RDFParserBase;
031:
032: import static org.openrdf.rio.trix.TriXConstants.*;
033:
034: /**
035: * A parser that can parse RDF files that are in the <a
036: * href="http://www.w3.org/2004/03/trix/">TriX format</a>.
037: *
038: * @author Arjohn Kampman
039: */
040: public class TriXParser extends RDFParserBase {
041:
042: /*--------------*
043: * Constructors *
044: *--------------*/
045:
046: /**
047: * Creates a new TriXParser that will use a {@link ValueFactoryImpl} to
048: * create objects for resources, bNodes, literals and statements.
049: */
050: public TriXParser() {
051: super ();
052: }
053:
054: /**
055: * Creates a new TriXParser that will use the supplied ValueFactory to create
056: * objects for resources, bNodes, literals and statements.
057: *
058: * @param valueFactory
059: * A ValueFactory.
060: */
061: public TriXParser(ValueFactory valueFactory) {
062: super (valueFactory);
063: }
064:
065: /*---------*
066: * Methods *
067: *---------*/
068:
069: public final RDFFormat getRDFFormat() {
070: return RDFFormat.TRIX;
071: }
072:
073: public void parse(InputStream in, String baseURI)
074: throws IOException, RDFParseException, RDFHandlerException {
075: parse(in);
076: }
077:
078: public void parse(Reader reader, String baseURI)
079: throws IOException, RDFParseException, RDFHandlerException {
080: parse(reader);
081: }
082:
083: private void parse(Object inputStreamOrReader) throws IOException,
084: RDFParseException, RDFHandlerException {
085: try {
086: rdfHandler.startRDF();
087:
088: SimpleSAXParser saxParser = new SimpleSAXParser();
089: saxParser.setPreserveWhitespace(true);
090: saxParser.setListener(new TriXSAXHandler());
091:
092: if (inputStreamOrReader instanceof InputStream) {
093: saxParser.parse((InputStream) inputStreamOrReader);
094: } else {
095: saxParser.parse((Reader) inputStreamOrReader);
096: }
097:
098: rdfHandler.endRDF();
099: } catch (SAXException e) {
100: Exception wrappedExc = e.getException();
101:
102: if (wrappedExc instanceof RDFParseException) {
103: throw (RDFParseException) wrappedExc;
104: } else if (wrappedExc instanceof RDFHandlerException) {
105: throw (RDFHandlerException) wrappedExc;
106: } else {
107: reportFatalError(wrappedExc);
108: }
109: }
110: }
111:
112: /*----------------------------*
113: * Inner class TriXSAXHandler *
114: *----------------------------*/
115:
116: private class TriXSAXHandler extends SimpleSAXAdapter {
117:
118: private Resource currentContext;
119:
120: private boolean parsingContext;
121:
122: private List<Value> valueList;
123:
124: public TriXSAXHandler() {
125: currentContext = null;
126: valueList = new ArrayList<Value>(3);
127: }
128:
129: @Override
130: public void startTag(String tagName, Map<String, String> atts,
131: String text) throws SAXException {
132: try {
133: if (tagName.equals(URI_TAG)) {
134: valueList.add(createURI(text));
135: } else if (tagName.equals(BNODE_TAG)) {
136: valueList.add(createBNode(text));
137: } else if (tagName.equals(PLAIN_LITERAL_TAG)) {
138: String lang = atts.get(LANGUAGE_ATT);
139: valueList.add(createLiteral(text, lang, null));
140: } else if (tagName.equals(TYPED_LITERAL_TAG)) {
141: String datatype = atts.get(DATATYPE_ATT);
142:
143: if (datatype == null) {
144: reportError(DATATYPE_ATT
145: + " attribute missing for typed literal");
146: valueList.add(createLiteral(text, null, null));
147: } else {
148: URI dtURI = createURI(datatype);
149: valueList.add(createLiteral(text, null, dtURI));
150: }
151: } else if (tagName.equals(TRIPLE_TAG)) {
152: if (parsingContext) {
153: try {
154: // First triple in a context, valueList can contain
155: // context information
156: if (valueList.size() > 1) {
157: reportError("At most 1 resource can be specified for the context");
158: } else if (valueList.size() == 1) {
159: try {
160: currentContext = (Resource) valueList
161: .get(0);
162: } catch (ClassCastException e) {
163: reportError("Context identifier should be a URI or blank node");
164: }
165: }
166: } finally {
167: parsingContext = false;
168: valueList.clear();
169: }
170: }
171: } else if (tagName.equals(CONTEXT_TAG)) {
172: parsingContext = true;
173: }
174: } catch (RDFParseException e) {
175: throw new SAXException(e);
176: }
177: }
178:
179: @Override
180: public void endTag(String tagName) throws SAXException {
181: try {
182: if (tagName.equals(TRIPLE_TAG)) {
183: reportStatement();
184: } else if (tagName.equals(CONTEXT_TAG)) {
185: currentContext = null;
186: }
187: } catch (RDFParseException e) {
188: throw new SAXException(e);
189: } catch (RDFHandlerException e) {
190: throw new SAXException(e);
191: }
192: }
193:
194: private void reportStatement() throws RDFParseException,
195: RDFHandlerException {
196: try {
197: if (valueList.size() != 3) {
198: reportError("exactly 3 values are required for a triple");
199: return;
200: }
201:
202: Resource subj;
203: URI pred;
204: Value obj;
205:
206: try {
207: subj = (Resource) valueList.get(0);
208: } catch (ClassCastException e) {
209: reportError("First value for a triple should be a URI or blank node");
210: return;
211: }
212:
213: try {
214: pred = (URI) valueList.get(1);
215: } catch (ClassCastException e) {
216: reportError("Second value for a triple should be a URI");
217: return;
218: }
219:
220: obj = valueList.get(2);
221:
222: Statement st = createStatement(subj, pred, obj,
223: currentContext);
224: rdfHandler.handleStatement(st);
225: } finally {
226: valueList.clear();
227: }
228: }
229: } // end inner class TriXSAXHandler
230: }
|