001: /*
002: * Copyright (C) 1999-2004 <a href="mailto:mandarax@jbdietrich.com">Jens Dietrich</a>
003: *
004: * This library is free software; you can redistribute it and/or
005: * modify it under the terms of the GNU Lesser General Public
006: * License as published by the Free Software Foundation; either
007: * version 2 of the License, or (at your option) any later version.
008: *
009: * This library is distributed in the hope that it will be useful,
010: * but WITHOUT ANY WARRANTY; without even the implied warranty of
011: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
012: * Lesser General Public License for more details.
013: *
014: * You should have received a copy of the GNU Lesser General Public
015: * License along with this library; if not, write to the Free Software
016: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
017: */
018: package org.mandarax.rdf;
019:
020: import java.io.InputStream;
021: import java.net.URL;
022: import org.mandarax.kernel.*;
023: import org.mandarax.rdf.lib.RDFLib;
024: import org.mandarax.util.AbstractClauseIterator;
025: import com.hp.hpl.jena.rdf.model.*;
026: import java.util.Iterator;
027: import java.util.*;
028:
029: /**
030: * Clause set iterator implementation.
031: * Each iterator instance has its own jena model which is NOT re-used.
032: * This simply means that the RDF documents is read / parsed whenever the application
033: * requests an iterator - caching could be implemented here, but this is curremntly not implemented.
034: * @author <A HREF="mailto:paschke@in.tum.de">Adrian Paschke</A> <A HREF="mailto:j.b.dietrich@massey.ac.nz">Jens Dietrich</A>
035: * @version 1.1 <01 August 2004>
036: * @since 0.1
037: */
038:
039: class RDFClauseIterator extends AbstractClauseIterator implements
040: RDFLogger, RDFConstants {
041: public static LogicFactory lfactory = LogicFactory
042: .getDefaultFactory();
043: private URL url = null;
044: private String language = RDFClauseSet.RDFXML;
045: private Model jenaModel = null;
046: private Iterator statementIterator = null;
047: private RDFPredicate predicate = null;
048: private Map containerRegistry = null;
049: private Map collectionRegistry = null;
050: // selector to detect contains predicates
051: private static Selector containsSelector = new Selector() {
052: /**
053: * Tests a statement.
054: * @see com.hp.hpl.jena.rdf.model.Selector#test(com.hp.hpl.jena.rdf.model.Statement)
055: */
056: public boolean test(Statement stmnt) {
057: return RDFUtils.isContainsProperty(stmnt.getPredicate());
058: }
059:
060: /**
061: * Indicates whether this is a simpel selector.
062: * @see com.hp.hpl.jena.rdf.model.Selector#isSimple()
063: */
064: public boolean isSimple() {
065: return false;
066: }
067:
068: /** Get the resource subject.
069: * @see com.hp.hpl.jena.rdf.model.Selector#getSubject()
070: */
071: public Resource getSubject() {
072: // nothing to do here
073: return null;
074: }
075:
076: /**
077: * Get the predicate.
078: * @see com.hp.hpl.jena.rdf.model.Selector#getPredicate()
079: */
080: public Property getPredicate() {
081: // nothing to do here
082: return null;
083: }
084:
085: /**
086: * Get the object.
087: * @see com.hp.hpl.jena.rdf.model.Selector#getObject()
088: */
089: public RDFNode getObject() {
090: // nothing to do here
091: return null;
092: }
093: };
094:
095: /**
096: * Constructor.
097: * @param url the url of the RDF source
098: * @param lang the language used to encode RDF (one of the constnst defined in RDFClauseSet)
099: * @param predicate the rdf predicate (all facts returned from the iterator have the same predicate)
100: * the predicate is important to restrict the iterator to clauses with this predicate, if null,
101: * clauses for all predicates are returned.
102: * @exception a ClauseSetException is thrown if the RDF source cannot be loaded
103: */
104: public RDFClauseIterator(URL url, String lang,
105: RDFPredicate predicate) throws ClauseSetException {
106: super ();
107: this .url = url;
108: this .language = lang;
109: this .predicate = predicate;
110:
111: // init model
112: try {
113: jenaModel = ModelFactory.createDefaultModel();
114: InputStream in = url.openStream();
115: jenaModel.read(in, "", language);
116:
117: // select only statements matching the predicate !
118: Property property = null;
119: if (predicate == RDFLib.CONTAINS)
120: statementIterator = jenaModel
121: .listStatements(containsSelector);
122: else {
123: if (predicate != null)
124: property = jenaModel.createProperty(predicate
125: .getNameSpace(), predicate.getLocalName());
126: statementIterator = jenaModel.listStatements(
127: (Resource) null, property, (RDFNode) null);
128: }
129: } catch (Exception x) {
130: throw new ClauseSetException(
131: "Cannot load RDF from URL "
132: + url
133: + ", check whether RDF source exists and the proxy is configured correctly",
134: x);
135: }
136: }
137:
138: /**
139: * Get the URL.
140: * @return Returns the url.
141: */
142: public URL getUrl() {
143: return url;
144: }
145:
146: /**
147: * Indicates whether there is a next clause.
148: * @return a boolean
149: */
150: public boolean hasMoreClauses() throws ClauseSetException {
151: return statementIterator.hasNext();
152: }
153:
154: /**
155: * Get the next clause.
156: * @return the next clause
157: */
158: public Clause nextClause() throws ClauseSetException {
159: try {
160: Statement nextStatement = (Statement) statementIterator
161: .next();
162: return rdf2fact(nextStatement);
163: } catch (Exception x) {
164: throw new ClauseSetException(
165: "Cannot fetch next RDF statement from URL " + url,
166: x);
167: }
168: }
169:
170: /**
171: * Create a fact from a RDF statement.
172: * @param stmnt a RDF statement.
173: * @return a fact.
174: */
175: private Fact rdf2fact(Statement stmnt) throws ClauseSetException {
176: Predicate p = predicate;
177: Property property = stmnt.getPredicate();
178: Resource subject = stmnt.getSubject();
179: RDFNode object = stmnt.getObject();
180:
181: // usually, the predicate is passed from the clause set
182: // if not, built it (this would be the case if RDFClauseSet#clauses() has been called)
183: boolean isContains = RDFUtils.isContainsProperty(property);
184: if (p == null) {
185: p = RDFPredicateRegistry.findOrCreatePredicate(property
186: .getNameSpace(), property.getLocalName(),
187: isContains);
188: }
189:
190: // detect containers/collections
191: if (isContains) {
192:
193: // is container
194: if (RDFUtils.isContainer(stmnt)) {
195: // register the container
196: RDFContainer container = getContainer(subject);
197: return lfactory.createFact(p, new Term[] {
198: lfactory.createConstantTerm(container,
199: RDFContainer.class),
200: lfactory.createConstantTerm(object,
201: RDFNode.class) });
202: } else { // is collection
203:
204: // register the collection
205: RDFCollection collection = getCollection(subject);
206: return lfactory.createFact(p, new Term[] {
207: lfactory.createConstantTerm(collection,
208: RDFCollection.class),
209: lfactory.createConstantTerm(object,
210: RDFNode.class) });
211: }
212: }
213:
214: else {
215: return lfactory.createFact(p,
216: new Term[] {
217: lfactory.createConstantTerm(subject,
218: Resource.class),
219: lfactory.createConstantTerm(object,
220: RDFNode.class) });
221: }
222:
223: }
224:
225: /**
226: * Get the RDF container for a node.
227: * @param an object
228: * @return an RDF Container
229: */
230: private RDFContainer getContainer(RDFNode object)
231: throws ClauseSetException {
232: if (!(object instanceof Resource))
233: throw new ClauseSetException(
234: "Container node must be a resource");
235: if (containerRegistry == null)
236: containerRegistry = new HashMap();
237: RDFContainer container = (RDFContainer) containerRegistry
238: .get(object);
239: if (container == null) {
240: // lazy initialization
241: Container c = jenaModel.getBag((Resource) object);
242: container = new RDFContainer(c);
243: containerRegistry.put(object, container);
244: // container only wraps the Jena container so that iterating over its elements can be delayed
245: }
246: return container;
247: }
248:
249: /**
250: * Get the RDF collection for a node.
251: * @param an object
252: * @return a RDF Collection
253: */
254: private RDFCollection getCollection(RDFNode object)
255: throws ClauseSetException {
256: if (!(object instanceof Resource))
257: throw new ClauseSetException(
258: "Collection node must be a resource");
259: if (collectionRegistry == null)
260: collectionRegistry = new HashMap();
261: RDFCollection collection = (RDFCollection) collectionRegistry
262: .get(object);
263: if (collection == null) {
264: // lazy initialization
265:
266: //find the start of the list
267: Iterator ite = jenaModel.listSubjectsWithProperty(jenaModel
268: .createProperty(RDF_NS + "rest"), object);
269: while (ite.hasNext()) {
270: object = (RDFNode) ite.next();
271: ite = jenaModel.listSubjectsWithProperty(jenaModel
272: .createProperty(RDF_NS + "rest"), object);
273: }
274:
275: RDFList list = (RDFList) object.as(RDFList.class);
276: collection = new RDFCollection(list);
277: collectionRegistry.put(object, collection);
278: // collection only wraps the Jena collection (linked list) so that iterating over its elements can be delayed
279: }
280: return collection;
281: }
282:
283: }
|