001: /*
002: * (c) Copyright 2006, 2007, 2008 Hewlett-Packard Development Company, LP
003: * All rights reserved.
004: * [See end of file]
005: */
006:
007: package arq.examples.propertyfunction;
008:
009: import java.util.List;
010:
011: import com.hp.hpl.jena.graph.Node;
012: import com.hp.hpl.jena.graph.Triple;
013: import com.hp.hpl.jena.rdf.model.Model;
014: import com.hp.hpl.jena.rdf.model.ModelFactory;
015: import com.hp.hpl.jena.rdf.model.Resource;
016:
017: import com.hp.hpl.jena.sparql.algebra.*;
018: import com.hp.hpl.jena.sparql.algebra.op.OpBGP;
019: import com.hp.hpl.jena.sparql.algebra.op.OpFilter;
020: import com.hp.hpl.jena.sparql.algebra.op.OpJoin;
021: import com.hp.hpl.jena.sparql.algebra.op.OpTable;
022: import com.hp.hpl.jena.sparql.core.BasicPattern;
023: import com.hp.hpl.jena.sparql.core.Var;
024: import com.hp.hpl.jena.sparql.engine.ExecutionContext;
025: import com.hp.hpl.jena.sparql.engine.QueryIterator;
026: import com.hp.hpl.jena.sparql.engine.iterator.QueryIterNullIterator;
027: import com.hp.hpl.jena.sparql.engine.main.OpCompiler;
028: import com.hp.hpl.jena.sparql.expr.E_Regex;
029: import com.hp.hpl.jena.sparql.expr.Expr;
030: import com.hp.hpl.jena.sparql.expr.ExprVar;
031: import com.hp.hpl.jena.sparql.pfunction.PropFuncArg;
032: import com.hp.hpl.jena.sparql.pfunction.PropertyFunction;
033: import com.hp.hpl.jena.sparql.pfunction.PropertyFunctionRegistry;
034: import com.hp.hpl.jena.sparql.syntax.ElementFilter;
035: import com.hp.hpl.jena.sparql.syntax.ElementGroup;
036: import com.hp.hpl.jena.sparql.syntax.ElementTriplesBlock;
037: import com.hp.hpl.jena.sparql.util.ALog;
038: import com.hp.hpl.jena.sparql.util.NodeUtils;
039: import com.hp.hpl.jena.vocabulary.RDFS;
040:
041: import com.hp.hpl.jena.query.*;
042:
043: /** Example extension or property function to show rewriting part of a query.
044: * A simpler, more driect way to implement property functions is to extends
045: * one of the helper classes and have the custom code called on each solution from the
046: * the previosu query stage.
047: *
048: * See examples {@link localname} for a general predicate that allows for any of
049: * subject or object to be a variable of boudn value, or see {@link uppercase} for a simple
050: * implementation that transforms on graph node into a new node.
051: *
052: * This is a more complicated example which uses the PropertyFunction interface directly.
053: * It takes the QueryIterator from the previous stage and inserts a new processing step.
054: * It then calls that processing step to do the real work.
055: *
056: * The approach here could be used to access an external index (e.g. Lucene) although here
057: * we just show looking for RDFS labels.
058: *
059: * <pre>
060: * ?x ext:labelSearch "something"
061: * </pre>
062: * as
063: * <pre>
064: * ?x rdfs:label ?label . FILTER regex(?label, "something", "i")
065: * </pre>
066: *
067: * by simply doing a regex but could be used to add access to some other form of
068: * indexing or external structure.
069: *
070: * @author Andy Seaborne
071: */
072:
073: public class labelSearch implements PropertyFunction {
074: List myArgs = null;
075:
076: public void build(PropFuncArg argSubject, Node predicate,
077: PropFuncArg argObject, ExecutionContext execCxt) {
078: if (argSubject.isList() || argObject.isList())
079: throw new QueryBuildException("List arguments to "
080: + predicate.getURI());
081: }
082:
083: /* This be called once, with unevaluated arguments.
084: * To do a rewrite of part of a query, we must use the fundamental PropertyFunction
085: * interface to be called once with the input iterator.
086: * Must not return null nor throw an exception. Instead, return a QueryIterNullIterator
087: * indicating no matches.
088: */
089:
090: public QueryIterator exec(QueryIterator input,
091: PropFuncArg argSubject, Node predicate,
092: PropFuncArg argObject, ExecutionContext execCxt) {
093: // No real need to check the pattern arguments because
094: // the replacement triple pattern and regex will cope
095: // but we illustrate testing here.
096:
097: Node nodeVar = argSubject.getArg();
098: String pattern = NodeUtils.stringLiteral(argObject.getArg());
099: if (pattern == null) {
100: ALog.warn(this ,
101: "Pattern must be a plain literal or xsd:string: "
102: + argObject.getArg());
103: return new QueryIterNullIterator(execCxt);
104: }
105:
106: if (false)
107: // Old (ARQ 1) way - not recommended.
108: return buildSyntax(input, nodeVar, pattern, execCxt);
109:
110: // Better
111: // Build a SPARQL algebra expression
112: Var var2 = createNewVar(); // Hidden variable
113:
114: BasicPattern bp = new BasicPattern();
115: Triple t = new Triple(nodeVar, RDFS.label.asNode(), var2);
116: bp.add(t);
117: OpBGP op = new OpBGP(bp);
118:
119: Expr regex = new E_Regex(new ExprVar(var2.getName()), pattern,
120: "i");
121: Op filter = OpFilter.filter(regex, op);
122:
123: // ---- Evaluation
124: if (true) {
125: // Use the reference query engine
126: // Create a table for the input stream (so it uses working memory at this point,
127: // which is why this is not the preferred way).
128: // Then join to expression for this stage.
129: Table table = TableFactory.create(input);
130: Op op2 = OpJoin.create(OpTable.create(table), filter);
131: return Algebra.exec(op2, execCxt.getDataset());
132: }
133:
134: // Use the default, optimizing query engine.
135: return OpCompiler.compile(filter, input, execCxt);
136: }
137:
138: // Build SPARQL syntax and compile it.
139: // Not recommended.
140: private QueryIterator buildSyntax(QueryIterator input,
141: Node nodeVar, String pattern, ExecutionContext execCxt) {
142: Var var2 = createNewVar();
143: // Triple patterns for ?x rdfs:label ?hiddenVar
144: ElementTriplesBlock elementBGP = new ElementTriplesBlock();
145: Triple t = new Triple(nodeVar, RDFS.label.asNode(), var2);
146: elementBGP.addTriple(t);
147:
148: // Regular expression for regex(?hiddenVar, "pattern", "i")
149: Expr regex = new E_Regex(new ExprVar(var2.getName()), pattern,
150: "i");
151:
152: ElementGroup elementGroup = new ElementGroup();
153: elementGroup.addElement(elementBGP);
154: elementGroup.addElement(new ElementFilter(regex));
155: // Compile it.
156: // An alternative design is to build the Op structure programmatically,
157: //
158: Op op = Algebra.compile(elementGroup);
159: return OpCompiler.compile(op, input, execCxt);
160: }
161:
162: static int hiddenVariableCount = 0;
163:
164: // Create a new, hidden, variable.
165: private static Var createNewVar() {
166: hiddenVariableCount++;
167: String varName = "-search-" + hiddenVariableCount;
168: return Var.alloc(varName);
169: }
170:
171: // -------- Example usage
172:
173: public static void main(String[] argv) {
174: // Call the function as java:arq.examples.ext.labelSearch or register it.
175: String prologue = "PREFIX ext: <java:arq.examples.propertyfunction.>\n";
176:
177: String qs = prologue + "SELECT * { ?x ext:labelSearch 'EF' }";
178: Query query = QueryFactory.create(qs);
179: Model model = make();
180: QueryExecution qExec = QueryExecutionFactory.create(query,
181: model);
182: try {
183: ResultSet rs = qExec.execSelect();
184: ResultSetFormatter.out(rs);
185: } finally {
186: qExec.close();
187: }
188:
189: // Or register it.
190: PropertyFunctionRegistry.get().put("http://example/f#search",
191: labelSearch.class);
192: prologue = "PREFIX ext: <http://example/f#>\n";
193: qs = prologue + "SELECT * { ?x ext:search 'EF' }";
194: query = QueryFactory.create(qs);
195: qExec = QueryExecutionFactory.create(query, model);
196: try {
197: ResultSet rs = qExec.execSelect();
198: ResultSetFormatter.out(rs);
199: } finally {
200: qExec.close();
201: }
202: }
203:
204: private static Model make() {
205: String BASE = "http://example/";
206: Model model = ModelFactory.createDefaultModel();
207: model.setNsPrefix("", BASE);
208: Resource r1 = model.createResource(BASE + "r1");
209: Resource r2 = model.createResource(BASE + "r2");
210:
211: r1.addProperty(RDFS.label, "abc");
212: r2.addProperty(RDFS.label, "def");
213:
214: return model;
215: }
216: }
217:
218: /*
219: * (c) Copyright 2006, 2007, 2008 Hewlett-Packard Development Company, LP
220: * All rights reserved.
221: *
222: * Redistribution and use in source and binary forms, with or without
223: * modification, are permitted provided that the following conditions
224: * are met:
225: * 1. Redistributions of source code must retain the above copyright
226: * notice, this list of conditions and the following disclaimer.
227: * 2. Redistributions in binary form must reproduce the above copyright
228: * notice, this list of conditions and the following disclaimer in the
229: * documentation and/or other materials provided with the distribution.
230: * 3. The name of the author may not be used to endorse or promote products
231: * derived from this software without specific prior written permission.
232: *
233: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
234: * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
235: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
236: * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
237: * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
238: * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
239: * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
240: * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
241: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
242: * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
243: */
|