001: /*
002: * Copyright Aduna (http://www.aduna-software.com/) (c) 1997-2007.
003: * Copyright James Leigh (c) 2006.
004: *
005: * Licensed under the Aduna BSD-style license.
006: */
007: package org.openrdf.query.algebra.evaluation.impl;
008:
009: import java.util.ArrayList;
010: import java.util.HashSet;
011: import java.util.LinkedList;
012: import java.util.List;
013: import java.util.Set;
014:
015: import org.openrdf.query.BindingSet;
016: import org.openrdf.query.Dataset;
017: import org.openrdf.query.algebra.Join;
018: import org.openrdf.query.algebra.TupleExpr;
019: import org.openrdf.query.algebra.evaluation.QueryOptimizer;
020: import org.openrdf.query.algebra.helpers.QueryModelVisitorBase;
021:
022: /**
023: * A query optimizer that re-orders nested Joins.
024: *
025: * @author Arjohn Kampman
026: * @author James Leigh
027: */
028: public class QueryJoinOptimizer implements QueryOptimizer {
029:
030: protected final EvaluationStatistics statistics;
031:
032: public QueryJoinOptimizer() {
033: this (new EvaluationStatistics());
034: }
035:
036: public QueryJoinOptimizer(EvaluationStatistics statistics) {
037: this .statistics = statistics;
038: }
039:
040: /**
041: * Applies generally applicable optimizations: path expressions are sorted
042: * from more to less specific.
043: *
044: * @param tupleExpr
045: */
046: public void optimize(TupleExpr tupleExpr, Dataset dataset,
047: BindingSet bindings) {
048: tupleExpr.visit(new JoinVisitor());
049: }
050:
051: protected class JoinVisitor extends
052: QueryModelVisitorBase<RuntimeException> {
053:
054: @Override
055: public void meet(Join node) {
056: List<TupleExpr> joinArgs = new LinkedList<TupleExpr>();
057: getJoinArgs(node, joinArgs);
058:
059: // Process rest of query model before reordering the joins
060: for (TupleExpr joinArg : joinArgs) {
061: joinArg.visit(this );
062: }
063:
064: joinArgs = sortExpressions(joinArgs, new HashSet<String>());
065:
066: // Build new join hierarchy
067: TupleExpr replacement = joinArgs.get(0);
068: for (int i = 1; i < joinArgs.size(); i++) {
069: replacement = new Join(replacement, joinArgs.get(i));
070: }
071:
072: // Replace old join hierarchy
073: node.replaceWith(replacement);
074: }
075:
076: protected void getJoinArgs(TupleExpr tupleExpr,
077: List<TupleExpr> joinArgs) {
078: if (tupleExpr instanceof Join) {
079: Join join = (Join) tupleExpr;
080: getJoinArgs(join.getLeftArg(), joinArgs);
081: getJoinArgs(join.getRightArg(), joinArgs);
082: } else {
083: joinArgs.add(tupleExpr);
084: }
085: }
086:
087: /**
088: * Merges the boolean constraints and the path expressions in one single
089: * list. Path expressions are heuristically reordered to minimize query
090: * evaluation time and boolean constraints are inserted between them. The
091: * separate boolean constraints are moved to the start of the list as much
092: * as possible, under the condition that all variables that are used in
093: * the constraint are instantiated by the path expressions that are
094: * earlier in the list. An example combined list might be:
095: * <tt>[(A,B,C), A != foo:bar, (B,E,F), C != F, (F,G,H)]</tt>.
096: */
097: protected List<TupleExpr> sortExpressions(
098: List<TupleExpr> expressions, Set<String> boundVars) {
099: List<TupleExpr> orderedExpressions = new ArrayList<TupleExpr>(
100: expressions.size());
101:
102: while (!expressions.isEmpty()) {
103: TupleExpr tupleExpr = selectNextTupleExpr(expressions,
104: boundVars);
105:
106: expressions.remove(tupleExpr);
107: orderedExpressions.add(tupleExpr);
108:
109: boundVars.addAll(tupleExpr.getBindingNames());
110: }
111:
112: return orderedExpressions;
113: }
114:
115: /**
116: * Selects from a list of tuple expressions the next tuple expression that
117: * should be evaluated. This method selects the tuple expression with
118: * highest number of bound variables, preferring variables that have been
119: * bound in other tuple expressions over variables with a fixed value.
120: */
121: protected TupleExpr selectNextTupleExpr(
122: List<TupleExpr> expressions, Set<String> boundVars) {
123: double lowestCardinality = Double.MAX_VALUE;
124: TupleExpr result = null;
125:
126: for (TupleExpr tupleExpr : expressions) {
127: // Calculate a score for this tuple expression
128: double cardinality = statistics.getCardinality(
129: tupleExpr, boundVars);
130:
131: if (cardinality < lowestCardinality) {
132: // More specific path expression found
133: lowestCardinality = cardinality;
134: result = tupleExpr;
135: }
136: }
137:
138: return result;
139: }
140: }
141: }
|