001: /*
002: (c) Copyright 2003, 2004, 2005, 2006, 2007, 2008 Hewlett-Packard Development Company, LP
003: [See end of file]
004: $Id: SimpleTripleSorter.java,v 1.12 2008/01/02 12:07:58 andy_seaborne Exp $
005: */
006:
007: package com.hp.hpl.jena.graph.query;
008:
009: import com.hp.hpl.jena.graph.*;
010:
011: import java.util.*;
012:
013: /**
014: A TripleSorter for "optimising" queries. The triples of the query are permuted by
015: moving the "lightest" triples to earlier positions. Within each region of the same
016: lightness, triples the bind the most variables to their right are preferred. Otherwise
017: the order is preserved.
018: <p>
019: The notion of "lightness" makes more concrete triples lighter than less concrete ones,
020: and variables lighter than ANY. Variables that have been bound by the time their
021: containing triple is processed weigh just a little.
022: <p>
023: The notion of "bind the most" is just the sum of occurances of the variables in the
024: triple in the other triples.
025: <p>
026: No weighting is applied to predicate position, and no knowledge about the graph
027: being queried is required.
028:
029: @author kers
030: */
031: public class SimpleTripleSorter implements TripleSorter {
032: private Triple[] result;
033: private int putIndex;
034: private Set bound;
035: private List remaining;
036:
037: /**
038: A public SimpleTripleSorter needs no arguments (we imagine more sophisticated
039: ones might).
040: */
041: public SimpleTripleSorter() {
042: }
043:
044: /**
045: Sort the triple array so that more-bound triples come before less-bound triples.
046: Preserve the order of the elements unless they <i>have<i> to move. Return
047: a new permuted copy of the original array. The work is done by a new instance
048: of SimpleTripleSorter specialised to this triple array (and with helpful state).
049: */
050: public Triple[] sort(Triple[] ts) {
051: return new SimpleTripleSorter(ts).sort();
052: }
053:
054: /**
055: Initialise a working SimpleTripleSorter from the triple array to sort. The working
056: copy has an empty set of bound variables and a mutable (and mutated) list of the
057: original triple array, in the same order.
058: */
059: protected SimpleTripleSorter(Triple[] triples) {
060: this ();
061: this .bound = new HashSet();
062: this .result = new Triple[triples.length];
063: this .remaining = new ArrayList(Arrays.asList(triples));
064: }
065:
066: /**
067: Sort the triple array so that more-bound triples come before less-bound triples.
068: Preserve the order of the elements unless they <i>have<i> to move.
069: <p>
070: The algorithm just repeatedly looks for a lightest triple, moves it into the result
071: array, and re-weighs triples in the light of the new bindings that makes. Of several
072: lightest triples, the first is picked [mostly so that it's easier to write the tests].
073: */
074: protected Triple[] sort() {
075: while (remaining.size() > 0)
076: accept(findMostBinding(findLightest(remaining)));
077: return result;
078: }
079:
080: /**
081: Accept a triple as the next element in the result array, note that all its variables are
082: now bound, and remove it from the list of remaining triples.
083: */
084: protected void accept(Triple t) {
085: result[putIndex++] = t;
086: bind(t);
087: remaining.remove(t);
088: }
089:
090: /**
091: Answer a list of the lightest triples in the candidate list; takes one pass over the
092: candidates.
093:
094: @param candidates the list of triples to select from
095: @return the light of lightest triples [by <code>weight</code>], preserving order
096: */
097: protected List findLightest(List candidates) {
098: List lightest = new ArrayList();
099: int minWeight = 100;
100: for (int i = 0; i < candidates.size(); i += 1) {
101: Triple t = (Triple) candidates.get(i);
102: int w = weight(t);
103: if (w < minWeight) {
104: lightest.clear();
105: lightest.add(t);
106: minWeight = w;
107: } else if (w == minWeight)
108: lightest.add(t);
109: }
110: return lightest;
111: }
112:
113: /**
114: Answer the first most-binding triple in the list of candidates.
115: */
116: protected Triple findMostBinding(List candidates) {
117: int maxBinding = -1;
118: Triple mostBinding = null;
119: for (int i = 0; i < candidates.size(); i += 1) {
120: Triple t = (Triple) candidates.get(i);
121: int count = bindingCount(t);
122: if (count > maxBinding) {
123: mostBinding = t;
124: maxBinding = count;
125: }
126: }
127: return mostBinding;
128: }
129:
130: /**
131: The binding count of a triple is the number of instances of variables in other triples
132: it would capture if it were to be bound.
133:
134: @param t the triple to compute the binding count for
135: @return the total binding count of t with respect to all the triples in remaining
136: */
137: protected int bindingCount(Triple t) {
138: int count = 0;
139: for (int i = 0; i < remaining.size(); i += 1) {
140: Triple other = (Triple) remaining.get(i);
141: if (other != t)
142: count += bindingCount(t, other);
143: }
144: return count;
145: }
146:
147: /**
148: Answer the binding count of t with respect to some other triple
149: */
150: protected int bindingCount(Triple t, Triple other) {
151: return bindingCount(t.getSubject(), other)
152: + bindingCount(t.getPredicate(), other)
153: + bindingCount(t.getObject(), other);
154: }
155:
156: protected int bindingCount(Node n, Triple o) {
157: return n.isVariable() ? bc(n, o.getSubject())
158: + bc(n, o.getPredicate()) + bc(n, o.getObject()) : 0;
159: }
160:
161: /**
162: Answer 1 if nodes are .equals, 0 otherwise.
163: */
164: protected int bc(Node n, Node other) {
165: return n.equals(other) ? 1 : 0;
166: }
167:
168: /**
169: Bind a triple by binding each of its nodes.
170: */
171: protected void bind(Triple t) {
172: bind(t.getSubject());
173: bind(t.getPredicate());
174: bind(t.getObject());
175: }
176:
177: protected void bind(Node n) {
178: if (n.isVariable())
179: bound.add(n);
180: }
181:
182: /**
183: In this simple sorter, the weight of a triple is the sum of the weights of its nodes.
184: None of the positions get weighted differently. One might choose to weigh
185: positions that were more search-intensive more heavily.
186:
187: @param t the triple to be weighed [with respect to the bound variables]
188: @return the weight of the triple, rising as the triple is more variable
189: */
190: protected int weight(Triple t) {
191: return weight(t.getSubject()) + weight(t.getPredicate())
192: + weight(t.getObject());
193: }
194:
195: /**
196: In this simple sorter, concrete nodes weigh nothing. [This is, after all, computing
197: rather than building.] ANYs cost the most, because they cannot be bound, and
198: variable nodes cost a little if they are bound and a lot if they are not.
199: <p>
200: The rules are
201: <ul>
202: <li>any concrete node weighs nothing
203: <li>a bound variable node weighs something, but a triple which is three bound
204: variables must weigh less than a triple with an unbound variable
205: <li>an ANY node weighs more than an unbound variable node but less than
206: two unbound variable nodes
207: </ul>
208:
209: @param n the node to be weighed [with respect to the bound variables]
210: @return the weight of the node
211: */
212: protected int weight(Node n) {
213: return n.isConcrete() ? 0 : n.equals(Node.ANY) ? 5 : bound
214: .contains(n) ? 1 : 4;
215: }
216: }
217:
218: /*
219: (c) Copyright 2003, 2004, 2005, 2006, 2007, 2008 Hewlett-Packard Development Company, LP
220: All rights reserved.
221:
222: Redistribution and use in source and binary forms, with or without
223: modification, are permitted provided that the following conditions
224: are met:
225:
226: 1. Redistributions of source code must retain the above copyright
227: notice, this list of conditions and the following disclaimer.
228:
229: 2. Redistributions in binary form must reproduce the above copyright
230: notice, this list of conditions and the following disclaimer in the
231: documentation and/or other materials provided with the distribution.
232:
233: 3. The name of the author may not be used to endorse or promote products
234: derived from this software without specific prior written permission.
235:
236: THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
237: IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
238: OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
239: IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
240: INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
241: NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
242: DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
243: THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
244: (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
245: THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
246: */
|