001: /*****************************************************************************
002: * Source code metadata
003: *
004: * Author ijd
005: * Package Jena2
006: * Created Nov 30, 2007
007: * Filename OntTools.java
008: *
009: * (c) Copyright 2007, 2008 Hewlett-Packard Development Company, LP
010: * (see footer for full conditions)
011: *****************************************************************************/package com.hp.hpl.jena.ontology;
012:
013: // Imports
014: ///////////////
015: import java.util.*;
016:
017: import org.apache.commons.logging.Log;
018: import org.apache.commons.logging.LogFactory;
019:
020: import com.hp.hpl.jena.ontology.impl.test.TestOntTools;
021: import com.hp.hpl.jena.rdf.model.*;
022: import com.hp.hpl.jena.shared.JenaException;
023: import com.hp.hpl.jena.util.iterator.Filter;
024:
025: /**
026: * <p>
027: * Some general utilities and algorithms to support developers working with the
028: * general classes in the Jena ontology API. <strong>Warning</strong> these
029: * utilities are <strong>experimental</strong>. Extensive testing has not yet
030: * occurred (see {@link TestOntTools} for basic unit tests), and in particular
031: * performance testing has not been carried out yet. Users are advised to exercise
032: * caution before relying on these utilities in production code. Please send
033: * any comments or suggestions to the
034: * <a href="http://tech.groups.yahoo.com/group/jena-dev">Jena support email list</a>.
035: * </p>
036: *
037: * @author Ian Dickinson, HP Labs (<a href="mailto:Ian.Dickinson@hp.com">email</a>)
038: */
039: public class OntTools {
040: // Constants
041: //////////////////////////////////
042:
043: // Static variables
044: //////////////////////////////////
045:
046: static private Log log = LogFactory.getLog(OntTools.class);
047:
048: // Instance variables
049: //////////////////////////////////
050:
051: // Constructors
052: //////////////////////////////////
053:
054: // External signature methods
055: //////////////////////////////////
056:
057: /**
058: * <p>Answer the lowest common ancestor of two classes in a given ontology. This
059: * is the class that is farthest from the root concept (defaulting to
060: * <code>owl:Thing</code> which is a super-class of both <code>u</code>
061: * and <code>v</code>. The algorithm is based on
062: * <a href="http://en.wikipedia.org/wiki/Tarjan's_off-line_least_common_ancestors_algorithm">Tarjan's
063: * off-line LCA</a>. The current implementation expects that the given model:
064: * </p>
065: * <ul>
066: * <li>is transitively closed over the <code>subClassOf</code> relation</li>
067: * <li>can cheaply determine <em>direct sub-class</em> relations</li>
068: * </ul>
069: * <p>Both of these conditions are true of the built-in Jena OWL reasoners,
070: * such as {@link OntModelSpec#OWL_MEM_MICRO_RULE_INF}, and external DL
071: * reasoners such as Pellet.</p>
072: *
073: * @param m The ontology model being queried to find the LCA, which should conform
074: * to the reasoner capabilities described above
075: * @param u An ontology class
076: * @param v An ontology class
077: * @return The LCA of <code>u</code> and <code>v</code>
078: * @exception JenaException if the language profile of the given model does not
079: * define a top concept (e.g. <code>owl:Thing</code>)
080: */
081: public static OntClass getLCA(OntModel m, OntClass u, OntClass v) {
082: Resource root = m.getProfile().THING();
083: if (root == null) {
084: throw new JenaException(
085: "The given OntModel has a language profile that does not define a generic root class (such as owl:Thing)");
086: }
087:
088: root = (Resource) root.inModel(m);
089: return getLCA(m, (OntClass) root.as(OntClass.class), u, v);
090: }
091:
092: /**
093: * Answer the lowest common ancestor of two classes, assuming that the given
094: * class is the root concept to start searching from. See {@link #getLCA(OntModel, OntClass, OntClass)}
095: * for details.
096: *
097: * @param m The ontology model being queried to find the LCA, which should conform
098: * to the reasoner capabilities described above
099: * @param root The root concept, which will be the starting point for the algorithm
100: * @param u An ontology class
101: * @param v An ontology class
102: * @return The LCA of <code>u</code> and <code>v</code>
103: * @exception JenaException if the language profile of the given model does not
104: * define a top concept (e.g. <code>owl:Thing</code>)
105: */
106: public static OntClass getLCA(OntModel m, OntClass root,
107: OntClass u, OntClass v) {
108: // check some common cases first
109: if (u.equals(root) || v.equals(root)) {
110: return root;
111: }
112:
113: if (u.hasSubClass(v)) {
114: return u;
115: }
116:
117: if (v.hasSubClass(u)) {
118: return v;
119: }
120:
121: // not a common case, so apply Tarjan's LCA algorithm
122: LCAIndex index = new LCAIndex();
123: lca(root, u, v, index);
124: return (OntClass) index.getLCA(u, v);
125: }
126:
127: /**
128: * <p>Answer the shortest path from the <code>start</code> resource to the <code>end</code> RDF node,
129: * such that every step on the path is accepted by the given filter. A path is a {@link List}
130: * of RDF {@link Statement}s. The subject of the first statement in the list is <code>start</code>,
131: * and the object of the last statement in the list is <code>end</code>.</p>
132: * <p>The <code>onPath</code> argument is a {@link Filter}, which accepts a statement and returns
133: * true if the statement should be considered to be on the path. To search for an unconstrained
134: * path, pass {@link Filter#any} as an argument. To search for a path whose predicates match a
135: * fixed restricted set of property names, pass an instance of {@link PredicatesFilter}.</p>
136: * <p>If there is more than one path of minimal length from <code>start</code> to <code>end</code>,
137: * this method returns an arbitrary one. The algorithm is blind breadth-first search,
138: * with loop detection.</p>
139: *
140: * @param m The model in which we are seeking a path
141: * @param start The starting resource
142: * @param end The end, or goal, node
143: * @param onPath A filter which determines whether a given statement can be considered part
144: * of the path
145: * @return A path, consisting of a list of statements whose first subject is <code>start</code>,
146: * and whose last object is <code>end</code>, or null if no such path exists.
147: */
148: public static Path findShortestPath(Model m, Resource start,
149: RDFNode end, Filter onPath) {
150: List bfs = new LinkedList();
151: Set seen = new HashSet();
152:
153: // initialise the paths
154: for (Iterator i = m.listStatements(start, null, (RDFNode) null)
155: .filterKeep(onPath); i.hasNext();) {
156: bfs.add(new Path().append((Statement) i.next()));
157: }
158:
159: // search
160: Path solution = null;
161: while (solution == null && !bfs.isEmpty()) {
162: Path candidate = (Path) bfs.remove(0);
163:
164: if (candidate.hasTerminus(end)) {
165: solution = candidate;
166: } else {
167: Resource terminus = candidate.getTerminalResource();
168: if (terminus != null) {
169: seen.add(terminus);
170:
171: // breadth-first expansion
172: for (Iterator i = terminus.listProperties()
173: .filterKeep(onPath); i.hasNext();) {
174: Statement link = (Statement) i.next();
175:
176: // no looping allowed, so we skip this link if it takes us to a node we've seen
177: if (!seen.contains(link.getObject())) {
178: bfs.add(candidate.append(link));
179: }
180: }
181: }
182: }
183: }
184:
185: return solution;
186: }
187:
188: /**
189: * Answer a list of the named hierarchy roots of a given {@link OntModel}. This
190: * will be similar to the results of {@link OntModel#listHierarchyRootClasses()},
191: * with the added constraint that every member of the returned iterator will be a
192: * named class, not an anonymous class expression. The named root classes are
193: * calculated from the root classes, by recursively replacing every anonymous class
194: * with its direct sub-classes. Thus it can be seen that the values in the list
195: * consists of the shallowest fringe of named classes in the hierarchy.
196: * @param m An ontology model
197: * @return A list of classes whose members are the named root classes of the
198: * class hierarchy in <code>m</code>
199: */
200: public static List namedHierarchyRoots(OntModel m) {
201: List nhr = new ArrayList(); // named roots
202: List ahr = new ArrayList(); // anon roots
203:
204: // do the initial partition of the root classes
205: partitionByNamed(m.listHierarchyRootClasses(), nhr, ahr);
206:
207: // now push the fringe down until we have only named classes
208: while (!ahr.isEmpty()) {
209: OntClass c = (OntClass) ahr.remove(0);
210: partitionByNamed(c.listSubClasses(true), nhr, ahr);
211: }
212:
213: return nhr;
214: }
215:
216: // Internal implementation methods
217: //////////////////////////////////
218:
219: /**
220: * Compute the LCA disjoint set at <code>cls</code>, noting that we are
221: * searching for the LCA of <code>uCls</code> and <code>vCls</code>.
222: * @param cls The class we are testing (this is 'u' in the Wiki article)
223: * @param uCls One of the two classes we are searching for the LCA of. We
224: * have simplified the set P of pairs to the unity set {uCls,vCls}
225: * @param vCls One of the two classes we are searching for the LCA of. We
226: * have simplified the set P of pairs to the unity set {uCls,vCls}
227: * @param index A data structure mapping resources to disjoint sets (since
228: * we can't side-effect Jena resources), and which is used to record the
229: * LCA pairs
230: */
231: protected static DisjointSet lca(OntClass cls, OntClass uCls,
232: OntClass vCls, LCAIndex index) {
233: log.debug("Entering lca(), cls = " + cls);
234: DisjointSet clsSet = index.getSet(cls);
235: if (clsSet.isBlack()) {
236: // already visited
237: return clsSet;
238: }
239:
240: // not visited yet
241: clsSet.setAncestor(clsSet);
242:
243: // for each child of cls
244: for (Iterator i = cls.listSubClasses(true); i.hasNext();) {
245: OntClass child = (OntClass) i.next();
246:
247: if (child.equals(cls)
248: || child.equals(cls.getProfile().NOTHING())) {
249: // we ignore the reflexive case and bottom
250: continue;
251: }
252:
253: // compute the LCA of the sub-tree
254: DisjointSet v = lca(child, uCls, vCls, index);
255:
256: // union the two disjoint sets together
257: clsSet.union(v);
258:
259: // propagate the distinguished member
260: clsSet.find().setAncestor(clsSet);
261: }
262:
263: // this node is done
264: clsSet.setBlack();
265:
266: // are we inspecting one of the elements we're interested in?
267: if (cls.equals(uCls)) {
268: checkSolution(uCls, vCls, index);
269: } else if (cls.equals(vCls)) {
270: checkSolution(vCls, uCls, index);
271: }
272:
273: return clsSet;
274: }
275:
276: /**
277: * Check to see if we have found a solution to the problem.
278: * TODO: we could throw an exception to simulate a non-local exit
279: * here, since we've assumed that P is the unity set.
280: * @param uCls
281: * @param vCls
282: * @param index
283: */
284: protected static void checkSolution(OntClass uCls, OntClass vCls,
285: LCAIndex index) {
286: DisjointSet vSet = index.getSet(vCls);
287: DisjointSet uSet = index.getSet(uCls);
288:
289: if (vSet != null && vSet.isBlack() && !vSet.used()
290: && uSet != null && uSet.isBlack() && !uSet.used()) {
291: vSet.setUsed();
292: uSet.setUsed();
293: log.debug("Found LCA: u = " + uCls + ", v = " + vCls);
294: OntClass lca = (OntClass) vSet.find().getAncestor()
295: .getNode();
296: log.debug("Found LCA: lca = " + lca);
297: index.setLCA(uCls, vCls, lca);
298: }
299:
300: }
301:
302: /**
303: * Partition the members of an iterator into two lists, according to whether
304: * they are named or anonymous classes
305: * @param i An iterator to partition
306: * @param named A list of named classes
307: * @param anon A list of anonymous classes
308: */
309: protected static void partitionByNamed(Iterator i, List named,
310: List anon) {
311: while (i.hasNext()) {
312: OntClass c = (OntClass) i.next();
313: (c.isAnon() ? anon : named).add(c);
314: }
315: }
316:
317: //==============================================================================
318: // Inner class definitions
319: //==============================================================================
320:
321: /**
322: * A simple representation of disjoint sets
323: */
324: public static class DisjointSet {
325: /** The resource this set represents */
326: private Resource m_node;
327:
328: /** The parent set in a union */
329: private DisjointSet m_parent;
330:
331: /** Heuristic used to build balanced unions */
332: private int m_rank;
333:
334: /** The link to the distinguished member set */
335: private DisjointSet m_ancestor;
336:
337: /** Set to true when the node has been processed */
338: private boolean m_black = false;
339:
340: /** Set to true when we've inspected a black set, since the result is only
341: * correct just after both of the sets for u and v have been marked black */
342: private boolean m_used = false;
343:
344: public DisjointSet(Resource node) {
345: m_node = node;
346: m_rank = 0;
347: m_parent = this ;
348: }
349:
350: public Resource getNode() {
351: return m_node;
352: }
353:
354: public DisjointSet getParent() {
355: return m_parent;
356: }
357:
358: public void setParent(DisjointSet parent) {
359: m_parent = parent;
360: }
361:
362: public int getRank() {
363: return m_rank;
364: }
365:
366: public void incrementRank() {
367: m_rank++;
368: }
369:
370: public DisjointSet getAncestor() {
371: return m_ancestor;
372: }
373:
374: public void setAncestor(DisjointSet anc) {
375: m_ancestor = anc;
376: }
377:
378: public void setBlack() {
379: m_black = true;
380: }
381:
382: public boolean isBlack() {
383: return m_black;
384: }
385:
386: public boolean used() {
387: return m_used;
388: }
389:
390: public void setUsed() {
391: m_used = true;
392: }
393:
394: /**
395: * The find operation collapses the pointer to the root parent, which is
396: * one of Tarjan's standard optimisations.
397: * @return The representative of the union containing this set
398: */
399: public DisjointSet find() {
400: DisjointSet root;
401: if (getParent() == this ) {
402: // the representative of the set
403: root = this ;
404: } else {
405: // otherwise, seek the representative of my parent and save it
406: root = getParent().find();
407: setParent(root);
408: }
409:
410: return root;
411: }
412:
413: /**
414: * The union of two sets
415: * @param y
416: */
417: public void union(DisjointSet y) {
418: DisjointSet xRoot = find();
419: DisjointSet yRoot = y.find();
420:
421: if (xRoot.getRank() > yRoot.getRank()) {
422: yRoot.setParent(xRoot);
423: } else if (yRoot.getRank() > xRoot.getRank()) {
424: xRoot.setParent(yRoot);
425: } else if (xRoot != yRoot) {
426: yRoot.setParent(xRoot);
427: xRoot.incrementRank();
428: }
429: }
430:
431: /**
432: * @see java.lang.Object#toString()
433: * @return A string representation of this set for debugging
434: */
435: public String toString() {
436: StringBuffer buf = new StringBuffer();
437: buf.append("DisjointSet{node=");
438: buf.append(m_node);
439: buf.append(",anc=");
440: buf.append((getAncestor() == this ) ? "self"
441: : (getAncestor() == null ? "null" : getAncestor()
442: .toShortString()));
443: buf.append(",parent=");
444: buf.append((getParent() == this ) ? "self"
445: : (getParent() == null ? "null" : getParent()
446: .toShortString()));
447: buf.append(",rank=");
448: buf.append(getRank());
449: buf.append(m_black ? ",black" : ",white");
450: buf.append("}");
451:
452: return buf.toString();
453: }
454:
455: public String toShortString() {
456: StringBuffer buf = new StringBuffer();
457: buf.append("DisjointSet{node=");
458: buf.append(m_node);
459: buf.append(",parent=");
460: buf.append((getParent() == this ) ? "self"
461: : (getParent() == null ? "null" : getParent()
462: .toShortString()));
463: buf.append("...}");
464:
465: return buf.toString();
466: }
467: }
468:
469: /**
470: * Simple data structure mapping RDF nodes to disjoint sets, and
471: * pairs of resources to their LCA.
472: */
473: public static class LCAIndex {
474: private Map m_setIndex = new HashMap();
475: private Map m_lcaIndex = new HashMap();
476:
477: public Resource getLCA(Resource u, Resource v) {
478: Map map = (Map) m_lcaIndex.get(u);
479: Resource lca = (map == null) ? null : (Resource) map.get(v);
480:
481: if (lca == null) {
482: map = (Map) m_lcaIndex.get(v);
483: lca = (map == null) ? null : (Resource) map.get(u);
484: }
485:
486: return lca;
487: }
488:
489: public void setLCA(Resource u, Resource v, Resource lca) {
490: Map uMap = (Map) m_lcaIndex.get(u);
491: if (uMap == null) {
492: uMap = new HashMap();
493: m_lcaIndex.put(u, uMap);
494: }
495: uMap.put(v, lca);
496: }
497:
498: public DisjointSet getSet(Resource r) {
499: DisjointSet s = (DisjointSet) m_setIndex.get(r);
500: if (s == null) {
501: log.debug("Generating new set for " + r);
502: s = new DisjointSet(r);
503: m_setIndex.put(r, s);
504: } else {
505: log.debug("Retrieving old set for " + r);
506:
507: }
508: return s;
509: }
510: }
511:
512: /**
513: * A path is an application of {@link java.util.List} containing only {@link Statement}
514: * objects, and in which for all adjacent elements <code>S<sub>i-1</sub></code>
515: * and <code>S<sub>i</sub></code>, where <code>i > 0</code>, it is true that:
516: * <code><pre>S<sub>i-1</sub>.getObject().equals( S<sub>i</sub>.getSubject() )</pre></code>
517: */
518: public static class Path extends ArrayList {
519: public Path() {
520: super ();
521: }
522:
523: public Path(Path basePath) {
524: super (basePath);
525: }
526:
527: public Statement getStatement(int i) {
528: return (Statement) get(i);
529: }
530:
531: /** Answer a new Path whose elements are this Path with <code>s</code> added at the end */
532: public Path append(Statement s) {
533: Path newPath = new Path(this );
534: newPath.add(s);
535: return newPath;
536: }
537:
538: /** Answer true if the last link on the path has object equal to <code>n</code> */
539: public boolean hasTerminus(RDFNode n) {
540: return n != null && n.equals(getTerminal());
541: }
542:
543: /** Answer the RDF node at the end of the path, if defined, or null */
544: public RDFNode getTerminal() {
545: return size() > 0 ? ((Statement) get(size() - 1))
546: .getObject() : null;
547: }
548:
549: /** Answer the resource at the end of the path, if defined, or null */
550: public Resource getTerminalResource() {
551: RDFNode n = getTerminal();
552: return (n != null && n.isResource()) ? (Resource) n : null;
553: }
554: }
555:
556: /**
557: * A filter which accepts statements whose predicate matches one of a collection
558: * of predicates held by the filter object.
559: */
560: public static class PredicatesFilter extends Filter {
561: public Collection m_preds;
562:
563: /** Accept statements with any predicate from <code>preds</code> */
564: public PredicatesFilter(Collection preds) {
565: m_preds = preds;
566: }
567:
568: /** Accept statements with any predicate from <code>preds</code> */
569: public PredicatesFilter(Property[] preds) {
570: m_preds = new HashSet();
571: for (int i = 0; i < preds.length; i++) {
572: m_preds.add(preds[i]);
573: }
574: }
575:
576: /** Accept statements with predicate <code>pred</code> */
577: public PredicatesFilter(Property pred) {
578: m_preds = new HashSet();
579: m_preds.add(pred);
580: }
581:
582: public boolean accept(Object s) {
583: return m_preds.contains(((Statement) s).getPredicate());
584: }
585: }
586: }
587:
588: /*
589: (c) Copyright 2007, 2008 Hewlett-Packard Development Company, LP
590: All rights reserved.
591:
592: Redistribution and use in source and binary forms, with or without
593: modification, are permitted provided that the following conditions
594: are met:
595:
596: 1. Redistributions of source code must retain the above copyright
597: notice, this list of conditions and the following disclaimer.
598:
599: 2. Redistributions in binary form must reproduce the above copyright
600: notice, this list of conditions and the following disclaimer in the
601: documentation and/or other materials provided with the distribution.
602:
603: 3. The name of the author may not be used to endorse or promote products
604: derived from this software without specific prior written permission.
605:
606: THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
607: IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
608: OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
609: IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
610: INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
611: NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
612: DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
613: THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
614: (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
615: THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
616: */
|