001: /*
002: * Copyright Aduna (http://www.aduna-software.com/) (c) 1997-2006.
003: *
004: * Licensed under the Aduna BSD-style license.
005: */
006: package org.openrdf.repository.util;
007:
008: import java.util.ArrayList;
009: import java.util.Collection;
010: import java.util.HashMap;
011: import java.util.HashSet;
012: import java.util.Iterator;
013: import java.util.LinkedList;
014: import java.util.Map;
015: import java.util.Set;
016:
017: import info.aduna.iteration.Iterations;
018:
019: import org.openrdf.model.BNode;
020: import org.openrdf.model.Resource;
021: import org.openrdf.model.Statement;
022: import org.openrdf.model.URI;
023: import org.openrdf.model.Value;
024: import org.openrdf.model.util.ModelUtil;
025: import org.openrdf.repository.Repository;
026: import org.openrdf.repository.RepositoryConnection;
027: import org.openrdf.repository.RepositoryException;
028:
029: /**
030: * Utility methods for comparing sets of statements (graphs) with each other.
031: * The supplied comparison operations map bnodes in the two supplied models on
032: * to each other and thus define a graph isomorphism.
033: *
034: * @author jeen
035: * @author Arjohn Kampman
036: */
037: public class RepositoryUtil {
038:
039: /**
040: * Compares the models in the default contexts of the two supplied
041: * repositories and returns true if they are equal. Models are equal if they
042: * contain the same set of statements. bNodes IDs are not relevant for model
043: * equality, they are mapped from one model to the other by using the
044: * attached properties. Note that the method pulls the entire default context
045: * of both repositories into main memory. Use with caution.
046: */
047: public static boolean equals(Repository rep1, Repository rep2)
048: throws RepositoryException {
049: // Fetch statements from rep1 and rep2
050: Set<Statement> model1, model2;
051:
052: RepositoryConnection con1 = rep1.getConnection();
053: try {
054: model1 = Iterations.asSet(con1.getStatements(null, null,
055: null, true));
056: } finally {
057: con1.close();
058: }
059:
060: RepositoryConnection con2 = rep2.getConnection();
061: try {
062: model2 = Iterations.asSet(con2.getStatements(null, null,
063: null, true));
064: } finally {
065: con2.close();
066: }
067:
068: return ModelUtil.equals(model1, model2);
069: }
070:
071: /**
072: * Compares the models of the default context of two repositories and returns
073: * true if rep1 is a subset of rep2. Note that the method pulls the entire
074: * default context of both repositories into main memory. Use with caution.
075: */
076: public static boolean isSubset(Repository rep1, Repository rep2)
077: throws RepositoryException {
078: Set<Statement> model1, model2;
079:
080: RepositoryConnection con1 = rep1.getConnection();
081: try {
082: model1 = Iterations.asSet(con1.getStatements(null, null,
083: null, true));
084: } finally {
085: con1.close();
086: }
087:
088: RepositoryConnection con2 = rep2.getConnection();
089: try {
090: model2 = Iterations.asSet(con2.getStatements(null, null,
091: null, true));
092: } finally {
093: con2.close();
094: }
095:
096: return ModelUtil.isSubset(model1, model2);
097: }
098:
099: /**
100: * Compares two models defined by the default context of two repositories and
101: * returns the difference between the first and the second model (that is,
102: * all statements that are present in rep1 but not in rep2). Blank node IDs
103: * are not relevant for model equality, they are mapped from one model to the
104: * other by using the attached properties. Note that the method pulls the
105: * entire default context of both repositories into main memory. Use with
106: * caution.
107: * <p>
108: * <b>NOTE: this algorithm is currently broken; it doesn't actually map blank
109: * nodes between the two models.</b>
110: *
111: * @return The collection of statements that is the difference between rep1
112: * and rep2.
113: */
114: public static Collection<? extends Statement> difference(
115: Repository rep1, Repository rep2)
116: throws RepositoryException {
117: Collection<Statement> model1 = new HashSet<Statement>();
118: Collection<Statement> model2 = new HashSet<Statement>();
119:
120: RepositoryConnection con1 = rep1.getConnection();
121: try {
122: Iterations.addAll(con1.getStatements(null, null, null,
123: false), model1);
124: } finally {
125: con1.close();
126: }
127:
128: RepositoryConnection con2 = rep2.getConnection();
129: try {
130: Iterations.addAll(con2.getStatements(null, null, null,
131: false), model2);
132: } finally {
133: con2.close();
134: }
135:
136: return difference(model1, model2);
137: }
138:
139: /**
140: * Compares two models, defined by two statement collections, and returns the
141: * difference between the first and the second model (that is, all statements
142: * that are present in model1 but not in model2). Blank node IDs are not
143: * relevant for model equality, they are mapped from one model to the other
144: * by using the attached properties. *
145: * <p>
146: * <b>NOTE: this algorithm is currently broken; it doesn't actually map blank
147: * nodes between the two models.</b>
148: *
149: * @return The collection of statements that is the difference between model1
150: * and model2.
151: */
152: public static Collection<? extends Statement> difference(
153: Collection<? extends Statement> model1,
154: Collection<? extends Statement> model2) {
155: // Create working copies
156: LinkedList<Statement> copy1 = new LinkedList<Statement>(model1);
157: LinkedList<Statement> copy2 = new LinkedList<Statement>(model2);
158:
159: Collection<Statement> result = new ArrayList<Statement>();
160:
161: // Compare statements that don't contain bNodes
162: Iterator<Statement> iter1 = copy1.iterator();
163: while (iter1.hasNext()) {
164: Statement st = iter1.next();
165:
166: if (st.getSubject() instanceof BNode
167: || st.getObject() instanceof BNode) {
168: // One or more of the statement's components is a bNode,
169: // these statements are handled later
170: continue;
171: }
172:
173: // Try to remove the statement from model2
174: boolean removed = copy2.remove(st);
175: if (!removed) {
176: // statement was not present in model2 and is part of the difference
177: result.add(st);
178: }
179: iter1.remove();
180: }
181:
182: // FIXME: this algorithm is broken: bNodeMapping is assumed to contain a
183: // bnode mapping while in reallity it is an empty map
184:
185: HashMap<BNode, BNode> bNodeMapping = new HashMap<BNode, BNode>();
186: // mapBlankNodes(copy1, copy2, bNodeMapping, 0);
187:
188: for (Statement st1 : copy1) {
189: boolean foundMatch = false;
190:
191: for (Statement st2 : copy2) {
192: if (statementsMatch(st1, st2, bNodeMapping)) {
193: // Found a matching statement
194: foundMatch = true;
195: break;
196: }
197: }
198:
199: if (!foundMatch) {
200: // No statement matching st1 was found in model2, st1 is part of
201: // the difference.
202: result.add(st1);
203: }
204: }
205:
206: return result;
207: }
208:
209: private static boolean statementsMatch(Statement st1,
210: Statement st2, Map<BNode, BNode> bNodeMapping) {
211: URI pred1 = st1.getPredicate();
212: URI pred2 = st2.getPredicate();
213:
214: if (!pred1.equals(pred2)) {
215: // predicates don't match
216: return false;
217: }
218:
219: Resource subj1 = st1.getSubject();
220: Resource subj2 = st2.getSubject();
221:
222: if (!(subj1 instanceof BNode)) {
223: if (!subj1.equals(subj2)) {
224: // subjects are not bNodes and don't match
225: return false;
226: }
227: } else { // subj1 instanceof BNode
228: BNode mappedBNode = bNodeMapping.get(subj1);
229:
230: if (mappedBNode != null) {
231: // bNode 'subj1' was already mapped to some other bNode
232: if (!subj2.equals(mappedBNode)) {
233: // 'subj1' and 'subj2' do not match
234: return false;
235: }
236: } else {
237: // 'subj1' was not yet mapped. we need to check if 'subj2' is a
238: // possible mapping candidate
239: if (bNodeMapping.containsValue(subj2)) {
240: // 'subj2' is already mapped to some other value.
241: return false;
242: }
243: }
244: }
245:
246: Value obj1 = st1.getObject();
247: Value obj2 = st2.getObject();
248:
249: if (!(obj1 instanceof BNode)) {
250: if (!obj1.equals(obj2)) {
251: // objects are not bNodes and don't match
252: return false;
253: }
254: } else { // obj1 instanceof BNode
255: BNode mappedBNode = bNodeMapping.get(obj1);
256:
257: if (mappedBNode != null) {
258: // bNode 'obj1' was already mapped to some other bNode
259: if (!obj2.equals(mappedBNode)) {
260: // 'obj1' and 'obj2' do not match
261: return false;
262: }
263: } else {
264: // 'obj1' was not yet mapped. we need to check if 'obj2' is a
265: // possible mapping candidate
266: if (bNodeMapping.containsValue(obj2)) {
267: // 'obj2' is already mapped to some other value.
268: return false;
269: }
270: }
271: }
272:
273: return true;
274: }
275: }
|