001: //
002: // Copyright (C) 2005 United States Government as represented by the
003: // Administrator of the National Aeronautics and Space Administration
004: // (NASA). All Rights Reserved.
005: //
006: // This software is distributed under the NASA Open Source Agreement
007: // (NOSA), version 1.3. The NOSA has been approved by the Open Source
008: // Initiative. See the file NOSA-1.3-JPF at the top of the distribution
009: // directory tree for the complete NOSA document.
010: //
011: // THE SUBJECT SOFTWARE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY OF ANY
012: // KIND, EITHER EXPRESSED, IMPLIED, OR STATUTORY, INCLUDING, BUT NOT
013: // LIMITED TO, ANY WARRANTY THAT THE SUBJECT SOFTWARE WILL CONFORM TO
014: // SPECIFICATIONS, ANY IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR
015: // A PARTICULAR PURPOSE, OR FREEDOM FROM INFRINGEMENT, ANY WARRANTY THAT
016: // THE SUBJECT SOFTWARE WILL BE ERROR FREE, OR ANY WARRANTY THAT
017: // DOCUMENTATION, IF PROVIDED, WILL CONFORM TO THE SUBJECT SOFTWARE.
018: //
019:
020: package gov.nasa.jpf.util;
021:
022: /**
023: * A specialized set that can store Md5 checksums efficiently. It will also
024: * assign each element of the set a unique and sequential integer Id (0 to the
025: * size of the set - 1).
026: *
027: * This implementation does not allocate an individual object for each element
028: * of the set.
029: *
030: * We balance the tree using red-black balancing after each add operation.
031: * The tree will stay roughly balanced.
032: *
033: * @author Owen O'Malley
034: */
035: public class Md5Set {
036: public static final int NULL = -1;
037: private static final int DEFAULT_INITIAL_CAPACITY = 10000;
038:
039: // Various values controlling the offset of the data within the array.
040: private static final int VALUE_OFFSET = 0;
041: private static final int VALUE_SIZE = 4;
042: private static final int LEFT_OFFSET = VALUE_OFFSET + VALUE_SIZE;
043: private static final int RIGHT_OFFSET = LEFT_OFFSET + 1;
044: private static final int ELEMENT_SIZE = RIGHT_OFFSET + 1;
045:
046: private static final int BYTES_PER_INT = 4;
047:
048: private int size = 0;
049: private DynamicIntArray data;
050: private int root = 0;
051:
052: /**
053: * Create a set with a default initial capacity.
054: */
055: public Md5Set() {
056: data = new DynamicIntArray(DEFAULT_INITIAL_CAPACITY
057: * ELEMENT_SIZE);
058: }
059:
060: /**
061: * Create a set with the given initial capacity.
062: */
063: public Md5Set(int initial_capacity) {
064: data = new DynamicIntArray(initial_capacity * ELEMENT_SIZE);
065: }
066:
067: /**
068: * Insert a new node into the data array, growing the array as necessary.
069: *
070: * @return Returns the position of the new node.
071: */
072: private int insert(int[] val, int left, int right) {
073: int position = size;
074: size += 1;
075: int offset = position * ELEMENT_SIZE + VALUE_OFFSET;
076: data.grow((position + 1) * ELEMENT_SIZE);
077: for (int i = 0; i < VALUE_SIZE; ++i) {
078: data.set(offset + i, val[i]);
079: }
080: setLeft(position, left);
081: setRight(position, right);
082: setRed(position, true);
083: return position;
084: }
085:
086: /**
087: * Convert bytes into a single integer.
088: */
089: private int formInt(byte[] val, int offset) {
090: int result = 0;
091: for (int i = 0; i < VALUE_SIZE; ++i) {
092: result = (result << 8) | (val[offset + i] & 255);
093: }
094: return result;
095: }
096:
097: /**
098: * I keep this array around so that I can reuse it with each addition.
099: * The only access should be through convertValueToInts.
100: */
101: private int[] new_val = new int[VALUE_SIZE];
102:
103: /**
104: * Convert the array of bytes to an array of ints.
105: */
106: private int[] convertValueToInts(byte[] val) {
107: assert val.length == VALUE_SIZE * BYTES_PER_INT;
108: for (int i = 0; i < VALUE_SIZE; ++i) {
109: new_val[i] = formInt(val, i * BYTES_PER_INT);
110: }
111: return new_val;
112: }
113:
114: /**
115: * Compare the value at the given position to the new value.
116: * @return 0 if the values are the same, -1 if the new value is smaller and
117: * 1 if the new value is larger.
118: */
119: private int compareValue(int[] val, int position) {
120: int offset = position * ELEMENT_SIZE + VALUE_OFFSET;
121: for (int i = 0; i < VALUE_SIZE; ++i) {
122: int array_val = data.get(offset + i);
123: if (array_val > val[i]) {
124: return -1;
125: } else if (array_val < val[i]) {
126: return 1;
127: }
128: }
129: return 0;
130: }
131:
132: /**
133: * Is the given node red as opposed to black? To prevent having an extra word
134: * in the data array, we just the low bit on the left child index.
135: */
136: private boolean isRed(int position) {
137: if (position == NULL) {
138: return false;
139: }
140: return (data.get(position * ELEMENT_SIZE + LEFT_OFFSET) & 1) == 1;
141: }
142:
143: /**
144: * Set the red bit true or false.
145: */
146: private void setRed(int position, boolean is_red) {
147: int offset = position * ELEMENT_SIZE + LEFT_OFFSET;
148: if (is_red) {
149: data.set(offset, data.get(offset) | 1);
150: } else {
151: data.set(offset, data.get(offset) & ~1);
152: }
153: }
154:
155: /**
156: * Get the left field of the given position.
157: */
158: private int getLeft(int position) {
159: return data.get(position * ELEMENT_SIZE + LEFT_OFFSET) >> 1;
160: }
161:
162: /**
163: * Get the right field of the given position.
164: */
165: private int getRight(int position) {
166: return data.get(position * ELEMENT_SIZE + RIGHT_OFFSET);
167: }
168:
169: /**
170: * Set the left field of the given position.
171: * Note that we are storing the node color in the low bit of the left pointer.
172: */
173: private void setLeft(int position, int left) {
174: int offset = position * ELEMENT_SIZE + LEFT_OFFSET;
175: data.set(offset, (left << 1) | (data.get(offset) & 1));
176: }
177:
178: /**
179: * Set the right field of the given position.
180: */
181: private void setRight(int position, int right) {
182: data.set(position * ELEMENT_SIZE + RIGHT_OFFSET, right);
183: }
184:
185: /**
186: * Rebalance the red-black aspect of the tree to restore the invariants.
187: * Invariants:
188: * 1. If a node is red, both of its children are black.
189: * 2. Each child of a node has the same black height (the number of black
190: * nodes between it and the leaves of the tree).
191: *
192: * Inserted nodes are at the leaves and are red, therefore there is at most a
193: * violation of rule 1 at the node we just put in. Instead of always keeping
194: * the parents, this routine passing down the context.
195: *
196: * The fix is broken down into 6 cases (1.{1,2,3} and 2.{1,2,3} that are
197: * left-right mirror images of each other). See Algorighms by Cormen,
198: * Leiserson, and Rivest for the explaination of the subcases.
199: *
200: * @param goal The value of the node that we added.
201: * @param node The node that we are fixing right now.
202: * @param parent Nodes' parent
203: * @param grandparent Parent's parent
204: * @param great_grandparent Grandparent's parent
205: * @return Does parent also need to be checked and/or fixed?
206: */
207: private boolean fixAfterInsert(int[] goal, int node, int parent,
208: int grandparent, int great_grandparent) {
209: int compare = compareValue(goal, node);
210: boolean keep_going = true;
211:
212: // Recurse down to find the added node. We need to recurse down to
213: // build the context that lets us fix the tree.
214: if (compare < 0) {
215: keep_going = fixAfterInsert(goal, getLeft(node), node,
216: parent, grandparent);
217: } else if (compare > 0) {
218: keep_going = fixAfterInsert(goal, getRight(node), node,
219: parent, grandparent);
220: }
221:
222: // we don't need to fix the root (because it is always set to black)
223: if (node == root) {
224: keep_going = false;
225: }
226:
227: // Do we need to fix this node? Only if there are two reds right under each
228: // other.
229: if (keep_going && isRed(node) && isRed(parent)) {
230: if (parent == getLeft(grandparent)) {
231: int uncle = getRight(grandparent);
232: if (isRed(uncle)) {
233: // case 1.1
234: setRed(parent, false);
235: setRed(uncle, false);
236: setRed(grandparent, true);
237: keep_going = true;
238: } else {
239: if (node == getRight(parent)) {
240: // case 1.2
241: // swap node and parent
242: int tmp = node;
243: node = parent;
244: parent = tmp;
245: // left-rotate on node
246: if (node == getLeft(grandparent)) {
247: setLeft(grandparent, parent);
248: } else {
249: setRight(grandparent, parent);
250: }
251: setRight(node, getLeft(parent));
252: setLeft(parent, node);
253: }
254:
255: // case 1.2 and 1.3
256: setRed(parent, false);
257: setRed(grandparent, true);
258: // right-rotate on grandparent
259: if (great_grandparent == NULL) {
260: root = parent;
261: } else if (getLeft(great_grandparent) == grandparent) {
262: setLeft(great_grandparent, parent);
263: } else {
264: setRight(great_grandparent, parent);
265: }
266: setLeft(grandparent, getRight(parent));
267: setRight(parent, grandparent);
268: keep_going = false;
269: }
270: } else {
271: int uncle = getLeft(grandparent);
272: if (isRed(uncle)) {
273: // case 2.1
274: setRed(parent, false);
275: setRed(uncle, false);
276: setRed(grandparent, true);
277: keep_going = true;
278: } else {
279: if (node == getLeft(parent)) {
280: // case 2.2
281: // swap node and parent
282: int tmp = node;
283: node = parent;
284: parent = tmp;
285: // right-rotate on node
286: if (node == getLeft(grandparent)) {
287: setLeft(grandparent, parent);
288: } else {
289: setRight(grandparent, parent);
290: }
291: setLeft(node, getRight(parent));
292: setRight(parent, node);
293: }
294: // case 2.2 and 2.3
295: setRed(parent, false);
296: setRed(grandparent, true);
297: // left-rotate on grandparent
298: if (great_grandparent == NULL) {
299: root = parent;
300: } else if (getRight(great_grandparent) == grandparent) {
301: setRight(great_grandparent, parent);
302: } else {
303: setLeft(great_grandparent, parent);
304: }
305: setRight(grandparent, getLeft(parent));
306: setLeft(parent, grandparent);
307: keep_going = false;
308: }
309: }
310: }
311: return keep_going;
312: }
313:
314: /**
315: * Add the given value to the set, if it is not already present.
316: *
317: * @return true if the value was not already in the set.
318: */
319: public boolean add(byte[] val) {
320: int[] int_val = convertValueToInts(val);
321: if (size == 0) {
322: insert(int_val, NULL, NULL);
323: setRed(root, false);
324: return true;
325: }
326: int current = root;
327: int prev = NULL;
328: boolean on_left = true;
329: while (current != NULL) {
330: int compare = compareValue(int_val, current);
331: if (compare == 0) {
332: return false;
333: }
334: prev = current;
335: if (compare < 0) {
336: current = getLeft(current);
337: on_left = true;
338: } else {
339: current = getRight(current);
340: on_left = false;
341: }
342: }
343: int position = insert(int_val, NULL, NULL);
344: if (on_left) {
345: setLeft(prev, position);
346: } else {
347: setRight(prev, position);
348: }
349: fixAfterInsert(int_val, root, NULL, NULL, NULL);
350: setRed(root, false);
351: return true;
352: }
353:
354: public int getId(byte[] val) {
355: if (size == 0) {
356: return NULL;
357: }
358: int[] int_val = convertValueToInts(val);
359:
360: // Check the one that was just added, because most of the time that will be
361: // the one that they want.
362: if (compareValue(int_val, size - 1) == 0) {
363: return size - 1;
364: }
365:
366: // Check the "hard" case by traversing the tree.
367: int current = root;
368: while (current != NULL) {
369: int compare = compareValue(int_val, current);
370: if (compare < 0) {
371: current = getLeft(current);
372: } else if (compare > 0) {
373: current = getRight(current);
374: } else {
375: return current;
376: }
377: }
378: return NULL;
379: }
380:
381: /**
382: * Get the number of elements in the set.
383: */
384: public int size() {
385: return size;
386: }
387:
388: /**
389: * A counter for testing that counts how many times checkSubtree was called.
390: */
391: private int check_count = 0;
392:
393: /**
394: * Checks the red-black tree rules to make sure that we have correctly built
395: * a valid tree.
396: *
397: * Properties:
398: * 1. Red nodes must have black children
399: * 2. Each node must have the same black height on both sides.
400: *
401: * @param node The id of the root of the subtree to check for the red-black
402: * tree properties.
403: * @return The black-height of the subtree.
404: */
405: private int checkSubtree(int node) {
406: if (node == NULL) {
407: return 1;
408: }
409: check_count += 1;
410: boolean is_red = isRed(node);
411: int left = getLeft(node);
412: int right = getRight(node);
413: if (is_red) {
414: assert !isRed(left) : "Left node of " + node + " is "
415: + left + " and both are red.";
416: assert !isRed(right) : "Right node of " + node + " is "
417: + right + " and both are red.";
418: }
419: int left_depth = checkSubtree(left);
420: int right_depth = checkSubtree(right);
421: assert left_depth == right_depth : "Lopsided tree at node "
422: + node + " with depths " + left_depth + " and "
423: + right_depth;
424: if (is_red) {
425: return left_depth;
426: } else {
427: return left_depth + 1;
428: }
429: }
430:
431: /**
432: * Checks the validity of the entire tree. Also ensures that the number of
433: * nodes visited is the same as the size of the set.
434: */
435: private void checkTree() {
436: check_count = 0;
437: checkSubtree(root);
438: assert check_count == size : "Traverse count " + check_count
439: + " disagrees with size " + size;
440: }
441:
442: /**
443: * Build up a test md5 value from a string. The data is filled with the
444: * character values and then 0 filled.
445: */
446: private static void buildTestData(byte[] data, String val) {
447: int val_len = val.length();
448: for (int i = 0; i < 16; i++) {
449: if (i < val_len) {
450: data[i] = (byte) val.charAt(i);
451: } else {
452: data[i] = 0;
453: }
454: }
455: }
456:
457: /**
458: * A test driver that takes each command line argument and puts it into the
459: * set. The checkTree function, which checks for a valid red-black tree, is
460: * called after each addition.
461: */
462: public static void main(String[] arg) {
463: Md5Set my_set = new Md5Set(2);
464: byte[] bytes = new byte[16];
465: for (int i = 0; i < arg.length; ++i) {
466: buildTestData(bytes, arg[i]);
467: System.out.println("add(" + arg[i] + ")= "
468: + my_set.add(bytes));
469: my_set.checkTree();
470: }
471: System.out.println("size = " + my_set.size());
472: for (int i = 0; i < arg.length; ++i) {
473: buildTestData(bytes, arg[i]);
474: int id = my_set.getId(bytes);
475: assert id != -1 : "Can not find element " + arg[i];
476: System.out.println("getId(" + arg[i] + ")= " + id);
477: }
478: }
479: }
|