001: /* BdbMultipleWorkQueuesTest
002: *
003: * $Id: BdbMultipleWorkQueuesTest.java 4161 2006-01-30 23:10:35Z gojomo $
004: *
005: * Created on Jul 21, 2005
006: *
007: * Copyright (C) 2005 Internet Archive.
008: *
009: * This file is part of the Heritrix web crawler (crawler.archive.org).
010: *
011: * Heritrix is free software; you can redistribute it and/or modify
012: * it under the terms of the GNU Lesser Public License as published by
013: * the Free Software Foundation; either version 2.1 of the License, or
014: * any later version.
015: *
016: * Heritrix is distributed in the hope that it will be useful,
017: * but WITHOUT ANY WARRANTY; without even the implied warranty of
018: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
019: * GNU Lesser Public License for more details.
020: *
021: * You should have received a copy of the GNU Lesser Public License
022: * along with Heritrix; if not, write to the Free Software
023: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
024: */
025: package org.archive.crawler.frontier;
026:
027: import org.apache.commons.httpclient.URIException;
028: import org.archive.crawler.datamodel.CandidateURI;
029: import org.archive.crawler.datamodel.CrawlURI;
030: import org.archive.net.UURIFactory;
031:
032: import com.sleepycat.je.tree.Key;
033:
034: import junit.framework.TestCase;
035:
036: /**
037: * Unit tests for BdbMultipleWorkQueues functionality.
038: *
039: * @author gojomo
040: */
041: public class BdbMultipleWorkQueuesTest extends TestCase {
042:
043: /**
044: * Basic sanity checks for calculateInsertKey() -- ensure ordinal, cost,
045: * and schedulingDirective have the intended effects, for ordinal values
046: * up through 1/4th of the maximum (about 2^61).
047: *
048: * @throws URIException
049: */
050: public void testCalculateInsertKey() throws URIException {
051: for (long ordinalOrigin = 1; ordinalOrigin < Long.MAX_VALUE / 4; ordinalOrigin <<= 1) {
052: CandidateURI cauri1 = new CandidateURI(UURIFactory
053: .getInstance("http://archive.org/foo"));
054: CrawlURI curi1 = new CrawlURI(cauri1, ordinalOrigin);
055: curi1.setClassKey("foo");
056: byte[] key1 = BdbMultipleWorkQueues.calculateInsertKey(
057: curi1).getData();
058: CandidateURI cauri2 = new CandidateURI(UURIFactory
059: .getInstance("http://archive.org/bar"));
060: CrawlURI curi2 = new CrawlURI(cauri2, ordinalOrigin + 1);
061: curi2.setClassKey("foo");
062: byte[] key2 = BdbMultipleWorkQueues.calculateInsertKey(
063: curi2).getData();
064: CandidateURI cauri3 = new CandidateURI(UURIFactory
065: .getInstance("http://archive.org/baz"));
066: CrawlURI curi3 = new CrawlURI(cauri3, ordinalOrigin + 2);
067: curi3.setClassKey("foo");
068: curi3.setSchedulingDirective(CandidateURI.HIGH);
069: byte[] key3 = BdbMultipleWorkQueues.calculateInsertKey(
070: curi3).getData();
071: CandidateURI cauri4 = new CandidateURI(UURIFactory
072: .getInstance("http://archive.org/zle"));
073: CrawlURI curi4 = new CrawlURI(cauri4, ordinalOrigin + 3);
074: curi4.setClassKey("foo");
075: curi4.setHolderCost(2);
076: byte[] key4 = BdbMultipleWorkQueues.calculateInsertKey(
077: curi4).getData();
078: CandidateURI cauri5 = new CandidateURI(UURIFactory
079: .getInstance("http://archive.org/gru"));
080: CrawlURI curi5 = new CrawlURI(cauri5, ordinalOrigin + 4);
081: curi5.setClassKey("foo");
082: curi5.setHolderCost(1);
083: byte[] key5 = BdbMultipleWorkQueues.calculateInsertKey(
084: curi5).getData();
085: // ensure that key1 (with lower ordinal) sorts before key2 (higher
086: // ordinal)
087: assertTrue("lower ordinal sorting first (" + ordinalOrigin
088: + ")", Key.compareKeys(key1, key2, null) < 0);
089: // ensure that key3 (with HIGH scheduling) sorts before key2 (even
090: // though
091: // it has lower ordinal)
092: assertTrue("lower directive sorting first ("
093: + ordinalOrigin + ")", Key.compareKeys(key3, key2,
094: null) < 0);
095: // ensure that key5 (with lower cost) sorts before key4 (even though
096: // key4 has lower ordinal and same default NORMAL scheduling directive)
097: assertTrue("lower cost sorting first (" + ordinalOrigin
098: + ")", Key.compareKeys(key5, key4, null) < 0);
099: }
100: }
101: }
|