001: /*
002: * XMLSettingsHandlerTest
003: *
004: * $Id: XMLSettingsHandlerTest.java 3704 2005-07-18 17:30:21Z stack-sf $
005: *
006: * Created on Jan 28, 2004
007: *
008: * Copyright (C) 2004 Internet Archive.
009: *
010: * This file is part of the Heritrix web crawler (crawler.archive.org).
011: *
012: * Heritrix is free software; you can redistribute it and/or modify it under the
013: * terms of the GNU Lesser Public License as published by the Free Software
014: * Foundation; either version 2.1 of the License, or any later version.
015: *
016: * Heritrix is distributed in the hope that it will be useful, but WITHOUT ANY
017: * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
018: * A PARTICULAR PURPOSE. See the GNU Lesser Public License for more details.
019: *
020: * You should have received a copy of the GNU Lesser Public License along with
021: * Heritrix; if not, write to the Free Software Foundation, Inc., 59 Temple
022: * Place, Suite 330, Boston, MA 02111-1307 USA
023: */
024: package org.archive.crawler.settings;
025:
026: import java.io.File;
027: import java.io.IOException;
028: import java.text.ParseException;
029:
030: import javax.management.Attribute;
031: import javax.management.AttributeNotFoundException;
032: import javax.management.InvalidAttributeValueException;
033: import javax.management.MBeanException;
034: import javax.management.ReflectionException;
035:
036: import org.apache.commons.httpclient.URIException;
037: import org.archive.crawler.datamodel.CrawlOrder;
038: import org.archive.crawler.datamodel.CrawlURI;
039: import org.archive.crawler.framework.CrawlScope;
040: import org.archive.crawler.scope.ClassicScope;
041: import org.archive.crawler.settings.refinements.Criteria;
042: import org.archive.crawler.settings.refinements.PortnumberCriteria;
043: import org.archive.crawler.settings.refinements.Refinement;
044: import org.archive.crawler.settings.refinements.RegularExpressionCriteria;
045: import org.archive.crawler.settings.refinements.TimespanCriteria;
046: import org.archive.net.UURIFactory;
047:
048: /**
049: * Tests the handling of settings files.
050: *
051: * @author John Erik Halse
052: *
053: */
054: public class XMLSettingsHandlerTest extends SettingsFrameworkTestCase {
055:
056: /*
057: * @see TestCase#setUp()
058: */
059: protected void setUp() throws Exception {
060: super .setUp();
061: }
062:
063: /*
064: * @see TestCase#tearDown()
065: */
066: protected void tearDown() throws Exception {
067: super .tearDown();
068: }
069:
070: /*
071: * Test for void writeSettingsObject(CrawlerSettings)
072: */
073: public void testWriteSettingsObjectCrawlerSettings()
074: throws AttributeNotFoundException,
075: InvalidAttributeValueException, MBeanException,
076: ReflectionException {
077:
078: // Write a crawl order file
079: CrawlerSettings settings = getGlobalSettings();
080: XMLSettingsHandler handler = getSettingsHandler();
081: handler.registerValueErrorHandler(this );
082: handler.getOrder().setAttribute(new ClassicScope());
083: handler.writeSettingsObject(settings);
084: assertTrue("Order file was not written", getOrderFile()
085: .exists());
086:
087: // Get a module to alter a setting on
088: ComplexType scope = settings.getModule(CrawlScope.ATTR_NAME);
089: assertNotNull("Could not get module scope", scope);
090:
091: // Alter two settings in a per host file
092: CrawlerSettings perHost = getPerHostSettings();
093: Integer newHops = new Integer(500);
094: String newFrom = "newfrom";
095: scope.setAttribute(perHost, new Attribute(
096: ClassicScope.ATTR_MAX_LINK_HOPS, newHops));
097: CrawlOrder order = handler.getOrder();
098: ComplexType httpHeaders = (ComplexType) order
099: .getAttribute(CrawlOrder.ATTR_HTTP_HEADERS);
100: httpHeaders.setAttribute(perHost, new Attribute(
101: CrawlOrder.ATTR_FROM, newFrom));
102:
103: // Write the per host file
104: handler.writeSettingsObject(perHost);
105: assertTrue("Per host file was not written", handler
106: .settingsToFilename(perHost).exists());
107:
108: // Create a new handler for testing that changes was written to disk
109: XMLSettingsHandler newHandler = new XMLSettingsHandler(
110: getOrderFile());
111: newHandler.initialize();
112:
113: // Read perHost
114: CrawlerSettings newPerHost = newHandler
115: .getSettingsObject(perHost.getScope());
116: assertNotNull("Per host scope could not be read", newPerHost);
117:
118: ComplexType newScope = newHandler
119: .getModule(CrawlScope.ATTR_NAME);
120: assertNotNull(newScope);
121: Integer r1 = (Integer) newScope.getAttribute(newPerHost,
122: ClassicScope.ATTR_MAX_LINK_HOPS);
123: assertEquals(newHops, r1);
124:
125: ComplexType newHttpHeaders = (ComplexType) newHandler
126: .getOrder().getAttribute(newPerHost,
127: CrawlOrder.ATTR_HTTP_HEADERS);
128: assertNotNull(newHttpHeaders);
129:
130: String r2 = (String) newHttpHeaders.getAttribute(newPerHost,
131: CrawlOrder.ATTR_FROM);
132: assertEquals(newFrom, r2);
133: }
134:
135: /**
136: * Test the copying of the entire settings directory.
137: *
138: * @throws IOException
139: */
140: public void testCopySettings() throws IOException {
141: //String testScope = "www.archive.org";
142:
143: // Write the files
144: XMLSettingsHandler handler = getSettingsHandler();
145: handler.writeSettingsObject(getGlobalSettings());
146: handler.writeSettingsObject(getPerHostSettings());
147:
148: // Copy to new location
149: File newOrderFile = new File(getTmpDir(),
150: "SETTINGS_new_order.xml");
151: String newSettingsDir = "SETTINGS_new_per_host_settings";
152: handler.copySettings(newOrderFile, newSettingsDir);
153:
154: // Check if new files where created.
155: assertTrue("Order file was not written", newOrderFile.exists());
156:
157: assertTrue("New settings dir not set", handler
158: .settingsToFilename(getPerHostSettings())
159: .getAbsolutePath()
160: .matches(".*" + newSettingsDir + ".*"));
161: assertTrue("Per host file was not written", handler
162: .settingsToFilename(getPerHostSettings()).exists());
163: }
164:
165: public void testGetSettings() {
166: XMLSettingsHandler handler = getSettingsHandler();
167: CrawlerSettings order = handler.getSettingsObject(null);
168: CrawlerSettings perHost = handler
169: .getSettings("localhost.localdomain");
170: assertNotNull("Didn't get any file", perHost);
171: assertSame("Did not get same file", order, perHost);
172: }
173:
174: public void testGetSettingsObject() {
175: String testScope = "audio.archive.org";
176:
177: XMLSettingsHandler handler = getSettingsHandler();
178: assertNotNull("Couldn't get orderfile", handler
179: .getSettingsObject(null));
180: assertNull("Got nonexisting per host file", handler
181: .getSettingsObject(testScope));
182: assertNotNull("Couldn't create per host file", handler
183: .getOrCreateSettingsObject(testScope));
184: assertNotNull("Couldn't get per host file", handler
185: .getSettingsObject(testScope));
186: }
187:
188: public void testDeleteSettingsObject() {
189: XMLSettingsHandler handler = getSettingsHandler();
190: File file = handler.settingsToFilename(getPerHostSettings());
191: handler.writeSettingsObject(getPerHostSettings());
192: assertTrue("Per host file was not written", file.exists());
193: handler.deleteSettingsObject(getPerHostSettings());
194: assertFalse("Per host file was not deleted", file.exists());
195: }
196:
197: public void testReadWriteRefinements() throws ParseException,
198: InvalidAttributeValueException, AttributeNotFoundException,
199: MBeanException, ReflectionException, URIException {
200: XMLSettingsHandler handler = getSettingsHandler();
201: CrawlerSettings global = getGlobalSettings();
202: CrawlerSettings per = getPerHostSettings();
203: ComplexType headers = (ComplexType) handler.getOrder()
204: .getAttribute(CrawlOrder.ATTR_HTTP_HEADERS);
205:
206: String globalFrom = (String) headers
207: .getAttribute(CrawlOrder.ATTR_FROM);
208: String refinedGlobalFrom = "refined@global.address";
209: String refinedPerFrom = "refined@per.address";
210:
211: // Create a refinement on the global level
212: Refinement globalRefinement = new Refinement(global, "test",
213: "Refinement test");
214: Criteria timespanCriteria = new TimespanCriteria("2300", "2300");
215: globalRefinement.addCriteria(timespanCriteria);
216: Criteria regexpCriteria = new RegularExpressionCriteria(
217: ".*www.*");
218: globalRefinement.addCriteria(regexpCriteria);
219: handler.writeSettingsObject(global);
220:
221: // Override an attribute on the global refinement
222: CrawlerSettings globalRefinementSetting = globalRefinement
223: .getSettings();
224: headers.setAttribute(globalRefinementSetting, new Attribute(
225: CrawlOrder.ATTR_FROM, refinedGlobalFrom));
226: handler.writeSettingsObject(globalRefinementSetting);
227:
228: // Create a refinement on a per level
229: Refinement perRefinement = new Refinement(per, "test2",
230: "Refinement test2");
231: Criteria portCriteria = new PortnumberCriteria("10");
232: perRefinement.addCriteria(portCriteria);
233: handler.writeSettingsObject(per);
234:
235: // Override an attribute on the per refinement
236: CrawlerSettings perRefinementSetting = perRefinement
237: .getSettings();
238: headers.setAttribute(perRefinementSetting, new Attribute(
239: CrawlOrder.ATTR_FROM, refinedPerFrom));
240: handler.writeSettingsObject(perRefinementSetting);
241:
242: // Create a new handler for testing that changes was written to disk
243: XMLSettingsHandler newHandler = new XMLSettingsHandler(
244: getOrderFile());
245: newHandler.initialize();
246: CrawlerSettings newGlobal = newHandler.getSettingsObject(null);
247: assertNotNull("Global scope could not be read", newGlobal);
248: CrawlerSettings newPer = newHandler.getSettingsObject(per
249: .getScope());
250: assertNotNull("Per host scope could not be read", newPer);
251:
252: ComplexType newHeaders = (ComplexType) newHandler.getOrder()
253: .getAttribute(CrawlOrder.ATTR_HTTP_HEADERS);
254: assertNotNull(newHeaders);
255:
256: String newFrom1 = (String) newHeaders.getAttribute(
257: CrawlOrder.ATTR_FROM, getMatchDomainURI());
258: String newFrom2 = (String) newHeaders.getAttribute(
259: CrawlOrder.ATTR_FROM, getMatchHostURI());
260: CrawlURI matchHostAndPortURI = new CrawlURI(UURIFactory
261: .getInstance("http://www.archive.org:10/index.html"));
262: String newFrom3 = (String) newHeaders.getAttribute(
263: CrawlOrder.ATTR_FROM, matchHostAndPortURI);
264:
265: //Check that we got what we expected
266: assertEquals(globalFrom, newFrom1);
267: assertEquals(refinedGlobalFrom, newFrom2);
268: assertEquals(refinedPerFrom, newFrom3);
269: }
270: }
|