001: /*
002: * Heritrix
003: *
004: * $Id: JobConfigureUtils.java 4401 2006-07-31 19:39:04Z gojomo $
005: *
006: * Created on Aug 30, 2004
007: *
008: * Copyright (C) 2003 Internet Archive.
009: *
010: * This file is part of the Heritrix web crawler (crawler.archive.org).
011: *
012: * Heritrix is free software; you can redistribute it and/or modify
013: * it under the terms of the GNU Lesser Public License as published by
014: * the Free Software Foundation; either version 2.1 of the License, or
015: * any later version.
016: *
017: * Heritrix is distributed in the hope that it will be useful,
018: * but WITHOUT ANY WARRANTY; without even the implied warranty of
020: * GNU Lesser Public License for more details.
021: *
022: * You should have received a copy of the GNU Lesser Public License
023: * along with Heritrix; if not, write to the Free Software
024: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
025: */
026: package org.archive.crawler.admin.ui;
028: import java.io.BufferedReader;
029: import java.io.BufferedWriter;
030: import java.io.File;
031: import java.io.FileInputStream;
032: import java.io.FileWriter;
033: import java.io.IOException;
034: import java.io.InputStream;
035: import java.io.InputStreamReader;
036: import java.io.Reader;
037: import java.io.StringReader;
038: import java.io.Writer;
039: import java.lang.reflect.Constructor;
040: import java.lang.reflect.InvocationTargetException;
041: import java.util.logging.Level;
042: import java.util.logging.Logger;
044: import javax.management.Attribute;
045: import javax.management.AttributeNotFoundException;
046: import javax.management.InvalidAttributeValueException;
047: import javax.management.MBeanAttributeInfo;
048: import javax.management.MBeanException;
049: import javax.management.ReflectionException;
050: import javax.servlet.http.HttpServletRequest;
051: import javax.servlet.http.HttpServletResponse;
053: import org.archive.crawler.admin.CrawlJob;
054: import org.archive.crawler.admin.CrawlJobHandler;
055: import org.archive.crawler.settings.ComplexType;
056: import org.archive.crawler.settings.CrawlerSettings;
057: import org.archive.crawler.settings.ListType;
058: import org.archive.crawler.settings.MapType;
059: import org.archive.crawler.settings.ModuleAttributeInfo;
060: import org.archive.crawler.settings.ModuleType;
061: import org.archive.crawler.settings.SettingsHandler;
062: import org.archive.crawler.settings.XMLSettingsHandler;
063: import org.archive.crawler.settings.refinements.Refinement;
064: import org.archive.util.IoUtils;
066: /**
067: * Utility methods used configuring jobs in the admin UI.
068: *
069: * Methods are mostly called by the admin UI jsp.
070: *
071: * @author stack
072: * @version $Date: 2006-07-31 19:39:04 +0000 (Mon, 31 Jul 2006) $, $Revision: 4401 $
073: */
074: public class JobConfigureUtils {
075: private static Logger logger = Logger
076: .getLogger(JobConfigureUtils.class.getName());
077: public static final String ACTION = "action";
078: public static final String SUBACTION = "subaction";
079: public static final String FILTERS = "filters";
080: private static final String MAP = "map";
081: private static final String FILTER = "filter";
082: private static final Object ADD = "add";
083: private static final Object MOVEUP = "moveup";
084: private static final Object MOVEDOWN = "movedown";
085: private static final Object REMOVE = "remove";
086: private static final Object GOTO = "goto";
087: private static final Object DONE = "done";
088: private static final Object CONTINUE = "continue"; // keep editting
090: /**
091: * Check passed crawljob CrawlJob setting. Call this method at start of
092: * page.
093: *
094: * @param job
095: * Current CrawlJobHandler.
096: * @param request
097: * Http request.
098: * @param response
099: * Http response.
100: * @return Crawljob.
101: */
102: protected static CrawlJob getAndCheckJob(CrawlJob job,
103: HttpServletRequest request, HttpServletResponse response) {
104: return job;
105: }
107: /**
108: * This methods updates a ComplexType with information passed to it by a
109: * HttpServletRequest. It assumes that for every 'simple' type there is a
110: * corresponding parameter in the request. A recursive call will be made for
111: * any nested ComplexTypes. For each attribute it will check if the relevant
112: * override is set (name.override parameter equals 'true'). If so the
113: * attribute setting on the specified domain level (settings) will be
114: * rewritten. If it is not we well ensure that it isn't being overridden.
115: *
116: * @param mbean
117: * The ComplexType to update
118: * @param settings
119: * CrawlerSettings for the domain to override setting for. null
120: * denotes the global settings.
121: * @param request
122: * The HttpServletRequest to use to update the ComplexType
123: * @param expert
124: * if true expert settings will be updated, otherwise they will
125: * be ignored.
126: */
127: public static void writeNewOrderFile(ComplexType mbean,
128: CrawlerSettings settings, HttpServletRequest request,
129: boolean expert) {
130: // If mbean is transient or a hidden expert setting.
131: if (mbean.isTransient()
132: || (mbean.isExpertSetting() && expert == false)) {
133: return;
134: }
136: MBeanAttributeInfo a[] = mbean.getMBeanInfo(settings)
137: .getAttributes();
138: for (int n = 0; n < a.length; n++) {
139: checkAttribute((ModuleAttributeInfo) a[n], mbean, settings,
140: request, expert);
141: }
142: }
144: /**
145: * Process passed attribute. Check if needs to be written and if so, write
146: * it.
147: *
148: * @param att
149: * Attribute to process.
150: * @param mbean
151: * The ComplexType to update
152: * @param settings
153: * CrawlerSettings for the domain to override setting for. null
154: * denotes the global settings.
155: * @param request
156: * The HttpServletRequest to use to update the ComplexType
157: * @param expert
158: * if true expert settings will be updated, otherwise they will
159: * be ignored.
160: */
161: protected static void checkAttribute(ModuleAttributeInfo att,
162: ComplexType mbean, CrawlerSettings settings,
163: HttpServletRequest request, boolean expert) {
164: // The attributes of the current attribute.
165: Object currentAttribute = null;
166: try {
167: currentAttribute = mbean.getAttribute(settings, att
168: .getName());
169: } catch (Exception e) {
170: logger.severe("Failed getting " + mbean.getAbsoluteName()
171: + " attribute " + att.getName() + ": "
172: + e.getMessage());
173: return;
174: }
176: if (logger.isLoggable(Level.FINE)) {
177: logger.fine("MBEAN: " + mbean.getAbsoluteName() + " "
178: + att.getName() + " TRANSIENT " + att.isTransient()
179: + " " + att.isExpertSetting() + " " + expert);
180: }
182: if (att.isTransient() == false
183: && (att.isExpertSetting() == false || expert)) {
184: if (currentAttribute instanceof ComplexType) {
185: writeNewOrderFile((ComplexType) currentAttribute,
186: settings, request, expert);
187: } else {
188: String attName = att.getName();
189: // Have a 'setting'. Let's see if we need to update it (if
190: // settings == null update all, otherwise only if override
191: // is set.
192: String attAbsoluteName = mbean.getAbsoluteName() + "/"
193: + attName;
194: boolean override = (request
195: .getParameter(attAbsoluteName + ".override") != null)
196: && (request.getParameter(attAbsoluteName
197: + ".override").equals("true"));
198: if (settings == null || override) {
199: if (currentAttribute instanceof ListType) {
200: try {
201: ListType list = (ListType) currentAttribute;
202: Class cls = list.getClass();
203: Constructor constructor = cls
204: .getConstructor(String.class,
205: String.class);
206: list = (ListType) constructor.newInstance(
207: list.getName(), list
208: .getDescription());
209: String[] elems = request
210: .getParameterValues(attAbsoluteName);
211: for (int i = 0; elems != null
212: && i < elems.length; i++) {
213: list.add(elems[i]);
214: }
215: writeAttribute(attName, attAbsoluteName,
216: mbean, settings, list);
217: } catch (Exception e) {
218: e.printStackTrace();
219: logger.severe("Setting new list values on "
220: + attAbsoluteName + ": "
221: + e.getMessage());
222: return;
223: }
224: } else {
225: writeAttribute(attName, attAbsoluteName, mbean,
226: settings, request
227: .getParameter(attAbsoluteName));
228: }
230: } else if (settings != null && override == false) {
231: // Is not being overridden. Need to remove possible
232: // previous overrides.
233: try {
234: mbean.unsetAttribute(settings, attName);
235: } catch (Exception e) {
236: e.printStackTrace();
237: logger.severe("Unsetting attribute on "
238: + attAbsoluteName + ": "
239: + e.getMessage());
240: return;
241: }
242: }
243: }
244: }
245: }
247: /**
248: * Write out attribute.
249: *
250: * @param attName
251: * Attribute short name.
252: * @param attAbsoluteName
253: * Attribute full name.
254: * @param mbean
255: * The ComplexType to update
256: * @param settings
257: * CrawlerSettings for the domain to override setting for. null
258: * denotes the global settings.
259: * @param value
260: * Value to set into the attribute.
261: */
262: protected static void writeAttribute(String attName,
263: String attAbsoluteName, ComplexType mbean,
264: CrawlerSettings settings, Object value) {
265: try {
266: if (logger.isLoggable(Level.FINE)) {
267: logger.fine("MBEAN SET: " + attAbsoluteName + " "
268: + value);
269: }
270: mbean.setAttribute(settings, new Attribute(attName, value));
271: } catch (Exception e) {
272: e.printStackTrace();
273: logger.severe("Setting attribute value " + value + " on "
274: + attAbsoluteName + ": " + e.getMessage());
275: return;
276: }
277: }
279: /**
280: * Check passed job is not null and not readonly.
281: * @param job Job to check.
282: * @param response Http response.
283: * @param redirectBasePath Full path for where to go next if an error.
284: * @param currDomain May be null.
285: * E.g. "/admin/jobs/per/overview.jsp".
286: * @return A job else we've redirected if no job or readonly.
287: * @throws IOException
288: */
289: public static CrawlJob checkCrawlJob(CrawlJob job,
290: HttpServletResponse response, String redirectBasePath,
291: String currDomain) throws IOException {
292: if (job == null) {
293: // Didn't find any job with the given UID or no UID given.
294: response.sendRedirect(redirectBasePath
295: + "?message=No job selected");
296: } else if (job.isReadOnly()) {
297: // Can't edit this job.
298: response
299: .sendRedirect(redirectBasePath
300: + "?job="
301: + job.getUID()
302: + ((currDomain != null && currDomain
303: .length() > 0) ? "&currDomain="
304: + currDomain : "")
305: + "&message=Can't edit a read only job");
306: }
307: return job;
308: }
310: /**
311: * Handle job action.
312: * @param handler CrawlJobHandler to operate on.
313: * @param request Http request.
314: * @param response Http response.
315: * @param redirectBasePath Full path for where to go next if an error.
316: * E.g. "/admin/jobs/per/overview.jsp".
317: * @param currDomain Current domain. Pass null for global domain.
318: * @param reference
319: * @return The crawljob configured.
320: * @throws IOException
321: * @throws AttributeNotFoundException
322: * @throws InvocationTargetException
323: * @throws InvalidAttributeValueException
324: */
325: public static CrawlJob handleJobAction(CrawlJobHandler handler,
326: HttpServletRequest request, HttpServletResponse response,
327: String redirectBasePath, String currDomain, String reference)
328: throws IOException, AttributeNotFoundException,
329: InvocationTargetException, InvalidAttributeValueException {
331: // Load the job to manipulate
332: CrawlJob theJob = checkCrawlJob(handler.getJob(request
333: .getParameter("job")), response, redirectBasePath,
334: currDomain);
336: XMLSettingsHandler settingsHandler = theJob
337: .getSettingsHandler();
338: // If currDomain is null, then we're at top-level.
339: CrawlerSettings settings = settingsHandler
340: .getSettingsObject(currDomain);
342: if (reference != null) {
343: // refinement
344: Refinement refinement = settings.getRefinement(reference);
345: settings = refinement.getSettings();
346: }
348: // See if we need to take any action
349: if (request.getParameter(ACTION) != null) {
350: // Need to take some action.
351: String action = request.getParameter(ACTION);
352: String subaction = request.getParameter(SUBACTION);
353: if (action.equals(FILTERS)) {
354: // Doing something with the filters.
355: String map = request.getParameter(MAP);
356: if (map != null && map.length() > 0) {
357: String filter = request.getParameter(FILTER);
358: MapType filterMap = (MapType) settingsHandler
359: .getComplexTypeByAbsoluteName(settings, map);
360: if (subaction.equals(ADD)) {
361: // Add filter
362: String className = request.getParameter(map
363: + ".class");
364: String typeName = request.getParameter(map
365: + ".name");
366: if (typeName != null && typeName.length() > 0
367: && className != null
368: && className.length() > 0) {
369: ModuleType tmp = SettingsHandler
370: .instantiateModuleTypeFromClassName(
371: typeName, className);
372: filterMap.addElement(settings, tmp);
373: }
374: } else if (subaction.equals(MOVEUP)) {
375: // Move a filter down in a map
376: if (filter != null && filter.length() > 0) {
377: filterMap.moveElementUp(settings, filter);
378: }
379: } else if (subaction.equals(MOVEDOWN)) {
380: // Move a filter up in a map
381: if (filter != null && filter.length() > 0) {
382: filterMap.moveElementDown(settings, filter);
383: }
384: } else if (subaction.equals(REMOVE)) {
385: // Remove a filter from a map
386: if (filter != null && filter.length() > 0) {
387: filterMap.removeElement(settings, filter);
388: }
389: }
390: }
391: // Finally save the changes to disk
392: settingsHandler.writeSettingsObject(settings);
393: } else if (action.equals(DONE)) {
394: // Ok, done editing.
395: if (subaction.equals(CONTINUE)) {
396: // was editting an override/refinement, simply continue
397: if (theJob.isRunning()) {
398: handler.kickUpdate(); //Just to make sure.
399: }
400: String overParam = ((currDomain != null && currDomain
401: .length() > 0) ? "&currDomain="
402: + currDomain : "");
403: String refParam = ((reference != null && reference
404: .length() > 0) ? "&reference=" + reference
405: : "");
406: String messageParam = (refParam.length() > 0) ? "&message=Refinement changes saved"
407: : "&message=Override changes saved";
408: response.sendRedirect(redirectBasePath + "?job="
409: + theJob.getUID() + overParam + refParam
410: + messageParam);
411: } else {
412: // on main, truly 'done'
413: if (theJob.isNew()) {
414: handler.addJob(theJob);
415: response.sendRedirect(redirectBasePath
416: + "?message=Job created");
417: } else {
418: if (theJob.isRunning()) {
419: handler.kickUpdate();
420: }
421: if (theJob.isProfile()) {
422: response.sendRedirect(redirectBasePath
423: + "?message=Profile modified");
424: } else {
425: response.sendRedirect(redirectBasePath
426: + "?message=Job modified");
427: }
428: }
429: }
430: } else if (action.equals(GOTO)) {
431: // Goto another page of the job/profile settings
432: String overParam = ((currDomain != null && currDomain
433: .length() > 0) ? "&currDomain=" + currDomain
434: : "");
435: String refParam = ((reference != null && reference
436: .length() > 0) ? "&reference=" + reference : "");
437: response.sendRedirect(request.getParameter(SUBACTION)
438: + overParam + refParam);
439: }
440: }
441: return theJob;
442: }
444: /**
445: * Print complete seeds list on passed in PrintWriter.
446: * @param hndlr Current handler.
447: * @param payload What to write out.
448: * @throws AttributeNotFoundException
449: * @throws MBeanException
450: * @throws ReflectionException
451: * @throws IOException
452: * @throws IOException
453: */
454: public static void printOutSeeds(SettingsHandler hndlr,
455: String payload) throws AttributeNotFoundException,
456: MBeanException, ReflectionException, IOException {
457: File seedfile = getSeedFile(hndlr);
458: writeReader(new StringReader(payload), new BufferedWriter(
459: new FileWriter(seedfile)));
460: }
462: /**
463: * Print complete seeds list on passed in PrintWriter.
464: * @param hndlr Current handler.
465: * @param out Writer to write out all seeds to.
466: * @throws ReflectionException
467: * @throws MBeanException
468: * @throws AttributeNotFoundException
469: * @throws IOException
470: */
471: public static void printOutSeeds(SettingsHandler hndlr, Writer out)
472: throws AttributeNotFoundException, MBeanException,
473: ReflectionException, IOException {
474: // getSeedStream looks for seeds on disk and on classpath.
475: InputStream is = getSeedStream(hndlr);
476: writeReader(new BufferedReader(new InputStreamReader(is)), out);
477: }
479: /**
480: * Test whether seeds file is of a size that's reasonable
481: * to edit in an HTML textarea.
482: * @param h current settingsHandler
483: * @return true if seeds size is manageable, false otherwise
484: * @throws AttributeNotFoundException
485: * @throws MBeanException
486: * @throws ReflectionException
487: *
488: */
489: public static boolean seedsEdittableSize(SettingsHandler h)
490: throws AttributeNotFoundException, MBeanException,
491: ReflectionException {
492: return getSeedFile(h).length() <= (32 * 1024); // 32K
493: }
495: /**
496: * @param hndlr Settings handler.
497: * @return Seeds file.
498: * @throws ReflectionException
499: * @throws MBeanException
500: * @throws AttributeNotFoundException
501: */
502: protected static File getSeedFile(SettingsHandler hndlr)
503: throws AttributeNotFoundException, MBeanException,
504: ReflectionException {
505: String seedsFileStr = (String) ((ComplexType) hndlr.getOrder()
506: .getAttribute("scope")).getAttribute("seedsfile");
507: return hndlr.getPathRelativeToWorkingDirectory(seedsFileStr);
508: }
510: /**
511: * Return seeds as a stream.
512: * This method will work for case where seeds are on disk or on classpath.
513: * @param hndlr SettingsHandler. Used to find seeds.txt file.
514: * @return InputStream on current seeds file.
515: * @throws IOException
516: * @throws ReflectionException
517: * @throws MBeanException
518: * @throws AttributeNotFoundException
519: */
520: protected static InputStream getSeedStream(SettingsHandler hndlr)
521: throws IOException, AttributeNotFoundException,
522: MBeanException, ReflectionException {
523: InputStream is = null;
524: File seedFile = getSeedFile(hndlr);
525: if (!seedFile.exists()) {
526: // Is the file on the CLASSPATH?
527: is = SettingsHandler.class.getResourceAsStream(IoUtils
528: .getClasspathPath(seedFile));
529: } else if (seedFile.canRead()) {
530: is = new FileInputStream(seedFile);
531: }
532: if (is == null) {
533: throw new IOException(
534: seedFile
535: + " does not"
536: + " exist -- neither on disk nor on CLASSPATH -- or is not"
537: + " readable.");
538: }
539: return is;
540: }
542: /**
543: * Print complete seeds list on passed in PrintWriter.
544: * @param reader File to read seeds from.
545: * @param out Writer to write out all seeds to.
546: * @throws IOException
547: */
548: protected static void writeReader(Reader reader, Writer out)
549: throws IOException {
550: final int bufferSize = 1024 * 4;
551: char[] buffer = new char[bufferSize];
552: int read = -1;
553: while ((read = reader.read(buffer, 0, bufferSize)) != -1) {
554: out.write(buffer, 0, read);
555: }
556: out.flush();
557: }
558: }