001: /*
002: * Copyright 2007 Hippo.
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016: package nl.hippo.cms.brokenlinkchecker.avalon;
017:
018: import java.util.HashSet;
019: import java.util.NoSuchElementException;
020: import java.util.Set;
021: import java.util.StringTokenizer;
022: import nl.hippo.cms.brokenlinkchecker.BrokenLinkCheckerRun;
023: import nl.hippo.cms.brokenlinkchecker.BrokenLinkCheckerRunConfiguration;
024: import nl.hippo.cms.brokenlinkchecker.BrokenLinkCheckerRunConfigurationBean;
025: import nl.hippo.cms.brokenlinkchecker.log.AvalonLog;
026: import org.apache.avalon.framework.CascadingException;
027: import org.apache.avalon.framework.activity.Initializable;
028: import org.apache.avalon.framework.logger.AbstractLogEnabled;
029: import org.apache.avalon.framework.parameters.ParameterException;
030: import org.apache.avalon.framework.parameters.Parameterizable;
031: import org.apache.avalon.framework.parameters.Parameters;
032: import org.apache.avalon.framework.service.ServiceException;
033: import org.apache.avalon.framework.service.ServiceManager;
034: import org.apache.avalon.framework.service.Serviceable;
035: import org.apache.avalon.framework.thread.ThreadSafe;
036: import org.apache.cocoon.components.cron.CronJob;
037: import org.apache.cocoon.components.cron.JobScheduler;
038:
039: /**
040: * <p>
041: * This Avalon component runs the broken link checker inside an Avalon container
042: * at specified times.
043: * </p>
044: *
045: * <p>
046: * The following parameters are used:
047: * </p>
048: * <table>
049: * <tr>
050: * <th>Parameter name</th>
051: * <th>Requiredness</th>
052: * <th>Description</th>
053: * </tr>
054: * <tr>
055: * <td><code>enabled</code></td>
056: * <td>Required</td>
057: * <td>Whether or not the broken link checker is enabled. If <code>true</code>
058: * the broken link checks will be performed.</td>
059: * </tr>
060: * <tr>
061: * <td><code>role-of-this-component</code></td>
062: * <td>Required if enabled</td>
063: * <td>To be able to schedule itself this component must know its role.
064: * Unfortunately this component cannot obtain its role from the Avalon
065: * environment, so it must be specified by the user.</td>
066: * </tr>
067: * <tr>
068: * <td><code>job-name</code></td>
069: * <td>Optional</td>
070: * <td>The name under which to schedule this component in the job scheduler.
071: * This name must be different from other jobs and names used for other
072: * instances of this component. If ommitted the fully-qualified class name of
073: * this component,
074: * <code>nl.hippo.cms.brokenlinkchecker.avalon.BrokenLinkCheckerAvalonWrapper</code>,
075: * will be used.</td>
076: * </tr>
077: * <tr>
078: * <td><code>cron-expression</code></td>
079: * <td>Required if enabled</td>
080: * <td>A scheduling expression specifying when the broken link checker should
081: * run. See the <a
082: * href="http://wiki.opensymphony.com/display/QRTZ1/CronTriggers+Tutorial">Cron
083: * Triggers Tutorial</a> of Quartz for more information about the scheduling
084: * expression.</td>
085: * </tr>
086: * <tr>
087: * <td><code>document-tree-to-check-root-url</code></td>
088: * <td>Required if enabled</td>
089: * <td>The absolute URL of the root of the tree that should be checked for
090: * documents containing broken links. For example:
091: * <code>http://localhost:60000/default/files/default.www/content/bulk</code>.</td>
092: * </tr>
093: * <tr>
094: * <td><code>documents-base-url</code></td>
095: * <td>Required if enabled</td>
096: * <td>The document URLs returned by the repository contain the complete path
097: * from the root of the webserver. This (relative) URL is the base URL that will
098: * be removed from the returned document URLs. For example:
099: * <code>/default/files/default.www</code>.</td>
100: * </tr>
101: * <tr>
102: * <td><code>internal-url-prefixes-to-ignore</code></td>
103: * <td>Optional</td>
104: * <td>Not all internal links point to documents. To prevent these links from
105: * incorrectly being reported as broken, a set of URL prefixes can be specified.
106: * If an internal URL starts with one of these prefixes, it is not checked for
107: * brokenness. The format of this parameter is a set of space-separated relative
108: * URLs. For example: <code>/assets/binaries/ /binaries/</code>.</td>
109: * </tr>
110: * <tr>
111: * <td><code>internal-links-base-url</code></td>
112: * <td>Required if enabled</td>
113: * <td>Internal links are specified using relative URLs. To be able to check if
114: * an internal link is broken an absolute URL is needed. This absolute URL will
115: * be prefixed to the URL of internal links so they can be checked. Note: this
116: * URL must point to a location that is part of the same repository as the tree
117: * containing the documents to check. For example:
118: * <code>http://localhost:60000/default/files/default.www</code>.</td>
119: * </tr>
120: * <tr>
121: * <td><code>repository-username</code></td>
122: * <td>Required if enabled</td>
123: * <td>The username to use to login to the repository containing the documents
124: * to check, the documents pointed to by internal links and the result document.</td>
125: * </tr>
126: * <tr>
127: * <td><code>repository-password</code></td>
128: * <td>Required if enabled</td>
129: * <td>The password to use to login to the repository containing the documents
130: * to check, the documents pointed to by internal links and the result document.</td>
131: * </tr>
132: * <tr>
133: * <td><code>result-document-url</code></td>
134: * <td>Required if enabled</td>
135: * <td>The absolute URL of the document to which the results must be written.
136: * The folder in which the document must be stored must already exist. Note:
137: * this URL must point to a location that is part of the same repository as the
138: * tree containing the documents to check. For example:
139: * <code>http://localhost:60000/default/files/default.www/broken-links.xml</code>.</td>
140: * </tr>
141: * <tr>
142: * <td><code>document-batch-size</code></td>
143: * <td>Optional</td>
144: * <td>For technical and performance reasons it is not possible to retrieve all
145: * documents containing links using one search request. This parameter specifies
146: * how many documents must be processed per search request. Valid values: [1,
147: * 1000]. If this parameter is omitted 100 documents will be processed per
148: * request.</td>
149: * </tr>
150: * <tr>
151: * <td><code>number-of-link-checking-threads</code></td>
152: * <td>Optional</td>
153: * <td>Checking links is a time-consuming process because external servers have
154: * to be contacted. The checking does not use a lot of resources so it is no
155: * problem to check multiple links simultaneously. This parameter specifies how
156: * many links must be checked simultaneously. Valid values: [1, ∞>
157: * (practically the maximum value is 2<sup>31</sup> - 1 = 2147483647), but
158: * values greater than 25 are discouraged because of performance reasons. If
159: * this parameter is omitted 10 links will be checked simultaneously.</td>
160: * </tr>
161: * <tr>
162: * <td><code>link-check-timeout-seconds</code></td>
163: * <td>Optional</td>
164: * <td>It is possible that a server to which a link points is unresponsive. The
165: * thread checking the link will wait a while before deciding that the link is
166: * broken. This timeout (in seconds) specifies how long a thread should wait for
167: * an unresponsive server before marking the link as broken. Valid values: [1,
168: * 2147483]. If this parameter is omitted the threads will wait 10 seconds for
169: * unresponsive servers.</td>
170: * </tr>
171: * </table>
172: */
173: public class BrokenLinkCheckerAvalonWrapper extends AbstractLogEnabled
174: implements Serviceable, Parameterizable, Initializable,
175: CronJob, ThreadSafe {
176: /**
177: * <p>
178: * The name of the parameter used to specify whether or not the broken
179: * links checks are enabled.
180: * </p>
181: */
182: private static final String ENABLED_PARAMETER_NAME = "enabled";
183:
184: /**
185: * <p>
186: * The name of the parameter used to specify the Avalon role of this
187: * component.
188: * </p>
189: */
190: private static final String ROLE_OF_THIS_COMPONENT_PARAMETER_NAME = "role-of-this-component";
191:
192: /**
193: * <p>
194: * The name of the parameter used to specify the name to use for the job
195: * that gets scheduled to run the broken link checks.
196: * </p>
197: */
198: private static final String JOB_NAME_PARAMETER_NAME = "job-name";
199:
200: /**
201: * <p>
202: * The name of the parameter used to specify cron expression specifying
203: * the schedule of the job.
204: * </p>
205: */
206: private static final String CRON_EXPRESSION_PARAMETER_NAME = "cron-expression";
207:
208: /**
209: * <p>
210: * The name of the parameter used to specify the URL of the root of the
211: * tree to check for documents containing broken links.
212: * </p>
213: */
214: private static final String DOCUMENT_TREE_TO_CHECK_ROOT_URL_PARAMETER_NAME = "document-tree-to-check-root-url";
215:
216: /**
217: * <p>
218: * The name of the parameter used to specify the base URL which should
219: * be removed from the URLs of found documents.
220: * </p>
221: */
222: private static final String DOCUMENTS_BASE_URL_PARAMETER_NAME = "documents-base-url";
223:
224: /**
225: * <p>
226: * The name of the parameter used to specify the prefixes of URLs of
227: * internal links that should be ignored.
228: * </p>
229: */
230: private static final String INTERNAL_URL_PREFIXES_TO_IGNORE_PARAMETER_NAME = "internal-url-prefixes-to-ignore";
231:
232: /**
233: * <p>
234: * The name of the parameter used to specify the base URL to which to
235: * append the relative URL of internal links to obtain an absolute URL.
236: * </p>
237: */
238: private static final String INTERNAL_LINKS_BASE_URL_PARAMETER_NAME = "internal-links-base-url";
239:
240: /**
241: * <p>
242: * The name of the parameter used to specify the username to use to
243: * login to the repository.
244: * </p>
245: */
246: private static final String REPOSITORY_USERNAME_PARAMETER_NAME = "repository-username";
247:
248: /**
249: * <p>
250: * The name of the parameter used to specify the password to use to
251: * login to the repository.
252: * </p>
253: */
254: private static final String REPOSITORY_PASSWORD_PARAMETER_NAME = "repository-password";
255:
256: /**
257: * <p>
258: * The name of the parameter used to specify the URL of the document to
259: * which to write the results of checking the links.
260: * </p>
261: */
262: private static final String RESULT_DOCUMENT_URL_PARAMETER_NAME = "result-document-url";
263:
264: /**
265: * <p>
266: * The name of the parameter used to specify the number of documents
267: * that must be requested at once.
268: * </p>
269: */
270: private static final String DOCUMENT_BATCH_SIZE_PARAMETER_NAME = "document-batch-size";
271:
272: /**
273: * <p>
274: * The name of the parameter used to specify the number of threads that
275: * should be used to check the links.
276: * </p>
277: */
278: private static final String NUMBER_OF_LINK_CHECKING_THREADS_PARAMETER_NAME = "number-of-link-checking-threads";
279:
280: /**
281: * <p>
282: * The name of the parameter used to specify the timeout to use when
283: * checking links.
284: * </p>
285: */
286: private static final String LINK_CHECK_TIMEOUT_SECONDS_PARAMETER_NAME = "link-check-timeout-seconds";
287:
288: /**
289: * <p>
290: * The value that will be used to indicate that an integer value was not
291: * specified.
292: * </p>
293: */
294: private static final int UNSPECIFIED_INTEGER_PARAMETER_VALUE = Integer.MIN_VALUE;
295:
296: /**
297: * <p>
298: * The separators (only a space) used to separate URLs in the set of URL
299: * prefixes to ignore.
300: * </p>
301: */
302: private static final String INTERNAL_URL_PREFIXES_TO_IGNORE_SEPARATORS = " ";
303:
304: /**
305: * <p>
306: * The Avalon service manager to use for obtaining Avalon components.
307: * </p>
308: */
309: private ServiceManager serviceManager;
310:
311: /**
312: * <p>
313: * Whether or not the checking of broken links is enabled. If
314: * <code>true</code> checks for broken links are performed.
315: * </p>
316: */
317: private boolean isEnabled;
318:
319: /**
320: * <p>
321: * The Avalon role under which this component is known by the Avalon
322: * container.
323: * </p>
324: */
325: private String roleOfThisComponent;
326:
327: /**
328: * <p>
329: * The name under which to schedule the job for checking links.
330: * </p>
331: */
332: private String jobName;
333:
334: /**
335: * <p>
336: * The cron expression specifying when the job must be run.
337: * </p>
338: */
339: private String cronExpression;
340:
341: /**
342: * <p>
343: * The URL of the root of the tree containing the documents that must be
344: * checked for broken links.
345: * </p>
346: */
347: private String documentTreeToCheckRootUrl;
348:
349: /**
350: * <p>
351: * The (relative) base URL which must be removed from the document URLs
352: * returned by the repository.
353: * </p>
354: */
355: private String documentsBaseUrl;
356:
357: /**
358: * <p>
359: * The set of prefixes of URLs of internal links that should not be
360: * checked for brokenness.
361: * </p>
362: */
363: private Set internalUrlPrefixesToIgnore = new HashSet();
364:
365: /**
366: * <p>
367: * The (absolute) base URL to prepend to the URLs of internal links to
368: * obtain the absolute URL.
369: * </p>
370: */
371: private String internalLinksBaseUrl;
372:
373: /**
374: * <p>
375: * The username to use for logging in to the repository.
376: * </p>
377: */
378: private String repositoryUsername;
379:
380: /**
381: * <p>
382: * The password to use for logging in to the repository.
383: * </p>
384: */
385: private String repositoryPassword;
386:
387: /**
388: * <p>
389: * The URL of the document in which to store the result.
390: * </p>
391: */
392: private String resultDocumentUrl;
393:
394: /**
395: * <p>
396: * The number of documents to request in one batch.
397: * </p>
398: */
399: private int documentBatchSize;
400:
401: /**
402: * <p>
403: * The number of threads to use for checking the links.
404: * </p>
405: */
406: private int numberOfLinkCheckingThreads;
407:
408: /**
409: * <p>
410: * The timeout to use when checking links.
411: * </p>
412: */
413: private int linkCheckTimeoutSeconds;
414:
415: /**
416: * <p>
417: * The configuration for the broken link checker runs. A single instance
418: * of the configuration is used for all runs.
419: * </p>
420: */
421: private BrokenLinkCheckerRunConfiguration brokenLinkCheckerRunConfiguration;
422:
423: /**
424: * <p>
425: * Create an instance of this component. The instance is unusable after
426: * construction. The Avalon lifecycle methods have to be invoked (in the
427: * correct order) on the instance for it to start working.
428: * </p>
429: */
430: public BrokenLinkCheckerAvalonWrapper() {
431: super ();
432:
433: // No action needed. There is nothing to initialize in the constructor.
434: }
435:
436: /**
437: * <p>
438: * Inject the Avalon service manager into this component.
439: * </p>
440: *
441: * @param serviceManager
442: * the Avalon service manager.
443: */
444: public void service(ServiceManager serviceManager) {
445: this .serviceManager = serviceManager;
446: }
447:
448: /**
449: * <p>
450: * Read the configuration from the Avalon parameters.
451: * </p>
452: *
453: * @param parameters
454: * the Avalon parameters to read the configuration from.
455: * @throws ParameterException
456: * if a parameter is missing or has an invalid value.
457: */
458: public void parameterize(Parameters parameters)
459: throws ParameterException {
460: readEnabled(parameters);
461: if (isEnabled) {
462: readRoleOfThisComponent(parameters);
463: readJobName(parameters);
464: readCronExpression(parameters);
465:
466: readDocumentTreeToCheckRootUrl(parameters);
467: readDocumentsBaseUrl(parameters);
468: readInternalUrlPrefixesToIgnore(parameters);
469: readInternalLinksBaseUrl(parameters);
470: readRepositoryUsername(parameters);
471: readRepositoryPassword(parameters);
472: readResultDocumentUrl(parameters);
473: readDocumentBatchSize(parameters);
474: readNumberOfLinkCheckingThreads(parameters);
475: readLinkCheckTimeoutSeconds(parameters);
476: }
477: }
478:
479: /**
480: * <p>
481: * Read and validate the <code>enabled</code> parameter.
482: * </p>
483: *
484: * @param parameters
485: * the Avalon parameters to read the value from.
486: * @throws ParameterException
487: * if a parameter is missing or has an invalid value.
488: */
489: private void readEnabled(Parameters parameters)
490: throws ParameterException {
491: isEnabled = parameters
492: .getParameterAsBoolean(ENABLED_PARAMETER_NAME);
493: }
494:
495: /**
496: * <p>
497: * Read and validate the <code>role-of-this-component</code>
498: * parameter.
499: * </p>
500: *
501: * @param parameters
502: * the Avalon parameters to read the value from.
503: * @throws ParameterException
504: * if a parameter is missing or has an invalid value.
505: */
506: private void readRoleOfThisComponent(Parameters parameters)
507: throws ParameterException {
508: roleOfThisComponent = parameters
509: .getParameter(ROLE_OF_THIS_COMPONENT_PARAMETER_NAME);
510: }
511:
512: /**
513: * <p>
514: * Read and validate the <code>job-name</code> parameter.
515: * </p>
516: *
517: * @param parameters
518: * the Avalon parameters to read the value from.
519: */
520: private void readJobName(Parameters parameters) {
521: jobName = parameters.getParameter(JOB_NAME_PARAMETER_NAME,
522: getClass().getName());
523: }
524:
525: /**
526: * <p>
527: * Read and validate the <code>cron-expression</code> parameter.
528: * </p>
529: *
530: * @param parameters
531: * the Avalon parameters to read the value from.
532: * @throws ParameterException
533: * if a parameter is missing or has an invalid value.
534: */
535: private void readCronExpression(Parameters parameters)
536: throws ParameterException {
537: cronExpression = parameters
538: .getParameter(CRON_EXPRESSION_PARAMETER_NAME);
539: }
540:
541: /**
542: * <p>
543: * Read and validate the <code>document-tree-to-check-root-url</code>
544: * parameter.
545: * </p>
546: *
547: * @param parameters
548: * the Avalon parameters to read the value from.
549: * @throws ParameterException
550: * if a parameter is missing or has an invalid value.
551: */
552: private void readDocumentTreeToCheckRootUrl(Parameters parameters)
553: throws ParameterException {
554: documentTreeToCheckRootUrl = parameters
555: .getParameter(DOCUMENT_TREE_TO_CHECK_ROOT_URL_PARAMETER_NAME);
556: }
557:
558: /**
559: * <p>
560: * Read and validate the <code>documents-base-url</code> parameter.
561: * </p>
562: *
563: * @param parameters
564: * the Avalon parameters to read the value from.
565: * @throws ParameterException
566: * if a parameter is missing or has an invalid value.
567: */
568: private void readDocumentsBaseUrl(Parameters parameters)
569: throws ParameterException {
570: documentsBaseUrl = parameters
571: .getParameter(DOCUMENTS_BASE_URL_PARAMETER_NAME);
572: }
573:
574: /**
575: * <p>
576: * Read the prefixes of URLs of internal links to ignore from the Avalon
577: * parameters, split them and store them in
578: * {@link #internalUrlPrefixesToIgnore}.
579: * </p>
580: *
581: * @param parameters
582: * the Avalon parameters to read the prefixes of URLs of
583: * internal links to ignore from.
584: */
585: private void readInternalUrlPrefixesToIgnore(Parameters parameters) {
586: String internalUrlPrefixesToIgnoreAsString = parameters
587: .getParameter(
588: INTERNAL_URL_PREFIXES_TO_IGNORE_PARAMETER_NAME,
589: null);
590: if (internalUrlPrefixesToIgnoreAsString != null) {
591: StringTokenizer internalUrlPrefixesToIgnoreTokenizer = new StringTokenizer(
592: internalUrlPrefixesToIgnoreAsString,
593: INTERNAL_URL_PREFIXES_TO_IGNORE_SEPARATORS);
594: while (internalUrlPrefixesToIgnoreTokenizer.hasMoreTokens()) {
595: String internalUrlPrefixToIgnore = internalUrlPrefixesToIgnoreTokenizer
596: .nextToken();
597:
598: internalUrlPrefixesToIgnore
599: .add(internalUrlPrefixToIgnore);
600: }
601: }
602: }
603:
604: /**
605: * <p>
606: * Read and validate the <code>internal-links-base-url</code>
607: * parameter.
608: * </p>
609: *
610: * @param parameters
611: * the Avalon parameters to read the value from.
612: * @throws ParameterException
613: * if a parameter is missing or has an invalid value.
614: */
615: private void readInternalLinksBaseUrl(Parameters parameters)
616: throws ParameterException {
617: internalLinksBaseUrl = parameters
618: .getParameter(INTERNAL_LINKS_BASE_URL_PARAMETER_NAME);
619: }
620:
621: /**
622: * <p>
623: * Read and validate the <code>repository-username</code> parameter.
624: * </p>
625: *
626: * @param parameters
627: * the Avalon parameters to read the value from.
628: * @throws ParameterException
629: * if a parameter is missing or has an invalid value.
630: */
631: private void readRepositoryUsername(Parameters parameters)
632: throws ParameterException {
633: repositoryUsername = parameters
634: .getParameter(REPOSITORY_USERNAME_PARAMETER_NAME);
635: }
636:
637: /**
638: * <p>
639: * Read and validate the <code>repository-password</code> parameter.
640: * </p>
641: *
642: * @param parameters
643: * the Avalon parameters to read the value from.
644: * @throws ParameterException
645: * if a parameter is missing or has an invalid value.
646: */
647: private void readRepositoryPassword(Parameters parameters)
648: throws ParameterException {
649: repositoryPassword = parameters
650: .getParameter(REPOSITORY_PASSWORD_PARAMETER_NAME);
651: }
652:
653: /**
654: * <p>
655: * Read and validate the <code>result-document-url</code> parameter.
656: * </p>
657: *
658: * @param parameters
659: * the Avalon parameters to read the value from.
660: * @throws ParameterException
661: * if a parameter is missing or has an invalid value.
662: */
663: private void readResultDocumentUrl(Parameters parameters)
664: throws ParameterException {
665: resultDocumentUrl = parameters
666: .getParameter(RESULT_DOCUMENT_URL_PARAMETER_NAME);
667: }
668:
669: /**
670: * <p>
671: * Read and validate the <code>document-batch-size</code> parameter.
672: * </p>
673: *
674: * @param parameters
675: * the Avalon parameters to read the value from.
676: * @throws ParameterException
677: * if a parameter is missing or has an invalid value.
678: */
679: private void readDocumentBatchSize(Parameters parameters)
680: throws ParameterException {
681: documentBatchSize = parameters.getParameterAsInteger(
682: DOCUMENT_BATCH_SIZE_PARAMETER_NAME,
683: UNSPECIFIED_INTEGER_PARAMETER_VALUE);
684: if (!isUnspecifiedIntegerValue(documentBatchSize)) {
685: if (documentBatchSize < BrokenLinkCheckerRun.MINIMUM_DOCUMENT_BATCH_SIZE
686: || BrokenLinkCheckerRun.MAXIMUM_DOCUMENT_BATCH_SIZE < documentBatchSize) {
687: throw new ParameterException(
688: "'"
689: + DOCUMENT_BATCH_SIZE_PARAMETER_NAME
690: + "' must be greater than or equal to "
691: + BrokenLinkCheckerRun.MINIMUM_DOCUMENT_BATCH_SIZE
692: + " and less than or equal to "
693: + BrokenLinkCheckerRun.MAXIMUM_DOCUMENT_BATCH_SIZE
694: + ".");
695: }
696: }
697: }
698:
699: /**
700: * <p>
701: * Read and validate the <code>number-of-link-checking-threads</code>
702: * parameter.
703: * </p>
704: *
705: * @param parameters
706: * the Avalon parameters to read the value from.
707: * @throws ParameterException
708: * if a parameter is missing or has an invalid value.
709: */
710: private void readNumberOfLinkCheckingThreads(Parameters parameters)
711: throws ParameterException {
712: numberOfLinkCheckingThreads = parameters.getParameterAsInteger(
713: NUMBER_OF_LINK_CHECKING_THREADS_PARAMETER_NAME,
714: UNSPECIFIED_INTEGER_PARAMETER_VALUE);
715: if (!isUnspecifiedIntegerValue(numberOfLinkCheckingThreads)) {
716: if (numberOfLinkCheckingThreads < BrokenLinkCheckerRun.MINIMUM_NUMBER_OF_LINK_CHECKING_THREADS) {
717: throw new ParameterException(
718: "'"
719: + NUMBER_OF_LINK_CHECKING_THREADS_PARAMETER_NAME
720: + "' must be greater than or equal to "
721: + BrokenLinkCheckerRun.MINIMUM_NUMBER_OF_LINK_CHECKING_THREADS
722: + ".");
723: }
724: }
725: }
726:
727: /**
728: * <p>
729: * Read and validate the <code>link-check-timeout-seconds</code>
730: * parameter.
731: * </p>
732: *
733: * @param parameters
734: * the Avalon parameters to read the value from.
735: * @throws ParameterException
736: * if a parameter is missing or has an invalid value.
737: */
738: private void readLinkCheckTimeoutSeconds(Parameters parameters)
739: throws ParameterException {
740: linkCheckTimeoutSeconds = parameters.getParameterAsInteger(
741: LINK_CHECK_TIMEOUT_SECONDS_PARAMETER_NAME,
742: UNSPECIFIED_INTEGER_PARAMETER_VALUE);
743: if (!isUnspecifiedIntegerValue(linkCheckTimeoutSeconds)) {
744: if (linkCheckTimeoutSeconds < BrokenLinkCheckerRun.MINIMUM_LINK_CHECK_TIMEOUT_SECONDS
745: || BrokenLinkCheckerRun.MAXIMUM_LINK_CHECK_TIMEOUT_SECONDS < linkCheckTimeoutSeconds) {
746: throw new ParameterException(
747: "'"
748: + LINK_CHECK_TIMEOUT_SECONDS_PARAMETER_NAME
749: + "' must be greater than or equal to "
750: + BrokenLinkCheckerRun.MINIMUM_LINK_CHECK_TIMEOUT_SECONDS
751: + " and less than or equal to "
752: + BrokenLinkCheckerRun.MAXIMUM_LINK_CHECK_TIMEOUT_SECONDS
753: + ".");
754: }
755: }
756: }
757:
758: /**
759: * <p>
760: * Create the configuration for the broken link checker so it will be
761: * ready when this component is executed as a job, and schedule this
762: * component as a job in the job scheduler.
763: * </p>
764: *
765: * @throws CascadingException
766: * if an error occurs during intialization.
767: */
768: public void initialize() throws CascadingException {
769: if (isEnabled) {
770: createBrokenLinkCheckerRunConfiguration();
771:
772: rescheduleJob();
773: }
774: }
775:
776: /**
777: * <p>
778: * Create the broken link checker run configuration that will be used by
779: * all jobs based on the configuration read from the Avalon parameters.
780: * </p>
781: */
782: private void createBrokenLinkCheckerRunConfiguration() {
783: BrokenLinkCheckerRunConfigurationBean configuration = new BrokenLinkCheckerRunConfigurationBean();
784:
785: configuration
786: .setDocumentTreeToCheckRootUrl(documentTreeToCheckRootUrl);
787:
788: configuration.setDocumentsBaseUrl(documentsBaseUrl);
789:
790: configuration
791: .addInternalUrlPrefixesToIgnore(internalUrlPrefixesToIgnore);
792:
793: configuration.setInternalLinksBaseUrl(internalLinksBaseUrl);
794:
795: configuration.setRepositoryUsername(repositoryUsername);
796: configuration.setRepositoryPassword(repositoryPassword);
797:
798: configuration.setResultDocumentUrl(resultDocumentUrl);
799:
800: if (!isUnspecifiedIntegerValue(documentBatchSize)) {
801: configuration.setDocumentBatchSize(documentBatchSize);
802: }
803:
804: if (!isUnspecifiedIntegerValue(numberOfLinkCheckingThreads)) {
805: configuration
806: .setNumberOfLinkCheckingThreads(numberOfLinkCheckingThreads);
807: }
808:
809: if (!isUnspecifiedIntegerValue(linkCheckTimeoutSeconds)) {
810: configuration
811: .setLinkCheckTimeoutSeconds(linkCheckTimeoutSeconds);
812: }
813:
814: AvalonLog log = new AvalonLog(getLogger());
815: configuration.setLog(log);
816:
817: setBrokenLinkCheckerRunConfiguration(configuration);
818: }
819:
820: /**
821: * <p>
822: * Determine if an integer has the value that indicates that it was not
823: * specified in the Avalon parameters.
824: * </p>
825: *
826: * @param value
827: * the value to check.
828: * @return <code>true</code> if the value was not specified in the
829: * Avalon parameters, <code>false</code> otherwise.
830: */
831: private boolean isUnspecifiedIntegerValue(int value) {
832: return value == UNSPECIFIED_INTEGER_PARAMETER_VALUE;
833: }
834:
835: /**
836: * <p>
837: * Reschedule the job for this broken link checker. This is necessary
838: * because the scheduling parameters might have changed since the last
839: * time this component was used.
840: * </p>
841: *
842: * @throws ServiceException
843: * if the job scheduler cannot be obtained.
844: * @throws CascadingException
845: * if an error occurs while scheduling the job.
846: */
847: private void rescheduleJob() throws ServiceException,
848: CascadingException {
849: JobScheduler scheduler = (JobScheduler) serviceManager
850: .lookup(JobScheduler.ROLE);
851: try {
852: removeJobForThisComponent(scheduler);
853:
854: scheduleJob(scheduler);
855: } finally {
856: ComponentCleanup.release(serviceManager, scheduler,
857: "job scheduler", getLogger());
858: }
859: }
860:
861: /**
862: * <p>
863: * Remove the job of this component from the scheduler. No exception
864: * will be thrown if the job does not exist.
865: * </p>
866: *
867: * @param scheduler
868: * the scheduler to remove the job from.
869: */
870: private void removeJobForThisComponent(JobScheduler scheduler) {
871: try {
872: scheduler.removeJob(jobName);
873: } catch (NoSuchElementException e) {
874: // No action needed. The job is not present and that is the
875: // desired state.
876: }
877: }
878:
879: /**
880: * <p>
881: * Scheduler the job for this component.
882: * </p>
883: *
884: * @param scheduler
885: * the scheduler to schedule the job with.
886: * @throws CascadingException
887: * if an error occurs while scheduling the job.
888: */
889: private void scheduleJob(JobScheduler scheduler)
890: throws CascadingException {
891: scheduler.addJob(jobName, roleOfThisComponent, cronExpression,
892: false);
893: }
894:
895: /**
896: * <p>
897: * Run the broken links checker.
898: * </p>
899: */
900: public void execute(String jobName) {
901: /*
902: * In case a persistent job database is used it is possible that this
903: * component is still scheduled. The job scheduler might start and run
904: * this component when this component is disabled, or before this
905: * component is fully initialized. If not fully initialized the broken
906: * link checker run configuration will not have been created yet.
907: *
908: * If the component is disabled or the broken link checker run
909: * configuration has not been created yet, do not check for broken
910: * links.
911: */
912: if (isEnabled) {
913: BrokenLinkCheckerRunConfiguration brokenLinkCheckerRunConfiguration = getBrokenLinkCheckerRunConfiguration();
914:
915: if (brokenLinkCheckerRunConfiguration != null) {
916: BrokenLinkCheckerRun run = new BrokenLinkCheckerRun(
917: brokenLinkCheckerRunConfiguration);
918: run.execute();
919: }
920: }
921: }
922:
923: /**
924: * <p>
925: * Get the broken link checker run configuration.
926: * </p>
927: *
928: * @return the broken link checker run configuration.
929: */
930: private synchronized BrokenLinkCheckerRunConfiguration getBrokenLinkCheckerRunConfiguration() {
931: return brokenLinkCheckerRunConfiguration;
932: }
933:
934: /**
935: * <p>
936: * Set the broken link checker run configuration.
937: * </p>
938: *
939: * @param configuration
940: * the broken link checker run configuration.
941: */
942: private synchronized void setBrokenLinkCheckerRunConfiguration(
943: BrokenLinkCheckerRunConfiguration configuration) {
944: brokenLinkCheckerRunConfiguration = configuration;
945: }
946: }
|