001: /*
002: * Copyright 2007 Hippo.
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016: package nl.hippo.cms.brokenlinkchecker;
017:
018: import java.io.IOException;
019: import java.util.Vector;
020: import nl.hippo.cms.brokenlinkchecker.log.BrokenLinkCheckerLog;
021: import nl.hippo.cms.brokenlinkchecker.threading.Task;
022: import nl.hippo.cms.brokenlinkchecker.util.MethodCleanup;
023: import nl.hippo.cms.brokenlinkchecker.util.Validation;
024: import org.apache.commons.httpclient.HttpClient;
025: import org.apache.commons.httpclient.methods.GetMethod;
026: import org.apache.webdav.lib.PropertyName;
027: import org.apache.webdav.lib.methods.PropFindMethod;
028:
029: /**
030: * <p>
031: * This task will retrieve links from the broken link checker run and check if
032: * they are broken. If a link is broken it will be reported to the broken link
033: * checker. The task will stop if the links to check have been exhausted.
034: * </p>
035: */
036: public class BrokenLinkCheckerTask extends Task {
037: /**
038: * <p>
039: * The namespace of WebDAV.
040: * </p>
041: */
042: private static final String DAV_NAMESPACE_URI = "DAV:";
043:
044: /**
045: * <p>
046: * The name of the property that is requested to check if an internal
047: * link is broken.
048: * </p>
049: */
050: private static final String GETCONTENTTYPE_PROPERTY_NAME = "getcontenttype";
051:
052: /**
053: * <p>
054: * The set of properties that is requested to check if an internal link
055: * is broken.
056: * </p>
057: */
058: private static final Vector INTERNAL_LINKS_PROPERTIES_TO_RETRIEVE = new Vector();
059:
060: /**
061: * <p>
062: * Initialize the set of properties that is requested to check if an
063: * internal link is broken.
064: * </p>
065: */
066: static {
067: INTERNAL_LINKS_PROPERTIES_TO_RETRIEVE.add(new PropertyName(
068: DAV_NAMESPACE_URI, GETCONTENTTYPE_PROPERTY_NAME));
069: }
070:
071: /**
072: * <p>
073: * The HTTP result code returned if a PROPFIND method executes
074: * successfully.
075: * </p>
076: */
077: private static final int PROPFIND_SUCCESS_RESULT_CODE = 207;
078:
079: /**
080: * <p>
081: * The prefix of the localization keys used to report broken links.
082: * </p>
083: */
084: private static final String BROKEN_LINKS_LOCALIZATION_KEY_PREFIX = "cms.brokenlinks.";
085:
086: /**
087: * <p>
088: * The localization key used to report broken internal links.
089: * </p>
090: */
091: private static final String INTERNAL_LINK_INVALID_LOCALIZATION_KEY = BROKEN_LINKS_LOCALIZATION_KEY_PREFIX
092: + "internal-link-invalid";
093:
094: /**
095: * <p>
096: * The minimum HTTP status code that is considered to be an error when
097: * checking external links. Requests for links that return this or a
098: * higher value will cause the link to be marked as broken.
099: * </p>
100: */
101: private static final int MINIMUM_HTTP_STATUS_CODE_TO_INTERPRET_AS_ERROR = 300;
102:
103: /**
104: * <p>
105: * The prefix of the localization keys used tor report HTTP errors that
106: * occur while checking for external links.
107: * </p>
108: */
109: private static final String HTTP_ERROR_LOCALIZATION_KEY_PREFIX = "message.http-error.";
110:
111: /**
112: * <p>
113: * The object holding the information and resources needed by this
114: * object.
115: * </p>
116: */
117: private BrokenLinkCheckerTaskConfiguration configuration;
118:
119: /**
120: * <p>
121: * Check that the configuration is valid. If it is not an
122: * {@link IllegalArgumentException} is thrown.
123: * </p>
124: * <p>
125: * A configuration is valid if the following rules are followed:
126: * </p>
127: * <table>
128: * <tr>
129: * <th>Attribute</th>
130: * <th>Rules</th>
131: * </tr>
132: * <tr>
133: * <td><code>brokenLinkCheckerRun</code></td>
134: * <td>Cannot be <code>null</code>.</td>
135: * </tr>
136: * <tr>
137: * <td><code>httpClient</code></td>
138: * <td>Cannot be <code>null</code>.</td>
139: * </tr>
140: * <tr>
141: * <td><code>internalLinksBaseUrl</code></td>
142: * <td>Cannot be <code>null</code>.</td>
143: * </tr>
144: * <tr>
145: * <td><code>log</code></td>
146: * <td>Cannot be <code>null</code>.</td>
147: * </tr>
148: * </table>
149: *
150: * @param configuration
151: * the configuration to check.
152: */
153: public static void assertConfigurationIsValid(
154: BrokenLinkCheckerTaskConfiguration configuration) {
155: boolean isValid = true;
156: StringBuffer validationErrorMessage = new StringBuffer(1000);
157:
158: isValid &= Validation.assertTrue(configuration
159: .getBrokenLinkCheckerRun() != null,
160: validationErrorMessage,
161: "The broken link checker run cannot be 'null'.");
162:
163: isValid &= Validation.assertTrue(
164: configuration.getHttpClient() != null,
165: validationErrorMessage,
166: "The HTTP client cannot be 'null'.");
167:
168: isValid &= Validation.assertTrue(configuration
169: .getInternalLinksBaseUrl() != null,
170: validationErrorMessage,
171: "The base URL for internal links cannot be 'null'.");
172:
173: isValid &= Validation.assertTrue(
174: configuration.getLog() != null, validationErrorMessage,
175: "The log cannot be 'null'.");
176:
177: if (!isValid) {
178: throw new IllegalArgumentException(validationErrorMessage
179: .toString());
180: }
181: }
182:
183: /**
184: * <p>
185: * Create an instance of a broken link checker task.
186: * </p>
187: */
188: public BrokenLinkCheckerTask(
189: BrokenLinkCheckerTaskConfiguration configuration) {
190: super ();
191:
192: assertConfigurationIsValid(configuration);
193:
194: this .configuration = configuration;
195: }
196:
197: /**
198: * <p>
199: * Retrieve links from the broken link checker run until they are
200: * exhausted, and check if the links are broken. Report broken links
201: * back to the broken link checker run.
202: * </p>
203: */
204: protected void performWork() {
205: String link = getBrokenLinkCheckerRun().getLinkToCheck();
206: while (link != null) {
207: if (LinkClassifier.isInternalLink(link)) {
208: checkInternalLink(link);
209: } else {
210: checkExternalLink(link);
211: }
212:
213: link = getBrokenLinkCheckerRun().getLinkToCheck();
214: }
215: }
216:
217: /**
218: * <p>
219: * Check if an internal link is broken. An internal link is considered
220: * broken if a <code>PROPFIND</code> for the property
221: * <code>getcontenttype</code> fails.
222: * </p>
223: *
224: * <p>
225: * Redirects will be followed and authentication is enabled.
226: * </p>
227: *
228: * @param link
229: * the internal link to check.
230: */
231: private void checkInternalLink(String link) {
232: String absoluteLink = getInternalLinksBaseUrl() + link;
233: PropFindMethod propFindMethod = new PropFindMethod(absoluteLink);
234: try {
235: propFindMethod.setDoAuthentication(true);
236: propFindMethod.setFollowRedirects(true);
237:
238: int propFindResultCode = getHttpClient().executeMethod(
239: propFindMethod);
240:
241: if (propFindResultCode != PROPFIND_SUCCESS_RESULT_CODE) {
242: reportBrokenLink(link,
243: INTERNAL_LINK_INVALID_LOCALIZATION_KEY, true);
244: }
245: } catch (IOException e) {
246: reportBrokenLink(link, e.toString(), false);
247:
248: /*
249: * No other action is needed. Especially no exception should be
250: * thrown so this thread does not terminate and can continue
251: * checking the remaining links.
252: */
253: } finally {
254: MethodCleanup.releaseConnection(propFindMethod,
255: "propfind for internal link check", getLog());
256: }
257: }
258:
259: /**
260: * <p>
261: * Check if an external link is broken. An external link is considered
262: * broken if a <code>GET</code> of the document fails.
263: * </p>
264: *
265: * <p>
266: * Redirects will be followed and authentication is disabled.
267: * </p>
268: *
269: * @param link
270: * the external link to check.
271: */
272: private void checkExternalLink(String link) {
273: GetMethod getMethod = new GetMethod(link);
274: try {
275: getMethod.setDoAuthentication(false);
276: getMethod.setFollowRedirects(true);
277:
278: int getResultCode = getHttpClient()
279: .executeMethod(getMethod);
280:
281: if (getResultCode >= MINIMUM_HTTP_STATUS_CODE_TO_INTERPRET_AS_ERROR) {
282: reportBrokenLink(link,
283: HTTP_ERROR_LOCALIZATION_KEY_PREFIX
284: + getResultCode, true);
285: }
286: } catch (IOException e) {
287: reportBrokenLink(link, e.toString(), false);
288:
289: /*
290: * No other action is needed. Especially no exception should be
291: * thrown so this thread does not terminate and can continue
292: * checking the remaining links.
293: */
294: } finally {
295: MethodCleanup.releaseConnection(getMethod,
296: "get for external link check", getLog());
297: }
298: }
299:
300: /**
301: * <p>
302: * Report a broken link to the broken link checker run.
303: * </p>
304: *
305: * @param link
306: * the broken link.
307: * @param messageOrKey
308: * the error message or localization key of the error
309: * message.
310: * @param isLocalizationKey
311: * indicates <code>messageOrKey</code> should be used
312: * as is (<code>false</code>), or as a localization
313: * key (<code>true</code>).
314: */
315: private void reportBrokenLink(String link, String messageOrKey,
316: boolean isLocalizationKey) {
317: BrokenLinkErrorMessage errorMessage = new BrokenLinkErrorMessage(
318: messageOrKey, isLocalizationKey);
319: getBrokenLinkCheckerRun().addBrokenLink(link, errorMessage);
320: }
321:
322: /**
323: * <p>
324: * Determine whether or not the links to check have been exhausted.
325: * </p>
326: *
327: * @return <code>true</code> if this task should stop because the
328: * links to check have been exhausted, <code>false</code>
329: * otherwise.
330: */
331: protected boolean hasStopConditionBeenMet() {
332: return getBrokenLinkCheckerRun()
333: .haveLinksToCheckBeenExhausted();
334: }
335:
336: /**
337: * <p>
338: * Inform the broken link checker run that this task has finished.
339: * </p>
340: */
341: protected void cleanUp() {
342: getBrokenLinkCheckerRun()
343: .handleBrokenLinksCheckingTaskFinished(this );
344:
345: super .cleanUp();
346: }
347:
348: /**
349: * <p>
350: * Get the broken link checker run owning this task from the
351: * configuration.
352: * </p>
353: *
354: * @return the owning broken link checker run.
355: */
356: private BrokenLinkCheckerRun getBrokenLinkCheckerRun() {
357: return configuration.getBrokenLinkCheckerRun();
358: }
359:
360: /**
361: * <p>
362: * Get the HTTP client to use for executing HTTP methods from the
363: * configuration.
364: * </p>
365: *
366: * @return the HTTP client to use for executing HTTP methods.
367: */
368: private HttpClient getHttpClient() {
369: return configuration.getHttpClient();
370: }
371:
372: /**
373: * <p>
374: * Get the base URL for the internal links from the configuration.
375: * </p>
376: *
377: * @return the base URL for the internal links.
378: */
379: private String getInternalLinksBaseUrl() {
380: return configuration.getInternalLinksBaseUrl();
381: }
382:
383: /**
384: * <p>
385: * Get the log from the configuration.
386: * </p>
387: *
388: * @return the log.
389: */
390: private BrokenLinkCheckerLog getLog() {
391: return configuration.getLog();
392: }
393: }
|