001: /*
002: * Copyright 2007 Hippo.
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016: package nl.hippo.cms.brokenlinkchecker;
017:
018: import java.io.ByteArrayInputStream;
019: import java.io.ByteArrayOutputStream;
020: import java.io.IOException;
021: import java.util.Date;
022: import java.util.Iterator;
023: import java.util.Map;
024: import java.util.Set;
025: import javax.xml.transform.TransformerConfigurationException;
026: import javax.xml.transform.TransformerFactory;
027: import javax.xml.transform.TransformerFactoryConfigurationError;
028: import javax.xml.transform.sax.SAXTransformerFactory;
029: import javax.xml.transform.sax.TransformerHandler;
030: import javax.xml.transform.stream.StreamResult;
031: import nl.hippo.cms.brokenlinkchecker.log.BrokenLinkCheckerLog;
032: import nl.hippo.cms.brokenlinkchecker.util.MethodCleanup;
033: import nl.hippo.cms.brokenlinkchecker.util.StreamCleanup;
034: import org.apache.commons.httpclient.HttpClient;
035: import org.apache.commons.httpclient.methods.PutMethod;
036: import org.xml.sax.SAXException;
037: import org.xml.sax.helpers.AttributesImpl;
038:
039: /**
040: * <p>
041: * This class writes the result of the broken link checking to an XML document
042: * in the repository.
043: * </p>
044: */
045: class BrokenLinksToXmlDocumentInRepositoryWriter {
046: /**
047: * <p>
048: * The value to use for the namespace URI if an element does not have a
049: * namespace.
050: * </p>
051: */
052: private static final String NO_NAMESPACE_URI_VALUE = "";
053:
054: /**
055: * <p>
056: * The namespace URI of the elements in Cocoon that are used for
057: * internationalization.
058: * </p>
059: */
060: private static final String COCOON_INTERNATIONALIZATION_NAMESPACE_URI = "http://apache.org/cocoon/i18n/2.1";
061:
062: /**
063: * </p>
064: * The name of the element containing the broken links.
065: * </p>
066: */
067: private static final String BROKEN_LINKS_ELEMENT_NAME = "broken-links";
068:
069: /**
070: * <p>
071: * The name of the element containing the broken links of a single
072: * document.
073: * </p>
074: */
075: private static final String PAGE_ELEMENT_NAME = "page";
076:
077: /**
078: * <p>
079: * The name of the element describing a broken link.
080: * </p>
081: */
082: private static final String LINK_ELEMENT_NAME = "link";
083:
084: /**
085: * <p>
086: * The name of the element that contains a localization key for the
087: * error message of the broken link.
088: * </p>
089: */
090: private static final String TEXT_ELEMENT_NAME = "text";
091:
092: /**
093: * <p>
094: * The name of the attribute that contains the date when the result
095: * document was generated.
096: * </p>
097: */
098: private static final String DATE_ATTRIBUTE_NAME = "date";
099:
100: /**
101: * <p>
102: * The name of the attribute containing the URL of the document
103: * containing the broken links, or the URL of the broken link.
104: * </p>
105: */
106: private static final String URL_ATTRIBUTE_NAME = "url";
107:
108: /**
109: * <p>
110: * The name of the attribute containing the namespace of the element.
111: * </p>
112: */
113: private static final String XMLNS_ATTRIBUTE_NAME = "xmlns";
114:
115: /**
116: * <p>
117: * The ID for attribute type <code>CDATA</code>.
118: * </p>
119: */
120: private static final String CDATA_ATTRIBUTE_TYPE_ID = "CDATA";
121:
122: /**
123: * <p>
124: * The divisor to use to derive the HTTP status code group from an HTTP
125: * status code.
126: * </p>
127: */
128: private static final int HTTP_STATUS_CODE_GROUP_DIVISOR = 100;
129:
130: /**
131: * <p>
132: * The ID of the HTTP status code group that contains successful HTTP
133: * status codes.
134: * </p>
135: */
136: private static final int SUCCESSFUL_HTTP_STATUS_CODE_GROUP_ID = 2;
137:
138: /**
139: * <p>
140: * The map containing the URLs of the documents that have broken links.
141: * Each document maps to the set of links that are broken.
142: * </p>
143: *
144: * <p>
145: * <code>Map<String, Set<String>></code>
146: * </p>
147: */
148: private Map documentsWithBrokenLinks;
149:
150: /**
151: * <p>
152: * The map containing the broken links. A broken link maps to the error
153: * message returned during the brokenness check.
154: * </p>
155: *
156: * <p>
157: * <code>Map<String, BrokenLinkErrorMessage></code>
158: * </p>
159: */
160: private Map brokenLinks;
161:
162: /**
163: * <p>
164: * The URL of the document to which the result must be written.
165: * </p>
166: */
167: private String resultDocumentUrl;
168:
169: /**
170: * <p>
171: * The HTTP client to use to store the document in the repository.
172: * </p>
173: */
174: private HttpClient httpClient;
175:
176: /**
177: * <p>
178: * The log to use for messages.
179: * </p>
180: */
181: private BrokenLinkCheckerLog log;
182:
183: /**
184: * <p>
185: * Create an instance of this result writer and initialize all
186: * attributes.
187: * </p>
188: *
189: * <p>
190: * The constructor does not write the result, use {@link #writeResult()}
191: * for this.
192: * </p>
193: */
194: BrokenLinksToXmlDocumentInRepositoryWriter(
195: Map documentsWithBrokenLinks, Map brokenLinks,
196: String resultDocumentUrl, HttpClient httpClient,
197: BrokenLinkCheckerLog log) {
198: super ();
199:
200: this .documentsWithBrokenLinks = documentsWithBrokenLinks;
201: this .brokenLinks = brokenLinks;
202: this .resultDocumentUrl = resultDocumentUrl;
203: this .httpClient = httpClient;
204: this .log = log;
205: }
206:
207: /**
208: * <p>
209: * Actually write the result to the repository. The result is converted
210: * to XML and then stored in the repository at the specified location.
211: * </p>
212: */
213: void writeResult() {
214: byte[] resultXmlBytes = generateResultXmlAsBytes();
215:
216: storeResultDocument(resultXmlBytes);
217: }
218:
219: /**
220: * <p>
221: * Generate a byte array representation of the XML representation of the
222: * result of the checking for broken links.
223: * </p>
224: *
225: * @return the XML representation of the result.
226: */
227: private byte[] generateResultXmlAsBytes() {
228: SAXTransformerFactory saxTransformerFactory = createSaxTransformerFactory();
229:
230: ByteArrayOutputStream xmlBytesOutput = new ByteArrayOutputStream();
231: try {
232: TransformerHandler handler = saxTransformerFactory
233: .newTransformerHandler();
234:
235: handler.setResult(new StreamResult(xmlBytesOutput));
236:
237: generateResultDocument(handler);
238: } catch (TransformerConfigurationException e) {
239: log
240: .error(
241: "Unable to obtain a transformer handler for writing the result.",
242: e);
243: } catch (SAXException e) {
244: log
245: .error(
246: "SAX error occurred while writing the result.",
247: e);
248: } finally {
249: StreamCleanup
250: .close(xmlBytesOutput, "result xml bytes", log);
251: }
252:
253: return xmlBytesOutput.toByteArray();
254: }
255:
256: /**
257: * <p>
258: * Create a SAX transformer factory.
259: * </p>
260: *
261: * @return a SAX transformer factory.
262: */
263: private SAXTransformerFactory createSaxTransformerFactory()
264: throws TransformerFactoryConfigurationError {
265: SAXTransformerFactory result;
266:
267: TransformerFactory transformerFactory = TransformerFactory
268: .newInstance();
269: if (!(transformerFactory instanceof SAXTransformerFactory)) {
270: throw new IllegalStateException(
271: "The transformer factory is not a SAX transformer factory.");
272: }
273:
274: result = (SAXTransformerFactory) transformerFactory;
275:
276: return result;
277: }
278:
279: /**
280: * <p>
281: * Write the result document to the SAX transformer handler.
282: * </p>
283: *
284: * @param handler
285: * the SAX transformer handler to which to write the
286: * result document.
287: */
288: private void generateResultDocument(TransformerHandler handler)
289: throws SAXException {
290: handler.startDocument();
291:
292: AttributesImpl rootAttributes = new AttributesImpl();
293: String dateAsString = new Date().toString();
294: rootAttributes.addAttribute(NO_NAMESPACE_URI_VALUE,
295: DATE_ATTRIBUTE_NAME, DATE_ATTRIBUTE_NAME,
296: CDATA_ATTRIBUTE_TYPE_ID, dateAsString);
297: handler.startElement(NO_NAMESPACE_URI_VALUE,
298: BROKEN_LINKS_ELEMENT_NAME, BROKEN_LINKS_ELEMENT_NAME,
299: rootAttributes);
300:
301: generateDocumentsWithBrokenLinks(handler);
302:
303: handler.endElement(NO_NAMESPACE_URI_VALUE,
304: BROKEN_LINKS_ELEMENT_NAME, BROKEN_LINKS_ELEMENT_NAME);
305: handler.endDocument();
306: }
307:
308: /**
309: * <p>
310: * Write the set of documents with broken links to the SAX transformer
311: * handler.
312: * </p>
313: *
314: * @param handler
315: * the SAX transformer handler to which to write the
316: * documents.
317: */
318: private void generateDocumentsWithBrokenLinks(
319: TransformerHandler handler) throws SAXException {
320: Iterator documentsWithBrokenLinksIterator = documentsWithBrokenLinks
321: .keySet().iterator();
322: while (documentsWithBrokenLinksIterator.hasNext()) {
323: String documentUrl = (String) documentsWithBrokenLinksIterator
324: .next();
325:
326: generateDocumentWithBrokenLinks(documentUrl, handler);
327: }
328: }
329:
330: /**
331: * <p>
332: * Write a document with broken links to the SAX transformer handler.
333: * </p>
334: *
335: * @param handler
336: * the SAX transformer handler to which to write the
337: * document.
338: */
339: private void generateDocumentWithBrokenLinks(String documentUrl,
340: TransformerHandler handler) throws SAXException {
341: AttributesImpl documentAttributes = new AttributesImpl();
342: documentAttributes.addAttribute(NO_NAMESPACE_URI_VALUE,
343: URL_ATTRIBUTE_NAME, URL_ATTRIBUTE_NAME,
344: CDATA_ATTRIBUTE_TYPE_ID, documentUrl);
345: handler.startElement(NO_NAMESPACE_URI_VALUE, PAGE_ELEMENT_NAME,
346: PAGE_ELEMENT_NAME, documentAttributes);
347:
348: generateBrokenLinks(documentUrl, handler);
349:
350: handler.endElement(NO_NAMESPACE_URI_VALUE, PAGE_ELEMENT_NAME,
351: PAGE_ELEMENT_NAME);
352: }
353:
354: /**
355: * <p>
356: * Write the set of broken links of a document to the SAX transformer
357: * handler.
358: * </p>
359: *
360: * @param handler
361: * the SAX transformer handler to which to write the
362: * broken links.
363: */
364: private void generateBrokenLinks(String documentUrl,
365: TransformerHandler handler) throws SAXException {
366: Set brokenLinks = (Set) documentsWithBrokenLinks
367: .get(documentUrl);
368: Iterator brokenLinksIterator = brokenLinks.iterator();
369: while (brokenLinksIterator.hasNext()) {
370: String brokenLink = (String) brokenLinksIterator.next();
371:
372: generateBrokenLink(brokenLink, handler);
373: }
374: }
375:
376: /**
377: * <p>
378: * Write a broken link to the SAX transformer handler.
379: * </p>
380: *
381: * @param handler
382: * the SAX transformer handler to which to write the
383: * link.
384: */
385: private void generateBrokenLink(String brokenLink,
386: TransformerHandler handler) throws SAXException {
387: AttributesImpl linkAttributes = new AttributesImpl();
388: linkAttributes.addAttribute(NO_NAMESPACE_URI_VALUE,
389: URL_ATTRIBUTE_NAME, URL_ATTRIBUTE_NAME,
390: CDATA_ATTRIBUTE_TYPE_ID, brokenLink);
391: handler.startElement(NO_NAMESPACE_URI_VALUE, LINK_ELEMENT_NAME,
392: LINK_ELEMENT_NAME, linkAttributes);
393:
394: generateErrorMessage(brokenLink, handler);
395:
396: handler.endElement(NO_NAMESPACE_URI_VALUE, LINK_ELEMENT_NAME,
397: LINK_ELEMENT_NAME);
398: }
399:
400: /**
401: * <p>
402: * Write the error message of a broken link to the SAX transformer
403: * handler.
404: * </p>
405: *
406: * @param handler
407: * the SAX transformer handler to which to write the
408: * error message.
409: */
410: private void generateErrorMessage(String brokenLink,
411: TransformerHandler handler) throws SAXException {
412: BrokenLinkErrorMessage errorMessage = (BrokenLinkErrorMessage) brokenLinks
413: .get(brokenLink);
414: if (errorMessage.isMessageLocalizationKey()) {
415: AttributesImpl textAttributes = new AttributesImpl();
416: textAttributes.addAttribute(NO_NAMESPACE_URI_VALUE,
417: XMLNS_ATTRIBUTE_NAME, XMLNS_ATTRIBUTE_NAME,
418: CDATA_ATTRIBUTE_TYPE_ID,
419: COCOON_INTERNATIONALIZATION_NAMESPACE_URI);
420: handler.startElement(NO_NAMESPACE_URI_VALUE,
421: TEXT_ELEMENT_NAME, TEXT_ELEMENT_NAME,
422: textAttributes);
423:
424: char[] localizationKey = errorMessage.getMessageOrKey()
425: .toCharArray();
426: handler.characters(localizationKey, 0,
427: localizationKey.length);
428:
429: handler.endElement(NO_NAMESPACE_URI_VALUE,
430: TEXT_ELEMENT_NAME, TEXT_ELEMENT_NAME);
431: } else {
432: char[] message = errorMessage.getMessageOrKey()
433: .toCharArray();
434: handler.characters(message, 0, message.length);
435: }
436: }
437:
438: /**
439: * <p>
440: * Write the XML containing the results to the repository. No exception
441: * will be thrown. Instead a message is logged if an error occurs.
442: * </p>
443: *
444: * @param resultXmlBytes
445: * the byte array representation of the XML document
446: * containing the results.
447: */
448: private void storeResultDocument(byte[] resultXmlBytes) {
449: PutMethod putMethod = new PutMethod(resultDocumentUrl);
450: try {
451: putMethod.setDoAuthentication(true);
452: putMethod.setFollowRedirects(false);
453:
454: ByteArrayInputStream resultXmlInput = new ByteArrayInputStream(
455: resultXmlBytes);
456: try {
457: putMethod.setRequestBody(resultXmlInput);
458:
459: int putResultCode = httpClient.executeMethod(putMethod);
460: if (isHttpMethodSuccessful(putResultCode)) {
461: log
462: .error("The storing of the result was unsuccesful. HTTP error code returned: "
463: + putResultCode);
464: }
465: } finally {
466: StreamCleanup.close(resultXmlInput, "result xml", log);
467: }
468: } catch (IOException e) {
469: log
470: .error(
471: "I/O error occurred while writing the result.",
472: e);
473: } finally {
474: MethodCleanup.releaseConnection(putMethod,
475: "put result document", log);
476: }
477: }
478:
479: /**
480: * <p>
481: * Determine if an HTTP method was successful.
482: * </p>
483: *
484: * @param resultCode
485: * the result code returned by the HTTP server.
486: * @return <code>true</code> if the HTTP method was successful,
487: * <code>false</code> otherwise.
488: */
489: private boolean isHttpMethodSuccessful(int resultCode) {
490: return (resultCode / HTTP_STATUS_CODE_GROUP_DIVISOR) != SUCCESSFUL_HTTP_STATUS_CODE_GROUP_ID;
491: }
492: }
|