001: /* CrawlSettingsSAXSource
002: *
003: * $Id: CrawlSettingsSAXSource.java 3292 2005-03-31 23:49:52Z stack-sf $
004: *
005: * Created on Dec 5, 2003
006: *
007: * Copyright (C) 2004 Internet Archive.
008: *
009: * This file is part of the Heritrix web crawler (crawler.archive.org).
010: *
011: * Heritrix is free software; you can redistribute it and/or modify
012: * it under the terms of the GNU Lesser Public License as published by
013: * the Free Software Foundation; either version 2.1 of the License, or
014: * any later version.
015: *
016: * Heritrix is distributed in the hope that it will be useful,
017: * but WITHOUT ANY WARRANTY; without even the implied warranty of
018: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
019: * GNU Lesser Public License for more details.
020: *
021: * You should have received a copy of the GNU Lesser Public License
022: * along with Heritrix; if not, write to the Free Software
023: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
024: */
025: package org.archive.crawler.settings;
026:
027: import java.io.IOException;
028: import java.text.ParseException;
029: import java.util.Iterator;
030:
031: import javax.management.AttributeNotFoundException;
032: import javax.management.MBeanInfo;
033: import javax.xml.transform.sax.SAXSource;
034:
035: import org.archive.crawler.settings.refinements.PortnumberCriteria;
036: import org.archive.crawler.settings.refinements.Refinement;
037: import org.archive.crawler.settings.refinements.RegularExpressionCriteria;
038: import org.archive.crawler.settings.refinements.TimespanCriteria;
039: import org.archive.util.ArchiveUtils;
040: import org.xml.sax.Attributes;
041: import org.xml.sax.ContentHandler;
042: import org.xml.sax.DTDHandler;
043: import org.xml.sax.EntityResolver;
044: import org.xml.sax.ErrorHandler;
045: import org.xml.sax.InputSource;
046: import org.xml.sax.SAXException;
047: import org.xml.sax.SAXNotRecognizedException;
048: import org.xml.sax.SAXNotSupportedException;
049: import org.xml.sax.XMLReader;
050: import org.xml.sax.helpers.AttributesImpl;
051:
052: /** Class that takes a CrawlerSettings object and create SAXEvents from it.
053: *
054: * This is a helper class for XMLSettingsHandler.
055: *
056: * @author John Erik Halse
057: */
058: public class CrawlSettingsSAXSource extends SAXSource implements
059: XMLReader {
060: // for prettyprinting XML file
061: private static final int indentAmount = 2;
062:
063: private CrawlerSettings settings;
064: private ContentHandler handler;
065: private boolean orderFile = false;
066:
067: /** Constructs a new CrawlSettingsSAXSource.
068: *
069: * @param settings the settings object to create SAX events from.
070: */
071: public CrawlSettingsSAXSource(CrawlerSettings settings) {
072: super ();
073: this .settings = settings;
074: if (settings.getParent() == null) {
075: orderFile = true;
076: }
077: }
078:
079: /* (non-Javadoc)
080: * @see org.xml.sax.XMLReader#getFeature(java.lang.String)
081: */
082: public boolean getFeature(String name)
083: throws SAXNotRecognizedException, SAXNotSupportedException {
084: return false;
085: }
086:
087: /* (non-Javadoc)
088: * @see org.xml.sax.XMLReader#setFeature(java.lang.String, boolean)
089: */
090: public void setFeature(String name, boolean value)
091: throws SAXNotRecognizedException, SAXNotSupportedException {
092:
093: }
094:
095: /* (non-Javadoc)
096: * @see org.xml.sax.XMLReader#getProperty(java.lang.String)
097: */
098: public Object getProperty(String name)
099: throws SAXNotRecognizedException, SAXNotSupportedException {
100: return null;
101: }
102:
103: /* (non-Javadoc)
104: * @see org.xml.sax.XMLReader#setProperty(java.lang.String, java.lang.Object)
105: */
106: public void setProperty(String name, Object value)
107: throws SAXNotRecognizedException, SAXNotSupportedException {
108:
109: }
110:
111: /* (non-Javadoc)
112: * @see org.xml.sax.XMLReader#setEntityResolver(org.xml.sax.EntityResolver)
113: */
114: public void setEntityResolver(EntityResolver resolver) {
115:
116: }
117:
118: /* (non-Javadoc)
119: * @see org.xml.sax.XMLReader#getEntityResolver()
120: */
121: public EntityResolver getEntityResolver() {
122: return null;
123: }
124:
125: /* (non-Javadoc)
126: * @see org.xml.sax.XMLReader#setDTDHandler(org.xml.sax.DTDHandler)
127: */
128: public void setDTDHandler(DTDHandler handler) {
129: }
130:
131: /* (non-Javadoc)
132: * @see org.xml.sax.XMLReader#getDTDHandler()
133: */
134: public DTDHandler getDTDHandler() {
135: return null;
136: }
137:
138: /* (non-Javadoc)
139: * @see org.xml.sax.XMLReader#setContentHandler(org.xml.sax.ContentHandler)
140: */
141: public void setContentHandler(ContentHandler handler) {
142: this .handler = handler;
143: }
144:
145: /* (non-Javadoc)
146: * @see org.xml.sax.XMLReader#getContentHandler()
147: */
148: public ContentHandler getContentHandler() {
149: return handler;
150: }
151:
152: /* (non-Javadoc)
153: * @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler)
154: */
155: public void setErrorHandler(ErrorHandler handler) {
156: }
157:
158: /* (non-Javadoc)
159: * @see org.xml.sax.XMLReader#getErrorHandler()
160: */
161: public ErrorHandler getErrorHandler() {
162: return null;
163: }
164:
165: // We're not doing namespaces
166: private static final String nsu = ""; // NamespaceURI
167: private static final char[] indentArray = "\n "
168: .toCharArray();
169:
170: /* (non-Javadoc)
171: * @see org.xml.sax.XMLReader#parse(org.xml.sax.InputSource)
172: */
173: public void parse(InputSource input) throws IOException,
174: SAXException {
175: if (handler == null) {
176: throw new SAXException("No content handler");
177: }
178: handler.startDocument();
179: AttributesImpl atts = new AttributesImpl();
180: atts.addAttribute("http://www.w3.org/2001/XMLSchema-instance",
181: "xsi", "xmlns:xsi", nsu,
182: "http://www.w3.org/2001/XMLSchema-instance");
183: atts.addAttribute("http://www.w3.org/2001/XMLSchema-instance",
184: "noNamespaceSchemaLocation",
185: "xsi:noNamespaceSchemaLocation", nsu,
186: XMLSettingsHandler.XML_SCHEMA);
187: String rootElement;
188: if (settings.isRefinement()) {
189: rootElement = XMLSettingsHandler.XML_ROOT_REFINEMENT;
190: } else if (orderFile) {
191: rootElement = XMLSettingsHandler.XML_ROOT_ORDER;
192: } else {
193: rootElement = XMLSettingsHandler.XML_ROOT_HOST_SETTINGS;
194: }
195: handler.startElement(nsu, rootElement, rootElement, atts);
196:
197: parseMetaData(1 + indentAmount);
198:
199: if (settings.hasRefinements()) {
200: parseRefinements(1 + indentAmount);
201: }
202:
203: // Write the modules
204: Iterator modules = settings.topLevelModules();
205: while (modules.hasNext()) {
206: ComplexType complexType = (ComplexType) modules.next();
207: parseComplexType(complexType, 1 + indentAmount);
208: }
209:
210: handler.ignorableWhitespace(indentArray, 0, 1);
211: handler.endElement(nsu, rootElement, rootElement);
212: handler.ignorableWhitespace(indentArray, 0, 1);
213: handler.endDocument();
214: }
215:
216: private void parseRefinements(int indent) throws SAXException {
217: Attributes nullAtts = new AttributesImpl();
218: handler.ignorableWhitespace(indentArray, 0, indent);
219: handler
220: .startElement(nsu,
221: XMLSettingsHandler.XML_ELEMENT_REFINEMENTLIST,
222: XMLSettingsHandler.XML_ELEMENT_REFINEMENTLIST,
223: nullAtts);
224:
225: Iterator it = settings.refinementsIterator();
226: while (it.hasNext()) {
227: Refinement refinement = (Refinement) it.next();
228: handler.ignorableWhitespace(indentArray, 0, indent
229: + indentAmount);
230: AttributesImpl reference = new AttributesImpl();
231: reference.addAttribute(nsu,
232: XMLSettingsHandler.XML_ELEMENT_REFERENCE,
233: XMLSettingsHandler.XML_ELEMENT_REFERENCE, nsu,
234: refinement.getReference());
235: handler.startElement(nsu,
236: XMLSettingsHandler.XML_ELEMENT_REFINEMENT,
237: XMLSettingsHandler.XML_ELEMENT_REFINEMENT,
238: reference);
239:
240: writeSimpleElement(
241: XMLSettingsHandler.XML_ELEMENT_DESCRIPTION,
242: refinement.getDescription(), nullAtts, indent + 2
243: * indentAmount);
244:
245: parseRefinementLimits(refinement, indent + 2 * indentAmount);
246:
247: handler.ignorableWhitespace(indentArray, 0, indent
248: + indentAmount);
249: handler.endElement(nsu,
250: XMLSettingsHandler.XML_ELEMENT_REFINEMENT,
251: XMLSettingsHandler.XML_ELEMENT_REFINEMENT);
252: }
253:
254: handler.ignorableWhitespace(indentArray, 0, indent);
255: handler.endElement(nsu,
256: XMLSettingsHandler.XML_ELEMENT_REFINEMENTLIST,
257: XMLSettingsHandler.XML_ELEMENT_REFINEMENTLIST);
258: }
259:
260: private void parseRefinementLimits(Refinement refinement, int indent)
261: throws SAXException {
262: Attributes nullAtts = new AttributesImpl();
263:
264: handler.ignorableWhitespace(indentArray, 0, indent);
265: handler.startElement(nsu,
266: XMLSettingsHandler.XML_ELEMENT_LIMITS,
267: XMLSettingsHandler.XML_ELEMENT_LIMITS, nullAtts);
268:
269: Iterator it = refinement.criteriaIterator();
270: while (it.hasNext()) {
271: Object limit = it.next();
272: if (limit instanceof TimespanCriteria) {
273: AttributesImpl timeSpan = new AttributesImpl();
274: timeSpan.addAttribute(nsu,
275: XMLSettingsHandler.XML_ATTRIBUTE_FROM,
276: XMLSettingsHandler.XML_ATTRIBUTE_FROM, nsu,
277: ((TimespanCriteria) limit).getFrom());
278: timeSpan.addAttribute(nsu,
279: XMLSettingsHandler.XML_ATTRIBUTE_TO,
280: XMLSettingsHandler.XML_ATTRIBUTE_TO, nsu,
281: ((TimespanCriteria) limit).getTo());
282: writeSimpleElement(
283: XMLSettingsHandler.XML_ELEMENT_TIMESPAN, "",
284: timeSpan, indent + 2 * indentAmount);
285: } else if (limit instanceof PortnumberCriteria) {
286: writeSimpleElement(
287: XMLSettingsHandler.XML_ELEMENT_PORTNUMBER,
288: ((PortnumberCriteria) limit).getPortNumber(),
289: nullAtts, indent + 2 * indentAmount);
290: } else if (limit instanceof RegularExpressionCriteria) {
291: writeSimpleElement(
292: XMLSettingsHandler.XML_ELEMENT_URIMATCHES,
293: ((RegularExpressionCriteria) limit).getRegexp(),
294: nullAtts, indent + 2 * indentAmount);
295: }
296: }
297:
298: handler.ignorableWhitespace(indentArray, 0, indent);
299: handler.endElement(nsu, XMLSettingsHandler.XML_ELEMENT_LIMITS,
300: XMLSettingsHandler.XML_ELEMENT_LIMITS);
301:
302: }
303:
304: private void parseMetaData(int indent) throws SAXException {
305: // Write meta information
306: Attributes nullAtts = new AttributesImpl();
307: handler.ignorableWhitespace(indentArray, 0, indent);
308: handler.startElement(nsu, XMLSettingsHandler.XML_ELEMENT_META,
309: XMLSettingsHandler.XML_ELEMENT_META, nullAtts);
310:
311: // Write settings name
312: writeSimpleElement(XMLSettingsHandler.XML_ELEMENT_NAME,
313: settings.getName(), null, indent + indentAmount);
314:
315: // Write settings description
316: writeSimpleElement(XMLSettingsHandler.XML_ELEMENT_DESCRIPTION,
317: settings.getDescription(), null, indent + indentAmount);
318:
319: // Write settings operator
320: writeSimpleElement(XMLSettingsHandler.XML_ELEMENT_OPERATOR,
321: settings.getOperator(), null, indent + indentAmount);
322:
323: // Write settings description
324: writeSimpleElement(XMLSettingsHandler.XML_ELEMENT_ORGANIZATION,
325: settings.getOrganization(), null, indent + indentAmount);
326:
327: // Write settings description
328: writeSimpleElement(XMLSettingsHandler.XML_ELEMENT_AUDIENCE,
329: settings.getAudience(), null, indent + indentAmount);
330:
331: // Write file date
332: String dateStamp = ArchiveUtils.get14DigitDate();
333: writeSimpleElement(XMLSettingsHandler.XML_ELEMENT_DATE,
334: dateStamp, null, indent + indentAmount);
335: try {
336: settings.setLastSavedTime(ArchiveUtils
337: .parse14DigitDate(dateStamp));
338: } catch (ParseException e) {
339: // Should never happen since we just created it. If this exception
340: // is thrown, then there is a bug in ArchiveUtils.
341: e.printStackTrace();
342: }
343:
344: handler.ignorableWhitespace(indentArray, 0, indent);
345: handler.endElement(nsu, XMLSettingsHandler.XML_ELEMENT_META,
346: XMLSettingsHandler.XML_ELEMENT_META);
347: }
348:
349: /**
350: * Create SAX events from a {@link ComplexType}.
351: *
352: * @param complexType the object to creat SAX events from.
353: * @param indent the indentation amount for prettyprinting XML.
354: * @throws SAXException is thrown if an error occurs.
355: */
356: private void parseComplexType(ComplexType complexType, int indent)
357: throws SAXException {
358: if (complexType.isTransient()) {
359: return;
360: }
361: MBeanInfo mbeanInfo = complexType.getMBeanInfo(settings);
362: String objectElement = resolveElementName(complexType);
363: AttributesImpl atts = new AttributesImpl();
364: atts.addAttribute(nsu, XMLSettingsHandler.XML_ATTRIBUTE_NAME,
365: XMLSettingsHandler.XML_ATTRIBUTE_NAME, nsu, complexType
366: .getName());
367: if (objectElement == XMLSettingsHandler.XML_ELEMENT_NEW_OBJECT) {
368: // Only 'newObject' elements have a class attribute
369: atts.addAttribute(nsu,
370: XMLSettingsHandler.XML_ATTRIBUTE_CLASS,
371: XMLSettingsHandler.XML_ATTRIBUTE_CLASS, nsu,
372: mbeanInfo.getClassName());
373: }
374: if (complexType.getParent() == null) {
375: atts = new AttributesImpl();
376: }
377: handler.ignorableWhitespace(indentArray, 0, indent);
378: handler.startElement(nsu, objectElement, objectElement, atts);
379: for (Iterator it = complexType
380: .getAttributeInfoIterator(settings); it.hasNext();) {
381: ModuleAttributeInfo attribute = (ModuleAttributeInfo) it
382: .next();
383: if (!attribute.isTransient()) {
384: parseAttribute(complexType, attribute, indent);
385: }
386: }
387: handler.ignorableWhitespace(indentArray, 0, indent);
388: handler.endElement(nsu, objectElement, objectElement);
389: }
390:
391: private void parseAttribute(ComplexType complexType,
392: ModuleAttributeInfo attribute, int indent)
393: throws SAXException {
394: Object value;
395: try {
396: value = complexType.getLocalAttribute(settings, attribute
397: .getName());
398: } catch (AttributeNotFoundException e) {
399: throw new SAXException(e);
400: }
401: if (orderFile || value != null) {
402: // Write only overridden values unless this is the order file
403: if (attribute.isComplexType()) {
404: // Call method recursively for complex types
405: parseComplexType((ComplexType) value, indent
406: + indentAmount);
407: } else {
408: // Write element
409: String elementName = SettingsHandler
410: .getTypeName(attribute.getType());
411: AttributesImpl atts = new AttributesImpl();
412: atts.addAttribute(nsu,
413: XMLSettingsHandler.XML_ATTRIBUTE_NAME,
414: XMLSettingsHandler.XML_ATTRIBUTE_NAME, nsu,
415: attribute.getName());
416: if (value == null) {
417: try {
418: value = complexType.getAttribute(attribute
419: .getName());
420: } catch (Exception e) {
421: throw new SAXException(
422: "Internal error in settings subsystem",
423: e);
424: }
425: }
426: if (value != null) {
427: handler.ignorableWhitespace(indentArray, 0, indent
428: + indentAmount);
429: handler.startElement(nsu, elementName, elementName,
430: atts);
431: if (value instanceof ListType) {
432: parseListData(value, indent + indentAmount);
433: handler.ignorableWhitespace(indentArray, 0,
434: indent + indentAmount);
435: } else {
436: char valueArray[] = value.toString()
437: .toCharArray();
438: handler.characters(valueArray, 0,
439: valueArray.length);
440: }
441: handler.endElement(nsu, elementName, elementName);
442: }
443: }
444: }
445: }
446:
447: /** Create SAX events for the content of a {@link ListType}.
448: *
449: * @param value the ListType whose content we create SAX events for.
450: * @param indent the indentation amount for prettyprinting XML.
451: * @throws SAXException is thrown if an error occurs.
452: */
453: private void parseListData(Object value, int indent)
454: throws SAXException {
455: ListType list = (ListType) value;
456: Iterator it = list.iterator();
457: while (it.hasNext()) {
458: Object element = it.next();
459: String elementName = SettingsHandler.getTypeName(element
460: .getClass().getName());
461: writeSimpleElement(elementName, element.toString(), null,
462: indent + indentAmount);
463: }
464: }
465:
466: /** Resolve the XML element name of a {@link ComplexType}.
467: *
468: * @param complexType the object to investigate.
469: * @return the name of the XML element.
470: */
471: private String resolveElementName(ComplexType complexType) {
472: String elementName;
473: if (complexType instanceof ModuleType) {
474: if (complexType.getParent() == null) {
475: // Top level controller element
476: elementName = XMLSettingsHandler.XML_ELEMENT_CONTROLLER;
477: } else if (!orderFile
478: && complexType.globalSettings().getModule(
479: complexType.getName()) != null) {
480: // This is not the order file and we are referencing an object
481: elementName = XMLSettingsHandler.XML_ELEMENT_OBJECT;
482: } else {
483: // The object is not referenced before
484: elementName = XMLSettingsHandler.XML_ELEMENT_NEW_OBJECT;
485: }
486: } else {
487: // It's a map
488: elementName = SettingsHandler.getTypeName(complexType
489: .getClass().getName());
490: }
491: return elementName;
492: }
493:
494: /** Create SAX events for a simple element.
495: *
496: * Creates all the SAX events needed for prettyprinting an XML element
497: * with a simple value and possible attributes.
498: *
499: * @param elementName the name of the XML element.
500: * @param value the value to pu inside the XML element.
501: * @param atts the attributes for the XML element.
502: * @param indent the indentation amount for prettyprinting XML.
503: * @throws SAXException is thrown if an error occurs.
504: */
505: private void writeSimpleElement(String elementName, String value,
506: Attributes atts, int indent) throws SAXException {
507: if (atts == null) {
508: atts = new AttributesImpl();
509: }
510: // make sure that the value is never null
511: value = value == null ? "" : value;
512: handler.ignorableWhitespace(indentArray, 0, indent);
513: handler.startElement(nsu, elementName, elementName, atts);
514: handler.characters(value.toCharArray(), 0, value.length());
515: handler.endElement(nsu, elementName, elementName);
516: }
517:
518: /* (non-Javadoc)
519: * @see org.xml.sax.XMLReader#parse(java.lang.String)
520: */
521: public void parse(String systemId) throws IOException, SAXException {
522: // Do nothing. Just for conformance to the XMLReader API.
523: }
524:
525: /* (non-Javadoc)
526: * @see javax.xml.transform.sax.SAXSource#getXMLReader()
527: */
528: public XMLReader getXMLReader() {
529: return this ;
530: }
531:
532: /* (non-Javadoc)
533: * @see javax.xml.transform.sax.SAXSource#getInputSource()
534: */
535: public InputSource getInputSource() {
536: return new InputSource();
537: }
538: }
|