001: /* WriterPoolProcessor
002: *
003: * $Id: WriterPoolProcessor.java 5029 2007-03-29 23:53:50Z gojomo $
004: *
005: * Created on July 19th, 2006
006: *
007: * Copyright (C) 2006 Internet Archive.
008: *
009: * This file is part of the Heritrix web crawler (crawler.archive.org).
010: *
011: * Heritrix is free software; you can redistribute it and/or modify
012: * it under the terms of the GNU Lesser Public License as published by
013: * the Free Software Foundation; either version 2.1 of the License, or
014: * any later version.
015: *
016: * Heritrix is distributed in the hope that it will be useful,
017: * but WITHOUT ANY WARRANTY; without even the implied warranty of
018: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
019: * GNU Lesser Public License for more details.
020: *
021: * You should have received a copy of the GNU Lesser Public License
022: * along with Heritrix; if not, write to the Free Software
023: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
024: */
025: package org.archive.crawler.framework;
026:
027: import java.io.DataInputStream;
028: import java.io.DataOutputStream;
029: import java.io.File;
030: import java.io.FileInputStream;
031: import java.io.FileNotFoundException;
032: import java.io.FileOutputStream;
033: import java.io.IOException;
034: import java.io.ObjectInputStream;
035: import java.io.StringWriter;
036: import java.net.InetAddress;
037: import java.net.UnknownHostException;
038: import java.util.ArrayList;
039: import java.util.Arrays;
040: import java.util.Iterator;
041: import java.util.List;
042: import java.util.concurrent.atomic.AtomicInteger;
043: import java.util.logging.Logger;
044:
045: import javax.management.AttributeNotFoundException;
046: import javax.management.MBeanException;
047: import javax.management.ReflectionException;
048: import javax.xml.transform.SourceLocator;
049: import javax.xml.transform.Templates;
050: import javax.xml.transform.Transformer;
051: import javax.xml.transform.TransformerConfigurationException;
052: import javax.xml.transform.TransformerException;
053: import javax.xml.transform.TransformerFactory;
054: import javax.xml.transform.stream.StreamResult;
055: import javax.xml.transform.stream.StreamSource;
056:
057: import org.archive.crawler.Heritrix;
058: import org.archive.crawler.datamodel.CoreAttributeConstants;
059: import org.archive.crawler.datamodel.CrawlHost;
060: import org.archive.crawler.datamodel.CrawlOrder;
061: import org.archive.crawler.datamodel.CrawlURI;
062: import org.archive.crawler.datamodel.FetchStatusCodes;
063: import org.archive.crawler.deciderules.recrawl.IdenticalDigestDecideRule;
064: import org.archive.crawler.event.CrawlStatusListener;
065: import org.archive.crawler.settings.SimpleType;
066: import org.archive.crawler.settings.StringList;
067: import org.archive.crawler.settings.Type;
068: import org.archive.crawler.settings.XMLSettingsHandler;
069: import org.archive.io.ObjectPlusFilesInputStream;
070: import org.archive.io.WriterPool;
071: import org.archive.io.WriterPoolMember;
072:
073: /**
074: * Abstract implementation of a file pool processor.
075: * Subclass to implement for a particular {@link WriterPoolMember} instance.
076: * @author Parker Thompson
077: * @author stack
078: */
079: public abstract class WriterPoolProcessor extends Processor implements
080: CoreAttributeConstants, CrawlStatusListener, FetchStatusCodes {
081: private final Logger logger = Logger.getLogger(this .getClass()
082: .getName());
083:
084: /**
085: * Key to use asking settings for file compression value.
086: */
087: public static final String ATTR_COMPRESS = "compress";
088:
089: /**
090: * Default as to whether we do compression of files.
091: */
092: public static final boolean DEFAULT_COMPRESS = true;
093:
094: /**
095: * Key to use asking settings for file prefix value.
096: */
097: public static final String ATTR_PREFIX = "prefix";
098:
099: /**
100: * Key to use asking settings for arc path value.
101: */
102: public static final String ATTR_PATH = "path";
103:
104: /**
105: * Key to use asking settings for file suffix value.
106: */
107: public static final String ATTR_SUFFIX = "suffix";
108:
109: /**
110: * Key to use asking settings for file max size value.
111: */
112: public static final String ATTR_MAX_SIZE_BYTES = "max-size-bytes";
113:
114: /**
115: * Key to get maximum pool size.
116: *
117: * This key is for maximum files active in the pool.
118: */
119: public static final String ATTR_POOL_MAX_ACTIVE = "pool-max-active";
120:
121: /**
122: * Key to get maximum wait on pool object before we give up and
123: * throw IOException.
124: */
125: public static final String ATTR_POOL_MAX_WAIT = "pool-max-wait";
126:
127: /**
128: * Key for the maximum bytes to write attribute.
129: */
130: public static final String ATTR_MAX_BYTES_WRITTEN = "total-bytes-to-write";
131:
132: /**
133: * Key for whether to skip writing records of content-digest repeats
134: */
135: public static final String ATTR_SKIP_IDENTICAL_DIGESTS = "skip-identical-digests";
136:
137: /**
138: * CrawlURI annotation indicating no record was written
139: */
140: protected static final String ANNOTATION_UNWRITTEN = "unwritten";
141:
142: /**
143: * Default maximum file size.
144: * TODO: Check that subclasses can set a different MAX_FILE_SIZE and
145: * it will be used in the constructor as default.
146: */
147: private static final int DEFAULT_MAX_FILE_SIZE = 100000000;
148:
149: /**
150: * Default path list.
151: *
152: * TODO: Confirm this one gets picked up.
153: */
154: private static final String[] DEFAULT_PATH = { "crawl-store" };
155:
156: /**
157: * Reference to pool.
158: */
159: transient private WriterPool pool = null;
160:
161: /**
162: * Total number of bytes written to disc.
163: */
164: private long totalBytesWritten = 0;
165:
166: /**
167: * Calculate metadata once only.
168: */
169: transient private List<String> cachedMetadata = null;
170:
171: /**
172: * @param name Name of this processor.
173: */
174: public WriterPoolProcessor(String name) {
175: this (name, "Pool of files processor");
176: }
177:
178: /**
179: * @param name Name of this processor.
180: * @param description Description for this processor.
181: */
182: public WriterPoolProcessor(final String name,
183: final String description) {
184: super (name, description);
185: Type e = addElementToDefinition(new SimpleType(ATTR_COMPRESS,
186: "Compress files when " + "writing to disk.",
187: new Boolean(DEFAULT_COMPRESS)));
188: e.setOverrideable(false);
189: e = addElementToDefinition(new SimpleType(
190: ATTR_PREFIX,
191: "File prefix. "
192: + "The text supplied here will be used as a prefix naming "
193: + "writer files. For example if the prefix is 'IAH', "
194: + "then file names will look like "
195: + "IAH-20040808101010-0001-HOSTNAME.arc.gz "
196: + "...if writing ARCs (The prefix will be "
197: + "separated from the date by a hyphen).",
198: WriterPoolMember.DEFAULT_PREFIX));
199: e = addElementToDefinition(new SimpleType(
200: ATTR_SUFFIX,
201: "Suffix to tag onto "
202: + "files. If value is '${HOSTNAME}', will use hostname for "
203: + "suffix. If empty, no suffix will be added.",
204: WriterPoolMember.DEFAULT_SUFFIX));
205: e.setOverrideable(false);
206: e = addElementToDefinition(new SimpleType(ATTR_MAX_SIZE_BYTES,
207: "Max size of each file",
208: new Long(DEFAULT_MAX_FILE_SIZE)));
209: e.setOverrideable(false);
210: e = addElementToDefinition(new StringList(
211: ATTR_PATH,
212: "Where to files. "
213: + "Supply absolute or relative path. If relative, files "
214: + "will be written relative to "
215: + "the "
216: + CrawlOrder.ATTR_DISK_PATH
217: + "setting."
218: + " If more than one path specified, we'll round-robin"
219: + " dropping files to each. This setting is safe"
220: + " to change midcrawl (You can remove and add new dirs"
221: + " as the crawler progresses).",
222: getDefaultPath()));
223: e.setOverrideable(false);
224: e = addElementToDefinition(new SimpleType(
225: ATTR_POOL_MAX_ACTIVE,
226: "Maximum active files in pool. "
227: + "This setting cannot be varied over the life of a crawl.",
228: new Integer(WriterPool.DEFAULT_MAX_ACTIVE)));
229: e.setOverrideable(false);
230: e = addElementToDefinition(new SimpleType(
231: ATTR_POOL_MAX_WAIT,
232: "Maximum time to wait on pool element"
233: + " (milliseconds). This setting cannot be varied over the life"
234: + " of a crawl.", new Integer(
235: WriterPool.DEFAULT_MAXIMUM_WAIT)));
236: e.setOverrideable(false);
237: e = addElementToDefinition(new SimpleType(
238: ATTR_MAX_BYTES_WRITTEN,
239: "Total file bytes to write to disk."
240: + " Once the size of all files on disk has exceeded this "
241: + "limit, this processor will stop the crawler. "
242: + "A value of zero means no upper limit.",
243: new Long(0)));
244: e.setOverrideable(false);
245: e.setExpertSetting(true);
246: e = addElementToDefinition(new SimpleType(
247: ATTR_SKIP_IDENTICAL_DIGESTS,
248: "Whether to skip the writing of a record when URI "
249: + "history information is available and indicates the "
250: + "prior fetch had an identical content digest. "
251: + "Default is false.", new Boolean(false)));
252: e.setOverrideable(true);
253: e.setExpertSetting(true);
254: }
255:
256: protected String[] getDefaultPath() {
257: return DEFAULT_PATH;
258: }
259:
260: public synchronized void initialTasks() {
261: // Add this class to crawl state listeners and setup pool.
262: getSettingsHandler().getOrder().getController()
263: .addCrawlStatusListener(this );
264: setupPool(new AtomicInteger());
265: // Run checkpoint recovery code.
266: if (getSettingsHandler().getOrder().getController()
267: .isCheckpointRecover()) {
268: checkpointRecover();
269: }
270: }
271:
272: protected AtomicInteger getSerialNo() {
273: return ((WriterPool) getPool()).getSerialNo();
274: }
275:
276: /**
277: * Set up pool of files.
278: */
279: protected abstract void setupPool(final AtomicInteger serialNo);
280:
281: /**
282: * Writes a CrawlURI and its associated data to store file.
283: *
284: * Currently this method understands the following uri types: dns, http,
285: * and https.
286: *
287: * @param curi CrawlURI to process.
288: */
289: protected abstract void innerProcess(CrawlURI curi);
290:
291: protected void checkBytesWritten() {
292: long max = getMaxToWrite();
293: if (max <= 0) {
294: return;
295: }
296: if (max <= this .totalBytesWritten) {
297: getController().requestCrawlStop(
298: "Finished - Maximum bytes (" + Long.toString(max)
299: + ") written");
300: }
301: }
302:
303: /**
304: * Whether the given CrawlURI should be written to archive files.
305: * Annotates CrawlURI with a reason for any negative answer.
306: *
307: * @param curi CrawlURI
308: * @return true if URI should be written; false otherwise
309: */
310: protected boolean shouldWrite(CrawlURI curi) {
311: // check for duplicate content write suppression
312: if (((Boolean) getUncheckedAttribute(curi,
313: ATTR_SKIP_IDENTICAL_DIGESTS))
314: && IdenticalDigestDecideRule.hasIdenticalDigest(curi)) {
315: curi.addAnnotation(ANNOTATION_UNWRITTEN
316: + ":identicalDigest");
317: return false;
318: }
319: String scheme = curi.getUURI().getScheme().toLowerCase();
320: // TODO: possibly move this sort of isSuccess() test into CrawlURI
321: boolean retVal;
322: if (scheme.equals("dns")) {
323: retVal = curi.getFetchStatus() == S_DNS_SUCCESS;
324: } else if (scheme.equals("http") || scheme.equals("https")) {
325: retVal = curi.getFetchStatus() > 0
326: && curi.isHttpTransaction();
327: } else if (scheme.equals("ftp")) {
328: retVal = curi.getFetchStatus() == 200;
329: } else {
330: // unsupported scheme
331: curi.addAnnotation(ANNOTATION_UNWRITTEN + ":scheme");
332: return false;
333: }
334: if (retVal == false) {
335: // status not deserving writing
336: curi.addAnnotation(ANNOTATION_UNWRITTEN + ":status");
337: return false;
338: }
339: return true;
340: }
341:
342: /**
343: * Return IP address of given URI suitable for recording (as in a
344: * classic ARC 5-field header line).
345: *
346: * @param curi CrawlURI
347: * @return String of IP address
348: */
349: protected String getHostAddress(CrawlURI curi) {
350: // special handling for DNS URIs: want address of DNS server
351: if (curi.getUURI().getScheme().toLowerCase().equals("dns")) {
352: return curi.getString(A_DNS_SERVER_IP_LABEL);
353: }
354: // otherwise, host referenced in URI
355: CrawlHost h = getController().getServerCache().getHostFor(curi);
356: if (h == null) {
357: throw new NullPointerException("Crawlhost is null for "
358: + curi + " " + curi.getVia());
359: }
360: InetAddress a = h.getIP();
361: if (a == null) {
362: throw new NullPointerException(
363: "Address is null for "
364: + curi
365: + " "
366: + curi.getVia()
367: + ". Address "
368: + ((h.getIpFetched() == CrawlHost.IP_NEVER_LOOKED_UP) ? "was never looked up."
369: : (System.currentTimeMillis() - h
370: .getIpFetched())
371: + " ms ago."));
372: }
373: return h.getIP().getHostAddress();
374: }
375:
376: /**
377: * Version of getAttributes that catches and logs exceptions
378: * and returns null if failure to fetch the attribute.
379: * @param name Attribute name.
380: * @return Attribute or null.
381: */
382: public Object getAttributeUnchecked(String name) {
383: Object result = null;
384: try {
385: result = super .getAttribute(name);
386: } catch (AttributeNotFoundException e) {
387: logger.warning(e.getLocalizedMessage());
388: } catch (MBeanException e) {
389: logger.warning(e.getLocalizedMessage());
390: } catch (ReflectionException e) {
391: logger.warning(e.getLocalizedMessage());
392: }
393: return result;
394: }
395:
396: /**
397: * Max size we want files to be (bytes).
398: *
399: * Default is ARCConstants.DEFAULT_MAX_ARC_FILE_SIZE. Note that ARC
400: * files will usually be bigger than maxSize; they'll be maxSize + length
401: * to next boundary.
402: * @return ARC maximum size.
403: */
404: public long getMaxSize() {
405: Object obj = getAttributeUnchecked(ATTR_MAX_SIZE_BYTES);
406: return (obj == null) ? DEFAULT_MAX_FILE_SIZE : ((Long) obj)
407: .longValue();
408: }
409:
410: public String getPrefix() {
411: Object obj = getAttributeUnchecked(ATTR_PREFIX);
412: return (obj == null) ? WriterPoolMember.DEFAULT_PREFIX
413: : (String) obj;
414: }
415:
416: public List<File> getOutputDirs() {
417: Object obj = getAttributeUnchecked(ATTR_PATH);
418: List list = (obj == null) ? Arrays.asList(DEFAULT_PATH)
419: : (StringList) obj;
420: ArrayList<File> results = new ArrayList<File>();
421: for (Iterator i = list.iterator(); i.hasNext();) {
422: String path = (String) i.next();
423: File f = new File(path);
424: if (!f.isAbsolute()) {
425: f = new File(getController().getDisk(), path);
426: }
427: if (!f.exists()) {
428: try {
429: f.mkdirs();
430: } catch (Exception e) {
431: e.printStackTrace();
432: continue;
433: }
434: }
435: results.add(f);
436: }
437: return results;
438: }
439:
440: public boolean isCompressed() {
441: Object obj = getAttributeUnchecked(ATTR_COMPRESS);
442: return (obj == null) ? DEFAULT_COMPRESS : ((Boolean) obj)
443: .booleanValue();
444: }
445:
446: /**
447: * @return Returns the poolMaximumActive.
448: */
449: public int getPoolMaximumActive() {
450: Object obj = getAttributeUnchecked(ATTR_POOL_MAX_ACTIVE);
451: return (obj == null) ? WriterPool.DEFAULT_MAX_ACTIVE
452: : ((Integer) obj).intValue();
453: }
454:
455: /**
456: * @return Returns the poolMaximumWait.
457: */
458: public int getPoolMaximumWait() {
459: Object obj = getAttributeUnchecked(ATTR_POOL_MAX_WAIT);
460: return (obj == null) ? WriterPool.DEFAULT_MAXIMUM_WAIT
461: : ((Integer) obj).intValue();
462: }
463:
464: public String getSuffix() {
465: Object obj = getAttributeUnchecked(ATTR_SUFFIX);
466: String sfx = (obj == null) ? WriterPoolMember.DEFAULT_SUFFIX
467: : (String) obj;
468: if (sfx != null
469: && sfx.trim()
470: .equals(WriterPoolMember.HOSTNAME_VARIABLE)) {
471: String str = "localhost.localdomain";
472: try {
473: str = InetAddress.getLocalHost().getHostName();
474: } catch (UnknownHostException ue) {
475: logger.severe("Failed getHostAddress for this host: "
476: + ue);
477: }
478: sfx = str;
479: }
480: return sfx;
481: }
482:
483: public long getMaxToWrite() {
484: Object obj = getAttributeUnchecked(ATTR_MAX_BYTES_WRITTEN);
485: return (obj == null) ? 0 : ((Long) obj).longValue();
486: }
487:
488: public void crawlEnding(String sExitMessage) {
489: this .pool.close();
490: }
491:
492: public void crawlEnded(String sExitMessage) {
493: // sExitMessage is unused.
494: }
495:
496: /* (non-Javadoc)
497: * @see org.archive.crawler.event.CrawlStatusListener#crawlStarted(java.lang.String)
498: */
499: public void crawlStarted(String message) {
500: // TODO Auto-generated method stub
501: }
502:
503: protected String getCheckpointStateFile() {
504: return this .getClass().getName() + ".state";
505: }
506:
507: public void crawlCheckpoint(File checkpointDir) throws IOException {
508: int serial = getSerialNo().get();
509: if (this .pool.getNumActive() > 0) {
510: // If we have open active Archive files, up the serial number
511: // so after checkpoint, we start at one past current number and
512: // so the number we serialize, is one past current serialNo.
513: // All this serial number manipulation should be fine in here since
514: // we're paused checkpointing (Revisit if this assumption changes).
515: serial = getSerialNo().incrementAndGet();
516: }
517: saveCheckpointSerialNumber(checkpointDir, serial);
518: // Close all ARCs on checkpoint.
519: try {
520: this .pool.close();
521: } finally {
522: // Reopen on checkpoint.
523: setupPool(new AtomicInteger(serial));
524: }
525: }
526:
527: public void crawlPausing(String statusMessage) {
528: // sExitMessage is unused.
529: }
530:
531: public void crawlPaused(String statusMessage) {
532: // sExitMessage is unused.
533: }
534:
535: public void crawlResuming(String statusMessage) {
536: // sExitMessage is unused.
537: }
538:
539: private void readObject(ObjectInputStream stream)
540: throws IOException, ClassNotFoundException {
541: stream.defaultReadObject();
542: ObjectPlusFilesInputStream coistream = (ObjectPlusFilesInputStream) stream;
543: coistream.registerFinishTask(new Runnable() {
544: public void run() {
545: setupPool(new AtomicInteger());
546: }
547: });
548: }
549:
550: protected WriterPool getPool() {
551: return pool;
552: }
553:
554: protected void setPool(WriterPool pool) {
555: this .pool = pool;
556: }
557:
558: protected long getTotalBytesWritten() {
559: return totalBytesWritten;
560: }
561:
562: protected void setTotalBytesWritten(long totalBytesWritten) {
563: this .totalBytesWritten = totalBytesWritten;
564: }
565:
566: /**
567: * Called out of {@link #initialTasks()} when recovering a checkpoint.
568: * Restore state.
569: */
570: protected void checkpointRecover() {
571: int serialNo = loadCheckpointSerialNumber();
572: if (serialNo != -1) {
573: getSerialNo().set(serialNo);
574: }
575: }
576:
577: /**
578: * @return Serial number from checkpoint state file or if unreadable, -1
579: * (Client should check for -1).
580: */
581: protected int loadCheckpointSerialNumber() {
582: int result = -1;
583:
584: // If in recover mode, read in the Writer serial number saved
585: // off when we checkpointed.
586: File stateFile = new File(getSettingsHandler().getOrder()
587: .getController().getCheckpointRecover().getDirectory(),
588: getCheckpointStateFile());
589: if (!stateFile.exists()) {
590: logger
591: .info(stateFile.getAbsolutePath()
592: + " doesn't exist so cannot restore Writer serial number.");
593: } else {
594: DataInputStream dis = null;
595: try {
596: dis = new DataInputStream(
597: new FileInputStream(stateFile));
598: result = dis.readShort();
599: } catch (FileNotFoundException e) {
600: e.printStackTrace();
601: } catch (IOException e) {
602: e.printStackTrace();
603: } finally {
604: try {
605: if (dis != null) {
606: dis.close();
607: }
608: } catch (IOException e) {
609: e.printStackTrace();
610: }
611: }
612: }
613: return result;
614: }
615:
616: protected void saveCheckpointSerialNumber(final File checkpointDir,
617: final int serialNo) throws IOException {
618: // Write out the current state of the ARCWriter serial number.
619: File f = new File(checkpointDir, getCheckpointStateFile());
620: DataOutputStream dos = new DataOutputStream(
621: new FileOutputStream(f));
622: try {
623: dos.writeShort(serialNo);
624: } finally {
625: dos.close();
626: }
627: }
628:
629: /**
630: * Return list of metadatas to add to first arc file metadata record.
631: *
632: * Default is to stylesheet the order file. To specify stylesheet,
633: * override {@link #getFirstrecordStylesheet()}.
634: *
635: * Get xml files from settingshandler. Currently order file is the
636: * only xml file. We're NOT adding seeds to meta data.
637: *
638: * @return List of strings and/or files to add to arc file as metadata or
639: * null.
640: */
641: public synchronized List<String> getMetadata() {
642: if (this .cachedMetadata != null) {
643: return this .cachedMetadata;
644: }
645: return cacheMetadata();
646: }
647:
648: protected synchronized List<String> cacheMetadata() {
649: if (this .cachedMetadata != null) {
650: return this .cachedMetadata;
651: }
652:
653: // If no stylesheet, return empty metadata.
654: if (getFirstrecordStylesheet() == null
655: || getFirstrecordStylesheet().length() == 0) {
656: this .cachedMetadata = new ArrayList<String>(1);
657: this .cachedMetadata.add("");
658: return this .cachedMetadata;
659: }
660:
661: List<String> result = null;
662: if (!XMLSettingsHandler.class.isInstance(getSettingsHandler())) {
663: logger
664: .warning("Expected xml settings handler (No warcinfo).");
665: // Early return
666: return result;
667: }
668:
669: XMLSettingsHandler xsh = (XMLSettingsHandler) getSettingsHandler();
670: File orderFile = xsh.getOrderFile();
671: if (!orderFile.exists() || !orderFile.canRead()) {
672: logger.severe("File " + orderFile.getAbsolutePath()
673: + " is does not exist or is not readable.");
674: } else {
675: result = new ArrayList<String>(1);
676: result.add(getFirstrecordBody(orderFile));
677: }
678: this .cachedMetadata = result;
679: return this .cachedMetadata;
680: }
681:
682: /**
683: * @preturn Full path to stylesheet (Its read off the CLASSPATH
684: * as resource).
685: */
686: protected String getFirstrecordStylesheet() {
687: return null;
688: }
689:
690: /**
691: * Write the arc metadata body content.
692: *
693: * Its based on the order xml file but into this base we'll add other info
694: * such as machine ip.
695: *
696: * @param orderFile Order file.
697:
698: *
699: * @return String that holds the arc metaheader body.
700: */
701: protected String getFirstrecordBody(File orderFile) {
702: String result = null;
703: TransformerFactory factory = TransformerFactory.newInstance();
704: Templates templates = null;
705: Transformer xformer = null;
706: try {
707: templates = factory.newTemplates(new StreamSource(this
708: .getClass().getResourceAsStream(
709: getFirstrecordStylesheet())));
710: xformer = templates.newTransformer();
711: // Below parameter names must match what is in the stylesheet.
712: xformer.setParameter("software", "Heritrix "
713: + Heritrix.getVersion()
714: + " http://crawler.archive.org");
715: xformer.setParameter("ip", InetAddress.getLocalHost()
716: .getHostAddress());
717: xformer.setParameter("hostname", InetAddress.getLocalHost()
718: .getHostName());
719: StreamSource source = new StreamSource(new FileInputStream(
720: orderFile));
721: StringWriter writer = new StringWriter();
722: StreamResult target = new StreamResult(writer);
723: xformer.transform(source, target);
724: result = writer.toString();
725: } catch (TransformerConfigurationException e) {
726: logger.severe("Failed transform " + e);
727: } catch (FileNotFoundException e) {
728: logger.severe("Failed transform, file not found " + e);
729: } catch (UnknownHostException e) {
730: logger.severe("Failed transform, unknown host " + e);
731: } catch (TransformerException e) {
732: SourceLocator locator = e.getLocator();
733: int col = locator.getColumnNumber();
734: int line = locator.getLineNumber();
735: String publicId = locator.getPublicId();
736: String systemId = locator.getSystemId();
737: logger.severe("Transform error " + e + ", col " + col
738: + ", line " + line + ", publicId " + publicId
739: + ", systemId " + systemId);
740: }
741:
742: return result;
743: }
744: }
|