001: /*
002: * Copyright (C) Chaperon. All rights reserved.
003: * -------------------------------------------------------------------------
004: * This software is published under the terms of the Apache Software License
005: * version 1.1, a copy of which has been included with this distribution in
006: * the LICENSE file.
007: */
008:
009: package net.sourceforge.chaperon.ant;
010:
011: import net.sourceforge.chaperon.build.*;
012: import net.sourceforge.chaperon.model.grammar.*;
013: import net.sourceforge.chaperon.model.lexicon.*;
014: import net.sourceforge.chaperon.process.*;
015:
016: import org.apache.tools.ant.*;
017: import org.apache.tools.ant.taskdefs.MatchingTask;
018: import org.apache.tools.ant.types.Mapper;
019: import org.apache.tools.ant.types.XMLCatalog;
020: import org.apache.tools.ant.util.FileNameMapper;
021: import org.apache.tools.ant.util.IdentityMapper;
022:
023: import org.xml.sax.*;
024: import org.xml.sax.helpers.*;
025:
026: import java.io.*;
027:
028: import java.util.Properties;
029:
030: import javax.xml.parsers.SAXParserFactory;
031: import javax.xml.transform.OutputKeys;
032: import javax.xml.transform.sax.SAXTransformerFactory;
033: import javax.xml.transform.sax.TransformerHandler;
034: import javax.xml.transform.stream.StreamResult;
035:
036: /**
037: * A ant task for parsing text files
038: *
039: * @author <a href="mailto:stephan@apache.org">Stephan Michels </a>
040: * @version CVS $Id: ParserTask.java,v 1.2 2004/01/08 11:30:52 benedikta Exp $
041: */
042: public class ParserTask extends MatchingTask {
043: private File srcDir = null;
044: private File destDir = null;
045: private File baseDir = null;
046: private File cacheDir = null;
047: private Mapper mapper = null;
048:
049: /** for resolving entities such as dtds */
050: private XMLCatalog xmlCatalog = new XMLCatalog();
051: private File lexiconFile = null;
052: private File grammarFile = null;
053: private String parserFactory = null;
054: private SAXParserFactory parserFactoryImpl = null;
055: private String transformerFactory = null;
056: private SAXTransformerFactory transformerFactoryImpl = null;
057: private String encoding = "ISO-8859-1";
058: private boolean indent = false;
059: private boolean flatten = false;
060: private String inputtype = "text";
061: private int msgLevel = Project.MSG_ERR;
062: private AntLog log;
063: private ParserAutomaton parserautomaton = null;
064: private ParserProcessor parser = null;
065: private LexicalAutomaton lexicalautomaton = null;
066: private LexicalProcessor lexer = null;
067:
068: /**
069: * Constructs the task
070: */
071: public ParserTask() {
072: }
073:
074: /**
075: * Executes the task
076: *
077: * @throws BuildException
078: */
079: public void execute() throws BuildException {
080: if (baseDir == null)
081: baseDir = project.resolveFile(".");
082:
083: if (lexiconFile == null)
084: throw new BuildException("No lexicon file is specified",
085: location);
086:
087: if (!lexiconFile.exists())
088: throw new BuildException("Lexicon file doesn't exists:"
089: + lexiconFile.getAbsolutePath(), location);
090:
091: if (destDir == null)
092: throw new BuildException("No destdir specified!", location);
093:
094: log = new AntLog(getProject(), msgLevel);
095:
096: buildAutomata(lexiconFile, grammarFile);
097:
098: DirectoryScanner scanner = getDirectoryScanner(srcDir);
099:
100: FileNameMapper mapperImpl;
101:
102: if (mapper == null)
103: mapperImpl = new IdentityMapper();
104: else
105: mapperImpl = mapper.getImplementation();
106:
107: String[] list = scanner.getIncludedFiles();
108:
109: for (int i = 0; i < list.length; i++) {
110: String[] dest = mapperImpl.mapFileName(list[i]);
111:
112: if (dest != null)
113: for (int j = 0; j < dest.length; j++) {
114: log("Transforming " + list[i] + " to " + dest[j],
115: Project.MSG_DEBUG);
116: process(new File(srcDir, list[i]), new File(
117: destDir, dest[j]));
118: }
119: }
120: }
121:
122: /**
123: * Set the base directory.
124: *
125: * @param dir Base directory
126: */
127: public void setBasedir(File dir) {
128: baseDir = dir;
129: }
130:
131: /**
132: * Set the source directory
133: *
134: * @param dir Source directory
135: */
136: public void setSrcdir(File dir) {
137: srcDir = dir;
138: }
139:
140: /**
141: * Set the destination directory into which the result files should be copied to
142: *
143: * @param dir Destination directory
144: */
145: public void setDestdir(File dir) {
146: destDir = dir;
147: }
148:
149: /**
150: * @param dir Directory for chaching objects
151: */
152: public void setCachedir(File dir) {
153: cacheDir = dir;
154: }
155:
156: /**
157: * Creates a mapper.
158: *
159: * @return New mapper.
160: *
161: * @throws BuildException
162: */
163: public Mapper createMapper() throws BuildException {
164: if (mapper != null)
165: throw new BuildException(
166: "Cannot define more than one mapper", location);
167:
168: mapper = new Mapper(project);
169: return mapper;
170: }
171:
172: /**
173: * Set the lexicon, which should be used.
174: *
175: * @param lexiconFile Lexicon file.
176: */
177: public void setLexicon(File lexiconFile) {
178: this .lexiconFile = lexiconFile;
179: }
180:
181: /**
182: * Set the grammar, which should be used.
183: *
184: * @param grammarFile Grammar file.
185: */
186: public void setGrammar(File grammarFile) {
187: this .grammarFile = grammarFile;
188: }
189:
190: /**
191: * Sets the message level.
192: *
193: * @param msgLevel Message level.
194: */
195: public void setMsglevel(String msgLevel) {
196: if (msgLevel.equalsIgnoreCase("debug"))
197: this .msgLevel = Project.MSG_DEBUG;
198: else if (msgLevel.equalsIgnoreCase("verbose"))
199: this .msgLevel = Project.MSG_VERBOSE;
200: else if (msgLevel.equalsIgnoreCase("info"))
201: this .msgLevel = Project.MSG_INFO;
202: else if (msgLevel.equalsIgnoreCase("warn"))
203: this .msgLevel = Project.MSG_WARN;
204: else if (msgLevel.equalsIgnoreCase("error"))
205: this .msgLevel = Project.MSG_ERR;
206: }
207:
208: /**
209: * Sets the encoding for the input file
210: *
211: * @param encoding Encoding of the document
212: */
213: public void setEncoding(String encoding) {
214: this .encoding = encoding;
215: }
216:
217: /**
218: * Set if the output document should be indented
219: *
220: * @param indent If the output should be indented
221: */
222: public void setIndent(boolean indent) {
223: this .indent = indent;
224: }
225:
226: /**
227: * Reduces the deep of the produced hirachy by flatten nested element with same name.
228: *
229: * @param flatten If the hirache should be reduced.
230: */
231: public void setFlatten(boolean flatten) {
232: this .flatten = flatten;
233: }
234:
235: /**
236: * If the input document is a XML or a text document.
237: *
238: * @param inputtype Type of the input document.
239: */
240: public void setInputtype(String inputtype) {
241: this .inputtype = inputtype;
242: }
243:
244: /**
245: * Name of the parser factory.
246: *
247: * @param parserFactory Name of the parser factory.
248: */
249: public void setParser(String parserFactory) {
250: this .parserFactory = parserFactory;
251: }
252:
253: /**
254: * Name of the transformer factory.
255: *
256: * @param transformerFactory Name of the transformer factory.
257: */
258: public void setTransformer(String transformerFactory) {
259: this .transformerFactory = transformerFactory;
260: }
261:
262: /**
263: * Add the catalog to our internal catalog
264: *
265: * @param xmlCatalog the XMLCatalog instance to use to look up DTDs
266: */
267: public void addConfiguredXMLCatalog(XMLCatalog xmlCatalog) {
268: this .xmlCatalog.addConfiguredXMLCatalog(xmlCatalog);
269: }
270:
271: /**
272: * Initialize internal instance of XMLCatalog
273: */
274: public void init() throws BuildException {
275: super .init();
276: xmlCatalog.setProject(project);
277: }
278:
279: /**
280: * Processes the given input XML file and stores the result in the given resultFile.
281: *
282: * @param inFile The text file, which should parsed
283: * @param outFile The output file
284: *
285: * @throws BuildException
286: */
287: private void process(File inFile, File outFile)
288: throws BuildException {
289: try {
290: if (!inFile.exists())
291: throw new BuildException("File " + inFile
292: + " doesn't exists", location);
293:
294: if (inFile.lastModified() > outFile.lastModified()) {
295: ensureDirectoryFor(outFile);
296: log("Parsing file " + inFile + " to " + outFile,
297: Project.MSG_INFO);
298:
299: Properties format = new Properties();
300:
301: format.put(OutputKeys.ENCODING, encoding);
302: if (indent)
303: format.put(OutputKeys.INDENT, "yes");
304:
305: format.put(OutputKeys.METHOD, "xml");
306:
307: SAXTransformerFactory factory = getTransformerFactory();
308:
309: TransformerHandler serializer = factory
310: .newTransformerHandler();
311: serializer.getTransformer().setOutputProperties(format);
312: serializer.setResult(new StreamResult(outFile));
313:
314: if (this .parserautomaton != null) // && (this.parser==null))
315: {
316: this .parser = new ParserProcessor();
317: this .parser.setLog(log);
318: this .parser.setFlatten(this .flatten);
319: this .parser
320: .setParserAutomaton(this .parserautomaton);
321: this .parser.setContentHandler(serializer);
322: }
323:
324: this .lexer = new LexicalProcessor();
325: this .lexer.setLog(log);
326: this .lexer.setLexicalAutomaton(this .lexicalautomaton);
327: if (this .parserautomaton != null)
328: this .lexer.setContentHandler(this .parser);
329: else
330: this .lexer.setContentHandler(serializer);
331:
332: if (!inputtype.equalsIgnoreCase("xml"))
333: pushTextFile(inFile);
334: else
335: pushXMLFile(inFile);
336: }
337: } catch (Exception ex) {
338: if (outFile != null)
339: outFile.delete();
340:
341: if (ex instanceof BuildException)
342: throw (BuildException) ex;
343:
344: throw new BuildException("Failed to process " + inFile
345: + " : " + ex.getMessage(), ex);
346: }
347: }
348:
349: /**
350: * Build the automata for the lexicon and grammar.
351: *
352: * @param lexiconFile Lexicon file.
353: * @param grammarFile Grammar file.
354: *
355: * @throws BuildException
356: */
357: private void buildAutomata(File lexiconFile, File grammarFile)
358: throws BuildException {
359: if ((cacheDir != null) && (!cacheDir.exists()))
360: throw new BuildException("Cache directory " + cacheDir
361: + " doesn't exist");
362:
363: try {
364: // Lexicon
365: String filename = lexiconFile.getName();
366:
367: File cacheFile = null;
368: if (cacheDir != null)
369: cacheFile = new File(cacheDir, filename + ".obj");
370:
371: if ((cacheFile != null)
372: && (cacheFile.exists())
373: && (cacheFile.lastModified() > lexiconFile
374: .lastModified())) {
375: log("Reading lexicon from cache " + cacheFile,
376: Project.MSG_DEBUG);
377:
378: ObjectInputStream in = new ObjectInputStream(
379: new FileInputStream(cacheFile));
380: this .lexicalautomaton = (LexicalAutomaton) in
381: .readObject();
382: in.close();
383: } else {
384: log("Building lexicon from " + lexiconFile,
385: Project.MSG_INFO);
386:
387: SAXParserFactory factory = getParserFactory();
388:
389: factory.setNamespaceAware(true);
390:
391: XMLReader parser = factory.newSAXParser()
392: .getXMLReader();
393:
394: parser.setEntityResolver(xmlCatalog);
395:
396: LexiconFactory lexiconfactory = new LexiconFactory();
397: parser.setContentHandler(lexiconfactory);
398: try {
399: parser.parse(lexiconFile.toString());
400: } catch (SAXParseException se) {
401: throw new BuildException("Couldn't parse file "
402: + lexiconFile, se);
403: }
404:
405: Lexicon lexicon = lexiconfactory.getLexicon();
406:
407: this .lexicalautomaton = (new LexicalAutomatonBuilder(
408: lexicon, log)).getLexicalAutomaton();
409:
410: if (cacheFile != null) {
411: ObjectOutputStream out = new ObjectOutputStream(
412: new FileOutputStream(cacheFile));
413: out.writeObject(this .lexicalautomaton);
414: out.flush();
415: out.close();
416: }
417: }
418:
419: if (grammarFile != null) {
420: // Grammar
421: filename = grammarFile.getName();
422:
423: cacheFile = null;
424: if (cacheDir != null)
425: cacheFile = new File(cacheDir, filename + ".obj");
426:
427: if ((cacheFile != null)
428: && (cacheFile.exists())
429: && (cacheFile.lastModified() > grammarFile
430: .lastModified())) {
431: log("Reading grammar from cache " + cacheFile,
432: Project.MSG_DEBUG);
433:
434: ObjectInputStream in = new ObjectInputStream(
435: new FileInputStream(cacheFile));
436: this .parserautomaton = (ParserAutomaton) in
437: .readObject();
438: in.close();
439: } else {
440: log("Building grammar from " + grammarFile,
441: Project.MSG_INFO);
442:
443: SAXParserFactory factory = getParserFactory();
444:
445: factory.setNamespaceAware(true);
446:
447: XMLReader parser = factory.newSAXParser()
448: .getXMLReader();
449: parser.setEntityResolver(xmlCatalog);
450:
451: GrammarFactory grammarfactory = new GrammarFactory();
452: parser.setContentHandler(grammarfactory);
453: try {
454: parser.parse(grammarFile.toString());
455: } catch (SAXParseException se) {
456: throw new BuildException("Couldn't parse file "
457: + lexiconFile, se);
458: }
459:
460: Grammar grammar = grammarfactory.getGrammar();
461:
462: this .parserautomaton = (new ParserAutomatonBuilder(
463: grammar, log)).getParserAutomaton();
464:
465: if (cacheFile != null) {
466: ObjectOutputStream out = new ObjectOutputStream(
467: new FileOutputStream(cacheFile));
468: out.writeObject(this .parserautomaton);
469: out.flush();
470: out.close();
471: }
472: }
473: }
474: } catch (Exception ex) {
475: if (ex instanceof BuildException)
476: throw (BuildException) ex;
477:
478: throw new BuildException(ex);
479: }
480: }
481:
482: private void pushTextFile(File inFile) throws Exception {
483: try {
484: LocatorImpl locator = new LocatorImpl();
485:
486: locator.setSystemId(inFile.toURL().toString());
487: locator.setLineNumber(1);
488: locator.setColumnNumber(1);
489:
490: this .lexer.setDocumentLocator(locator);
491: this .lexer.startDocument();
492: this .lexer.startElement(
493: "http://chaperon.sourceforge.net/schema/text/1.0",
494: "text", "text", new AttributesImpl());
495:
496: LineNumberReader reader = new LineNumberReader(
497: new InputStreamReader(new FileInputStream(inFile)));
498:
499: String line;
500: String newline = null;
501: String separator = System.getProperty("line.separator");
502:
503: while (true) {
504: if (newline == null)
505: line = reader.readLine();
506: else
507: line = newline;
508:
509: if (line == null)
510: break;
511:
512: newline = reader.readLine();
513:
514: line = (newline != null) ? (line + separator) : line;
515:
516: locator.setLineNumber(reader.getLineNumber());
517: locator.setColumnNumber(1);
518: this .lexer.characters(line.toCharArray(), 0, line
519: .length());
520:
521: if (newline == null)
522: break;
523: }
524:
525: reader.close();
526:
527: this .lexer.endElement(
528: "http://chaperon.sourceforge.net/schema/text/1.0",
529: "text", "text");
530: this .lexer.endDocument();
531: } catch (SAXParseException se) {
532: throw new BuildException(
533: "Exception occurs during parsing file " + inFile
534: + " at line " + se.getLineNumber()
535: + " column " + se.getColumnNumber(), se);
536: }
537: }
538:
539: private void pushXMLFile(File inFile) throws Exception {
540: SAXParserFactory parserfactory = getParserFactory();
541:
542: parserfactory.setNamespaceAware(true);
543:
544: XMLReader parser = parserfactory.newSAXParser().getXMLReader();
545:
546: parser.setEntityResolver(xmlCatalog);
547:
548: parser.setContentHandler(this .lexer);
549: try {
550: parser.parse(inFile.toString());
551: } catch (SAXParseException se) {
552: throw new BuildException(
553: "Exception occurs during parsing file " + inFile
554: + " at line " + se.getLineNumber()
555: + " column " + se.getColumnNumber(), se);
556: }
557: }
558:
559: /**
560: * Ensures the directory for the output
561: *
562: * @param targetFile The directory
563: *
564: * @throws BuildException
565: */
566: private void ensureDirectoryFor(File targetFile)
567: throws BuildException {
568: File directory = new File(targetFile.getParent());
569:
570: if ((!directory.exists()) && (!directory.mkdirs()))
571: throw new BuildException("Unable to create directory: "
572: + directory.getAbsolutePath());
573: }
574:
575: private SAXParserFactory getParserFactory() throws BuildException {
576: if (parserFactoryImpl == null) {
577: try {
578: if (parserFactory == null)
579: parserFactoryImpl = SAXParserFactory.newInstance();
580: else
581: parserFactoryImpl = (SAXParserFactory) Class
582: .forName(parserFactory).newInstance();
583: } catch (Exception e) {
584: throw new BuildException(
585: "Could not load parser factory", e);
586: }
587: }
588:
589: return parserFactoryImpl;
590: }
591:
592: private SAXTransformerFactory getTransformerFactory()
593: throws BuildException {
594: if (transformerFactoryImpl == null) {
595: try {
596: if (transformerFactory == null)
597: transformerFactoryImpl = (SAXTransformerFactory) SAXTransformerFactory
598: .newInstance();
599: else
600: transformerFactoryImpl = (SAXTransformerFactory) Class
601: .forName(transformerFactory).newInstance();
602: } catch (Exception e) {
603: throw new BuildException(
604: "Could not load transformer factory", e);
605: }
606: }
607:
608: return transformerFactoryImpl;
609: }
610: }
|