001: /*
002: * Copyright (C) Chaperon. All rights reserved.
003: * -------------------------------------------------------------------------
004: * This software is published under the terms of the Apache Software License
005: * version 1.1, a copy of which has been included with this distribution in
006: * the LICENSE file.
007: */
008:
009: package net.sourceforge.chaperon.ant;
010:
011: import net.sourceforge.chaperon.model.extended.ExtendedGrammar;
012: import net.sourceforge.chaperon.process.extended.ExtendedDirectParserProcessor;
013:
014: import org.apache.tools.ant.BuildException;
015: import org.apache.tools.ant.DirectoryScanner;
016: import org.apache.tools.ant.Project;
017: import org.apache.tools.ant.taskdefs.MatchingTask;
018: import org.apache.tools.ant.types.Mapper;
019: import org.apache.tools.ant.types.XMLCatalog;
020: import org.apache.tools.ant.util.FileNameMapper;
021: import org.apache.tools.ant.util.IdentityMapper;
022:
023: import org.exolab.castor.mapping.Mapping;
024: import org.exolab.castor.xml.Unmarshaller;
025:
026: import org.xml.sax.InputSource;
027: import org.xml.sax.SAXParseException;
028: import org.xml.sax.XMLReader;
029: import org.xml.sax.helpers.AttributesImpl;
030: import org.xml.sax.helpers.LocatorImpl;
031:
032: import java.io.File;
033: import java.io.FileInputStream;
034: import java.io.FileReader;
035: import java.io.InputStreamReader;
036: import java.io.LineNumberReader;
037:
038: import java.util.Properties;
039:
040: import javax.xml.parsers.SAXParserFactory;
041: import javax.xml.transform.OutputKeys;
042: import javax.xml.transform.sax.SAXTransformerFactory;
043: import javax.xml.transform.sax.TransformerHandler;
044: import javax.xml.transform.stream.StreamResult;
045:
046: /**
047: * A ant task for parsing text files
048: *
049: * @author <a href="mailto:stephan@apache.org">Stephan Michels </a>
050: * @version CVS $Id: ExtendedParserTask.java,v 1.3 2004/01/09 10:48:06 benedikta Exp $
051: */
052: public class ExtendedParserTask extends MatchingTask {
053: private File srcDir = null;
054: private File destDir = null;
055: private File baseDir = null;
056: private Mapper mapper = null;
057:
058: /** for resolving entities such as dtds */
059: private XMLCatalog xmlCatalog = new XMLCatalog();
060: private File grammarFile = null;
061: private String parserFactory = null;
062: private SAXParserFactory parserFactoryImpl = null;
063: private String transformerFactory = null;
064: private SAXTransformerFactory transformerFactoryImpl = null;
065: private String encoding = "ISO-8859-1";
066: private boolean indent = false;
067: private boolean flatten = false;
068: private String inputtype = "text";
069: private int msgLevel = Project.MSG_ERR;
070: private AntLog log;
071: private ExtendedGrammar grammar = null;
072: private ExtendedDirectParserProcessor parser = null;
073:
074: /**
075: * Constructs the task
076: */
077: public ExtendedParserTask() {
078: }
079:
080: /**
081: * Executes the task
082: *
083: * @throws BuildException
084: */
085: public void execute() throws BuildException {
086: if (baseDir == null)
087: baseDir = project.resolveFile(".");
088:
089: if (grammarFile == null)
090: throw new BuildException("No grammar file is specified",
091: location);
092:
093: if (!grammarFile.exists())
094: throw new BuildException("Grammar file doesn't exists:"
095: + grammarFile.getAbsolutePath(), location);
096:
097: if (destDir == null)
098: throw new BuildException("No destdir specified!", location);
099:
100: log = new AntLog(getProject(), msgLevel);
101:
102: buildAutomata(grammarFile);
103:
104: DirectoryScanner scanner = getDirectoryScanner(srcDir);
105:
106: FileNameMapper mapperImpl;
107:
108: if (mapper == null)
109: mapperImpl = new IdentityMapper();
110: else
111: mapperImpl = mapper.getImplementation();
112:
113: String[] list = scanner.getIncludedFiles();
114:
115: for (int i = 0; i < list.length; i++) {
116: String[] dest = mapperImpl.mapFileName(list[i]);
117:
118: if (dest != null)
119: for (int j = 0; j < dest.length; j++) {
120: log("Transforming " + list[i] + " to " + dest[j],
121: Project.MSG_DEBUG);
122: process(new File(srcDir, list[i]), new File(
123: destDir, dest[j]));
124: }
125: }
126: }
127:
128: /**
129: * Set the base directory.
130: *
131: * @param dir Base directory
132: */
133: public void setBasedir(File dir) {
134: baseDir = dir;
135: }
136:
137: /**
138: * Set the source directory
139: *
140: * @param dir Source directory
141: */
142: public void setSrcdir(File dir) {
143: srcDir = dir;
144: }
145:
146: /**
147: * Set the destination directory into which the result files should be copied to
148: *
149: * @param dir Destination directory
150: */
151: public void setDestdir(File dir) {
152: destDir = dir;
153: }
154:
155: /**
156: * Creates a mapper.
157: *
158: * @return New mapper.
159: *
160: * @throws BuildException
161: */
162: public Mapper createMapper() throws BuildException {
163: if (mapper != null)
164: throw new BuildException(
165: "Cannot define more than one mapper", location);
166:
167: mapper = new Mapper(project);
168: return mapper;
169: }
170:
171: /**
172: * Set the grammar, which should be used.
173: *
174: * @param grammarFile Grammar file.
175: */
176: public void setGrammar(File grammarFile) {
177: this .grammarFile = grammarFile;
178: }
179:
180: /**
181: * Sets the message level.
182: *
183: * @param msgLevel Message level.
184: */
185: public void setMsglevel(String msgLevel) {
186: if (msgLevel.equalsIgnoreCase("debug"))
187: this .msgLevel = Project.MSG_DEBUG;
188: else if (msgLevel.equalsIgnoreCase("verbose"))
189: this .msgLevel = Project.MSG_VERBOSE;
190: else if (msgLevel.equalsIgnoreCase("info"))
191: this .msgLevel = Project.MSG_INFO;
192: else if (msgLevel.equalsIgnoreCase("warn"))
193: this .msgLevel = Project.MSG_WARN;
194: else if (msgLevel.equalsIgnoreCase("error"))
195: this .msgLevel = Project.MSG_ERR;
196: }
197:
198: /**
199: * Sets the encoding for the input file
200: *
201: * @param encoding Encoding of the document
202: */
203: public void setEncoding(String encoding) {
204: this .encoding = encoding;
205: }
206:
207: /**
208: * Set if the output document should be indented
209: *
210: * @param indent If the output should be indented
211: */
212: public void setIndent(boolean indent) {
213: this .indent = indent;
214: }
215:
216: /**
217: * If the input document is a XML or a text document.
218: *
219: * @param inputtype Type of the input document.
220: */
221: public void setInputtype(String inputtype) {
222: this .inputtype = inputtype;
223: }
224:
225: /**
226: * Name of the parser factory.
227: *
228: * @param parserFactory Name of the parser factory.
229: */
230: public void setParser(String parserFactory) {
231: this .parserFactory = parserFactory;
232: }
233:
234: /**
235: * Name of the transformer factory.
236: *
237: * @param transformerFactory Name of the transformer factory.
238: */
239: public void setTransformer(String transformerFactory) {
240: this .transformerFactory = transformerFactory;
241: }
242:
243: /**
244: * Add the catalog to our internal catalog
245: *
246: * @param xmlCatalog the XMLCatalog instance to use to look up DTDs
247: */
248: public void addConfiguredXMLCatalog(XMLCatalog xmlCatalog) {
249: this .xmlCatalog.addConfiguredXMLCatalog(xmlCatalog);
250: }
251:
252: /**
253: * Initialize internal instance of XMLCatalog
254: */
255: public void init() throws BuildException {
256: super .init();
257: xmlCatalog.setProject(project);
258: }
259:
260: /**
261: * Processes the given input XML file and stores the result in the given resultFile.
262: *
263: * @param inFile The text file, which should parsed
264: * @param outFile The output file
265: *
266: * @throws BuildException
267: */
268: private void process(File inFile, File outFile)
269: throws BuildException {
270: try {
271: if (!inFile.exists())
272: throw new BuildException("File " + inFile
273: + " doesn't exists", location);
274:
275: if (inFile.lastModified() > outFile.lastModified()) {
276: ensureDirectoryFor(outFile);
277: log("Parsing file " + inFile + " to " + outFile,
278: Project.MSG_INFO);
279:
280: Properties format = new Properties();
281:
282: format.put(OutputKeys.ENCODING, encoding);
283: if (indent)
284: format.put(OutputKeys.INDENT, "yes");
285:
286: format.put(OutputKeys.METHOD, "xml");
287:
288: SAXTransformerFactory factory = getTransformerFactory();
289:
290: TransformerHandler serializer = factory
291: .newTransformerHandler();
292: serializer.getTransformer().setOutputProperties(format);
293: serializer.setResult(new StreamResult(outFile));
294:
295: this .parser = new ExtendedDirectParserProcessor();
296: this .parser.setLog(log);
297: this .parser.setFlatten(this .flatten);
298:
299: this .parser.setExtendedGrammar(this .grammar);
300: this .parser.setContentHandler(serializer);
301:
302: if (!inputtype.equalsIgnoreCase("xml"))
303: pushTextFile(inFile);
304: else
305: pushXMLFile(inFile);
306: }
307: } catch (Exception ex) {
308: if (outFile != null)
309: outFile.delete();
310:
311: if (ex instanceof BuildException)
312: throw (BuildException) ex;
313:
314: throw new BuildException("Failed to process " + inFile
315: + " : " + ex.getMessage(), ex);
316: }
317: }
318:
319: /**
320: * Build the automata for the lexicon and grammar.
321: *
322: * @param grammarFile Grammar file.
323: *
324: * @throws BuildException
325: */
326: private void buildAutomata(File grammarFile) throws BuildException {
327: try {
328: log("Building grammar from " + grammarFile,
329: Project.MSG_INFO);
330:
331: SAXParserFactory factory = getParserFactory();
332:
333: factory.setNamespaceAware(true);
334:
335: XMLReader parser = factory.newSAXParser().getXMLReader();
336: parser.setEntityResolver(xmlCatalog);
337:
338: Mapping mapping = new Mapping();
339: mapping.loadMapping(new InputSource(ExtendedGrammar.class
340: .getResource("mapping.xml").openStream()));
341:
342: Unmarshaller unmarshaller = new Unmarshaller(
343: ExtendedGrammar.class);
344: unmarshaller.setMapping(mapping);
345:
346: this .grammar = (ExtendedGrammar) unmarshaller
347: .unmarshal(new FileReader(grammarFile));
348:
349: if (log.isDebugEnabled())
350: log.debug("grammar:\n" + grammar);
351: } catch (Exception ex) {
352: if (ex instanceof BuildException)
353: throw (BuildException) ex;
354:
355: throw new BuildException(ex);
356: }
357: }
358:
359: private void pushTextFile(File inFile) throws Exception {
360: try {
361: LocatorImpl locator = new LocatorImpl();
362:
363: locator.setSystemId(inFile.toURL().toString());
364: locator.setLineNumber(1);
365: locator.setColumnNumber(1);
366:
367: this .parser.setDocumentLocator(locator);
368: this .parser.startDocument();
369: this .parser.startElement(
370: "http://chaperon.sourceforge.net/schema/text/1.0",
371: "text", "text", new AttributesImpl());
372:
373: LineNumberReader reader = new LineNumberReader(
374: new InputStreamReader(new FileInputStream(inFile)));
375:
376: String line;
377: String newline = null;
378: String separator = System.getProperty("line.separator");
379:
380: while (true) {
381: if (newline == null)
382: line = reader.readLine();
383: else
384: line = newline;
385:
386: if (line == null)
387: break;
388:
389: newline = reader.readLine();
390:
391: line = (newline != null) ? (line + separator) : line;
392:
393: locator.setLineNumber(reader.getLineNumber());
394: locator.setColumnNumber(1);
395: this .parser.characters(line.toCharArray(), 0, line
396: .length());
397:
398: if (newline == null)
399: break;
400: }
401:
402: reader.close();
403:
404: this .parser.endElement(
405: "http://chaperon.sourceforge.net/schema/text/1.0",
406: "text", "text");
407: this .parser.endDocument();
408: } catch (SAXParseException se) {
409: throw new BuildException(
410: "Exception occurs during parsing file " + inFile
411: + " at line " + se.getLineNumber()
412: + " column " + se.getColumnNumber(), se);
413: }
414: }
415:
416: private void pushXMLFile(File inFile) throws Exception {
417: SAXParserFactory parserfactory = getParserFactory();
418:
419: parserfactory.setNamespaceAware(true);
420:
421: XMLReader parser = parserfactory.newSAXParser().getXMLReader();
422:
423: parser.setEntityResolver(xmlCatalog);
424:
425: parser.setContentHandler(this .parser);
426: try {
427: parser.parse(inFile.toString());
428: } catch (SAXParseException se) {
429: throw new BuildException(
430: "Exception occurs during parsing file " + inFile
431: + " at line " + se.getLineNumber()
432: + " column " + se.getColumnNumber(), se);
433: }
434: }
435:
436: /**
437: * Ensures the directory for the output
438: *
439: * @param targetFile The directory
440: *
441: * @throws BuildException
442: */
443: private void ensureDirectoryFor(File targetFile)
444: throws BuildException {
445: File directory = new File(targetFile.getParent());
446:
447: if ((!directory.exists()) && (!directory.mkdirs()))
448: throw new BuildException("Unable to create directory: "
449: + directory.getAbsolutePath());
450: }
451:
452: private SAXParserFactory getParserFactory() throws BuildException {
453: if (parserFactoryImpl == null) {
454: try {
455: if (parserFactory == null)
456: parserFactoryImpl = SAXParserFactory.newInstance();
457: else
458: parserFactoryImpl = (SAXParserFactory) Class
459: .forName(parserFactory).newInstance();
460: } catch (Exception e) {
461: throw new BuildException(
462: "Could not load parser factory", e);
463: }
464: }
465:
466: return parserFactoryImpl;
467: }
468:
469: private SAXTransformerFactory getTransformerFactory()
470: throws BuildException {
471: if (transformerFactoryImpl == null) {
472: try {
473: if (transformerFactory == null)
474: transformerFactoryImpl = (SAXTransformerFactory) SAXTransformerFactory
475: .newInstance();
476: else
477: transformerFactoryImpl = (SAXTransformerFactory) Class
478: .forName(transformerFactory).newInstance();
479: } catch (Exception e) {
480: throw new BuildException(
481: "Could not load transformer factory", e);
482: }
483: }
484:
485: return transformerFactoryImpl;
486: }
487: }
|