001: /*
002: * Java HTML Tidy - JTidy
003: * HTML parser and pretty printer
004: *
005: * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
006: * Institute of Technology, Institut National de Recherche en
007: * Informatique et en Automatique, Keio University). All Rights
008: * Reserved.
009: *
010: * Contributing Author(s):
011: *
012: * Dave Raggett <dsr@w3.org>
013: * Andy Quick <ac.quick@sympatico.ca> (translation to Java)
014: * Gary L Peskin <garyp@firstech.com> (Java development)
015: * Sami Lempinen <sami@lempinen.net> (release management)
016: * Fabrizio Giustina <fgiust at users.sourceforge.net>
017: *
018: * The contributing author(s) would like to thank all those who
019: * helped with testing, bug fixes, and patience. This wouldn't
020: * have been possible without all of you.
021: *
022: * COPYRIGHT NOTICE:
023: *
024: * This software and documentation is provided "as is," and
025: * the copyright holders and contributing author(s) make no
026: * representations or warranties, express or implied, including
027: * but not limited to, warranties of merchantability or fitness
028: * for any particular purpose or that the use of the software or
029: * documentation will not infringe any third party patents,
030: * copyrights, trademarks or other rights.
031: *
032: * The copyright holders and contributing author(s) will not be
033: * liable for any direct, indirect, special or consequential damages
034: * arising out of any use of the software or documentation, even if
035: * advised of the possibility of such damage.
036: *
037: * Permission is hereby granted to use, copy, modify, and distribute
038: * this source code, or portions hereof, documentation and executables,
039: * for any purpose, without fee, subject to the following restrictions:
040: *
041: * 1. The origin of this source code must not be misrepresented.
042: * 2. Altered versions must be plainly marked as such and must
043: * not be misrepresented as being the original source.
044: * 3. This Copyright notice may not be removed or altered from any
045: * source or altered source distribution.
046: *
047: * The copyright holders and contributing author(s) specifically
048: * permit, without fee, and encourage the use of this source code
049: * as a component for supporting the Hypertext Markup Language in
050: * commercial products. If you use this source code in a product,
051: * acknowledgment is not required but would be appreciated.
052: *
053: */
054: package org.w3c.tidy.ant;
055:
056: import java.io.BufferedInputStream;
057: import java.io.BufferedOutputStream;
058: import java.io.ByteArrayOutputStream;
059: import java.io.File;
060: import java.io.FileInputStream;
061: import java.io.FileNotFoundException;
062: import java.io.FileOutputStream;
063: import java.io.IOException;
064: import java.io.InputStream;
065: import java.io.OutputStream;
066: import java.io.PrintWriter;
067: import java.util.ArrayList;
068: import java.util.Iterator;
069: import java.util.List;
070: import java.util.Properties;
071:
072: import org.apache.tools.ant.BuildException;
073: import org.apache.tools.ant.DirectoryScanner;
074: import org.apache.tools.ant.Project;
075: import org.apache.tools.ant.Task;
076: import org.apache.tools.ant.types.FileSet;
077: import org.apache.tools.ant.types.Parameter;
078: import org.apache.tools.ant.util.FileNameMapper;
079: import org.apache.tools.ant.util.FlatFileNameMapper;
080: import org.apache.tools.ant.util.IdentityMapper;
081: import org.w3c.tidy.Tidy;
082:
083: /**
084: * JTidy ant task.
085: * <h3>Parameters</h3>
086: * <table cellspacing="0" border="1"> <thead>
087: * <tr>
088: * <th>Attribute</th>
089: * <th>Description</th>
090: * <th>Required</th>
091: * </tr>
092: * </thead> <tbody>
093: * <tr>
094: * <td>srcfile</td>
095: * <td>source file</td>
096: * <td>Yes, unless a nested <code><fileset></code> element is used.</td>
097: * </tr>
098: * <tr>
099: * <td>destfile</td>
100: * <td>destination file for output</td>
101: * <td rowspan="2">With the <code>srcfile</code> attribute, either <code>destfile</code> or <code>destdir</code>
102: * can be used. With nested <code><fileset></code> elements only <code>destdir</code> is allowed.</td>
103: * </tr>
104: * <tr>
105: * <td>destdir</td>
106: * <td>destination directory for output</td>
107: * </tr>
108: * <tr>
109: * <td>properties</td>
110: * <td>Path to a valid tidy properties file</td>
111: * <td>No</td>
112: * </tr>
113: * <tr>
114: * <td>flatten</td>
115: * <td>Ignore the directory structure of the source files, and copy all files into the directory specified by the
116: * <code>destdir</code> attribute.</td>
117: * <td>No; defaults to false.</td>
118: * </tr>
119: * <tr>
120: * <td>failonerror</td>
121: * <td>boolean to control whether failure to execute should throw a BuildException or just print an error. If set to
122: * <code>true</code> errors in input files which tidy is enable to fix will cause a failure.</td>
123: * <td>No; defaults to false.</td>
124: * </tr>
125: * </tbody> </table>
126: * <h3>Nested elements</h3>
127: * <ul>
128: * <li><strong>Fileset </strong>: if you need to run tidy on more than one file, you can specify nested filesets.</li>
129: * <li><strong>Parameter </strong>: you can specify any tidy configuration option directly using a nested
130: * <code>parameter</code> element.</li>
131: * </ul>
132: * <h3>Setup</h3>
133: * <p>
134: * Adds the following <code>typedef</code> to setup the JTidy task in your build.xml:
135: * </p>
136: *
137: * <pre>
138: * <taskdef name="tidy" classname="org.w3c.tidy.ant.JTidyTask"/>
139: * </pre>
140: *
141: * <p>
142: * This will work if JTidy jar is copied to ant lib directory. If you need to reference the jar elsewhere on the
143: * filesystem you can add a nested classpath element:
144: * </p>
145: *
146: * <pre>
147: * <taskdef name="tidy" classname="org.w3c.tidy.ant.JTidyTask">
148: * <classpath>
149: * <pathelement location="${lib.dir}/jtidy.jar"/>
150: * </classpath>
151: * </taskdef>
152: * </pre>
153: *
154: * <h3>Examples</h3>
155: *
156: * <pre>
157: * <tidy destdir="out" properties="/path/to/tidy.properties">
158: * <fileset dir="inputdir" />
159: * <parameter name="drop-font-tags" value="true" />
160: * </tidy>
161: * </pre>
162: *
163: * @author Fabrizio Giustina
164: * @version $Revision: 1.8 $ ($Author: fgiust $)
165: */
166: public class JTidyTask extends Task {
167:
168: /**
169: * Filesets.
170: */
171: private List filesets = new ArrayList();
172:
173: /**
174: * Destination directory for output.
175: */
176: private File destdir;
177:
178: /**
179: * Destination file for output.
180: */
181: private File destfile;
182:
183: /**
184: * Source file.
185: */
186: private File srcfile;
187:
188: /**
189: * Control whether failure to execute should throw a BuildException.
190: */
191: private boolean failonerror;
192:
193: /**
194: * Don't output directories.
195: */
196: private boolean flatten;
197:
198: /**
199: * tidy instance.
200: */
201: private Tidy tidy;
202:
203: /**
204: * Configured properties.
205: */
206: private Properties props;
207:
208: /**
209: * Properties file.
210: */
211: private File properties;
212:
213: /**
214: * @param destdir The destdir to set.
215: */
216: public void setDestdir(File destdir) {
217: this .destdir = destdir;
218: }
219:
220: /**
221: * @param destfile The destfile to set.
222: */
223: public void setDestfile(File destfile) {
224: this .destfile = destfile;
225: }
226:
227: /**
228: * @param srcfile The srcfile to set.
229: */
230: public void setSrcfile(File srcfile) {
231: this .srcfile = srcfile;
232: }
233:
234: /**
235: * @param failonerror The failonerror to set.
236: */
237: public void setFailonerror(boolean failonerror) {
238: this .failonerror = failonerror;
239: }
240:
241: /**
242: * @param flatten The flatten to set.
243: */
244: public void setFlatten(boolean flatten) {
245: this .flatten = flatten;
246: }
247:
248: /**
249: * @param properties The properties to set.
250: */
251: public void setProperties(File properties) {
252: this .properties = properties;
253: }
254:
255: /**
256: * Adds a fileset to be processed Fileset
257: * @param fileSet
258: */
259: public void addFileset(FileSet fileSet) {
260: filesets.add(fileSet);
261: }
262:
263: /**
264: * Setter method for any property using the ant type Parameter.
265: * @param prop Ant type Parameter
266: */
267: public void addConfiguredParameter(Parameter prop) {
268: props.setProperty(prop.getName(), prop.getValue());
269: }
270:
271: /**
272: * Initializes the task.
273: */
274: public void init() {
275: super .init();
276:
277: // Setup a Tidy instance
278: tidy = new Tidy();
279: props = new Properties();
280: }
281:
282: /**
283: * Validates task parameters.
284: * @throws BuildException if any invalid parameter is found
285: */
286: protected void validateParameters() throws BuildException {
287: if (srcfile == null && filesets.size() == 0) {
288: throw new BuildException(
289: "Specify at least srcfile or a fileset.");
290: }
291: if (srcfile != null && filesets.size() > 0) {
292: throw new BuildException(
293: "You can't specify both srcfile and nested filesets.");
294: }
295:
296: if (destfile == null && destdir == null) {
297: throw new BuildException(
298: "One of destfile or destdir must be set.");
299: }
300:
301: if (srcfile == null && destfile != null) {
302: throw new BuildException(
303: "You only can use destfile with srcfile.");
304: }
305:
306: if (srcfile != null && srcfile.exists()
307: && srcfile.isDirectory()) {
308: throw new BuildException("srcfile can't be a directory.");
309: }
310:
311: if (properties != null
312: && (!properties.exists() || properties.isDirectory())) {
313: throw new BuildException(
314: "Invalid properties file specified: "
315: + properties.getPath());
316: }
317:
318: }
319:
320: /**
321: * Run the task.
322: * @exception BuildException The exception raised during task execution.
323: */
324: public void execute() throws BuildException {
325: // validate
326: validateParameters();
327:
328: // load configuration
329: if (this .properties != null) {
330: try {
331: this .props.load(new FileInputStream(this .properties));
332: } catch (IOException e) {
333: throw new BuildException(
334: "Unable to load properties file " + properties,
335: e);
336: }
337: }
338:
339: // hide output unless set in properties
340: tidy.setErrout(new PrintWriter(new ByteArrayOutputStream()));
341:
342: tidy.setConfigurationFromProps(props);
343:
344: if (this .srcfile != null) {
345: // process a single file
346: executeSingle();
347: } else {
348: // process filesets
349: executeSet();
350: }
351: }
352:
353: /**
354: * A single file has been specified.
355: */
356: protected void executeSingle() {
357:
358: if (!srcfile.exists()) {
359: throw new BuildException("Could not find source file "
360: + srcfile.getAbsolutePath() + ".");
361: }
362:
363: if (destfile == null) {
364: // destdir can't be null, condition is checked in validateParameters()
365: destfile = new File(destdir, srcfile.getName());
366: }
367:
368: processFile(srcfile, destfile);
369: }
370:
371: /**
372: * Run tidy on filesets.
373: */
374: protected void executeSet() {
375:
376: FileNameMapper mapper = null;
377: if (flatten) {
378: mapper = new FlatFileNameMapper();
379: } else {
380: mapper = new IdentityMapper();
381: }
382:
383: mapper.setTo(this .destdir.getAbsolutePath());
384:
385: Iterator iterator = filesets.iterator();
386: while (iterator.hasNext()) {
387: FileSet fileSet = (FileSet) iterator.next();
388: DirectoryScanner directoryScanner = fileSet
389: .getDirectoryScanner(getProject());
390: String[] sourceFiles = directoryScanner.getIncludedFiles();
391: File inputdir = directoryScanner.getBasedir();
392:
393: mapper.setFrom(inputdir.getAbsolutePath());
394:
395: for (int j = 0; j < sourceFiles.length; j++) {
396: String[] mapped = mapper.mapFileName(sourceFiles[j]);
397:
398: processFile(new File(inputdir, sourceFiles[j]),
399: new File(this .destdir, mapped[0]));
400: }
401: }
402: }
403:
404: /**
405: * Run tidy on a file.
406: * @param inputFile input file
407: * @param outputFile output file
408: */
409: protected void processFile(File inputFile, File outputFile) {
410:
411: log("Processing " + inputFile.getAbsolutePath(),
412: Project.MSG_DEBUG);
413:
414: InputStream is;
415: OutputStream os;
416: try {
417: is = new BufferedInputStream(new FileInputStream(inputFile));
418: } catch (IOException e) {
419: throw new BuildException("Unable to open file " + inputFile);
420: }
421:
422: try {
423: outputFile.getParentFile().mkdirs();
424: outputFile.createNewFile();
425: os = new BufferedOutputStream(new FileOutputStream(
426: outputFile));
427: } catch (IOException e) {
428: throw new BuildException("Unable to open destination file "
429: + outputFile, e);
430: }
431:
432: tidy.parse(is, os);
433:
434: try {
435: is.close();
436: } catch (IOException e1) {
437: // ignore
438: }
439: try {
440: os.flush();
441: os.close();
442: } catch (IOException e1) {
443: // ignore
444: }
445:
446: // cleanup empty files
447: if (tidy.getParseErrors() > 0 && !tidy.getForceOutput()) {
448: outputFile.delete();
449: }
450:
451: if (failonerror && tidy.getParseErrors() > 0) {
452: throw new BuildException("Tidy was unable to process file "
453: + inputFile + ", " + tidy.getParseErrors()
454: + " returned.");
455: }
456:
457: }
458: }
|