0001: /*
0002: * Java HTML Tidy - JTidy
0003: * HTML parser and pretty printer
0004: *
0005: * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
0006: * Institute of Technology, Institut National de Recherche en
0007: * Informatique et en Automatique, Keio University). All Rights
0008: * Reserved.
0009: *
0010: * Contributing Author(s):
0011: *
0012: * Dave Raggett <dsr@w3.org>
0013: * Andy Quick <ac.quick@sympatico.ca> (translation to Java)
0014: * Gary L Peskin <garyp@firstech.com> (Java development)
0015: * Sami Lempinen <sami@lempinen.net> (release management)
0016: * Fabrizio Giustina <fgiust at users.sourceforge.net>
0017: *
0018: * The contributing author(s) would like to thank all those who
0019: * helped with testing, bug fixes, and patience. This wouldn't
0020: * have been possible without all of you.
0021: *
0022: * COPYRIGHT NOTICE:
0023: *
0024: * This software and documentation is provided "as is," and
0025: * the copyright holders and contributing author(s) make no
0026: * representations or warranties, express or implied, including
0027: * but not limited to, warranties of merchantability or fitness
0028: * for any particular purpose or that the use of the software or
0029: * documentation will not infringe any third party patents,
0030: * copyrights, trademarks or other rights.
0031: *
0032: * The copyright holders and contributing author(s) will not be
0033: * liable for any direct, indirect, special or consequential damages
0034: * arising out of any use of the software or documentation, even if
0035: * advised of the possibility of such damage.
0036: *
0037: * Permission is hereby granted to use, copy, modify, and distribute
0038: * this source code, or portions hereof, documentation and executables,
0039: * for any purpose, without fee, subject to the following restrictions:
0040: *
0041: * 1. The origin of this source code must not be misrepresented.
0042: * 2. Altered versions must be plainly marked as such and must
0043: * not be misrepresented as being the original source.
0044: * 3. This Copyright notice may not be removed or altered from any
0045: * source or altered source distribution.
0046: *
0047: * The copyright holders and contributing author(s) specifically
0048: * permit, without fee, and encourage the use of this source code
0049: * as a component for supporting the Hypertext Markup Language in
0050: * commercial products. If you use this source code in a product,
0051: * acknowledgment is not required but would be appreciated.
0052: *
0053: */
0054: package org.w3c.tidy;
0055:
0056: import java.io.FileInputStream;
0057: import java.io.FileNotFoundException;
0058: import java.io.FileOutputStream;
0059: import java.io.FileWriter;
0060: import java.io.IOException;
0061: import java.io.InputStream;
0062: import java.io.OutputStream;
0063: import java.io.PrintWriter;
0064: import java.io.Serializable;
0065: import java.util.HashMap;
0066: import java.util.Map;
0067: import java.util.Properties;
0068:
0069: /**
0070: * HTML parser and pretty printer.
0071: * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a>
0072: * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java)
0073: * @author Fabrizio Giustina
0074: * @version $Revision: 1.61 $ ($Author: fgiust $)
0075: */
0076: public class Tidy implements Serializable {
0077:
0078: /**
0079: * Serial Version UID to avoid problems during serialization.
0080: */
0081: static final long serialVersionUID = -2794371560623987718L;
0082:
0083: /**
0084: * Alias for configuration options accepted in command line.
0085: */
0086: private static final Map CMDLINE_ALIAS = new HashMap();
0087:
0088: static {
0089: CMDLINE_ALIAS.put("xml", "input-xml");
0090: CMDLINE_ALIAS.put("xml", "output-xhtml");
0091: CMDLINE_ALIAS.put("asxml", "output-xhtml");
0092: CMDLINE_ALIAS.put("ashtml", "output-html");
0093: CMDLINE_ALIAS.put("omit", "hide-endtags");
0094: CMDLINE_ALIAS.put("upper", "uppercase-tags");
0095: CMDLINE_ALIAS.put("raw", "output-raw");
0096: CMDLINE_ALIAS.put("numeric", "numeric-entities");
0097: CMDLINE_ALIAS.put("change", "write-back");
0098: CMDLINE_ALIAS.put("update", "write-back");
0099: CMDLINE_ALIAS.put("modify", "write-back");
0100: CMDLINE_ALIAS.put("errors", "only-errors");
0101: CMDLINE_ALIAS.put("slides", "split");
0102: CMDLINE_ALIAS.put("lang", "language");
0103: CMDLINE_ALIAS.put("w", "wrap");
0104: CMDLINE_ALIAS.put("file", "error-file");
0105: CMDLINE_ALIAS.put("f", "error-file");
0106: }
0107:
0108: /**
0109: * Error output stream.
0110: */
0111: private PrintWriter errout;
0112:
0113: private PrintWriter stderr;
0114:
0115: private Configuration configuration;
0116:
0117: private String inputStreamName = "InputStream";
0118:
0119: private int parseErrors;
0120:
0121: private int parseWarnings;
0122:
0123: private Report report;
0124:
0125: /**
0126: * Instantiates a new Tidy instance. It's reccomended that a new instance is used at each parsing.
0127: */
0128: public Tidy() {
0129: this .report = new Report();
0130: configuration = new Configuration(this .report);
0131: if (configuration == null) {
0132: return;
0133: }
0134:
0135: AttributeTable at = AttributeTable.getDefaultAttributeTable();
0136: if (at == null) {
0137: return;
0138: }
0139: TagTable tt = new TagTable();
0140: if (tt == null) {
0141: return;
0142: }
0143: tt.setConfiguration(configuration);
0144: configuration.tt = tt;
0145: EntityTable et = EntityTable.getDefaultEntityTable();
0146: if (et == null) {
0147: return;
0148: }
0149:
0150: configuration.errfile = null;
0151: stderr = new PrintWriter(System.err, true);
0152: errout = stderr;
0153: }
0154:
0155: /**
0156: * Returns the actual configuration
0157: * @return tidy configuration
0158: */
0159: public Configuration getConfiguration() {
0160: return configuration;
0161: }
0162:
0163: public PrintWriter getStderr() {
0164: return stderr;
0165: }
0166:
0167: /**
0168: * ParseErrors - the number of errors that occurred in the most recent parse operation.
0169: * @return number of errors that occurred in the most recent parse operation.
0170: */
0171: public int getParseErrors() {
0172: return parseErrors;
0173: }
0174:
0175: /**
0176: * ParseWarnings - the number of warnings that occurred in the most recent parse operation.
0177: * @return number of warnings that occurred in the most recent parse operation.
0178: */
0179: public int getParseWarnings() {
0180: return parseWarnings;
0181: }
0182:
0183: /**
0184: * InputStreamName - the name of the input stream (printed in the header information).
0185: * @param name input stream name
0186: */
0187: public void setInputStreamName(String name) {
0188: if (name != null) {
0189: inputStreamName = name;
0190: }
0191: }
0192:
0193: public String getInputStreamName() {
0194: return inputStreamName;
0195: }
0196:
0197: /**
0198: * Errout - the error output stream.
0199: * @return error output stream.
0200: */
0201: public PrintWriter getErrout() {
0202: return errout;
0203: }
0204:
0205: public void setErrout(PrintWriter out) {
0206: this .errout = out;
0207: }
0208:
0209: /**
0210: * Sets the configuration from a configuration file.
0211: * @param filename configuration file name/path.
0212: */
0213: public void setConfigurationFromFile(String filename) {
0214: configuration.parseFile(filename);
0215: }
0216:
0217: /**
0218: * Sets the configuration from a properties object.
0219: * @param props Properties object
0220: */
0221: public void setConfigurationFromProps(Properties props) {
0222: configuration.addProps(props);
0223: }
0224:
0225: /**
0226: * Parses InputStream in and returns the root Node. If out is non-null, pretty prints to OutputStream out.
0227: * @param in input stream
0228: * @param out optional output stream
0229: * @return parsed org.w3c.tidy.Node
0230: */
0231: public Node parse(InputStream in, OutputStream out) {
0232: Node document = null;
0233:
0234: try {
0235: document = parse(in, null, out);
0236: } catch (FileNotFoundException fnfe) {
0237: // ignore
0238: } catch (IOException e) {
0239: // ignore
0240: }
0241:
0242: return document;
0243: }
0244:
0245: /**
0246: * Internal routine that actually does the parsing. The caller can pass either an InputStream or file name. If both
0247: * are passed, the file name is preferred.
0248: * @param in input stream (used only if <code>file</code> is null)
0249: * @param file file name
0250: * @param out output stream
0251: * @return parsed org.w3c.tidy.Node
0252: * @throws FileNotFoundException if <code>file</code> is not null but it can't be found
0253: * @throws IOException for errors in reading input stream or file
0254: */
0255: private Node parse(InputStream in, String file, OutputStream out)
0256: throws FileNotFoundException, IOException {
0257: Lexer lexer;
0258: Node document = null;
0259: Node doctype;
0260: PPrint pprint;
0261:
0262: if (errout == null) {
0263: return null;
0264: }
0265:
0266: parseErrors = 0;
0267: parseWarnings = 0;
0268:
0269: // ensure config is self-consistent
0270: configuration.adjust();
0271:
0272: if (file != null) {
0273: in = new FileInputStream(file);
0274: inputStreamName = file;
0275: } else if (in == null) {
0276: in = System.in;
0277: inputStreamName = "stdin";
0278: }
0279:
0280: if (in != null) {
0281:
0282: StreamIn streamIn = StreamInFactory.getStreamIn(
0283: configuration, in);
0284:
0285: lexer = new Lexer(streamIn, configuration, this .report);
0286: lexer.errout = errout;
0287:
0288: // store pointer to lexer in input stream to allow character encoding errors to be reported
0289: streamIn.setLexer(lexer);
0290:
0291: this .report.setFilename(inputStreamName); // #431895 - fix by Dave Bryan 04 Jan 01
0292:
0293: if (!configuration.quiet) {
0294: this .report.helloMessage(errout);
0295: }
0296:
0297: // skip byte order mark
0298:
0299: // if (lexer.configuration.getInCharEncoding() == Configuration.UTF8
0300: // || lexer.configuration.getInCharEncoding() == Configuration.UTF16LE
0301: // || lexer.configuration.getInCharEncoding() == Configuration.UTF16BE
0302: // || lexer.configuration.getInCharEncoding() == Configuration.UTF16)
0303: // {
0304: // int c = lexer.in.readChar();
0305: // if (c != EncodingUtils.UNICODE_BOM)
0306: // {
0307: // lexer.in.ungetChar(c);
0308: // }
0309: // }
0310:
0311: // Tidy doesn't alter the doctype for generic XML docs
0312: if (configuration.xmlTags) {
0313: document = ParserImpl.parseXMLDocument(lexer);
0314: if (!document.checkNodeIntegrity()) {
0315: if (!configuration.quiet) {
0316: report.badTree(errout);
0317: }
0318: return null;
0319: }
0320: } else {
0321: lexer.warnings = 0;
0322:
0323: document = ParserImpl.parseDocument(lexer);
0324:
0325: if (!document.checkNodeIntegrity()) {
0326: if (!configuration.quiet) {
0327: this .report.badTree(errout);
0328: }
0329: return null;
0330: }
0331:
0332: Clean cleaner = new Clean(configuration.tt);
0333:
0334: // simplifies <b><b> ... </b> ... </b> etc.
0335: cleaner.nestedEmphasis(document);
0336:
0337: // cleans up <dir> indented text </dir> etc.
0338: cleaner.list2BQ(document);
0339: cleaner.bQ2Div(document);
0340:
0341: // replaces i by em and b by strong
0342: if (configuration.logicalEmphasis) {
0343: cleaner.emFromI(document);
0344: }
0345:
0346: if (configuration.word2000
0347: && cleaner.isWord2000(document)) {
0348: // prune Word2000's <![if ...]> ... <![endif]>
0349: cleaner.dropSections(lexer, document);
0350:
0351: // drop style & class attributes and empty p, span elements
0352: cleaner.cleanWord2000(lexer, document);
0353: }
0354:
0355: // replaces presentational markup by style rules
0356: if (configuration.makeClean
0357: || configuration.dropFontTags) {
0358: cleaner.cleanTree(lexer, document);
0359: }
0360:
0361: if (!document.checkNodeIntegrity()) {
0362: this .report.badTree(errout);
0363: return null;
0364: }
0365:
0366: doctype = document.findDocType();
0367:
0368: // remember given doctype
0369: if (doctype != null) {
0370: doctype = (Node) doctype.clone();
0371: }
0372:
0373: if (document.content != null) {
0374: if (configuration.xHTML) {
0375: lexer.setXHTMLDocType(document);
0376: } else {
0377: lexer.fixDocType(document);
0378: }
0379:
0380: if (configuration.tidyMark) {
0381: lexer.addGenerator(document);
0382: }
0383: }
0384:
0385: // ensure presence of initial <?XML version="1.0"?>
0386: if (configuration.xmlOut && configuration.xmlPi) {
0387: lexer.fixXmlDecl(document);
0388: }
0389:
0390: if (!configuration.quiet && document.content != null) {
0391: this .report.reportVersion(errout, lexer,
0392: inputStreamName, doctype);
0393: }
0394: }
0395:
0396: // Try to close the InputStream but only if if we created it.
0397: if ((file != null) && (in != System.in)) {
0398: try {
0399: in.close();
0400: } catch (IOException e) {
0401: // ignore
0402: }
0403: }
0404:
0405: if (!configuration.quiet) {
0406: parseWarnings = lexer.warnings;
0407: parseErrors = lexer.errors;
0408: this .report.reportNumWarnings(errout, lexer);
0409: }
0410:
0411: if (!configuration.quiet && lexer.errors > 0
0412: && !configuration.forceOutput) {
0413: this .report.needsAuthorIntervention(errout);
0414: }
0415:
0416: if (!configuration.onlyErrors
0417: && (lexer.errors == 0 || configuration.forceOutput)) {
0418: if (configuration.burstSlides) {
0419: Node body;
0420:
0421: body = null;
0422: // remove doctype to avoid potential clash with markup introduced when bursting into slides
0423:
0424: // discard the document type
0425: doctype = document.findDocType();
0426:
0427: if (doctype != null) {
0428: Node.discardElement(doctype);
0429: }
0430:
0431: /* slides use transitional features */
0432: lexer.versions |= Dict.VERS_HTML40_LOOSE;
0433:
0434: // and patch up doctype to match
0435: if (configuration.xHTML) {
0436: lexer.setXHTMLDocType(document);
0437: } else {
0438: lexer.fixDocType(document);
0439: }
0440:
0441: // find the body element which may be implicit
0442: body = document.findBody(configuration.tt);
0443:
0444: if (body != null) {
0445: pprint = new PPrint(configuration);
0446: if (!configuration.quiet) {
0447: this .report.reportNumberOfSlides(errout,
0448: pprint.countSlides(body));
0449: }
0450: pprint.createSlides(lexer, document);
0451: } else if (!configuration.quiet) {
0452: this .report.missingBody(errout);
0453: }
0454: } else if (configuration.writeback && (file != null)) {
0455: try {
0456: pprint = new PPrint(configuration);
0457: FileOutputStream fis = new FileOutputStream(
0458: file);
0459:
0460: Out o = OutFactory.getOut(this .configuration,
0461: fis);
0462:
0463: if (document.findDocType() == null) {
0464: // only use numeric character references if no doctype could be determined (e.g., because
0465: // the document contains proprietary features) to ensure well-formedness.
0466: configuration.numEntities = true;
0467: }
0468: if (configuration.bodyOnly) {
0469: // Feature request #434940 - fix by Dave Raggett/Ignacio Vazquez-Abrams 21 Jun 01
0470: pprint.printBody(o, lexer, document,
0471: configuration.xmlOut);
0472: } else if (configuration.xmlOut
0473: && !configuration.xHTML) {
0474: pprint.printXMLTree(o, (short) 0, 0, lexer,
0475: document);
0476: } else {
0477: pprint.printTree(o, (short) 0, 0, lexer,
0478: document);
0479: }
0480:
0481: pprint.flushLine(o, 0);
0482: o.close();
0483: } catch (IOException e) {
0484: errout.println(file + e.toString());
0485: }
0486: } else if (out != null) {
0487: pprint = new PPrint(configuration);
0488:
0489: Out o = OutFactory.getOut(this .configuration, out); // normal output stream
0490:
0491: if (document.findDocType() == null) {
0492: // only use numeric character references if no doctype could be determined (e.g., because
0493: // the document contains proprietary features) to ensure well-formedness.
0494: configuration.numEntities = true;
0495: }
0496: if (configuration.bodyOnly) {
0497: // Feature request #434940 - fix by Dave Raggett/Ignacio Vazquez-Abrams 21 Jun 01
0498: pprint.printBody(o, lexer, document,
0499: configuration.xmlOut);
0500: } else if (configuration.xmlOut
0501: && !configuration.xHTML) {
0502: pprint.printXMLTree(o, (short) 0, 0, lexer,
0503: document);
0504: } else {
0505: pprint.printTree(o, (short) 0, 0, lexer,
0506: document);
0507: }
0508:
0509: pprint.flushLine(o, 0);
0510: o.close();
0511: }
0512:
0513: }
0514:
0515: if (!configuration.quiet) {
0516: this .report.errorSummary(lexer);
0517: }
0518: }
0519: return document;
0520: }
0521:
0522: /**
0523: * Parses InputStream in and returns a DOM Document node. If out is non-null, pretty prints to OutputStream out.
0524: * @param in input stream
0525: * @param out optional output stream
0526: * @return parsed org.w3c.dom.Document
0527: */
0528: public org.w3c.dom.Document parseDOM(InputStream in,
0529: OutputStream out) {
0530: Node document = parse(in, out);
0531: if (document != null) {
0532: return (org.w3c.dom.Document) document.getAdapter();
0533: }
0534: return null;
0535: }
0536:
0537: /**
0538: * Creates an empty DOM Document.
0539: * @return a new org.w3c.dom.Document
0540: */
0541: public static org.w3c.dom.Document createEmptyDocument() {
0542: Node document = new Node(Node.ROOT_NODE, new byte[0], 0, 0);
0543: Node node = new Node(Node.START_TAG, new byte[0], 0, 0, "html",
0544: new TagTable());
0545: if (document != null && node != null) {
0546: document.insertNodeAtStart(node);
0547: return (org.w3c.dom.Document) document.getAdapter();
0548: }
0549:
0550: return null;
0551: }
0552:
0553: /**
0554: * Pretty-prints a DOM Document. Must be an instance of org.w3c.tidy.DOMDocumentImpl.
0555: * @param doc org.w3c.dom.Document
0556: * @param out output stream
0557: */
0558: public void pprint(org.w3c.dom.Document doc, OutputStream out) {
0559: if (!(doc instanceof DOMDocumentImpl)) {
0560: // @todo should we inform users that tidy can't print a generic Document or change the method signature?
0561: return;
0562: }
0563:
0564: pprint(((DOMDocumentImpl) doc).adaptee, out);
0565: }
0566:
0567: /**
0568: * Pretty-prints a DOM Node.
0569: * @param node org.w3c.dom.Node. Must be an instance of org.w3c.tidy.DOMNodeImpl.
0570: * @param out output stream
0571: */
0572: public void pprint(org.w3c.dom.Node node, OutputStream out) {
0573: if (!(node instanceof DOMNodeImpl)) {
0574: // @todo should we inform users than tidy can't print a generic Node or change the method signature?
0575: return;
0576: }
0577:
0578: pprint(((DOMNodeImpl) node).adaptee, out);
0579: }
0580:
0581: /**
0582: * Pretty-prints a tidy Node.
0583: * @param node org.w3c.tidy.Node
0584: * @param out output stream
0585: */
0586: private void pprint(Node node, OutputStream out) {
0587: PPrint pprint;
0588:
0589: if (out != null) {
0590:
0591: Out o = OutFactory.getOut(this .configuration, out);
0592:
0593: Lexer lexer = new Lexer(null, this .configuration,
0594: this .report);
0595:
0596: pprint = new PPrint(configuration);
0597:
0598: if (configuration.xmlTags) {
0599: pprint.printXMLTree(o, (short) 0, 0, lexer, node);
0600: } else {
0601: pprint.printTree(o, (short) 0, 0, lexer, node);
0602: }
0603:
0604: pprint.flushLine(o, 0);
0605: }
0606: }
0607:
0608: /**
0609: * Command line interface to parser and pretty printer.
0610: * @param argv command line parameters
0611: */
0612: public static void main(String[] argv) {
0613: Tidy tidy = new Tidy();
0614: int returnCode = tidy.mainExec(argv);
0615: System.exit(returnCode);
0616: }
0617:
0618: /**
0619: * Main method, but returns the return code as an int instead of calling System.exit(code). Needed for testing main
0620: * method without shutting down tests.
0621: * @param argv command line parameters
0622: * @return return code
0623: */
0624: protected int mainExec(String[] argv) {
0625: String file;
0626: int argCount = argv.length;
0627: int argIndex = 0;
0628:
0629: // read command line
0630: Properties properties = new Properties();
0631:
0632: while (argCount > 0) {
0633: if (argv[argIndex].startsWith("-")) {
0634: // support -foo and --foo
0635: String argName = argv[argIndex].toLowerCase();
0636: while (argName.length() > 0 && argName.charAt(0) == '-') {
0637: argName = argName.substring(1);
0638: }
0639:
0640: // "exclusive" options
0641: if (argName.equals("help") || argName.equals("h")
0642: || argName.equals("?")) {
0643: this .report.helpText(new PrintWriter(System.out,
0644: true));
0645: return 0;
0646: } else if (argName.equals("help-config")) {
0647: configuration.printConfigOptions(new PrintWriter(
0648: System.out, true), false);
0649: return 0;
0650: } else if (argName.equals("show-config")) {
0651: configuration.adjust(); // ensure config is self-consistent
0652: configuration.printConfigOptions(errout, true);
0653: return 0;
0654: } else if (argName.equals("version")
0655: || argName.equals("v")) {
0656: this .report.showVersion(errout);
0657: return 0;
0658: }
0659:
0660: // optional value for non boolean options
0661: String argValue = null;
0662: if (argCount > 2 && !argv[argIndex + 1].startsWith("-")) {
0663: argValue = argv[argIndex + 1];
0664: --argCount;
0665: ++argIndex;
0666: }
0667:
0668: // handle "special" aliases
0669: String alias = (String) CMDLINE_ALIAS.get(argName);
0670: if (alias != null) {
0671: argName = alias;
0672: }
0673:
0674: if (Configuration.isKnownOption(argName)) // handle any standard config option
0675: {
0676: properties.setProperty(argName,
0677: (argValue == null ? "" : argName));
0678: } else if (argName.equals("config")) // parse a property file
0679: {
0680: if (argValue != null) {
0681: configuration.parseFile(argValue);
0682: }
0683: } else if (TidyUtils.isCharEncodingSupported(argName)) // handle any encoding name
0684: {
0685: properties.setProperty("char-encoding", argName);
0686: } else {
0687:
0688: for (int i = 0; i < argName.length(); i++) {
0689: switch (argName.charAt(i)) {
0690: case 'i':
0691: configuration.indentContent = true;
0692: configuration.smartIndent = true;
0693: break;
0694:
0695: case 'o':
0696: configuration.hideEndTags = true;
0697: break;
0698:
0699: case 'u':
0700: configuration.upperCaseTags = true;
0701: break;
0702:
0703: case 'c':
0704: configuration.makeClean = true;
0705: break;
0706:
0707: case 'b':
0708: configuration.makeBare = true;
0709: break;
0710:
0711: case 'n':
0712: configuration.numEntities = true;
0713: break;
0714:
0715: case 'm':
0716: configuration.writeback = true;
0717: break;
0718:
0719: case 'e':
0720: configuration.onlyErrors = true;
0721: break;
0722:
0723: case 'q':
0724: configuration.quiet = true;
0725: break;
0726:
0727: default:
0728: this .report.unknownOption(this .errout,
0729: argName.charAt(i));
0730: break;
0731: }
0732: }
0733: }
0734:
0735: --argCount;
0736: ++argIndex;
0737: continue;
0738: }
0739:
0740: configuration.addProps(properties);
0741:
0742: // ensure config is self-consistent
0743: configuration.adjust();
0744:
0745: // user specified error file
0746: if (configuration.errfile != null) {
0747:
0748: String errorfile = "stderr";
0749:
0750: // is it same as the currently opened file?
0751: if (!configuration.errfile.equals(errorfile)) {
0752: // no so close previous error file
0753:
0754: if (this .errout != this .stderr) {
0755: this .errout.close();
0756: }
0757:
0758: // and try to open the new error file
0759: try {
0760: this .setErrout(new PrintWriter(new FileWriter(
0761: configuration.errfile), true));
0762: errorfile = configuration.errfile;
0763: } catch (IOException e) {
0764: // can't be opened so fall back to stderr
0765: errorfile = "stderr";
0766: this .setErrout(stderr);
0767: }
0768: }
0769: }
0770:
0771: if (argCount > 0) {
0772: file = argv[argIndex];
0773: } else {
0774: file = "stdin";
0775: }
0776:
0777: try {
0778: parse(null, file, System.out);
0779: } catch (FileNotFoundException fnfe) {
0780: this .report.unknownFile(this .errout, file);
0781: } catch (IOException ioe) {
0782: this .report.unknownFile(this .errout, file);
0783: }
0784:
0785: --argCount;
0786: ++argIndex;
0787:
0788: if (argCount <= 0) {
0789: break;
0790: }
0791: }
0792:
0793: if (this .parseErrors + this .parseWarnings > 0
0794: && !configuration.quiet) {
0795: this .report.generalInfo(this .errout);
0796: }
0797:
0798: if (this .errout != this .stderr) {
0799: this .errout.close();
0800: }
0801:
0802: // return status can be used by scripts
0803: if (this .parseErrors > 0) {
0804: return 2;
0805: }
0806:
0807: if (this .parseWarnings > 0) {
0808: return 1;
0809: }
0810:
0811: // 0 means all is ok
0812: return 0;
0813: }
0814:
0815: /**
0816: * Attach a TidyMessageListener which will be notified for messages and errors.
0817: * @param listener TidyMessageListener implementation
0818: */
0819: public void setMessageListener(TidyMessageListener listener) {
0820: this .report.addMessageListener(listener);
0821: }
0822:
0823: /**
0824: * <code>indent-spaces</code>- default indentation.
0825: * @param spaces number of spaces used for indentation
0826: * @see Configuration#spaces
0827: */
0828: public void setSpaces(int spaces) {
0829: configuration.spaces = spaces;
0830: }
0831:
0832: /**
0833: * <code>indent-spaces</code>- default indentation.
0834: * @return number of spaces used for indentation
0835: * @see Configuration#spaces
0836: */
0837: public int getSpaces() {
0838: return configuration.spaces;
0839: }
0840:
0841: /**
0842: * <code>wrap</code>- default wrap margin.
0843: * @param wraplen default wrap margin
0844: * @see Configuration#wraplen
0845: */
0846: public void setWraplen(int wraplen) {
0847: configuration.wraplen = wraplen;
0848: }
0849:
0850: /**
0851: * <code>wrap</code>- default wrap margin.
0852: * @return default wrap margin
0853: * @see Configuration#wraplen
0854: */
0855: public int getWraplen() {
0856: return configuration.wraplen;
0857: }
0858:
0859: /**
0860: * <code>tab-size</code>- tab size in chars.
0861: * @param tabsize tab size in chars
0862: * @see Configuration#tabsize
0863: */
0864: public void setTabsize(int tabsize) {
0865: configuration.tabsize = tabsize;
0866: }
0867:
0868: /**
0869: * <code>tab-size</code>- tab size in chars.
0870: * @return tab size in chars
0871: * @see Configuration#tabsize
0872: */
0873: public int getTabsize() {
0874: return configuration.tabsize;
0875: }
0876:
0877: /**
0878: * Errfile - file name to write errors to.
0879: * @param errfile file name to write errors to
0880: * @see Configuration#errfile
0881: */
0882: public void setErrfile(String errfile) {
0883: configuration.errfile = errfile;
0884: }
0885:
0886: /**
0887: * Errfile - file name to write errors to.
0888: * @return error file name
0889: * @see Configuration#errfile
0890: */
0891: public String getErrfile() {
0892: return configuration.errfile;
0893: }
0894:
0895: /**
0896: * writeback - if true then output tidied markup. NOTE: this property is ignored when parsing from an InputStream.
0897: * @param writeback <code>true</code>= output tidied markup
0898: * @see Configuration#writeback
0899: */
0900: public void setWriteback(boolean writeback) {
0901: configuration.writeback = writeback;
0902: }
0903:
0904: /**
0905: * writeback - if true then output tidied markup. NOTE: this property is ignored when parsing from an InputStream.
0906: * @return <code>true</code> if tidy will output tidied markup in input file
0907: * @see Configuration#writeback
0908: */
0909: public boolean getWriteback() {
0910: return configuration.writeback;
0911: }
0912:
0913: /**
0914: * only-errors - if true normal output is suppressed.
0915: * @param onlyErrors if <code>true</code> normal output is suppressed.
0916: * @see Configuration#onlyErrors
0917: */
0918: public void setOnlyErrors(boolean onlyErrors) {
0919: configuration.onlyErrors = onlyErrors;
0920: }
0921:
0922: /**
0923: * only-errors - if true normal output is suppressed.
0924: * @return <code>true</code> if normal output is suppressed.
0925: * @see Configuration#onlyErrors
0926: */
0927: public boolean getOnlyErrors() {
0928: return configuration.onlyErrors;
0929: }
0930:
0931: /**
0932: * show-warnings - show warnings? (errors are always shown).
0933: * @param showWarnings if <code>false</code> warnings are not shown
0934: * @see Configuration#showWarnings
0935: */
0936: public void setShowWarnings(boolean showWarnings) {
0937: configuration.showWarnings = showWarnings;
0938: }
0939:
0940: /**
0941: * show-warnings - show warnings? (errors are always shown).
0942: * @return <code>false</code> if warnings are not shown
0943: * @see Configuration#showWarnings
0944: */
0945: public boolean getShowWarnings() {
0946: return configuration.showWarnings;
0947: }
0948:
0949: /**
0950: * quiet - no 'Parsing X', guessed DTD or summary.
0951: * @param quiet <code>true</code>= don't output summary, warnings or errors
0952: * @see Configuration#quiet
0953: */
0954: public void setQuiet(boolean quiet) {
0955: configuration.quiet = quiet;
0956: }
0957:
0958: /**
0959: * quiet - no 'Parsing X', guessed DTD or summary.
0960: * @return <code>true</code> if tidy will not output summary, warnings or errors
0961: * @see Configuration#quiet
0962: */
0963: public boolean getQuiet() {
0964: return configuration.quiet;
0965: }
0966:
0967: /**
0968: * indent - indent content of appropriate tags.
0969: * @param indentContent indent content of appropriate tags
0970: * @see Configuration#indentContent
0971: */
0972: public void setIndentContent(boolean indentContent) {
0973: configuration.indentContent = indentContent;
0974: }
0975:
0976: /**
0977: * indent - indent content of appropriate tags.
0978: * @return <code>true</code> if tidy will indent content of appropriate tags
0979: * @see Configuration#indentContent
0980: */
0981: public boolean getIndentContent() {
0982: return configuration.indentContent;
0983: }
0984:
0985: /**
0986: * SmartIndent - does text/block level content effect indentation.
0987: * @param smartIndent <code>true</code> if text/block level content should effect indentation
0988: * @see Configuration#smartIndent
0989: */
0990: public void setSmartIndent(boolean smartIndent) {
0991: configuration.smartIndent = smartIndent;
0992: }
0993:
0994: /**
0995: * SmartIndent - does text/block level content effect indentation.
0996: * @return <code>true</code> if text/block level content should effect indentation
0997: * @see Configuration#smartIndent
0998: */
0999: public boolean getSmartIndent() {
1000: return configuration.smartIndent;
1001: }
1002:
1003: /**
1004: * hide-endtags - suppress optional end tags.
1005: * @param hideEndTags <code>true</code>= suppress optional end tags
1006: * @see Configuration#hideEndTags
1007: */
1008: public void setHideEndTags(boolean hideEndTags) {
1009: configuration.hideEndTags = hideEndTags;
1010: }
1011:
1012: /**
1013: * hide-endtags - suppress optional end tags.
1014: * @return <code>true</code> if tidy will suppress optional end tags
1015: * @see Configuration#hideEndTags
1016: */
1017: public boolean getHideEndTags() {
1018: return configuration.hideEndTags;
1019: }
1020:
1021: /**
1022: * input-xml - treat input as XML.
1023: * @param xmlTags <code>true</code> if tidy should treat input as XML
1024: * @see Configuration#xmlTags
1025: */
1026: public void setXmlTags(boolean xmlTags) {
1027: configuration.xmlTags = xmlTags;
1028: }
1029:
1030: /**
1031: * input-xml - treat input as XML.
1032: * @return <code>true</code> if tidy will treat input as XML
1033: * @see Configuration#xmlTags
1034: */
1035: public boolean getXmlTags() {
1036: return configuration.xmlTags;
1037: }
1038:
1039: /**
1040: * output-xml - create output as XML.
1041: * @param xmlOut <code>true</code> if tidy should create output as xml
1042: * @see Configuration#xmlOut
1043: */
1044: public void setXmlOut(boolean xmlOut) {
1045: configuration.xmlOut = xmlOut;
1046: }
1047:
1048: /**
1049: * output-xml - create output as XML.
1050: * @return <code>true</code> if tidy will create output as xml
1051: * @see Configuration#xmlOut
1052: */
1053: public boolean getXmlOut() {
1054: return configuration.xmlOut;
1055: }
1056:
1057: /**
1058: * output-xhtml - output extensible HTML.
1059: * @param xhtml <code>true</code> if tidy should output XHTML
1060: * @see Configuration#xHTML
1061: */
1062: public void setXHTML(boolean xhtml) {
1063: configuration.xHTML = xhtml;
1064: }
1065:
1066: /**
1067: * output-xhtml - output extensible HTML.
1068: * @return <code>true</code> if tidy will output XHTML
1069: * @see Configuration#xHTML
1070: */
1071: public boolean getXHTML() {
1072: return configuration.xHTML;
1073: }
1074:
1075: /**
1076: * uppercase-tags - output tags in upper case.
1077: * @param upperCaseTags <code>true</code> if tidy should output tags in upper case (default is lowercase)
1078: * @see Configuration#upperCaseTags
1079: */
1080: public void setUpperCaseTags(boolean upperCaseTags) {
1081: configuration.upperCaseTags = upperCaseTags;
1082: }
1083:
1084: /**
1085: * uppercase-tags - output tags in upper case.
1086: * @return <code>true</code> if tidy should will tags in upper case
1087: * @see Configuration#upperCaseTags
1088: */
1089: public boolean getUpperCaseTags() {
1090: return configuration.upperCaseTags;
1091: }
1092:
1093: /**
1094: * uppercase-attributes - output attributes in upper case.
1095: * @param upperCaseAttrs <code>true</code> if tidy should output attributes in upper case (default is lowercase)
1096: * @see Configuration#upperCaseAttrs
1097: */
1098: public void setUpperCaseAttrs(boolean upperCaseAttrs) {
1099: configuration.upperCaseAttrs = upperCaseAttrs;
1100: }
1101:
1102: /**
1103: * uppercase-attributes - output attributes in upper case.
1104: * @return <code>true</code> if tidy should will attributes in upper case
1105: * @see Configuration#upperCaseAttrs
1106: */
1107: public boolean getUpperCaseAttrs() {
1108: return configuration.upperCaseAttrs;
1109: }
1110:
1111: /**
1112: * make-clean - remove presentational clutter.
1113: * @param makeClean true to remove presentational clutter
1114: * @see Configuration#makeClean
1115: */
1116: public void setMakeClean(boolean makeClean) {
1117: configuration.makeClean = makeClean;
1118: }
1119:
1120: /**
1121: * make-clean - remove presentational clutter.
1122: * @return true if tidy will remove presentational clutter
1123: * @see Configuration#makeClean
1124: */
1125: public boolean getMakeClean() {
1126: return configuration.makeClean;
1127: }
1128:
1129: /**
1130: * make-bare - remove Microsoft cruft.
1131: * @param makeBare true to remove Microsoft cruft
1132: * @see Configuration#makeBare
1133: */
1134: public void setMakeBare(boolean makeBare) {
1135: configuration.makeBare = makeBare;
1136: }
1137:
1138: /**
1139: * make-clean - remove Microsoft cruft.
1140: * @return true if tidy will remove Microsoft cruft
1141: * @see Configuration#makeBare
1142: */
1143: public boolean getMakeBare() {
1144: return configuration.makeBare;
1145: }
1146:
1147: /**
1148: * break-before-br - output newline before <br>.
1149: * @param breakBeforeBR <code>true</code> if tidy should output a newline before <br>
1150: * @see Configuration#breakBeforeBR
1151: */
1152: public void setBreakBeforeBR(boolean breakBeforeBR) {
1153: configuration.breakBeforeBR = breakBeforeBR;
1154: }
1155:
1156: /**
1157: * break-before-br - output newline before <br>.
1158: * @return <code>true</code> if tidy will output a newline before <br>
1159: * @see Configuration#breakBeforeBR
1160: */
1161: public boolean getBreakBeforeBR() {
1162: return configuration.breakBeforeBR;
1163: }
1164:
1165: /**
1166: * <code>split</code>- create slides on each h2 element.
1167: * @param burstSlides <code>true</code> if tidy should create slides on each h2 element
1168: * @see Configuration#burstSlides
1169: */
1170: public void setBurstSlides(boolean burstSlides) {
1171: configuration.burstSlides = burstSlides;
1172: }
1173:
1174: /**
1175: * <code>split</code>- create slides on each h2 element.
1176: * @return <code>true</code> if tidy will create slides on each h2 element
1177: * @see Configuration#burstSlides
1178: */
1179: public boolean getBurstSlides() {
1180: return configuration.burstSlides;
1181: }
1182:
1183: /**
1184: * <code>numeric-entities</code>- output entities other than the built-in HTML entities in the numeric rather
1185: * than the named entity form.
1186: * @param numEntities <code>true</code> if tidy should output entities in the numeric form.
1187: * @see Configuration#numEntities
1188: */
1189: public void setNumEntities(boolean numEntities) {
1190: configuration.numEntities = numEntities;
1191: }
1192:
1193: /**
1194: * <code>numeric-entities</code>- output entities other than the built-in HTML entities in the numeric rather
1195: * than the named entity form.
1196: * @return <code>true</code> if tidy will output entities in the numeric form.
1197: * @see Configuration#numEntities
1198: */
1199: public boolean getNumEntities() {
1200: return configuration.numEntities;
1201: }
1202:
1203: /**
1204: * <code>quote-marks</code>- output " marks as &quot;.
1205: * @param quoteMarks <code>true</code> if tidy should output " marks as &quot;
1206: * @see Configuration#quoteMarks
1207: */
1208: public void setQuoteMarks(boolean quoteMarks) {
1209: configuration.quoteMarks = quoteMarks;
1210: }
1211:
1212: /**
1213: * <code>quote-marks</code>- output " marks as &quot;.
1214: * @return <code>true</code> if tidy will output " marks as &quot;
1215: * @see Configuration#quoteMarks
1216: */
1217: public boolean getQuoteMarks() {
1218: return configuration.quoteMarks;
1219: }
1220:
1221: /**
1222: * <code>quote-nbsp</code>- output non-breaking space as entity.
1223: * @param quoteNbsp <code>true</code> if tidy should output non-breaking space as entity
1224: * @see Configuration#quoteNbsp
1225: */
1226: public void setQuoteNbsp(boolean quoteNbsp) {
1227: configuration.quoteNbsp = quoteNbsp;
1228: }
1229:
1230: /**
1231: * <code>quote-nbsp</code>- output non-breaking space as entity.
1232: * @return <code>true</code> if tidy will output non-breaking space as entity
1233: * @see Configuration#quoteNbsp
1234: */
1235: public boolean getQuoteNbsp() {
1236: return configuration.quoteNbsp;
1237: }
1238:
1239: /**
1240: * <code>quote-ampersand</code>- output naked ampersand as &.
1241: * @param quoteAmpersand <code>true</code> if tidy should output naked ampersand as &
1242: * @see Configuration#quoteAmpersand
1243: */
1244: public void setQuoteAmpersand(boolean quoteAmpersand) {
1245: configuration.quoteAmpersand = quoteAmpersand;
1246: }
1247:
1248: /**
1249: * <code>quote-ampersand</code>- output naked ampersand as &.
1250: * @return <code>true</code> if tidy will output naked ampersand as &
1251: * @see Configuration#quoteAmpersand
1252: */
1253: public boolean getQuoteAmpersand() {
1254: return configuration.quoteAmpersand;
1255: }
1256:
1257: /**
1258: * <code>wrap-attributes</code>- wrap within attribute values.
1259: * @param wrapAttVals <code>true</code> if tidy should wrap within attribute values
1260: * @see Configuration#wrapAttVals
1261: */
1262: public void setWrapAttVals(boolean wrapAttVals) {
1263: configuration.wrapAttVals = wrapAttVals;
1264: }
1265:
1266: /**
1267: * <code>wrap-attributes</code>- wrap within attribute values.
1268: * @return <code>true</code> if tidy will wrap within attribute values
1269: * @see Configuration#wrapAttVals
1270: */
1271: public boolean getWrapAttVals() {
1272: return configuration.wrapAttVals;
1273: }
1274:
1275: /**
1276: * <code>wrap-script-literals</code>- wrap within JavaScript string literals.
1277: * @param wrapScriptlets <code>true</code> if tidy should wrap within JavaScript string literals
1278: * @see Configuration#wrapScriptlets
1279: */
1280: public void setWrapScriptlets(boolean wrapScriptlets) {
1281: configuration.wrapScriptlets = wrapScriptlets;
1282: }
1283:
1284: /**
1285: * <code>wrap-script-literals</code>- wrap within JavaScript string literals.
1286: * @return <code>true</code> if tidy will wrap within JavaScript string literals
1287: * @see Configuration#wrapScriptlets
1288: */
1289: public boolean getWrapScriptlets() {
1290: return configuration.wrapScriptlets;
1291: }
1292:
1293: /**
1294: * <code>wrap-sections</code>- wrap within <![ ... ]> section tags
1295: * @param wrapSection <code>true</code> if tidy should wrap within <![ ... ]> section tags
1296: * @see Configuration#wrapSection
1297: */
1298: public void setWrapSection(boolean wrapSection) {
1299: configuration.wrapSection = wrapSection;
1300: }
1301:
1302: /**
1303: * <code>wrap-sections</code>- wrap within <![ ... ]> section tags
1304: * @return <code>true</code> if tidy will wrap within <![ ... ]> section tags
1305: * @see Configuration#wrapSection
1306: */
1307: public boolean getWrapSection() {
1308: return configuration.wrapSection;
1309: }
1310:
1311: /**
1312: * <code>alt-text</code>- default text for alt attribute.
1313: * @param altText default text for alt attribute
1314: * @see Configuration#altText
1315: */
1316: public void setAltText(String altText) {
1317: configuration.altText = altText;
1318: }
1319:
1320: /**
1321: * <code>alt-text</code>- default text for alt attribute.
1322: * @return default text for alt attribute
1323: * @see Configuration#altText
1324: */
1325: public String getAltText() {
1326: return configuration.altText;
1327: }
1328:
1329: /**
1330: * <code>add-xml-pi</code>- add <?xml?> for XML docs.
1331: * @param xmlPi <code>true</code> if tidy should add <?xml?> for XML docs
1332: * @see Configuration#xmlPi
1333: */
1334: public void setXmlPi(boolean xmlPi) {
1335: configuration.xmlPi = xmlPi;
1336: }
1337:
1338: /**
1339: * <code>add-xml-pi</code>- add <?xml?> for XML docs.
1340: * @return <code>true</code> if tidy will add <?xml?> for XML docs
1341: * @see Configuration#xmlPi
1342: */
1343: public boolean getXmlPi() {
1344: return configuration.xmlPi;
1345: }
1346:
1347: /**
1348: * <code>drop-font-tags</code>- discard presentation tags.
1349: * @param dropFontTags <code>true</code> if tidy should discard presentation tags
1350: * @see Configuration#dropFontTags
1351: */
1352: public void setDropFontTags(boolean dropFontTags) {
1353: configuration.dropFontTags = dropFontTags;
1354: }
1355:
1356: /**
1357: * <code>drop-font-tags</code>- discard presentation tags.
1358: * @return <code>true</code> if tidy will discard presentation tags
1359: * @see Configuration#dropFontTags
1360: */
1361: public boolean getDropFontTags() {
1362: return configuration.dropFontTags;
1363: }
1364:
1365: /**
1366: * <code>drop-proprietary-attributes</code>- discard proprietary attributes.
1367: * @param dropProprietaryAttributes <code>true</code> if tidy should discard proprietary attributes
1368: * @see Configuration#dropProprietaryAttributes
1369: */
1370: public void setDropProprietaryAttributes(
1371: boolean dropProprietaryAttributes) {
1372: configuration.dropProprietaryAttributes = dropProprietaryAttributes;
1373: }
1374:
1375: /**
1376: * <code>drop-proprietary-attributes</code>- discard proprietary attributes.
1377: * @return <code>true</code> if tidy will discard proprietary attributes
1378: * @see Configuration#dropProprietaryAttributes
1379: */
1380: public boolean getDropProprietaryAttributes() {
1381: return configuration.dropProprietaryAttributes;
1382: }
1383:
1384: /**
1385: * <code>drop-empty-paras</code>- discard empty p elements.
1386: * @param dropEmptyParas <code>true</code> if tidy should discard empty p elements
1387: * @see Configuration#dropEmptyParas
1388: */
1389: public void setDropEmptyParas(boolean dropEmptyParas) {
1390: configuration.dropEmptyParas = dropEmptyParas;
1391: }
1392:
1393: /**
1394: * <code>drop-empty-paras</code>- discard empty p elements.
1395: * @return <code>true</code> if tidy will discard empty p elements
1396: * @see Configuration#dropEmptyParas
1397: */
1398: public boolean getDropEmptyParas() {
1399: return configuration.dropEmptyParas;
1400: }
1401:
1402: /**
1403: * <code>fix-bad-comments</code>- fix comments with adjacent hyphens.
1404: * @param fixComments <code>true</code> if tidy should fix comments with adjacent hyphens
1405: * @see Configuration#fixComments
1406: */
1407: public void setFixComments(boolean fixComments) {
1408: configuration.fixComments = fixComments;
1409: }
1410:
1411: /**
1412: * <code>fix-bad-comments</code>- fix comments with adjacent hyphens.
1413: * @return <code>true</code> if tidy will fix comments with adjacent hyphens
1414: * @see Configuration#fixComments
1415: */
1416: public boolean getFixComments() {
1417: return configuration.fixComments;
1418: }
1419:
1420: /**
1421: * <code>wrap-asp</code>- wrap within ASP pseudo elements.
1422: * @param wrapAsp <code>true</code> if tidy should wrap within ASP pseudo elements
1423: * @see Configuration#wrapAsp
1424: */
1425: public void setWrapAsp(boolean wrapAsp) {
1426: configuration.wrapAsp = wrapAsp;
1427: }
1428:
1429: /**
1430: * <code>wrap-asp</code>- wrap within ASP pseudo elements.
1431: * @return <code>true</code> if tidy will wrap within ASP pseudo elements
1432: * @see Configuration#wrapAsp
1433: */
1434: public boolean getWrapAsp() {
1435: return configuration.wrapAsp;
1436: }
1437:
1438: /**
1439: * <code>wrap-jste</code>- wrap within JSTE pseudo elements.
1440: * @param wrapJste <code>true</code> if tidy should wrap within JSTE pseudo elements
1441: * @see Configuration#wrapJste
1442: */
1443: public void setWrapJste(boolean wrapJste) {
1444: configuration.wrapJste = wrapJste;
1445: }
1446:
1447: /**
1448: * <code>wrap-jste</code>- wrap within JSTE pseudo elements.
1449: * @return <code>true</code> if tidy will wrap within JSTE pseudo elements
1450: * @see Configuration#wrapJste
1451: */
1452: public boolean getWrapJste() {
1453: return configuration.wrapJste;
1454: }
1455:
1456: /**
1457: * <code>wrap-php</code>- wrap within PHP pseudo elements.
1458: * @param wrapPhp <code>true</code> if tidy should wrap within PHP pseudo elements
1459: * @see Configuration#wrapPhp
1460: */
1461: public void setWrapPhp(boolean wrapPhp) {
1462: configuration.wrapPhp = wrapPhp;
1463: }
1464:
1465: /**
1466: * <code>wrap-php</code>- wrap within PHP pseudo elements.
1467: * @return <code>true</code> if tidy will wrap within PHP pseudo elements
1468: * @see Configuration#wrapPhp
1469: */
1470: public boolean getWrapPhp() {
1471: return configuration.wrapPhp;
1472: }
1473:
1474: /**
1475: * <code>fix-backslash</code>- fix URLs by replacing \ with /.
1476: * @param fixBackslash <code>true</code> if tidy should fix URLs by replacing \ with /
1477: * @see Configuration#fixBackslash
1478: */
1479: public void setFixBackslash(boolean fixBackslash) {
1480: configuration.fixBackslash = fixBackslash;
1481: }
1482:
1483: /**
1484: * <code>fix-backslash</code>- fix URLs by replacing \ with /.
1485: * @return <code>true</code> if tidy will fix URLs by replacing \ with /
1486: * @see Configuration#fixBackslash
1487: */
1488: public boolean getFixBackslash() {
1489: return configuration.fixBackslash;
1490: }
1491:
1492: /**
1493: * <code>indent-attributes</code>- newline+indent before each attribute.
1494: * @param indentAttributes <code>true</code> if tidy should output a newline+indent before each attribute
1495: * @see Configuration#indentAttributes
1496: */
1497: public void setIndentAttributes(boolean indentAttributes) {
1498: configuration.indentAttributes = indentAttributes;
1499: }
1500:
1501: /**
1502: * <code>indent-attributes</code>- newline+indent before each attribute.
1503: * @return <code>true</code> if tidy will output a newline+indent before each attribute
1504: * @see Configuration#indentAttributes
1505: */
1506: public boolean getIndentAttributes() {
1507: return configuration.indentAttributes;
1508: }
1509:
1510: /**
1511: * <code>doctype</code>- user specified doctype.
1512: * @param doctype <code>omit | auto | strict | loose | <em>fpi</em></code> where the <em>fpi </em> is a string
1513: * similar to "-//ACME//DTD HTML 3.14159//EN" Note: for <em>fpi </em> include the double-quotes in the
1514: * string.
1515: * @see Configuration#docTypeStr
1516: * @see Configuration#docTypeMode
1517: */
1518: public void setDocType(String doctype) {
1519: if (doctype != null) {
1520: configuration.docTypeStr = (String) ParsePropertyImpl.DOCTYPE
1521: .parse(doctype, "doctype", configuration);
1522: }
1523: }
1524:
1525: /**
1526: * <code>doctype</code>- user specified doctype.
1527: * @return <code>omit | auto | strict | loose | <em>fpi</em></code> where the <em>fpi </em> is a string similar
1528: * to "-//ACME//DTD HTML 3.14159//EN" Note: for <em>fpi </em> include the double-quotes in the string.
1529: * @see Configuration#docTypeStr
1530: * @see Configuration#docTypeMode
1531: */
1532: public String getDocType() {
1533: String result = null;
1534: switch (configuration.docTypeMode) {
1535: case Configuration.DOCTYPE_OMIT:
1536: result = "omit";
1537: break;
1538: case Configuration.DOCTYPE_AUTO:
1539: result = "auto";
1540: break;
1541: case Configuration.DOCTYPE_STRICT:
1542: result = "strict";
1543: break;
1544: case Configuration.DOCTYPE_LOOSE:
1545: result = "loose";
1546: break;
1547: case Configuration.DOCTYPE_USER:
1548: result = configuration.docTypeStr;
1549: break;
1550: }
1551: return result;
1552: }
1553:
1554: /**
1555: * <code>logical-emphasis</code>- replace i by em and b by strong.
1556: * @param logicalEmphasis <code>true</code> if tidy should replace i by em and b by strong
1557: * @see Configuration#logicalEmphasis
1558: */
1559: public void setLogicalEmphasis(boolean logicalEmphasis) {
1560: configuration.logicalEmphasis = logicalEmphasis;
1561: }
1562:
1563: /**
1564: * <code>logical-emphasis</code>- replace i by em and b by strong.
1565: * @return <code>true</code> if tidy will replace i by em and b by strong
1566: * @see Configuration#logicalEmphasis
1567: */
1568: public boolean getLogicalEmphasis() {
1569: return configuration.logicalEmphasis;
1570: }
1571:
1572: /**
1573: * <code>assume-xml-procins</code> This option specifies if Tidy should change the parsing of processing
1574: * instructions to require ?> as the terminator rather than >. This option is automatically set if the input is in
1575: * XML.
1576: * @param xmlPIs <code>true</code> if tidy should expect a ?> at the end of processing instructions
1577: * @see Configuration#xmlPIs
1578: */
1579: public void setXmlPIs(boolean xmlPIs) {
1580: configuration.xmlPIs = xmlPIs;
1581: }
1582:
1583: /**
1584: * <code>assume-xml-procins</code> This option specifies if Tidy should change the parsing of processing
1585: * instructions to require ?> as the terminator rather than >. This option is automatically set if the input is in
1586: * XML.
1587: * @return <code>true</code> if tidy will expect a ?> at the end of processing instructions
1588: * @see Configuration#xmlPIs
1589: */
1590: public boolean getXmlPIs() {
1591: return configuration.xmlPIs;
1592: }
1593:
1594: /**
1595: * <code>enclose-text</code>- if true text at body is wrapped in <p>'s.
1596: * @param encloseText <code>true</code> if tidy should wrap text at body in <p>'s.
1597: * @see Configuration#encloseBodyText
1598: */
1599: public void setEncloseText(boolean encloseText) {
1600: configuration.encloseBodyText = encloseText;
1601: }
1602:
1603: /**
1604: * <code>enclose-text</code>- if true text at body is wrapped in <p>'s.
1605: * @return <code>true</code> if tidy will wrap text at body in <p>'s.
1606: * @see Configuration#encloseBodyText
1607: */
1608: public boolean getEncloseText() {
1609: return configuration.encloseBodyText;
1610: }
1611:
1612: /**
1613: * <code>enclose-block-text</code>- if true text in blocks is wrapped in <p>'s.
1614: * @param encloseBlockText <code>true</code> if tidy should wrap text text in blocks in <p>'s.
1615: * @see Configuration#encloseBlockText
1616: */
1617: public void setEncloseBlockText(boolean encloseBlockText) {
1618: configuration.encloseBlockText = encloseBlockText;
1619: }
1620:
1621: /**
1622: * <code>enclose-block-text</code>- if true text in blocks is wrapped in <p>'s. return <code>true</code>
1623: * if tidy should will text text in blocks in <p>'s.
1624: * @see Configuration#encloseBlockText
1625: */
1626: public boolean getEncloseBlockText() {
1627: return configuration.encloseBlockText;
1628: }
1629:
1630: /**
1631: * <code>word-2000</code>- draconian cleaning for Word2000.
1632: * @param word2000 <code>true</code> if tidy should clean word2000 documents
1633: * @see Configuration#word2000
1634: */
1635: public void setWord2000(boolean word2000) {
1636: configuration.word2000 = word2000;
1637: }
1638:
1639: /**
1640: * <code>word-2000</code>- draconian cleaning for Word2000.
1641: * @return <code>true</code> if tidy will clean word2000 documents
1642: * @see Configuration#word2000
1643: */
1644: public boolean getWord2000() {
1645: return configuration.word2000;
1646: }
1647:
1648: /**
1649: * <code>tidy-mark</code>- add meta element indicating tidied doc.
1650: * @param tidyMark <code>true</code> if tidy should add meta element indicating tidied doc
1651: * @see Configuration#tidyMark
1652: */
1653: public void setTidyMark(boolean tidyMark) {
1654: configuration.tidyMark = tidyMark;
1655: }
1656:
1657: /**
1658: * <code>tidy-mark</code>- add meta element indicating tidied doc.
1659: * @return <code>true</code> if tidy will add meta element indicating tidied doc
1660: * @see Configuration#tidyMark
1661: */
1662: public boolean getTidyMark() {
1663: return configuration.tidyMark;
1664: }
1665:
1666: /**
1667: * <code>add-xml-space</code>- if set to yes adds xml:space attr as needed.
1668: * @param xmlSpace <code>true</code> if tidy should add xml:space attr as needed
1669: * @see Configuration#xmlSpace
1670: */
1671: public void setXmlSpace(boolean xmlSpace) {
1672: configuration.xmlSpace = xmlSpace;
1673: }
1674:
1675: /**
1676: * <code>add-xml-space</code>- if set to yes adds xml:space attr as needed.
1677: * @return <code>true</code> if tidy will add xml:space attr as needed
1678: * @see Configuration#xmlSpace
1679: */
1680: public boolean getXmlSpace() {
1681: return configuration.xmlSpace;
1682: }
1683:
1684: /**
1685: * <code>gnu-emacs</code>- if true format error output for GNU Emacs.
1686: * @param emacs <code>true</code> if tidy should format error output for GNU Emacs
1687: * @see Configuration#emacs
1688: */
1689: public void setEmacs(boolean emacs) {
1690: configuration.emacs = emacs;
1691: }
1692:
1693: /**
1694: * <code>gnu-emacs</code>- if true format error output for GNU Emacs.
1695: * @return <code>true</code> if tidy will format error output for GNU Emacs
1696: * @see Configuration#emacs
1697: */
1698: public boolean getEmacs() {
1699: return configuration.emacs;
1700: }
1701:
1702: /**
1703: * <code>literal-attributes</code>- if true attributes may use newlines.
1704: * @param literalAttribs <code>true</code> if attributes may use newlines
1705: * @see Configuration#literalAttribs
1706: */
1707: public void setLiteralAttribs(boolean literalAttribs) {
1708: configuration.literalAttribs = literalAttribs;
1709: }
1710:
1711: /**
1712: * <code>literal-attributes</code>- if true attributes may use newlines.
1713: * @return <code>true</code> if attributes may use newlines
1714: * @see Configuration#literalAttribs
1715: */
1716: public boolean getLiteralAttribs() {
1717: return configuration.literalAttribs;
1718: }
1719:
1720: /**
1721: * <code>print-body-only</code>- output BODY content only.
1722: * @param bodyOnly true = print only the document body
1723: * @see Configuration#bodyOnly
1724: */
1725: public void setPrintBodyOnly(boolean bodyOnly) {
1726: configuration.bodyOnly = bodyOnly;
1727: }
1728:
1729: /**
1730: * <code>print-body-only</code>- output BODY content only.
1731: * @return true if tidy will print only the document body
1732: */
1733: public boolean getPrintBodyOnly() {
1734: return configuration.bodyOnly;
1735: }
1736:
1737: /**
1738: * <code>fix-uri</code>- fix uri references applying URI encoding if necessary.
1739: * @param fixUri true = fix uri references
1740: * @see Configuration#fixUri
1741: */
1742: public void setFixUri(boolean fixUri) {
1743: configuration.fixUri = fixUri;
1744: }
1745:
1746: /**
1747: * <code>fix-uri</code>- output BODY content only.
1748: * @return true if tidy will fix uri references
1749: */
1750: public boolean getFixUri() {
1751: return configuration.fixUri;
1752: }
1753:
1754: /**
1755: * <code>lower-literals</code>- folds known attribute values to lower case.
1756: * @param lowerLiterals true = folds known attribute values to lower case
1757: * @see Configuration#lowerLiterals
1758: */
1759: public void setLowerLiterals(boolean lowerLiterals) {
1760: configuration.lowerLiterals = lowerLiterals;
1761: }
1762:
1763: /**
1764: * <code>lower-literals</code>- folds known attribute values to lower case.
1765: * @return true if tidy will folds known attribute values to lower case
1766: */
1767: public boolean getLowerLiterals() {
1768: return configuration.lowerLiterals;
1769: }
1770:
1771: /**
1772: * <code>hide-comments</code>- hides all (real) comments in output.
1773: * @param hideComments true = hides all comments in output
1774: * @see Configuration#hideComments
1775: */
1776: public void setHideComments(boolean hideComments) {
1777: configuration.hideComments = hideComments;
1778: }
1779:
1780: /**
1781: * <code>hide-comments</code>- hides all (real) comments in output.
1782: * @return true if tidy will hide all comments in output
1783: */
1784: public boolean getHideComments() {
1785: return configuration.hideComments;
1786: }
1787:
1788: /**
1789: * <code>indent-cdata</code>- indent CDATA sections.
1790: * @param indentCdata true = indent CDATA sections
1791: * @see Configuration#indentCdata
1792: */
1793: public void setIndentCdata(boolean indentCdata) {
1794: configuration.indentCdata = indentCdata;
1795: }
1796:
1797: /**
1798: * <code>indent-cdata</code>- indent CDATA sections.
1799: * @return true if tidy will indent CDATA sections
1800: */
1801: public boolean getIndentCdata() {
1802: return configuration.indentCdata;
1803: }
1804:
1805: /**
1806: * <code>force-output</code>- output document even if errors were found.
1807: * @param forceOutput true = output document even if errors were found
1808: * @see Configuration#forceOutput
1809: */
1810: public void setForceOutput(boolean forceOutput) {
1811: configuration.forceOutput = forceOutput;
1812: }
1813:
1814: /**
1815: * <code>force-output</code>- output document even if errors were found.
1816: * @return true if tidy will output document even if errors were found
1817: */
1818: public boolean getForceOutput() {
1819: return configuration.forceOutput;
1820: }
1821:
1822: /**
1823: * <code>show-errors</code>- set the number of errors to put out.
1824: * @param showErrors number of errors to put out
1825: * @see Configuration#showErrors
1826: */
1827: public void setShowErrors(int showErrors) {
1828: configuration.showErrors = showErrors;
1829: }
1830:
1831: /**
1832: * <code>show-errors</code>- number of errors to put out.
1833: * @return the number of errors tidy will put out
1834: */
1835: public int getShowErrors() {
1836: return configuration.showErrors;
1837: }
1838:
1839: /**
1840: * <code>ascii-chars</code>- convert quotes and dashes to nearest ASCII char.
1841: * @param asciiChars true = convert quotes and dashes to nearest ASCII char
1842: * @see Configuration#asciiChars
1843: */
1844: public void setAsciiChars(boolean asciiChars) {
1845: configuration.asciiChars = asciiChars;
1846: }
1847:
1848: /**
1849: * <code>ascii-chars</code>- convert quotes and dashes to nearest ASCII char.
1850: * @return true if tidy will convert quotes and dashes to nearest ASCII char
1851: */
1852: public boolean getAsciiChars() {
1853: return configuration.asciiChars;
1854: }
1855:
1856: /**
1857: * <code>join-classes</code>- join multiple class attributes.
1858: * @param joinClasses true = join multiple class attributes
1859: * @see Configuration#joinClasses
1860: */
1861: public void setJoinClasses(boolean joinClasses) {
1862: configuration.joinClasses = joinClasses;
1863: }
1864:
1865: /**
1866: * <code>join-classes</code>- join multiple class attributes.
1867: * @return true if tidy will join multiple class attributes
1868: */
1869: public boolean getJoinClasses() {
1870: return configuration.joinClasses;
1871: }
1872:
1873: /**
1874: * <code>join-styles</code>- join multiple style attributes.
1875: * @param joinStyles true = join multiple style attributes
1876: * @see Configuration#joinStyles
1877: */
1878: public void setJoinStyles(boolean joinStyles) {
1879: configuration.joinStyles = joinStyles;
1880: }
1881:
1882: /**
1883: * <code>join-styles</code>- join multiple style attributes.
1884: * @return true if tidy will join multiple style attributes
1885: */
1886: public boolean getJoinStyles() {
1887: return configuration.joinStyles;
1888: }
1889:
1890: /**
1891: * <code>trim-empty-elements</code>- trim empty elements.
1892: * @param trim-empty-elements true = trim empty elements
1893: * @see Configuration#trimEmpty
1894: */
1895: public void setTrimEmptyElements(boolean trimEmpty) {
1896: configuration.trimEmpty = trimEmpty;
1897: }
1898:
1899: /**
1900: * <code>trim-empty-elements</code>- trim empty elements.
1901: * @return true if tidy will trim empty elements
1902: */
1903: public boolean getTrimEmptyElements() {
1904: return configuration.trimEmpty;
1905: }
1906:
1907: /**
1908: * <code>replace-color</code>- replace hex color attribute values with names.
1909: * @param replaceColor true = replace hex color attribute values with names
1910: * @see Configuration#replaceColor
1911: */
1912: public void setReplaceColor(boolean replaceColor) {
1913: configuration.replaceColor = replaceColor;
1914: }
1915:
1916: /**
1917: * <code>replace-color</code>- replace hex color attribute values with names.
1918: * @return true if tidy will replace hex color attribute values with names
1919: */
1920: public boolean getReplaceColor() {
1921: return configuration.replaceColor;
1922: }
1923:
1924: /**
1925: * <code>escape-cdata</code>- replace CDATA sections with escaped text.
1926: * @param escapeCdata true = replace CDATA sections with escaped text
1927: * @see Configuration#escapeCdata
1928: */
1929: public void setEscapeCdata(boolean escapeCdata) {
1930: configuration.escapeCdata = escapeCdata;
1931: }
1932:
1933: /**
1934: * <code>escape-cdata</code> -replace CDATA sections with escaped text.
1935: * @return true if tidy will replace CDATA sections with escaped text
1936: */
1937: public boolean getEscapeCdata() {
1938: return configuration.escapeCdata;
1939: }
1940:
1941: /**
1942: * <code>repeated-attributes</code>- keep first or last duplicate attribute.
1943: * @param repeatedAttributes <code>Configuration.KEEP_FIRST | Configuration.KEEP_LAST</code>
1944: * @see Configuration#duplicateAttrs
1945: */
1946: public void setRepeatedAttributes(int repeatedAttributes) {
1947: configuration.duplicateAttrs = repeatedAttributes;
1948: }
1949:
1950: /**
1951: * <code>repeated-attributes</code>- keep first or last duplicate attribute.
1952: * @return <code>Configuration.KEEP_FIRST | Configuration.KEEP_LAST</code>
1953: */
1954: public int getRepeatedAttributes() {
1955: return configuration.duplicateAttrs;
1956: }
1957:
1958: /**
1959: * <code>keep-time</code>- if true last modified time is preserved.
1960: * @param keepFileTimes <code>true</code> if tidy should preserved last modified time in input file.
1961: * @todo <strong>this is NOT supported at this time. </strong>
1962: * @see Configuration#keepFileTimes
1963: */
1964: public void setKeepFileTimes(boolean keepFileTimes) {
1965: configuration.keepFileTimes = keepFileTimes;
1966: }
1967:
1968: /**
1969: * <code>keep-time</code>- if true last modified time is preserved.
1970: * @return <code>true</code> if tidy will preserved last modified time in input file.
1971: * @todo <strong>this is NOT supported at this time. </strong>
1972: * @see Configuration#keepFileTimes
1973: */
1974: public boolean getKeepFileTimes() {
1975: return configuration.keepFileTimes;
1976: }
1977:
1978: /**
1979: * Sets the character encoding used both for input and for output.
1980: * @param charencoding encoding constant
1981: * @deprecated set input/output encoding using java encoding names
1982: */
1983: public void setCharEncoding(int charencoding) {
1984: String ceName = configuration.convertCharEncoding(charencoding);
1985: if (ceName != null) {
1986: configuration.setInCharEncodingName(ceName);
1987: configuration.setOutCharEncodingName(ceName);
1988: }
1989: }
1990:
1991: /**
1992: * Returns the configured character encoding.
1993: * @return character encoding constant
1994: * @deprecated from r8 tidy can use different encoding for input and output. This method will only return the
1995: * <strong>input </strong> character encoding.
1996: */
1997: public int getCharEncoding() {
1998: return configuration.getInCharEncoding();
1999: }
2000:
2001: /**
2002: * @param slidestyle N/A
2003: * @deprecated does nothing
2004: */
2005: public void setSlidestyle(String slidestyle) {
2006: configuration.slidestyle = slidestyle;
2007: }
2008:
2009: /**
2010: * @deprecated does nothing
2011: * @return <code>null</code>
2012: */
2013: public String getSlidestyle() {
2014: return null;
2015: }
2016:
2017: /**
2018: * <code>output-raw</code>- avoid mapping values > 127 to entities. This has the same effect of specifying a
2019: * "raw" encoding in the original version of tidy.
2020: * @param rawOut avoid mapping values > 127 to entities
2021: * @see Configuration#rawOut
2022: */
2023: public void setRawOut(boolean rawOut) {
2024: configuration.rawOut = rawOut;
2025: }
2026:
2027: /**
2028: * <code>output-raw</code>- avoid mapping values > 127 to entities.
2029: * @return <code>true</code> if tidy will not map values > 127 to entities
2030: * @see Configuration#rawOut
2031: */
2032: public boolean getRawOut() {
2033: return configuration.rawOut;
2034: }
2035:
2036: /**
2037: * <code>input-encoding</code> the character encoding used for input.
2038: * @param encoding a valid java encoding name
2039: */
2040: public void setInputEncoding(String encoding) {
2041: configuration.setInCharEncodingName(encoding);
2042: }
2043:
2044: /**
2045: * <code>input-encoding</code> the character encoding used for input.
2046: * @return the java name of the encoding currently used for input
2047: */
2048: public String getInputEncoding() {
2049: return configuration.getInCharEncodingName();
2050: }
2051:
2052: /**
2053: * <code>output-encoding</code> the character encoding used for output.
2054: * @param encoding a valid java encoding name
2055: */
2056: public void setOutputEncoding(String encoding) {
2057: configuration.setOutCharEncodingName(encoding);
2058: }
2059:
2060: /**
2061: * <code>output-encoding</code> the character encoding used for output.
2062: * @return the java name of the encoding currently used for output
2063: */
2064: public String getOutputEncoding() {
2065: return configuration.getOutCharEncodingName();
2066: }
2067:
2068: }
|