0001: /*
0002: * Java HTML Tidy - JTidy
0003: * HTML parser and pretty printer
0004: *
0005: * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
0006: * Institute of Technology, Institut National de Recherche en
0007: * Informatique et en Automatique, Keio University). All Rights
0008: * Reserved.
0009: *
0010: * Contributing Author(s):
0011: *
0012: * Dave Raggett <dsr@w3.org>
0013: * Andy Quick <ac.quick@sympatico.ca> (translation to Java)
0014: * Gary L Peskin <garyp@firstech.com> (Java development)
0015: * Sami Lempinen <sami@lempinen.net> (release management)
0016: * Fabrizio Giustina <fgiust at users.sourceforge.net>
0017: *
0018: * The contributing author(s) would like to thank all those who
0019: * helped with testing, bug fixes, and patience. This wouldn't
0020: * have been possible without all of you.
0021: *
0022: * COPYRIGHT NOTICE:
0023: *
0024: * This software and documentation is provided "as is," and
0025: * the copyright holders and contributing author(s) make no
0026: * representations or warranties, express or implied, including
0027: * but not limited to, warranties of merchantability or fitness
0028: * for any particular purpose or that the use of the software or
0029: * documentation will not infringe any third party patents,
0030: * copyrights, trademarks or other rights.
0031: *
0032: * The copyright holders and contributing author(s) will not be
0033: * liable for any direct, indirect, special or consequential damages
0034: * arising out of any use of the software or documentation, even if
0035: * advised of the possibility of such damage.
0036: *
0037: * Permission is hereby granted to use, copy, modify, and distribute
0038: * this source code, or portions hereof, documentation and executables,
0039: * for any purpose, without fee, subject to the following restrictions:
0040: *
0041: * 1. The origin of this source code must not be misrepresented.
0042: * 2. Altered versions must be plainly marked as such and must
0043: * not be misrepresented as being the original source.
0044: * 3. This Copyright notice may not be removed or altered from any
0045: * source or altered source distribution.
0046: *
0047: * The copyright holders and contributing author(s) specifically
0048: * permit, without fee, and encourage the use of this source code
0049: * as a component for supporting the Hypertext Markup Language in
0050: * commercial products. If you use this source code in a product,
0051: * acknowledgment is not required but would be appreciated.
0052: *
0053: */
0054: package org.w3c.tidy;
0055:
0056: import java.io.FileInputStream;
0057: import java.io.IOException;
0058: import java.io.Serializable;
0059: import java.io.Writer;
0060: import java.lang.reflect.Field;
0061: import java.util.ArrayList;
0062: import java.util.Collections;
0063: import java.util.Enumeration;
0064: import java.util.HashMap;
0065: import java.util.Iterator;
0066: import java.util.List;
0067: import java.util.Map;
0068: import java.util.Properties;
0069:
0070: /**
0071: * Read configuration file and manage configuration properties. Configuration files associate a property name with a
0072: * value. The format is that of a Java .properties file.
0073: * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a>
0074: * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java)
0075: * @author Fabrizio Giustina
0076: * @version $Revision: 1.43 $ ($Author: fgiust $)
0077: */
0078: public class Configuration implements Serializable {
0079:
0080: /**
0081: * character encoding = RAW.
0082: * @deprecated use <code>Tidy.setRawOut(true)</code> for raw output
0083: */
0084: public static final int RAW = 0;
0085:
0086: /**
0087: * character encoding = ASCII.
0088: * @deprecated
0089: */
0090: public static final int ASCII = 1;
0091:
0092: /**
0093: * character encoding = LATIN1.
0094: * @deprecated
0095: */
0096: public static final int LATIN1 = 2;
0097:
0098: /**
0099: * character encoding = UTF8.
0100: * @deprecated
0101: */
0102: public static final int UTF8 = 3;
0103:
0104: /**
0105: * character encoding = ISO2022.
0106: * @deprecated
0107: */
0108: public static final int ISO2022 = 4;
0109:
0110: /**
0111: * character encoding = MACROMAN.
0112: * @deprecated
0113: */
0114: public static final int MACROMAN = 5;
0115:
0116: /**
0117: * character encoding = UTF16LE.
0118: * @deprecated
0119: */
0120: public static final int UTF16LE = 6;
0121:
0122: /**
0123: * character encoding = UTF16BE.
0124: * @deprecated
0125: */
0126: public static final int UTF16BE = 7;
0127:
0128: /**
0129: * character encoding = UTF16.
0130: * @deprecated
0131: */
0132: public static final int UTF16 = 8;
0133:
0134: /**
0135: * character encoding = WIN1252.
0136: * @deprecated
0137: */
0138: public static final int WIN1252 = 9;
0139:
0140: /**
0141: * character encoding = BIG5.
0142: * @deprecated
0143: */
0144: public static final int BIG5 = 10;
0145:
0146: /**
0147: * character encoding = SHIFTJIS.
0148: * @deprecated
0149: */
0150: public static final int SHIFTJIS = 11;
0151:
0152: /**
0153: * Convert from deprecated tidy encoding constant to standard java encoding name.
0154: */
0155: private final String[] ENCODING_NAMES = new String[] {
0156: "raw", // rawOut, it will not be mapped to a java encoding
0157: "ASCII", "ISO8859_1", "UTF8", "JIS", "MacRoman",
0158: "UnicodeLittle", "UnicodeBig", "Unicode", "Cp1252", "Big5",
0159: "SJIS" };
0160:
0161: /**
0162: * treatment of doctype: omit.
0163: * @todo should be an enumeration DocTypeMode
0164: */
0165: public static final int DOCTYPE_OMIT = 0;
0166:
0167: /**
0168: * treatment of doctype: auto.
0169: */
0170: public static final int DOCTYPE_AUTO = 1;
0171:
0172: /**
0173: * treatment of doctype: strict.
0174: */
0175: public static final int DOCTYPE_STRICT = 2;
0176:
0177: /**
0178: * treatment of doctype: loose.
0179: */
0180: public static final int DOCTYPE_LOOSE = 3;
0181:
0182: /**
0183: * treatment of doctype: user.
0184: */
0185: public static final int DOCTYPE_USER = 4;
0186:
0187: /**
0188: * Keep last duplicate attribute.
0189: * @todo should be an enumeration DupAttrMode
0190: */
0191: public static final int KEEP_LAST = 0;
0192:
0193: /**
0194: * Keep first duplicate attribute.
0195: */
0196: public static final int KEEP_FIRST = 1;
0197:
0198: /**
0199: * Map containg all the valid configuration options and the related parser. Tag entry contains String(option
0200: * name)-Flag instance.
0201: */
0202: private static final Map OPTIONS = new HashMap();
0203:
0204: /**
0205: * serial version UID for this class.
0206: */
0207: private static final long serialVersionUID = -4955155037138560842L;
0208:
0209: static {
0210: addConfigOption(new Flag("indent-spaces", "spaces",
0211: ParsePropertyImpl.INT));
0212: addConfigOption(new Flag("wrap", "wraplen",
0213: ParsePropertyImpl.INT));
0214: addConfigOption(new Flag("show-errors", "showErrors",
0215: ParsePropertyImpl.INT));
0216: addConfigOption(new Flag("tab-size", "tabsize",
0217: ParsePropertyImpl.INT));
0218:
0219: addConfigOption(new Flag("wrap-attributes", "wrapAttVals",
0220: ParsePropertyImpl.BOOL));
0221: addConfigOption(new Flag("wrap-script-literals",
0222: "wrapScriptlets", ParsePropertyImpl.BOOL));
0223: addConfigOption(new Flag("wrap-sections", "wrapSection",
0224: ParsePropertyImpl.BOOL));
0225: addConfigOption(new Flag("wrap-asp", "wrapAsp",
0226: ParsePropertyImpl.BOOL));
0227: addConfigOption(new Flag("wrap-jste", "wrapJste",
0228: ParsePropertyImpl.BOOL));
0229: addConfigOption(new Flag("wrap-php", "wrapPhp",
0230: ParsePropertyImpl.BOOL));
0231: addConfigOption(new Flag("literal-attributes",
0232: "literalAttribs", ParsePropertyImpl.BOOL));
0233: addConfigOption(new Flag("show-body-only", "bodyOnly",
0234: ParsePropertyImpl.BOOL));
0235: addConfigOption(new Flag("fix-uri", "fixUri",
0236: ParsePropertyImpl.BOOL));
0237: addConfigOption(new Flag("lower-literals", "lowerLiterals",
0238: ParsePropertyImpl.BOOL));
0239: addConfigOption(new Flag("hide-comments", "hideComments",
0240: ParsePropertyImpl.BOOL));
0241: addConfigOption(new Flag("indent-cdata", "indentCdata",
0242: ParsePropertyImpl.BOOL));
0243: addConfigOption(new Flag("force-output", "forceOutput",
0244: ParsePropertyImpl.BOOL));
0245: addConfigOption(new Flag("ascii-chars", "asciiChars",
0246: ParsePropertyImpl.BOOL));
0247: addConfigOption(new Flag("join-classes", "joinClasses",
0248: ParsePropertyImpl.BOOL));
0249: addConfigOption(new Flag("join-styles", "joinStyles",
0250: ParsePropertyImpl.BOOL));
0251: addConfigOption(new Flag("escape-cdata", "escapeCdata",
0252: ParsePropertyImpl.BOOL));
0253: addConfigOption(new Flag("replace-color", "replaceColor",
0254: ParsePropertyImpl.BOOL));
0255: addConfigOption(new Flag("quiet", "quiet",
0256: ParsePropertyImpl.BOOL));
0257: addConfigOption(new Flag("tidy-mark", "tidyMark",
0258: ParsePropertyImpl.BOOL));
0259: addConfigOption(new Flag("indent-attributes",
0260: "indentAttributes", ParsePropertyImpl.BOOL));
0261: addConfigOption(new Flag("hide-endtags", "hideEndTags",
0262: ParsePropertyImpl.BOOL));
0263: addConfigOption(new Flag("input-xml", "xmlTags",
0264: ParsePropertyImpl.BOOL));
0265: addConfigOption(new Flag("output-xml", "xmlOut",
0266: ParsePropertyImpl.BOOL));
0267: addConfigOption(new Flag("output-html", "htmlOut",
0268: ParsePropertyImpl.BOOL));
0269: addConfigOption(new Flag("output-xhtml", "xHTML",
0270: ParsePropertyImpl.BOOL));
0271: addConfigOption(new Flag("add-xml-pi", "xmlPi",
0272: ParsePropertyImpl.BOOL));
0273: addConfigOption(new Flag("add-xml-decl", "xmlPi",
0274: ParsePropertyImpl.BOOL));
0275: addConfigOption(new Flag("assume-xml-procins", "xmlPIs",
0276: ParsePropertyImpl.BOOL));
0277: addConfigOption(new Flag("uppercase-tags", "upperCaseTags",
0278: ParsePropertyImpl.BOOL));
0279: addConfigOption(new Flag("uppercase-attributes",
0280: "upperCaseAttrs", ParsePropertyImpl.BOOL));
0281: addConfigOption(new Flag("bare", "makeBare",
0282: ParsePropertyImpl.BOOL));
0283: addConfigOption(new Flag("clean", "makeClean",
0284: ParsePropertyImpl.BOOL));
0285: addConfigOption(new Flag("logical-emphasis", "logicalEmphasis",
0286: ParsePropertyImpl.BOOL));
0287: addConfigOption(new Flag("word-2000", "word2000",
0288: ParsePropertyImpl.BOOL));
0289: addConfigOption(new Flag("drop-empty-paras", "dropEmptyParas",
0290: ParsePropertyImpl.BOOL));
0291: addConfigOption(new Flag("drop-font-tags", "dropFontTags",
0292: ParsePropertyImpl.BOOL));
0293: addConfigOption(new Flag("drop-proprietary-attributes",
0294: "dropProprietaryAttributes", ParsePropertyImpl.BOOL));
0295: addConfigOption(new Flag("enclose-text", "encloseBodyText",
0296: ParsePropertyImpl.BOOL));
0297: addConfigOption(new Flag("enclose-block-text",
0298: "encloseBlockText", ParsePropertyImpl.BOOL));
0299: addConfigOption(new Flag("add-xml-space", "xmlSpace",
0300: ParsePropertyImpl.BOOL));
0301: addConfigOption(new Flag("fix-bad-comments", "fixComments",
0302: ParsePropertyImpl.BOOL));
0303: addConfigOption(new Flag("split", "burstSlides",
0304: ParsePropertyImpl.BOOL));
0305: addConfigOption(new Flag("break-before-br", "breakBeforeBR",
0306: ParsePropertyImpl.BOOL));
0307: addConfigOption(new Flag("numeric-entities", "numEntities",
0308: ParsePropertyImpl.BOOL));
0309: addConfigOption(new Flag("quote-marks", "quoteMarks",
0310: ParsePropertyImpl.BOOL));
0311: addConfigOption(new Flag("quote-nbsp", "quoteNbsp",
0312: ParsePropertyImpl.BOOL));
0313: addConfigOption(new Flag("quote-ampersand", "quoteAmpersand",
0314: ParsePropertyImpl.BOOL));
0315: addConfigOption(new Flag("write-back", "writeback",
0316: ParsePropertyImpl.BOOL));
0317: addConfigOption(new Flag("keep-time", "keepFileTimes",
0318: ParsePropertyImpl.BOOL));
0319: addConfigOption(new Flag("show-warnings", "showWarnings",
0320: ParsePropertyImpl.BOOL));
0321: addConfigOption(new Flag("ncr", "ncr", ParsePropertyImpl.BOOL));
0322: addConfigOption(new Flag("fix-backslash", "fixBackslash",
0323: ParsePropertyImpl.BOOL));
0324: addConfigOption(new Flag("gnu-emacs", "emacs",
0325: ParsePropertyImpl.BOOL));
0326: addConfigOption(new Flag("only-errors", "onlyErrors",
0327: ParsePropertyImpl.BOOL));
0328: addConfigOption(new Flag("output-raw", "rawOut",
0329: ParsePropertyImpl.BOOL));
0330: addConfigOption(new Flag("trim-empty-elements", "trimEmpty",
0331: ParsePropertyImpl.BOOL));
0332:
0333: addConfigOption(new Flag("markup", "onlyErrors",
0334: ParsePropertyImpl.INVBOOL));
0335:
0336: addConfigOption(new Flag("char-encoding", null,
0337: ParsePropertyImpl.CHAR_ENCODING));
0338: addConfigOption(new Flag("input-encoding", null,
0339: ParsePropertyImpl.CHAR_ENCODING));
0340: addConfigOption(new Flag("output-encoding", null,
0341: ParsePropertyImpl.CHAR_ENCODING));
0342:
0343: addConfigOption(new Flag("error-file", "errfile",
0344: ParsePropertyImpl.NAME));
0345: addConfigOption(new Flag("slide-style", "slidestyle",
0346: ParsePropertyImpl.NAME));
0347: addConfigOption(new Flag("language", "language",
0348: ParsePropertyImpl.NAME));
0349:
0350: addConfigOption(new Flag("new-inline-tags", null,
0351: ParsePropertyImpl.TAGNAMES));
0352: addConfigOption(new Flag("new-blocklevel-tags", null,
0353: ParsePropertyImpl.TAGNAMES));
0354: addConfigOption(new Flag("new-empty-tags", null,
0355: ParsePropertyImpl.TAGNAMES));
0356: addConfigOption(new Flag("new-pre-tags", null,
0357: ParsePropertyImpl.TAGNAMES));
0358:
0359: addConfigOption(new Flag("doctype", "docTypeStr",
0360: ParsePropertyImpl.DOCTYPE));
0361:
0362: addConfigOption(new Flag("repeated-attributes",
0363: "duplicateAttrs", ParsePropertyImpl.REPEATED_ATTRIBUTES));
0364:
0365: addConfigOption(new Flag("alt-text", "altText",
0366: ParsePropertyImpl.STRING));
0367:
0368: addConfigOption(new Flag("indent", "indentContent",
0369: ParsePropertyImpl.INDENT));
0370:
0371: addConfigOption(new Flag("css-prefix", "cssPrefix",
0372: ParsePropertyImpl.CSS1SELECTOR));
0373:
0374: addConfigOption(new Flag("newline", null,
0375: ParsePropertyImpl.NEWLINE));
0376: }
0377:
0378: /**
0379: * default indentation.
0380: */
0381: protected int spaces = 2;
0382:
0383: /**
0384: * default wrap margin (68).
0385: */
0386: protected int wraplen = 68;
0387:
0388: /**
0389: * default tab size (8).
0390: */
0391: protected int tabsize = 8;
0392:
0393: /**
0394: * see doctype property.
0395: */
0396: protected int docTypeMode = DOCTYPE_AUTO;
0397:
0398: /**
0399: * Keep first or last duplicate attribute.
0400: */
0401: protected int duplicateAttrs = KEEP_LAST;
0402:
0403: /**
0404: * default text for alt attribute.
0405: */
0406: protected String altText;
0407:
0408: /**
0409: * style sheet for slides.
0410: * @deprecated does nothing
0411: */
0412: protected String slidestyle;
0413:
0414: /**
0415: * RJ language property.
0416: */
0417: protected String language; // #431953
0418:
0419: /**
0420: * user specified doctype.
0421: */
0422: protected String docTypeStr;
0423:
0424: /**
0425: * file name to write errors to.
0426: */
0427: protected String errfile;
0428:
0429: /**
0430: * if true then output tidied markup.
0431: */
0432: protected boolean writeback;
0433:
0434: /**
0435: * if true normal output is suppressed.
0436: */
0437: protected boolean onlyErrors;
0438:
0439: /**
0440: * however errors are always shown.
0441: */
0442: protected boolean showWarnings = true;
0443:
0444: /**
0445: * no 'Parsing X', guessed DTD or summary.
0446: */
0447: protected boolean quiet;
0448:
0449: /**
0450: * indent content of appropriate tags.
0451: */
0452: protected boolean indentContent;
0453:
0454: /**
0455: * does text/block level content effect indentation.
0456: */
0457: protected boolean smartIndent;
0458:
0459: /**
0460: * suppress optional end tags.
0461: */
0462: protected boolean hideEndTags;
0463:
0464: /**
0465: * treat input as XML.
0466: */
0467: protected boolean xmlTags;
0468:
0469: /**
0470: * create output as XML.
0471: */
0472: protected boolean xmlOut;
0473:
0474: /**
0475: * output extensible HTML.
0476: */
0477: protected boolean xHTML;
0478:
0479: /**
0480: * output plain-old HTML, even for XHTML input. Yes means set explicitly.
0481: */
0482: protected boolean htmlOut;
0483:
0484: /**
0485: * add <code><?xml?></code> for XML docs.
0486: */
0487: protected boolean xmlPi;
0488:
0489: /**
0490: * output tags in upper not lower case.
0491: */
0492: protected boolean upperCaseTags;
0493:
0494: /**
0495: * output attributes in upper not lower case.
0496: */
0497: protected boolean upperCaseAttrs;
0498:
0499: /**
0500: * remove presentational clutter.
0501: */
0502: protected boolean makeClean;
0503:
0504: /**
0505: * Make bare HTML: remove Microsoft cruft.
0506: */
0507: protected boolean makeBare;
0508:
0509: /**
0510: * replace i by em and b by strong.
0511: */
0512: protected boolean logicalEmphasis;
0513:
0514: /**
0515: * discard presentation tags.
0516: */
0517: protected boolean dropFontTags;
0518:
0519: /**
0520: * discard proprietary attributes.
0521: */
0522: protected boolean dropProprietaryAttributes;
0523:
0524: /**
0525: * discard empty p elements.
0526: */
0527: protected boolean dropEmptyParas = true;
0528:
0529: /**
0530: * fix comments with adjacent hyphens.
0531: */
0532: protected boolean fixComments = true;
0533:
0534: /**
0535: * trim empty elements.
0536: */
0537: protected boolean trimEmpty = true;
0538:
0539: /**
0540: * o/p newline before br or not?
0541: */
0542: protected boolean breakBeforeBR;
0543:
0544: /**
0545: * create slides on each h2 element.
0546: */
0547: protected boolean burstSlides;
0548:
0549: /**
0550: * use numeric entities.
0551: */
0552: protected boolean numEntities;
0553:
0554: /**
0555: * output " marks as ".
0556: */
0557: protected boolean quoteMarks;
0558:
0559: /**
0560: * output non-breaking space as entity.
0561: */
0562: protected boolean quoteNbsp = true;
0563:
0564: /**
0565: * output naked ampersand as &.
0566: */
0567: protected boolean quoteAmpersand = true;
0568:
0569: /**
0570: * wrap within attribute values.
0571: */
0572: protected boolean wrapAttVals;
0573:
0574: /**
0575: * wrap within JavaScript string literals.
0576: */
0577: protected boolean wrapScriptlets;
0578:
0579: /**
0580: * wrap within CDATA section tags.
0581: */
0582: protected boolean wrapSection = true;
0583:
0584: /**
0585: * wrap within ASP pseudo elements.
0586: */
0587: protected boolean wrapAsp = true;
0588:
0589: /**
0590: * wrap within JSTE pseudo elements.
0591: */
0592: protected boolean wrapJste = true;
0593:
0594: /**
0595: * wrap within PHP pseudo elements.
0596: */
0597: protected boolean wrapPhp = true;
0598:
0599: /**
0600: * fix URLs by replacing \ with /.
0601: */
0602: protected boolean fixBackslash = true;
0603:
0604: /**
0605: * newline+indent before each attribute.
0606: */
0607: protected boolean indentAttributes;
0608:
0609: /**
0610: * If set to yes PIs must end with <code>?></code>.
0611: */
0612: protected boolean xmlPIs;
0613:
0614: /**
0615: * if set to yes adds xml:space attr as needed.
0616: */
0617: protected boolean xmlSpace;
0618:
0619: /**
0620: * if yes text at body is wrapped in p's.
0621: */
0622: protected boolean encloseBodyText;
0623:
0624: /**
0625: * if yes text in blocks is wrapped in p's.
0626: */
0627: protected boolean encloseBlockText;
0628:
0629: /**
0630: * if yes last modied time is preserved.
0631: */
0632: protected boolean keepFileTimes = true;
0633:
0634: /**
0635: * draconian cleaning for Word2000.
0636: */
0637: protected boolean word2000;
0638:
0639: /**
0640: * add meta element indicating tidied doc.
0641: */
0642: protected boolean tidyMark = true;
0643:
0644: /**
0645: * if true format error output for GNU Emacs.
0646: */
0647: protected boolean emacs;
0648:
0649: /**
0650: * if true attributes may use newlines.
0651: */
0652: protected boolean literalAttribs;
0653:
0654: /**
0655: * output BODY content only.
0656: */
0657: protected boolean bodyOnly;
0658:
0659: /**
0660: * properly escape URLs.
0661: */
0662: protected boolean fixUri = true;
0663:
0664: /**
0665: * folds known attribute values to lower case.
0666: */
0667: protected boolean lowerLiterals = true;
0668:
0669: /**
0670: * replace hex color attribute values with names.
0671: */
0672: protected boolean replaceColor;
0673:
0674: /**
0675: * hides all (real) comments in output.
0676: */
0677: protected boolean hideComments;
0678:
0679: /**
0680: * indent CDATA sections.
0681: */
0682: protected boolean indentCdata;
0683:
0684: /**
0685: * output document even if errors were found.
0686: */
0687: protected boolean forceOutput;
0688:
0689: /**
0690: * number of errors to put out.
0691: */
0692: protected int showErrors = 6;
0693:
0694: /**
0695: * convert quotes and dashes to nearest ASCII char.
0696: */
0697: protected boolean asciiChars = true;
0698:
0699: /**
0700: * join multiple class attributes.
0701: */
0702: protected boolean joinClasses;
0703:
0704: /**
0705: * join multiple style attributes.
0706: */
0707: protected boolean joinStyles = true;
0708:
0709: /**
0710: * replace CDATA sections with escaped text.
0711: */
0712: protected boolean escapeCdata = true;
0713:
0714: /**
0715: * allow numeric character references.
0716: */
0717: protected boolean ncr = true; // #431953
0718:
0719: /**
0720: * CSS class naming for -clean option.
0721: */
0722: protected String cssPrefix;
0723:
0724: /**
0725: * char encoding used when replacing illegal SGML chars, regardless of specified encoding.
0726: */
0727: protected int replacementCharEncoding = WIN1252; // by default
0728:
0729: /**
0730: * TagTable associated with this Configuration.
0731: */
0732: protected TagTable tt;
0733:
0734: /**
0735: * Report instance. Used for messages.
0736: */
0737: protected Report report;
0738:
0739: /**
0740: * track what types of tags user has defined to eliminate unnecessary searches.
0741: */
0742: protected int definedTags;
0743:
0744: /**
0745: * bytes for the newline marker.
0746: */
0747: protected char[] newline = (System.getProperty("line.separator"))
0748: .toCharArray();
0749:
0750: /**
0751: * Input character encoding (defaults to LATIN1).
0752: */
0753: private int inCharEncoding = LATIN1;
0754:
0755: /**
0756: * Input character encoding (defaults to "ISO8859_1").
0757: */
0758: private String inCharEncodingName = "ISO8859_1";
0759:
0760: /**
0761: * Output character encoding (defaults to ASCII).
0762: */
0763: private int outCharEncoding = ASCII;
0764:
0765: /**
0766: * Output character encoding (defaults to "ASCII").
0767: */
0768: private String outCharEncodingName = "ASCII";
0769:
0770: /**
0771: * Avoid mapping values > 127 to entities.
0772: */
0773: protected boolean rawOut;
0774:
0775: /**
0776: * configuration properties.
0777: */
0778: private transient Properties properties = new Properties();
0779:
0780: /**
0781: * Instantiates a new Configuration. This method should be called by Tidy only.
0782: * @param report Report instance
0783: */
0784: protected Configuration(Report report) {
0785: this .report = report;
0786: }
0787:
0788: /**
0789: * adds a config option to the map.
0790: * @param flag configuration options added
0791: */
0792: private static void addConfigOption(Flag flag) {
0793: OPTIONS.put(flag.getName(), flag);
0794: }
0795:
0796: /**
0797: * adds configuration Properties.
0798: * @param p Properties
0799: */
0800: public void addProps(Properties p) {
0801: Enumeration propEnum = p.propertyNames();
0802: while (propEnum.hasMoreElements()) {
0803: String key = (String) propEnum.nextElement();
0804: String value = p.getProperty(key);
0805: properties.put(key, value);
0806: }
0807: parseProps();
0808: }
0809:
0810: /**
0811: * Parses a property file.
0812: * @param filename file name
0813: */
0814: public void parseFile(String filename) {
0815: try {
0816: properties.load(new FileInputStream(filename));
0817: } catch (IOException e) {
0818: System.err.println(filename + " " + e.toString());
0819: return;
0820: }
0821: parseProps();
0822: }
0823:
0824: /**
0825: * Is the given String a valid configuration flag?
0826: * @param name configuration parameter name
0827: * @return <code>true</code> if the given String is a valid config option
0828: */
0829: public static boolean isKnownOption(String name) {
0830: return name != null && OPTIONS.containsKey(name);
0831: }
0832:
0833: /**
0834: * Parses the configuration properties file.
0835: */
0836: private void parseProps() {
0837: Iterator iterator = properties.keySet().iterator();
0838:
0839: while (iterator.hasNext()) {
0840: String key = (String) iterator.next();
0841: Flag flag = (Flag) OPTIONS.get(key);
0842: if (flag == null) {
0843: report.unknownOption(key);
0844: continue;
0845: }
0846:
0847: String stringValue = properties.getProperty(key);
0848: Object value = flag.getParser().parse(stringValue, key,
0849: this );
0850: if (flag.getLocation() != null) {
0851: try {
0852: flag.getLocation().set(this , value);
0853: } catch (IllegalArgumentException e) {
0854: throw new RuntimeException(
0855: "IllegalArgumentException during config initialization for field "
0856: + key + "with value [" + value
0857: + "]: " + e.getMessage());
0858: } catch (IllegalAccessException e) {
0859: throw new RuntimeException(
0860: "IllegalArgumentException during config initialization for field "
0861: + key + "with value [" + value
0862: + "]: " + e.getMessage());
0863: }
0864: }
0865: }
0866: }
0867:
0868: /**
0869: * Ensure that config is self consistent.
0870: */
0871: public void adjust() {
0872: if (encloseBlockText) {
0873: encloseBodyText = true;
0874: }
0875:
0876: // avoid the need to set IndentContent when SmartIndent is set
0877: if (smartIndent) {
0878: indentContent = true;
0879: }
0880:
0881: // disable wrapping
0882: if (wraplen == 0) {
0883: wraplen = 0x7FFFFFFF;
0884: }
0885:
0886: // Word 2000 needs o:p to be declared as inline
0887: if (word2000) {
0888: definedTags |= Dict.TAGTYPE_INLINE;
0889: tt.defineTag(Dict.TAGTYPE_INLINE, "o:p");
0890: }
0891:
0892: // #480701 disable XHTML output flag if both output-xhtml and xml are set
0893: if (xmlTags) {
0894: xHTML = false;
0895: }
0896:
0897: // XHTML is written in lower case
0898: if (xHTML) {
0899: xmlOut = true;
0900: upperCaseTags = false;
0901: upperCaseAttrs = false;
0902: }
0903:
0904: // if XML in, then XML out
0905: if (xmlTags) {
0906: xmlOut = true;
0907: xmlPIs = true;
0908: }
0909:
0910: // #427837 - fix by Dave Raggett 02 Jun 01
0911: // generate <?xml version="1.0" encoding="iso-8859-1"?> if the output character encoding is Latin-1 etc.
0912: if (getOutCharEncoding() != UTF8
0913: && getOutCharEncoding() != ASCII && xmlOut) {
0914: xmlPi = true;
0915: }
0916:
0917: // XML requires end tags
0918: if (xmlOut) {
0919: quoteAmpersand = true;
0920: hideEndTags = false;
0921: }
0922: }
0923:
0924: /**
0925: * prints available configuration options.
0926: * @param errout where to write
0927: * @param showActualConfiguration print actual configuration values
0928: */
0929: void printConfigOptions(Writer errout,
0930: boolean showActualConfiguration) {
0931: String pad = " ";
0932: try {
0933: errout.write("\nConfiguration File Settings:\n\n");
0934:
0935: if (showActualConfiguration) {
0936: errout
0937: .write("Name Type Current Value\n");
0938: } else {
0939: errout
0940: .write("Name Type Allowable values\n");
0941: }
0942:
0943: errout
0944: .write("=========================== ========= ========================================\n");
0945:
0946: Flag configItem;
0947:
0948: // sort configuration options
0949: List values = new ArrayList(OPTIONS.values());
0950: Collections.sort(values);
0951:
0952: Iterator iterator = values.iterator();
0953:
0954: while (iterator.hasNext()) {
0955: configItem = (Flag) iterator.next();
0956:
0957: errout.write(configItem.getName());
0958: errout
0959: .write(pad, 0, 28 - configItem.getName()
0960: .length());
0961:
0962: errout.write(configItem.getParser().getType());
0963: errout.write(pad, 0, 11 - configItem.getParser()
0964: .getType().length());
0965:
0966: if (showActualConfiguration) {
0967: Field field = configItem.getLocation();
0968: Object actualValue = null;
0969:
0970: if (field != null) {
0971: try {
0972: actualValue = field.get(this );
0973: } catch (IllegalArgumentException e1) {
0974: // should never happen
0975: throw new RuntimeException(
0976: "IllegalArgument when reading field "
0977: + field.getName());
0978: } catch (IllegalAccessException e1) {
0979: // should never happen
0980: throw new RuntimeException(
0981: "IllegalAccess when reading field "
0982: + field.getName());
0983: }
0984: }
0985:
0986: errout.write(configItem.getParser()
0987: .getFriendlyName(configItem.getName(),
0988: actualValue, this ));
0989: } else {
0990: errout.write(configItem.getParser()
0991: .getOptionValues());
0992: }
0993:
0994: errout.write("\n");
0995:
0996: }
0997: errout.flush();
0998: } catch (IOException e) {
0999: throw new RuntimeException(e.getMessage());
1000: }
1001:
1002: }
1003:
1004: /**
1005: * A configuration option.
1006: */
1007: static class Flag implements Comparable {
1008:
1009: /**
1010: * option name.
1011: */
1012: private String name;
1013:
1014: /**
1015: * field name.
1016: */
1017: private String fieldName;
1018:
1019: /**
1020: * Field where the evaluated value is saved.
1021: */
1022: private Field location;
1023:
1024: /**
1025: * Parser for the configuration property.
1026: */
1027: private ParseProperty parser;
1028:
1029: /**
1030: * Instantiates a new Flag.
1031: * @param name option name
1032: * @param fieldName field name (can be null)
1033: * @param parser parser for property
1034: */
1035: Flag(String name, String fieldName, ParseProperty parser) {
1036:
1037: this .fieldName = fieldName;
1038: this .name = name;
1039: this .parser = parser;
1040: }
1041:
1042: /**
1043: * Getter for <code>location</code>.
1044: * @return Returns the location.
1045: */
1046: public Field getLocation() {
1047: // lazy initialization to speed up loading
1048: if (fieldName != null && this .location == null) {
1049: try {
1050: this .location = Configuration.class
1051: .getDeclaredField(fieldName);
1052: } catch (NoSuchFieldException e) {
1053: throw new RuntimeException(
1054: "NoSuchField exception during config initialization for field "
1055: + fieldName);
1056: } catch (SecurityException e) {
1057: throw new RuntimeException(
1058: "Security exception during config initialization for field "
1059: + fieldName + ": " + e.getMessage());
1060: }
1061: }
1062:
1063: return this .location;
1064: }
1065:
1066: /**
1067: * Getter for <code>name</code>.
1068: * @return Returns the name.
1069: */
1070: public String getName() {
1071: return this .name;
1072: }
1073:
1074: /**
1075: * Getter for <code>parser</code>.
1076: * @return Returns the parser.
1077: */
1078: public ParseProperty getParser() {
1079: return this .parser;
1080: }
1081:
1082: /**
1083: * @see java.lang.Object#equals(java.lang.Object)
1084: */
1085: public boolean equals(Object obj) {
1086: return this .name.equals(((Flag) obj).name);
1087: }
1088:
1089: /**
1090: * @see java.lang.Object#hashCode()
1091: */
1092: public int hashCode() {
1093: // returning the hashCode of String, to be consistent with equals and compareTo
1094: return this .name.hashCode();
1095: }
1096:
1097: /**
1098: * @see java.lang.Comparable#compareTo(java.lang.Object)
1099: */
1100: public int compareTo(Object o) {
1101: return this .name.compareTo(((Flag) o).name);
1102: }
1103:
1104: }
1105:
1106: /**
1107: * Getter for <code>inCharEncoding</code>.
1108: * @return Returns the inCharEncoding.
1109: * @deprecated use getInCharEncodingName()
1110: */
1111: protected int getInCharEncoding() {
1112: return this .inCharEncoding;
1113: }
1114:
1115: /**
1116: * Setter for <code>inCharEncoding</code>.
1117: * @param encoding The inCharEncoding to set.
1118: * @deprecated use setInCharEncodingName(String)
1119: */
1120: protected void setInCharEncoding(int encoding) {
1121: if (encoding == RAW) {
1122: rawOut = true;
1123: } else {
1124: rawOut = false;
1125: this .inCharEncoding = encoding;
1126: }
1127: }
1128:
1129: /**
1130: * Getter for <code>inCharEncodingName</code>.
1131: * @return Returns the inCharEncodingName.
1132: */
1133: protected String getInCharEncodingName() {
1134: return this .inCharEncodingName;
1135: }
1136:
1137: /**
1138: * Setter for <code>inCharEncodingName</code>.
1139: * @param encoding The inCharEncodingName to set.
1140: */
1141: protected void setInCharEncodingName(String encoding) {
1142: String javaEncoding = EncodingNameMapper.toJava(encoding);
1143: if (javaEncoding != null) {
1144: this .inCharEncodingName = javaEncoding;
1145: this .inCharEncoding = convertCharEncoding(javaEncoding);
1146: }
1147: }
1148:
1149: /**
1150: * Getter for <code>outCharEncoding</code>.
1151: * @return Returns the outCharEncoding.
1152: * @deprecated use getOutCharEncodingName()
1153: */
1154: protected int getOutCharEncoding() {
1155: return this .outCharEncoding;
1156: }
1157:
1158: /**
1159: * Setter for <code>outCharEncoding</code>.
1160: * @param encoding The outCharEncoding to set.
1161: * @deprecated use setOutCharEncodingName(String)
1162: */
1163: protected void setOutCharEncoding(int encoding) {
1164: switch (encoding) {
1165: case RAW:
1166: this .rawOut = true;
1167: break;
1168:
1169: case MACROMAN:
1170: case WIN1252:
1171: this .rawOut = false;
1172: this .outCharEncoding = ASCII;
1173: break;
1174:
1175: default:
1176: this .rawOut = false;
1177: this .outCharEncoding = encoding;
1178: break;
1179: }
1180: }
1181:
1182: /**
1183: * Getter for <code>outCharEncodingName</code>.
1184: * @return Returns the outCharEncodingName.
1185: */
1186: protected String getOutCharEncodingName() {
1187: return this .outCharEncodingName;
1188: }
1189:
1190: /**
1191: * Setter for <code>outCharEncodingName</code>.
1192: * @param encoding The outCharEncodingName to set.
1193: */
1194: protected void setOutCharEncodingName(String encoding) {
1195: String javaEncoding = EncodingNameMapper.toJava(encoding);
1196: if (javaEncoding != null) {
1197: this .outCharEncodingName = javaEncoding;
1198: this .outCharEncoding = convertCharEncoding(javaEncoding);
1199: }
1200: }
1201:
1202: /**
1203: * Setter for <code>inOutCharEncodingName</code>.
1204: * @param encoding The CharEncodingName to set.
1205: */
1206: protected void setInOutEncodingName(String encoding) {
1207: setInCharEncodingName(encoding);
1208: setOutCharEncodingName(encoding);
1209: }
1210:
1211: /**
1212: * Convert a char encoding from the deprecated tidy constant to a standard java encoding name.
1213: * @param code encoding code
1214: * @return encoding name
1215: */
1216: protected String convertCharEncoding(int code) {
1217: if (code != 0 && code < ENCODING_NAMES.length) {
1218: return ENCODING_NAMES[code];
1219: }
1220: return null;
1221: }
1222:
1223: /**
1224: * Convert a char encoding from a standard java encoding name to the deprecated tidy constant.
1225: * @param name encoding name
1226: * @return encoding code
1227: */
1228: protected int convertCharEncoding(String name) {
1229: if (name == null) {
1230: return -1;
1231: }
1232:
1233: for (int j = 1; j < ENCODING_NAMES.length; j++) {
1234: if (name.equals(ENCODING_NAMES[j])) {
1235: return j;
1236: }
1237: }
1238:
1239: return -1;
1240: }
1241:
1242: }
|