001: /*
002: * @(#)Configuration.java 1.11 2000/08/16
003: *
004: */
005:
006: package org.w3c.tidy;
007:
008: /**
009: *
010: * Read configuration file and manage configuration properties.
011: *
012: * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
013: * See Tidy.java for the copyright notice.
014: * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
015: * HTML Tidy Release 4 Aug 2000</a>
016: *
017: * @author Dave Raggett <dsr@w3.org>
018: * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
019: * @version 1.0, 1999/05/22
020: * @version 1.0.1, 1999/05/29
021: * @version 1.1, 1999/06/18 Java Bean
022: * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
023: * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
024: * @version 1.4, 1999/09/04 DOM support
025: * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
026: * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
027: * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
028: * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
029: * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
030: * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
031: * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
032: */
033:
034: /*
035: Configuration files associate a property name with a value.
036: The format is that of a Java .properties file.
037: */
038:
039: import java.util.Enumeration;
040: import java.util.Properties;
041: import java.util.StringTokenizer;
042: import java.io.FileInputStream;
043: import java.io.IOException;
044:
045: public class Configuration implements java.io.Serializable {
046:
047: /* character encodings */
048: public static final int RAW = 0;
049: public static final int ASCII = 1;
050: public static final int LATIN1 = 2;
051: public static final int UTF8 = 3;
052: public static final int ISO2022 = 4;
053: public static final int MACROMAN = 5;
054:
055: /* mode controlling treatment of doctype */
056: public static final int DOCTYPE_OMIT = 0;
057: public static final int DOCTYPE_AUTO = 1;
058: public static final int DOCTYPE_STRICT = 2;
059: public static final int DOCTYPE_LOOSE = 3;
060: public static final int DOCTYPE_USER = 4;
061:
062: protected int spaces = 2; /* default indentation */
063: protected int wraplen = 68; /* default wrap margin */
064: protected int CharEncoding = ASCII;
065: protected int tabsize = 4;
066:
067: protected int docTypeMode = DOCTYPE_AUTO; /* see doctype property */
068: protected String altText = null; /* default text for alt attribute */
069: protected String slidestyle = null; /* style sheet for slides */
070: protected String docTypeStr = null; /* user specified doctype */
071: protected String errfile = null; /* file name to write errors to */
072: protected boolean writeback = false; /* if true then output tidied markup */
073:
074: protected boolean OnlyErrors = false; /* if true normal output is suppressed */
075: protected boolean ShowWarnings = true; /* however errors are always shown */
076: protected boolean Quiet = false; /* no 'Parsing X', guessed DTD or summary */
077: protected boolean IndentContent = false; /* indent content of appropriate tags */
078: protected boolean SmartIndent = false; /* does text/block level content effect indentation */
079: protected boolean HideEndTags = false; /* suppress optional end tags */
080: protected boolean XmlTags = false; /* treat input as XML */
081: protected boolean XmlOut = false; /* create output as XML */
082: protected boolean xHTML = false; /* output extensible HTML */
083: protected boolean XmlPi = false; /* add <?xml?> for XML docs */
084: protected boolean RawOut = false; /* avoid mapping values > 127 to entities */
085: protected boolean UpperCaseTags = false; /* output tags in upper not lower case */
086: protected boolean UpperCaseAttrs = false; /* output attributes in upper not lower case */
087: protected boolean MakeClean = false; /* remove presentational clutter */
088: protected boolean LogicalEmphasis = false; /* replace i by em and b by strong */
089: protected boolean DropFontTags = false; /* discard presentation tags */
090: protected boolean DropEmptyParas = true; /* discard empty p elements */
091: protected boolean FixComments = true; /* fix comments with adjacent hyphens */
092: protected boolean BreakBeforeBR = false; /* o/p newline before <br> or not? */
093: protected boolean BurstSlides = false; /* create slides on each h2 element */
094: protected boolean NumEntities = false; /* use numeric entities */
095: protected boolean QuoteMarks = false; /* output " marks as " */
096: protected boolean QuoteNbsp = true; /* output non-breaking space as entity */
097: protected boolean QuoteAmpersand = true; /* output naked ampersand as & */
098: protected boolean WrapAttVals = false; /* wrap within attribute values */
099: protected boolean WrapScriptlets = false; /* wrap within JavaScript string literals */
100: protected boolean WrapSection = true; /* wrap within <![ ... ]> section tags */
101: protected boolean WrapAsp = true; /* wrap within ASP pseudo elements */
102: protected boolean WrapJste = true; /* wrap within JSTE pseudo elements */
103: protected boolean WrapPhp = true; /* wrap within PHP pseudo elements */
104: protected boolean FixBackslash = true; /* fix URLs by replacing \ with / */
105: protected boolean IndentAttributes = false; /* newline+indent before each attribute */
106: protected boolean XmlPIs = false; /* if set to yes PIs must end with ?> */
107: protected boolean XmlSpace = false; /* if set to yes adds xml:space attr as needed */
108: protected boolean EncloseBodyText = false; /* if yes text at body is wrapped in <p>'s */
109: protected boolean EncloseBlockText = false; /* if yes text in blocks is wrapped in <p>'s */
110: protected boolean KeepFileTimes = true; /* if yes last modied time is preserved */
111: protected boolean Word2000 = false; /* draconian cleaning for Word2000 */
112: protected boolean TidyMark = true; /* add meta element indicating tidied doc */
113: protected boolean Emacs = false; /* if true format error output for GNU Emacs */
114: protected boolean LiteralAttribs = false; /* if true attributes may use newlines */
115:
116: protected TagTable tt; /* TagTable associated with this Configuration */
117:
118: private transient Properties _properties = new Properties();
119:
120: public Configuration() {
121: }
122:
123: public void addProps(Properties p) {
124: Enumeration propenum = p.propertyNames();
125: while (propenum.hasMoreElements()) {
126: String key = (String) propenum.nextElement();
127: String value = p.getProperty(key);
128: _properties.put(key, value);
129: }
130: parseProps();
131: }
132:
133: public void parseFile(String filename) {
134: try {
135: _properties.load(new FileInputStream(filename));
136: } catch (IOException e) {
137: System.err.println(filename + e.toString());
138: return;
139: }
140: parseProps();
141: }
142:
143: private void parseProps() {
144: String value;
145:
146: value = _properties.getProperty("indent-spaces");
147: if (value != null)
148: spaces = parseInt(value, "indent-spaces");
149:
150: value = _properties.getProperty("wrap");
151: if (value != null)
152: wraplen = parseInt(value, "wrap");
153:
154: value = _properties.getProperty("wrap-attributes");
155: if (value != null)
156: WrapAttVals = parseBool(value, "wrap-attributes");
157:
158: value = _properties.getProperty("wrap-script-literals");
159: if (value != null)
160: WrapScriptlets = parseBool(value, "wrap-script-literals");
161:
162: value = _properties.getProperty("wrap-sections");
163: if (value != null)
164: WrapSection = parseBool(value, "wrap-sections");
165:
166: value = _properties.getProperty("wrap-asp");
167: if (value != null)
168: WrapAsp = parseBool(value, "wrap-asp");
169:
170: value = _properties.getProperty("wrap-jste");
171: if (value != null)
172: WrapJste = parseBool(value, "wrap-jste");
173:
174: value = _properties.getProperty("wrap-php");
175: if (value != null)
176: WrapPhp = parseBool(value, "wrap-php");
177:
178: value = _properties.getProperty("literal-attributes");
179: if (value != null)
180: LiteralAttribs = parseBool(value, "literal-attributes");
181:
182: value = _properties.getProperty("tab-size");
183: if (value != null)
184: tabsize = parseInt(value, "tab-size");
185:
186: value = _properties.getProperty("markup");
187: if (value != null)
188: OnlyErrors = parseInvBool(value, "markup");
189:
190: value = _properties.getProperty("quiet");
191: if (value != null)
192: Quiet = parseBool(value, "quiet");
193:
194: value = _properties.getProperty("tidy-mark");
195: if (value != null)
196: TidyMark = parseBool(value, "tidy-mark");
197:
198: value = _properties.getProperty("indent");
199: if (value != null)
200: IndentContent = parseIndent(value, "indent");
201:
202: value = _properties.getProperty("indent-attributes");
203: if (value != null)
204: IndentAttributes = parseBool(value, "ident-attributes");
205:
206: value = _properties.getProperty("hide-endtags");
207: if (value != null)
208: HideEndTags = parseBool(value, "hide-endtags");
209:
210: value = _properties.getProperty("input-xml");
211: if (value != null)
212: XmlTags = parseBool(value, "input-xml");
213:
214: value = _properties.getProperty("output-xml");
215: if (value != null)
216: XmlOut = parseBool(value, "output-xml");
217:
218: value = _properties.getProperty("output-xhtml");
219: if (value != null)
220: xHTML = parseBool(value, "output-xhtml");
221:
222: value = _properties.getProperty("add-xml-pi");
223: if (value != null)
224: XmlPi = parseBool(value, "add-xml-pi");
225:
226: value = _properties.getProperty("add-xml-decl");
227: if (value != null)
228: XmlPi = parseBool(value, "add-xml-decl");
229:
230: value = _properties.getProperty("assume-xml-procins");
231: if (value != null)
232: XmlPIs = parseBool(value, "assume-xml-procins");
233:
234: value = _properties.getProperty("raw");
235: if (value != null)
236: RawOut = parseBool(value, "raw");
237:
238: value = _properties.getProperty("uppercase-tags");
239: if (value != null)
240: UpperCaseTags = parseBool(value, "uppercase-tags");
241:
242: value = _properties.getProperty("uppercase-attributes");
243: if (value != null)
244: UpperCaseAttrs = parseBool(value, "uppercase-attributes");
245:
246: value = _properties.getProperty("clean");
247: if (value != null)
248: MakeClean = parseBool(value, "clean");
249:
250: value = _properties.getProperty("logical-emphasis");
251: if (value != null)
252: LogicalEmphasis = parseBool(value, "logical-emphasis");
253:
254: value = _properties.getProperty("word-2000");
255: if (value != null)
256: Word2000 = parseBool(value, "word-2000");
257:
258: value = _properties.getProperty("drop-empty-paras");
259: if (value != null)
260: DropEmptyParas = parseBool(value, "drop-empty-paras");
261:
262: value = _properties.getProperty("drop-font-tags");
263: if (value != null)
264: DropFontTags = parseBool(value, "drop-font-tags");
265:
266: value = _properties.getProperty("enclose-text");
267: if (value != null)
268: EncloseBodyText = parseBool(value, "enclose-text");
269:
270: value = _properties.getProperty("enclose-block-text");
271: if (value != null)
272: EncloseBlockText = parseBool(value, "enclose-block-text");
273:
274: value = _properties.getProperty("alt-text");
275: if (value != null)
276: altText = value;
277:
278: value = _properties.getProperty("add-xml-space");
279: if (value != null)
280: XmlSpace = parseBool(value, "add-xml-space");
281:
282: value = _properties.getProperty("fix-bad-comments");
283: if (value != null)
284: FixComments = parseBool(value, "fix-bad-comments");
285:
286: value = _properties.getProperty("split");
287: if (value != null)
288: BurstSlides = parseBool(value, "split");
289:
290: value = _properties.getProperty("break-before-br");
291: if (value != null)
292: BreakBeforeBR = parseBool(value, "break-before-br");
293:
294: value = _properties.getProperty("numeric-entities");
295: if (value != null)
296: NumEntities = parseBool(value, "numeric-entities");
297:
298: value = _properties.getProperty("quote-marks");
299: if (value != null)
300: QuoteMarks = parseBool(value, "quote-marks");
301:
302: value = _properties.getProperty("quote-nbsp");
303: if (value != null)
304: QuoteNbsp = parseBool(value, "quote-nbsp");
305:
306: value = _properties.getProperty("quote-ampersand");
307: if (value != null)
308: QuoteAmpersand = parseBool(value, "quote-ampersand");
309:
310: value = _properties.getProperty("write-back");
311: if (value != null)
312: writeback = parseBool(value, "write-back");
313:
314: value = _properties.getProperty("keep-time");
315: if (value != null)
316: KeepFileTimes = parseBool(value, "keep-time");
317:
318: value = _properties.getProperty("show-warnings");
319: if (value != null)
320: ShowWarnings = parseBool(value, "show-warnings");
321:
322: value = _properties.getProperty("error-file");
323: if (value != null)
324: errfile = parseName(value, "error-file");
325:
326: value = _properties.getProperty("slide-style");
327: if (value != null)
328: slidestyle = parseName(value, "slide-style");
329:
330: value = _properties.getProperty("new-inline-tags");
331: if (value != null)
332: parseInlineTagNames(value, "new-inline-tags");
333:
334: value = _properties.getProperty("new-blocklevel-tags");
335: if (value != null)
336: parseBlockTagNames(value, "new-blocklevel-tags");
337:
338: value = _properties.getProperty("new-empty-tags");
339: if (value != null)
340: parseEmptyTagNames(value, "new-empty-tags");
341:
342: value = _properties.getProperty("new-pre-tags");
343: if (value != null)
344: parsePreTagNames(value, "new-pre-tags");
345:
346: value = _properties.getProperty("char-encoding");
347: if (value != null)
348: CharEncoding = parseCharEncoding(value, "char-encoding");
349:
350: value = _properties.getProperty("doctype");
351: if (value != null)
352: docTypeStr = parseDocType(value, "doctype");
353:
354: value = _properties.getProperty("fix-backslash");
355: if (value != null)
356: FixBackslash = parseBool(value, "fix-backslash");
357:
358: value = _properties.getProperty("gnu-emacs");
359: if (value != null)
360: Emacs = parseBool(value, "gnu-emacs");
361: }
362:
363: /* ensure that config is self consistent */
364: public void adjust() {
365: if (EncloseBlockText)
366: EncloseBodyText = true;
367:
368: /* avoid the need to set IndentContent when SmartIndent is set */
369:
370: if (SmartIndent)
371: IndentContent = true;
372:
373: /* disable wrapping */
374: if (wraplen == 0)
375: wraplen = 0x7FFFFFFF;
376:
377: /* Word 2000 needs o:p to be declared as inline */
378: if (Word2000) {
379: tt.defineInlineTag("o:p");
380: }
381:
382: /* XHTML is written in lower case */
383: if (xHTML) {
384: XmlOut = true;
385: UpperCaseTags = false;
386: UpperCaseAttrs = false;
387: }
388:
389: /* if XML in, then XML out */
390: if (XmlTags) {
391: XmlOut = true;
392: XmlPIs = true;
393: }
394:
395: /* XML requires end tags */
396: if (XmlOut) {
397: QuoteAmpersand = true;
398: HideEndTags = false;
399: }
400: }
401:
402: private static int parseInt(String s, String option) {
403: int i = 0;
404: try {
405: i = Integer.parseInt(s);
406: } catch (NumberFormatException e) {
407: Report.badArgument(option);
408: i = -1;
409: }
410: return i;
411: }
412:
413: private static boolean parseBool(String s, String option) {
414: boolean b = false;
415: if (s != null && s.length() > 0) {
416: char c = s.charAt(0);
417: if ((c == 't') || (c == 'T') || (c == 'Y') || (c == 'y')
418: || (c == '1'))
419: b = true;
420: else if ((c == 'f') || (c == 'F') || (c == 'N')
421: || (c == 'n') || (c == '0'))
422: b = false;
423: else
424: Report.badArgument(option);
425: }
426: return b;
427: }
428:
429: private static boolean parseInvBool(String s, String option) {
430: boolean b = false;
431: if (s != null && s.length() > 0) {
432: char c = s.charAt(0);
433: if ((c == 't') || (c == 'T') || (c == 'Y') || (c == 'y'))
434: b = true;
435: else if ((c == 'f') || (c == 'F') || (c == 'N')
436: || (c == 'n'))
437: b = false;
438: else
439: Report.badArgument(option);
440: }
441: return !b;
442: }
443:
444: private static String parseName(String s, String option) {
445: StringTokenizer t = new StringTokenizer(s);
446: String rs = null;
447: if (t.countTokens() >= 1)
448: rs = t.nextToken();
449: else
450: Report.badArgument(option);
451: return rs;
452: }
453:
454: private static int parseCharEncoding(String s, String option) {
455: int result = ASCII;
456:
457: if (Lexer.wstrcasecmp(s, "ascii") == 0)
458: result = ASCII;
459: else if (Lexer.wstrcasecmp(s, "latin1") == 0)
460: result = LATIN1;
461: else if (Lexer.wstrcasecmp(s, "raw") == 0)
462: result = RAW;
463: else if (Lexer.wstrcasecmp(s, "utf8") == 0)
464: result = UTF8;
465: else if (Lexer.wstrcasecmp(s, "iso2022") == 0)
466: result = ISO2022;
467: else if (Lexer.wstrcasecmp(s, "mac") == 0)
468: result = MACROMAN;
469: else
470: Report.badArgument(option);
471:
472: return result;
473: }
474:
475: /* slight hack to avoid changes to pprint.c */
476: private boolean parseIndent(String s, String option) {
477: boolean b = IndentContent;
478:
479: if (Lexer.wstrcasecmp(s, "yes") == 0) {
480: b = true;
481: SmartIndent = false;
482: } else if (Lexer.wstrcasecmp(s, "true") == 0) {
483: b = true;
484: SmartIndent = false;
485: } else if (Lexer.wstrcasecmp(s, "no") == 0) {
486: b = false;
487: SmartIndent = false;
488: } else if (Lexer.wstrcasecmp(s, "false") == 0) {
489: b = false;
490: SmartIndent = false;
491: } else if (Lexer.wstrcasecmp(s, "auto") == 0) {
492: b = true;
493: SmartIndent = true;
494: } else
495: Report.badArgument(option);
496: return b;
497: }
498:
499: private void parseInlineTagNames(String s, String option) {
500: StringTokenizer t = new StringTokenizer(s, " \t\n\r,");
501: while (t.hasMoreTokens()) {
502: tt.defineInlineTag(t.nextToken());
503: }
504: }
505:
506: private void parseBlockTagNames(String s, String option) {
507: StringTokenizer t = new StringTokenizer(s, " \t\n\r,");
508: while (t.hasMoreTokens()) {
509: tt.defineBlockTag(t.nextToken());
510: }
511: }
512:
513: private void parseEmptyTagNames(String s, String option) {
514: StringTokenizer t = new StringTokenizer(s, " \t\n\r,");
515: while (t.hasMoreTokens()) {
516: tt.defineEmptyTag(t.nextToken());
517: }
518: }
519:
520: private void parsePreTagNames(String s, String option) {
521: StringTokenizer t = new StringTokenizer(s, " \t\n\r,");
522: while (t.hasMoreTokens()) {
523: tt.definePreTag(t.nextToken());
524: }
525: }
526:
527: /*
528: doctype: omit | auto | strict | loose | <fpi>
529:
530: where the fpi is a string similar to
531:
532: "-//ACME//DTD HTML 3.14159//EN"
533: */
534: protected String parseDocType(String s, String option) {
535: s = s.trim();
536:
537: /* "-//ACME//DTD HTML 3.14159//EN" or similar */
538:
539: if (s.startsWith("\"")) {
540: docTypeMode = DOCTYPE_USER;
541: return s;
542: }
543:
544: /* read first word */
545: String word = "";
546: StringTokenizer t = new StringTokenizer(s, " \t\n\r,");
547: if (t.hasMoreTokens())
548: word = t.nextToken();
549:
550: if (Lexer.wstrcasecmp(word, "omit") == 0)
551: docTypeMode = DOCTYPE_OMIT;
552: else if (Lexer.wstrcasecmp(word, "strict") == 0)
553: docTypeMode = DOCTYPE_STRICT;
554: else if (Lexer.wstrcasecmp(word, "loose") == 0
555: || Lexer.wstrcasecmp(word, "transitional") == 0)
556: docTypeMode = DOCTYPE_LOOSE;
557: else if (Lexer.wstrcasecmp(word, "auto") == 0)
558: docTypeMode = DOCTYPE_AUTO;
559: else {
560: docTypeMode = DOCTYPE_AUTO;
561: Report.badArgument(option);
562: }
563: return null;
564: }
565:
566: }
|