001: /*
002: * @(#)Configuration.java 1.11 2000/08/16
003: *
004: */
005:
006: package org.w3c.tidy;
007:
008: /**
009: *
010: * Read configuration file and manage configuration properties.
011: *
012: * (c) 1998-2000 (W3C) MIT, INRIA, Keio University
013: * See Tidy.java for the copyright notice.
014: * Derived from <a href="http://www.w3.org/People/Raggett/tidy">
015: * HTML Tidy Release 4 Aug 2000</a>
016: *
017: * @author Dave Raggett <dsr@w3.org>
018: * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java)
019: * @version 1.0, 1999/05/22
020: * @version 1.0.1, 1999/05/29
021: * @version 1.1, 1999/06/18 Java Bean
022: * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999
023: * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999
024: * @version 1.4, 1999/09/04 DOM support
025: * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999
026: * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999
027: * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999
028: * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000
029: * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000
030: * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000
031: * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
032: */
033:
034: /*
035: Configuration files associate a property name with a value.
036: The format is that of a Java .properties file.
037: */
038:
039: import java.util.Enumeration;
040: import java.util.Properties;
041: import java.util.StringTokenizer;
042: import java.io.FileInputStream;
043: import java.io.IOException;
044:
045: public class Configuration implements java.io.Serializable {
046:
047: /* character encodings */
048: public static final int RAW = 0;
049: public static final int ASCII = 1;
050: public static final int LATIN1 = 2;
051: public static final int UTF8 = 3;
052: public static final int ISO2022 = 4;
053: public static final int MACROMAN = 5;
054:
055: /* mode controlling treatment of doctype */
056: public static final int DOCTYPE_OMIT = 0;
057: public static final int DOCTYPE_AUTO = 1;
058: public static final int DOCTYPE_STRICT = 2;
059: public static final int DOCTYPE_LOOSE = 3;
060: public static final int DOCTYPE_USER = 4;
061:
062: protected int spaces = 2; /* default indentation */
063: protected int wraplen = 68; /* default wrap margin */
064: protected int CharEncoding = ASCII;
065: protected int tabsize = 4;
066:
067: // BEGIN RAVE MODIFICATIONS
068: /** When set, treat the output as jsp source; e.g. entities should
069: * be escaped, there should be xhtml doctype, etc. */
070: public boolean outputJspMode = false;
071: /** When set, treat the input as jsp source, e.g. entities should be
072: * considered escaped, etc. */
073: public boolean inputJspMode = false;
074: // END RAVE MODIFICATIONS
075:
076: protected int docTypeMode = DOCTYPE_AUTO; /* see doctype property */
077: protected String altText = null; /* default text for alt attribute */
078: protected String slidestyle = null; /* style sheet for slides */
079: protected String docTypeStr = null; /* user specified doctype */
080: protected String errfile = null; /* file name to write errors to */
081: protected boolean writeback = false; /* if true then output tidied markup */
082:
083: protected boolean OnlyErrors = false; /* if true normal output is suppressed */
084: protected boolean ShowWarnings = true; /* however errors are always shown */
085: protected boolean Quiet = false; /* no 'Parsing X', guessed DTD or summary */
086: protected boolean IndentContent = false; /* indent content of appropriate tags */
087: protected boolean SmartIndent = false; /* does text/block level content effect indentation */
088: protected boolean HideEndTags = false; /* suppress optional end tags */
089: protected boolean XmlTags = false; /* treat input as XML */
090: protected boolean XmlOut = false; /* create output as XML */
091: protected boolean xHTML = false; /* output extensible HTML */
092: protected boolean XmlPi = false; /* add <?xml?> for XML docs */
093: protected boolean RawOut = false; /* avoid mapping values > 127 to entities */
094: protected boolean UpperCaseTags = false; /* output tags in upper not lower case */
095: protected boolean UpperCaseAttrs = false; /* output attributes in upper not lower case */
096: protected boolean MakeClean = false; /* remove presentational clutter */
097: protected boolean LogicalEmphasis = false; /* replace i by em and b by strong */
098: protected boolean DropFontTags = false; /* discard presentation tags */
099: protected boolean DropEmptyParas = true; /* discard empty p elements */
100: protected boolean FixComments = true; /* fix comments with adjacent hyphens */
101: protected boolean BreakBeforeBR = false; /* o/p newline before <br> or not? */
102: protected boolean BurstSlides = false; /* create slides on each h2 element */
103: protected boolean NumEntities = false; /* use numeric entities */
104: protected boolean QuoteMarks = false; /* output " marks as " */
105: protected boolean QuoteNbsp = true; /* output non-breaking space as entity */
106: protected boolean QuoteAmpersand = true; /* output naked ampersand as & */
107: protected boolean WrapAttVals = false; /* wrap within attribute values */
108: protected boolean WrapScriptlets = false; /* wrap within JavaScript string literals */
109: protected boolean WrapSection = true; /* wrap within <![ ... ]> section tags */
110: protected boolean WrapAsp = true; /* wrap within ASP pseudo elements */
111: protected boolean WrapJste = true; /* wrap within JSTE pseudo elements */
112: protected boolean WrapPhp = true; /* wrap within PHP pseudo elements */
113: protected boolean FixBackslash = true; /* fix URLs by replacing \ with / */
114: protected boolean IndentAttributes = false; /* newline+indent before each attribute */
115: protected boolean XmlPIs = false; /* if set to yes PIs must end with ?> */
116: protected boolean XmlSpace = false; /* if set to yes adds xml:space attr as needed */
117: protected boolean EncloseBodyText = false; /* if yes text at body is wrapped in <p>'s */
118: protected boolean EncloseBlockText = false; /* if yes text in blocks is wrapped in <p>'s */
119: protected boolean KeepFileTimes = true; /* if yes last modied time is preserved */
120: protected boolean Word2000 = false; /* draconian cleaning for Word2000 */
121: protected boolean TidyMark = true; /* add meta element indicating tidied doc */
122: protected boolean Emacs = false; /* if true format error output for GNU Emacs */
123: protected boolean LiteralAttribs = false; /* if true attributes may use newlines */
124:
125: protected TagTable tt; /* TagTable associated with this Configuration */
126:
127: private transient Properties _properties = new Properties();
128:
129: public Configuration() {
130: }
131:
132: public void addProps(Properties p) {
133: Enumeration enumeration = p.propertyNames();
134: while (enumeration.hasMoreElements()) {
135: String key = (String) enumeration.nextElement();
136: String value = p.getProperty(key);
137: _properties.put(key, value);
138: }
139: parseProps();
140: }
141:
142: public void parseFile(String filename) {
143: try {
144: _properties.load(new FileInputStream(filename));
145: } catch (IOException e) {
146: System.err.println(filename + e.toString());
147: return;
148: }
149: parseProps();
150: }
151:
152: private void parseProps() {
153: String value;
154:
155: value = _properties.getProperty("indent-spaces");
156: if (value != null)
157: spaces = parseInt(value, "indent-spaces");
158:
159: value = _properties.getProperty("wrap");
160: if (value != null)
161: wraplen = parseInt(value, "wrap");
162:
163: value = _properties.getProperty("wrap-attributes");
164: if (value != null)
165: WrapAttVals = parseBool(value, "wrap-attributes");
166:
167: value = _properties.getProperty("wrap-script-literals");
168: if (value != null)
169: WrapScriptlets = parseBool(value, "wrap-script-literals");
170:
171: value = _properties.getProperty("wrap-sections");
172: if (value != null)
173: WrapSection = parseBool(value, "wrap-sections");
174:
175: value = _properties.getProperty("wrap-asp");
176: if (value != null)
177: WrapAsp = parseBool(value, "wrap-asp");
178:
179: value = _properties.getProperty("wrap-jste");
180: if (value != null)
181: WrapJste = parseBool(value, "wrap-jste");
182:
183: value = _properties.getProperty("wrap-php");
184: if (value != null)
185: WrapPhp = parseBool(value, "wrap-php");
186:
187: value = _properties.getProperty("literal-attributes");
188: if (value != null)
189: LiteralAttribs = parseBool(value, "literal-attributes");
190:
191: value = _properties.getProperty("tab-size");
192: if (value != null)
193: tabsize = parseInt(value, "tab-size");
194:
195: value = _properties.getProperty("markup");
196: if (value != null)
197: OnlyErrors = parseInvBool(value, "markup");
198:
199: value = _properties.getProperty("quiet");
200: if (value != null)
201: Quiet = parseBool(value, "quiet");
202:
203: value = _properties.getProperty("tidy-mark");
204: if (value != null)
205: TidyMark = parseBool(value, "tidy-mark");
206:
207: value = _properties.getProperty("indent");
208: if (value != null)
209: IndentContent = parseIndent(value, "indent");
210:
211: value = _properties.getProperty("indent-attributes");
212: if (value != null)
213: IndentAttributes = parseBool(value, "ident-attributes");
214:
215: value = _properties.getProperty("hide-endtags");
216: if (value != null)
217: HideEndTags = parseBool(value, "hide-endtags");
218:
219: value = _properties.getProperty("input-xml");
220: if (value != null)
221: XmlTags = parseBool(value, "input-xml");
222:
223: value = _properties.getProperty("output-xml");
224: if (value != null)
225: XmlOut = parseBool(value, "output-xml");
226:
227: value = _properties.getProperty("output-xhtml");
228: if (value != null)
229: xHTML = parseBool(value, "output-xhtml");
230:
231: value = _properties.getProperty("add-xml-pi");
232: if (value != null)
233: XmlPi = parseBool(value, "add-xml-pi");
234:
235: value = _properties.getProperty("add-xml-decl");
236: if (value != null)
237: XmlPi = parseBool(value, "add-xml-decl");
238:
239: value = _properties.getProperty("assume-xml-procins");
240: if (value != null)
241: XmlPIs = parseBool(value, "assume-xml-procins");
242:
243: value = _properties.getProperty("raw");
244: if (value != null)
245: RawOut = parseBool(value, "raw");
246:
247: value = _properties.getProperty("uppercase-tags");
248: if (value != null)
249: UpperCaseTags = parseBool(value, "uppercase-tags");
250:
251: value = _properties.getProperty("uppercase-attributes");
252: if (value != null)
253: UpperCaseAttrs = parseBool(value, "uppercase-attributes");
254:
255: value = _properties.getProperty("clean");
256: if (value != null)
257: MakeClean = parseBool(value, "clean");
258:
259: value = _properties.getProperty("logical-emphasis");
260: if (value != null)
261: LogicalEmphasis = parseBool(value, "logical-emphasis");
262:
263: value = _properties.getProperty("word-2000");
264: if (value != null)
265: Word2000 = parseBool(value, "word-2000");
266:
267: value = _properties.getProperty("drop-empty-paras");
268: if (value != null)
269: DropEmptyParas = parseBool(value, "drop-empty-paras");
270:
271: value = _properties.getProperty("drop-font-tags");
272: if (value != null)
273: DropFontTags = parseBool(value, "drop-font-tags");
274:
275: value = _properties.getProperty("enclose-text");
276: if (value != null)
277: EncloseBodyText = parseBool(value, "enclose-text");
278:
279: value = _properties.getProperty("enclose-block-text");
280: if (value != null)
281: EncloseBlockText = parseBool(value, "enclose-block-text");
282:
283: value = _properties.getProperty("alt-text");
284: if (value != null)
285: altText = value;
286:
287: value = _properties.getProperty("add-xml-space");
288: if (value != null)
289: XmlSpace = parseBool(value, "add-xml-space");
290:
291: value = _properties.getProperty("fix-bad-comments");
292: if (value != null)
293: FixComments = parseBool(value, "fix-bad-comments");
294:
295: value = _properties.getProperty("split");
296: if (value != null)
297: BurstSlides = parseBool(value, "split");
298:
299: value = _properties.getProperty("break-before-br");
300: if (value != null)
301: BreakBeforeBR = parseBool(value, "break-before-br");
302:
303: value = _properties.getProperty("numeric-entities");
304: if (value != null)
305: NumEntities = parseBool(value, "numeric-entities");
306:
307: value = _properties.getProperty("quote-marks");
308: if (value != null)
309: QuoteMarks = parseBool(value, "quote-marks");
310:
311: value = _properties.getProperty("quote-nbsp");
312: if (value != null)
313: QuoteNbsp = parseBool(value, "quote-nbsp");
314:
315: value = _properties.getProperty("quote-ampersand");
316: if (value != null)
317: QuoteAmpersand = parseBool(value, "quote-ampersand");
318:
319: value = _properties.getProperty("write-back");
320: if (value != null)
321: writeback = parseBool(value, "write-back");
322:
323: value = _properties.getProperty("keep-time");
324: if (value != null)
325: KeepFileTimes = parseBool(value, "keep-time");
326:
327: value = _properties.getProperty("show-warnings");
328: if (value != null)
329: ShowWarnings = parseBool(value, "show-warnings");
330:
331: value = _properties.getProperty("error-file");
332: if (value != null)
333: errfile = parseName(value, "error-file");
334:
335: value = _properties.getProperty("slide-style");
336: if (value != null)
337: slidestyle = parseName(value, "slide-style");
338:
339: value = _properties.getProperty("new-inline-tags");
340: if (value != null)
341: parseInlineTagNames(value, "new-inline-tags");
342:
343: value = _properties.getProperty("new-blocklevel-tags");
344: if (value != null)
345: parseBlockTagNames(value, "new-blocklevel-tags");
346:
347: value = _properties.getProperty("new-empty-tags");
348: if (value != null)
349: parseEmptyTagNames(value, "new-empty-tags");
350:
351: value = _properties.getProperty("new-pre-tags");
352: if (value != null)
353: parsePreTagNames(value, "new-pre-tags");
354:
355: value = _properties.getProperty("char-encoding");
356: if (value != null)
357: CharEncoding = parseCharEncoding(value, "char-encoding");
358:
359: value = _properties.getProperty("doctype");
360: if (value != null)
361: docTypeStr = parseDocType(value, "doctype");
362:
363: value = _properties.getProperty("fix-backslash");
364: if (value != null)
365: FixBackslash = parseBool(value, "fix-backslash");
366:
367: value = _properties.getProperty("gnu-emacs");
368: if (value != null)
369: Emacs = parseBool(value, "gnu-emacs");
370: }
371:
372: /* ensure that config is self consistent */
373: public void adjust() {
374: if (EncloseBlockText)
375: EncloseBodyText = true;
376:
377: /* avoid the need to set IndentContent when SmartIndent is set */
378:
379: if (SmartIndent)
380: IndentContent = true;
381:
382: /* disable wrapping */
383: if (wraplen == 0)
384: wraplen = 0x7FFFFFFF;
385:
386: /* Word 2000 needs o:p to be declared as inline */
387: if (Word2000) {
388: tt.defineInlineTag("o:p");
389: }
390:
391: /* XHTML is written in lower case */
392: if (xHTML) {
393: XmlOut = true;
394: UpperCaseTags = false;
395: UpperCaseAttrs = false;
396: }
397:
398: /* if XML in, then XML out */
399: if (XmlTags) {
400: XmlOut = true;
401: XmlPIs = true;
402: }
403:
404: /* XML requires end tags */
405: if (XmlOut) {
406: QuoteAmpersand = true;
407: HideEndTags = false;
408: }
409: }
410:
411: private static int parseInt(String s, String option) {
412: int i = 0;
413: try {
414: i = Integer.parseInt(s);
415: } catch (NumberFormatException e) {
416: Report.badArgument(option);
417: i = -1;
418: }
419: return i;
420: }
421:
422: private static boolean parseBool(String s, String option) {
423: boolean b = false;
424: if (s != null && s.length() > 0) {
425: char c = s.charAt(0);
426: if ((c == 't') || (c == 'T') || (c == 'Y') || (c == 'y')
427: || (c == '1'))
428: b = true;
429: else if ((c == 'f') || (c == 'F') || (c == 'N')
430: || (c == 'n') || (c == '0'))
431: b = false;
432: else
433: Report.badArgument(option);
434: }
435: return b;
436: }
437:
438: private static boolean parseInvBool(String s, String option) {
439: boolean b = false;
440: if (s != null && s.length() > 0) {
441: char c = s.charAt(0);
442: if ((c == 't') || (c == 'T') || (c == 'Y') || (c == 'y'))
443: b = true;
444: else if ((c == 'f') || (c == 'F') || (c == 'N')
445: || (c == 'n'))
446: b = false;
447: else
448: Report.badArgument(option);
449: }
450: return !b;
451: }
452:
453: private static String parseName(String s, String option) {
454: StringTokenizer t = new StringTokenizer(s);
455: String rs = null;
456: if (t.countTokens() >= 1)
457: rs = t.nextToken();
458: else
459: Report.badArgument(option);
460: return rs;
461: }
462:
463: private static int parseCharEncoding(String s, String option) {
464: int result = ASCII;
465:
466: if (Lexer.wstrcasecmp(s, "ascii") == 0)
467: result = ASCII;
468: else if (Lexer.wstrcasecmp(s, "latin1") == 0)
469: result = LATIN1;
470: else if (Lexer.wstrcasecmp(s, "raw") == 0)
471: result = RAW;
472: else if (Lexer.wstrcasecmp(s, "utf8") == 0)
473: result = UTF8;
474: else if (Lexer.wstrcasecmp(s, "iso2022") == 0)
475: result = ISO2022;
476: else if (Lexer.wstrcasecmp(s, "mac") == 0)
477: result = MACROMAN;
478: else
479: Report.badArgument(option);
480:
481: return result;
482: }
483:
484: /* slight hack to avoid changes to pprint.c */
485: private boolean parseIndent(String s, String option) {
486: boolean b = IndentContent;
487:
488: if (Lexer.wstrcasecmp(s, "yes") == 0) {
489: b = true;
490: SmartIndent = false;
491: } else if (Lexer.wstrcasecmp(s, "true") == 0) {
492: b = true;
493: SmartIndent = false;
494: } else if (Lexer.wstrcasecmp(s, "no") == 0) {
495: b = false;
496: SmartIndent = false;
497: } else if (Lexer.wstrcasecmp(s, "false") == 0) {
498: b = false;
499: SmartIndent = false;
500: } else if (Lexer.wstrcasecmp(s, "auto") == 0) {
501: b = true;
502: SmartIndent = true;
503: } else
504: Report.badArgument(option);
505: return b;
506: }
507:
508: private void parseInlineTagNames(String s, String option) {
509: StringTokenizer t = new StringTokenizer(s, " \t\n\r,");
510: while (t.hasMoreTokens()) {
511: tt.defineInlineTag(t.nextToken());
512: }
513: }
514:
515: private void parseBlockTagNames(String s, String option) {
516: StringTokenizer t = new StringTokenizer(s, " \t\n\r,");
517: while (t.hasMoreTokens()) {
518: tt.defineBlockTag(t.nextToken());
519: }
520: }
521:
522: private void parseEmptyTagNames(String s, String option) {
523: StringTokenizer t = new StringTokenizer(s, " \t\n\r,");
524: while (t.hasMoreTokens()) {
525: tt.defineEmptyTag(t.nextToken());
526: }
527: }
528:
529: private void parsePreTagNames(String s, String option) {
530: StringTokenizer t = new StringTokenizer(s, " \t\n\r,");
531: while (t.hasMoreTokens()) {
532: tt.definePreTag(t.nextToken());
533: }
534: }
535:
536: /*
537: doctype: omit | auto | strict | loose | <fpi>
538:
539: where the fpi is a string similar to
540:
541: "-//ACME//DTD HTML 3.14159//EN"
542: */
543: protected String parseDocType(String s, String option) {
544: s = s.trim();
545:
546: /* "-//ACME//DTD HTML 3.14159//EN" or similar */
547:
548: if (s.startsWith("\"")) {
549: docTypeMode = DOCTYPE_USER;
550: return s;
551: }
552:
553: /* read first word */
554: String word = "";
555: StringTokenizer t = new StringTokenizer(s, " \t\n\r,");
556: if (t.hasMoreTokens())
557: word = t.nextToken();
558:
559: if (Lexer.wstrcasecmp(word, "omit") == 0)
560: docTypeMode = DOCTYPE_OMIT;
561: else if (Lexer.wstrcasecmp(word, "strict") == 0)
562: docTypeMode = DOCTYPE_STRICT;
563: else if (Lexer.wstrcasecmp(word, "loose") == 0
564: || Lexer.wstrcasecmp(word, "transitional") == 0)
565: docTypeMode = DOCTYPE_LOOSE;
566: else if (Lexer.wstrcasecmp(word, "auto") == 0)
567: docTypeMode = DOCTYPE_AUTO;
568: else {
569: docTypeMode = DOCTYPE_AUTO;
570: Report.badArgument(option);
571: }
572: return null;
573: }
574:
575: }
|