001: /*
002: * Java HTML Tidy - JTidy
003: * HTML parser and pretty printer
004: *
005: * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
006: * Institute of Technology, Institut National de Recherche en
007: * Informatique et en Automatique, Keio University). All Rights
008: * Reserved.
009: *
010: * Contributing Author(s):
011: *
012: * Dave Raggett <dsr@w3.org>
013: * Andy Quick <ac.quick@sympatico.ca> (translation to Java)
014: * Gary L Peskin <garyp@firstech.com> (Java development)
015: * Sami Lempinen <sami@lempinen.net> (release management)
016: * Fabrizio Giustina <fgiust at users.sourceforge.net>
017: *
018: * The contributing author(s) would like to thank all those who
019: * helped with testing, bug fixes, and patience. This wouldn't
020: * have been possible without all of you.
021: *
022: * COPYRIGHT NOTICE:
023: *
024: * This software and documentation is provided "as is," and
025: * the copyright holders and contributing author(s) make no
026: * representations or warranties, express or implied, including
027: * but not limited to, warranties of merchantability or fitness
028: * for any particular purpose or that the use of the software or
029: * documentation will not infringe any third party patents,
030: * copyrights, trademarks or other rights.
031: *
032: * The copyright holders and contributing author(s) will not be
033: * liable for any direct, indirect, special or consequential damages
034: * arising out of any use of the software or documentation, even if
035: * advised of the possibility of such damage.
036: *
037: * Permission is hereby granted to use, copy, modify, and distribute
038: * this source code, or portions hereof, documentation and executables,
039: * for any purpose, without fee, subject to the following restrictions:
040: *
041: * 1. The origin of this source code must not be misrepresented.
042: * 2. Altered versions must be plainly marked as such and must
043: * not be misrepresented as being the original source.
044: * 3. This Copyright notice may not be removed or altered from any
045: * source or altered source distribution.
046: *
047: * The copyright holders and contributing author(s) specifically
048: * permit, without fee, and encourage the use of this source code
049: * as a component for supporting the Hypertext Markup Language in
050: * commercial products. If you use this source code in a product,
051: * acknowledgment is not required but would be appreciated.
052: *
053: */
054: package org.w3c.tidy;
055:
056: import java.util.Iterator;
057: import java.util.List;
058: import java.util.StringTokenizer;
059:
060: /**
061: * Property parser instances.
062: * @author Fabrizio Giustina
063: * @version $Revision $ ($Author $)
064: */
065: public final class ParsePropertyImpl {
066:
067: /**
068: * configuration parser for int values.
069: */
070: static final ParseProperty INT = new ParseInt();
071:
072: /**
073: * configuration parser for boolean values.
074: */
075: static final ParseProperty BOOL = new ParseBoolean();
076:
077: /**
078: * configuration parser for inverted boolean values.
079: */
080: static final ParseProperty INVBOOL = new ParseInvBoolean();
081:
082: /**
083: * configuration parser for char encoding values.
084: */
085: static final ParseProperty CHAR_ENCODING = new ParseCharEncoding();
086:
087: /**
088: * configuration parser for name values.
089: */
090: static final ParseProperty NAME = new ParseName();
091:
092: /**
093: * configuration parser for tag names.
094: */
095: static final ParseProperty TAGNAMES = new ParseTagNames();
096:
097: /**
098: * configuration parser for doctype property.
099: */
100: static final ParseProperty DOCTYPE = new ParseDocType();
101:
102: /**
103: * configuration parser for repetated attribute property.
104: */
105: static final ParseProperty REPEATED_ATTRIBUTES = new ParseRepeatedAttribute();
106:
107: /**
108: * configuration parser for String values.
109: */
110: static final ParseProperty STRING = new ParseString();
111:
112: /**
113: * configuration parser for indent property.
114: */
115: static final ParseProperty INDENT = new ParseIndent();
116:
117: /**
118: * configuration parser for css selectors.
119: */
120: static final ParseProperty CSS1SELECTOR = new ParseCSS1Selector();
121:
122: /**
123: * configuration parser for new line bytes.
124: */
125: static final ParseProperty NEWLINE = new ParseNewLine();
126:
127: /**
128: * don't instantiate.
129: */
130: private ParsePropertyImpl() {
131: // unused
132: }
133:
134: /**
135: * parser for integer values.
136: */
137: static class ParseInt implements ParseProperty {
138:
139: /**
140: * @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration)
141: */
142: public Object parse(String value, String option,
143: Configuration configuration) {
144: int i = 0;
145: try {
146: i = Integer.parseInt(value);
147: } catch (NumberFormatException e) {
148: configuration.report.badArgument(value, option);
149: i = -1;
150: }
151: return new Integer(i);
152: }
153:
154: /**
155: * @see org.w3c.tidy.ParseProperty#getType()
156: */
157: public String getType() {
158: return "Integer";
159: }
160:
161: /**
162: * @see org.w3c.tidy.ParseProperty#getOptionValues()
163: */
164: public String getOptionValues() {
165: return "0, 1, 2, ...";
166: }
167:
168: /**
169: * @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration)
170: */
171: public String getFriendlyName(String option, Object value,
172: Configuration configuration) {
173: return value == null ? "" : value.toString();
174: }
175: }
176:
177: /**
178: * parser for boolean values.
179: */
180: static class ParseBoolean implements ParseProperty {
181:
182: /**
183: * @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration)
184: */
185: public Object parse(String value, String option,
186: Configuration configuration) {
187: Boolean b = Boolean.TRUE;
188: if (value != null && value.length() > 0) {
189: char c = value.charAt(0);
190: if ((c == 't') || (c == 'T') || (c == 'Y')
191: || (c == 'y') || (c == '1')) {
192: b = Boolean.TRUE;
193: } else if ((c == 'f') || (c == 'F') || (c == 'N')
194: || (c == 'n') || (c == '0')) {
195: b = Boolean.FALSE;
196: } else {
197: configuration.report.badArgument(value, option);
198: }
199: }
200: return b;
201: }
202:
203: /**
204: * @see org.w3c.tidy.ParseProperty#getType()
205: */
206: public String getType() {
207: return "Boolean";
208: }
209:
210: /**
211: * @see org.w3c.tidy.ParseProperty#getOptionValues()
212: */
213: public String getOptionValues() {
214: return "y/n, yes/no, t/f, true/false, 1/0";
215: }
216:
217: /**
218: * @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration)
219: */
220: public String getFriendlyName(String option, Object value,
221: Configuration configuration) {
222: if (value == null) {
223: return "";
224: }
225:
226: return ((Boolean) value).booleanValue() ? "yes" : "no";
227: }
228: }
229:
230: /**
231: * parser for boolean values.
232: */
233: static class ParseInvBoolean implements ParseProperty {
234:
235: /**
236: * @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration)
237: */
238: public Object parse(String value, String option,
239: Configuration configuration) {
240: return (((Boolean) BOOL.parse(value, option, configuration))
241: .booleanValue() ? Boolean.FALSE : Boolean.TRUE);
242: }
243:
244: /**
245: * @see org.w3c.tidy.ParseProperty#getType()
246: */
247: public String getType() {
248: return "Boolean";
249: }
250:
251: /**
252: * @see org.w3c.tidy.ParseProperty#getOptionValues()
253: */
254: public String getOptionValues() {
255: return "yes, no, true, false";
256: }
257:
258: /**
259: * @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration)
260: */
261: public String getFriendlyName(String option, Object value,
262: Configuration configuration) {
263: if (value == null) {
264: return "";
265: }
266:
267: return ((Boolean) value).booleanValue() ? "no" : "yes";
268: }
269: }
270:
271: /**
272: * parse character encoding option. Can be RAW, ASCII, LATIN1, UTF8, ISO2022, MACROMAN, UTF16LE, UTF16BE, UTF16,
273: * WIN1252, BIG5, SHIFTJIS
274: */
275: static class ParseCharEncoding implements ParseProperty {
276:
277: /**
278: * @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration)
279: */
280: public Object parse(String value, String option,
281: Configuration configuration) {
282:
283: if ("raw".equalsIgnoreCase(value)) {
284: // special value for compatibility with tidy c
285: configuration.rawOut = true;
286: } else if (!TidyUtils.isCharEncodingSupported(value)) {
287: configuration.report.badArgument(value, option);
288: } else if ("input-encoding".equalsIgnoreCase(option)) {
289: configuration.setInCharEncodingName(value);
290: } else if ("output-encoding".equalsIgnoreCase(option)) {
291: configuration.setOutCharEncodingName(value);
292: } else if ("char-encoding".equalsIgnoreCase(option)) {
293: configuration.setInCharEncodingName(value);
294: configuration.setOutCharEncodingName(value);
295: }
296:
297: return null;
298: }
299:
300: /**
301: * @see org.w3c.tidy.ParseProperty#getType()
302: */
303: public String getType() {
304: return "Encoding";
305: }
306:
307: /**
308: * @see org.w3c.tidy.ParseProperty#getOptionValues()
309: */
310: public String getOptionValues() {
311: // ascii, latin1, raw, utf-8, iso2022, mac, utf-16, utf-16be, utf-16le, big5, shiftjis
312: return "Any valid java char encoding name";
313: }
314:
315: /**
316: * @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration)
317: */
318: public String getFriendlyName(String option, Object value,
319: Configuration configuration) {
320: if ("output-encoding".equalsIgnoreCase(option)) {
321: return configuration.getOutCharEncodingName();
322: }
323:
324: // for input-encoding or char-encoding
325: return configuration.getInCharEncodingName();
326: }
327: }
328:
329: /**
330: * parser for name values (a string excluding whitespace).
331: */
332: static class ParseName implements ParseProperty {
333:
334: /**
335: * @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration)
336: */
337: public Object parse(String value, String option,
338: Configuration configuration) {
339: StringTokenizer t = new StringTokenizer(value);
340: String rs = null;
341: if (t.countTokens() >= 1) {
342: rs = t.nextToken();
343: } else {
344: configuration.report.badArgument(value, option);
345: }
346: return rs;
347: }
348:
349: /**
350: * @see org.w3c.tidy.ParseProperty#getType()
351: */
352: public String getType() {
353: return "Name";
354: }
355:
356: /**
357: * @see org.w3c.tidy.ParseProperty#getOptionValues()
358: */
359: public String getOptionValues() {
360: return "-";
361: }
362:
363: /**
364: * @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration)
365: */
366: public String getFriendlyName(String option, Object value,
367: Configuration configuration) {
368: return value == null ? "" : value.toString();
369: }
370: }
371:
372: /**
373: * parser for name values.
374: */
375: static class ParseTagNames implements ParseProperty {
376:
377: /**
378: * @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration)
379: */
380: public Object parse(String value, String option,
381: Configuration configuration) {
382: short tagType = Dict.TAGTYPE_INLINE;
383:
384: if ("new-inline-tags".equals(option)) {
385: tagType = Dict.TAGTYPE_INLINE;
386: } else if ("new-blocklevel-tags".equals(option)) {
387: tagType = Dict.TAGTYPE_BLOCK;
388: } else if ("new-empty-tags".equals(option)) {
389: tagType = Dict.TAGTYPE_EMPTY;
390: } else if ("new-pre-tags".equals(option)) {
391: tagType = Dict.TAGTYPE_PRE;
392: }
393:
394: StringTokenizer t = new StringTokenizer(value, " \t\n\r,");
395: while (t.hasMoreTokens()) {
396: configuration.definedTags |= tagType;
397: configuration.tt.defineTag(tagType, t.nextToken());
398: }
399: return null;
400: }
401:
402: /**
403: * @see org.w3c.tidy.ParseProperty#getType()
404: */
405: public String getType() {
406: return "Tag names";
407: }
408:
409: /**
410: * @see org.w3c.tidy.ParseProperty#getOptionValues()
411: */
412: public String getOptionValues() {
413: return "tagX, tagY, ...";
414: }
415:
416: /**
417: * @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration)
418: */
419: public String getFriendlyName(String option, Object value,
420: Configuration configuration) {
421: short tagType;
422: if ("new-inline-tags".equals(option)) {
423: tagType = Dict.TAGTYPE_INLINE;
424: } else if ("new-blocklevel-tags".equals(option)) {
425: tagType = Dict.TAGTYPE_BLOCK;
426: } else if ("new-empty-tags".equals(option)) {
427: tagType = Dict.TAGTYPE_EMPTY;
428: } else if ("new-pre-tags".equals(option)) {
429: tagType = Dict.TAGTYPE_PRE;
430: } else {
431: return "";
432: }
433:
434: List tagList = configuration.tt.findAllDefinedTag(tagType);
435: if (tagList.isEmpty()) {
436: return "";
437: }
438:
439: StringBuffer buffer = new StringBuffer();
440: Iterator iterator = tagList.iterator();
441: while (iterator.hasNext()) {
442: buffer.append(iterator.next());
443: buffer.append(" ");
444: }
445:
446: return buffer.toString();
447: }
448: }
449:
450: /**
451: * Parse doctype preference. doctype: <code>omit | auto | strict | loose | [fpi]</code> where the fpi is a string
452: * similar to <code>"-//ACME//DTD HTML 3.14159//EN"</code>.
453: */
454: static class ParseDocType implements ParseProperty {
455:
456: /**
457: * @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration)
458: */
459: public Object parse(String value, String option,
460: Configuration configuration) {
461: value = value.trim();
462:
463: /* "-//ACME//DTD HTML 3.14159//EN" or similar */
464:
465: if (value.startsWith("\"")) {
466: configuration.docTypeMode = Configuration.DOCTYPE_USER;
467: return value;
468: }
469:
470: /* read first word */
471: String word = "";
472: StringTokenizer t = new StringTokenizer(value, " \t\n\r,");
473: if (t.hasMoreTokens()) {
474: word = t.nextToken();
475: }
476: // #443663 - fix by Terry Teague 23 Jul 01
477: if ("auto".equalsIgnoreCase(word)) {
478: configuration.docTypeMode = Configuration.DOCTYPE_AUTO;
479: } else if ("omit".equalsIgnoreCase(word)) {
480: configuration.docTypeMode = Configuration.DOCTYPE_OMIT;
481: } else if ("strict".equalsIgnoreCase(word)) {
482: configuration.docTypeMode = Configuration.DOCTYPE_STRICT;
483: } else if ("loose".equalsIgnoreCase(word)
484: || "transitional".equalsIgnoreCase(word)) {
485: configuration.docTypeMode = Configuration.DOCTYPE_LOOSE;
486: } else {
487: configuration.report.badArgument(value, option);
488: }
489: return null;
490: }
491:
492: /**
493: * @see org.w3c.tidy.ParseProperty#getType()
494: */
495: public String getType() {
496: return "DocType";
497: }
498:
499: /**
500: * @see org.w3c.tidy.ParseProperty#getOptionValues()
501: */
502: public String getOptionValues() {
503: return "omit | auto | strict | loose | [fpi]";
504: }
505:
506: /**
507: * @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration)
508: */
509: public String getFriendlyName(String option, Object value,
510: Configuration configuration) {
511:
512: String stringValue;
513:
514: switch (configuration.docTypeMode) {
515: case Configuration.DOCTYPE_AUTO:
516: stringValue = "auto";
517: break;
518:
519: case Configuration.DOCTYPE_OMIT:
520: stringValue = "omit";
521: break;
522:
523: case Configuration.DOCTYPE_STRICT:
524: stringValue = "strict";
525: break;
526:
527: case Configuration.DOCTYPE_LOOSE:
528: stringValue = "transitional";
529: break;
530:
531: case Configuration.DOCTYPE_USER:
532: stringValue = configuration.docTypeStr;
533: break;
534:
535: default:
536: stringValue = "unknown";
537: break;
538: }
539:
540: return stringValue;
541: }
542: }
543:
544: /**
545: * keep-first or keep-last?
546: */
547: static class ParseRepeatedAttribute implements ParseProperty {
548:
549: /**
550: * @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration)
551: */
552: public Object parse(String value, String option,
553: Configuration configuration) {
554: int dupAttr;
555:
556: if ("keep-first".equalsIgnoreCase(value)) {
557: dupAttr = Configuration.KEEP_FIRST;
558: } else if ("keep-last".equalsIgnoreCase(value)) {
559: dupAttr = Configuration.KEEP_LAST;
560: } else {
561: configuration.report.badArgument(value, option);
562: dupAttr = -1;
563: }
564: return new Integer(dupAttr);
565: }
566:
567: /**
568: * @see org.w3c.tidy.ParseProperty#getType()
569: */
570: public String getType() {
571: return "Enum";
572: }
573:
574: /**
575: * @see org.w3c.tidy.ParseProperty#getOptionValues()
576: */
577: public String getOptionValues() {
578: return "keep-first, keep-last";
579: }
580:
581: /**
582: * @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration)
583: */
584: public String getFriendlyName(String option, Object value,
585: Configuration configuration) {
586: if (value == null) {
587: return "";
588: }
589:
590: int intValue = ((Integer) value).intValue();
591: String stringValue;
592:
593: switch (intValue) {
594: case Configuration.KEEP_FIRST:
595: stringValue = "keep-first";
596: break;
597:
598: case Configuration.KEEP_LAST:
599: stringValue = "keep-last";
600: break;
601:
602: default:
603: stringValue = "unknown";
604: break;
605: }
606:
607: return stringValue;
608: }
609: }
610:
611: /**
612: * Parser for String values.
613: */
614: static class ParseString implements ParseProperty {
615:
616: /**
617: * @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration)
618: */
619: public Object parse(String value, String option,
620: Configuration configuration) {
621: return value;
622: }
623:
624: /**
625: * @see org.w3c.tidy.ParseProperty#getType()
626: */
627: public String getType() {
628: return "String";
629: }
630:
631: /**
632: * @see org.w3c.tidy.ParseProperty#getOptionValues()
633: */
634: public String getOptionValues() {
635: return "-";
636: }
637:
638: /**
639: * @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration)
640: */
641: public String getFriendlyName(String option, Object value,
642: Configuration configuration) {
643: return value == null ? "" : (String) value;
644: }
645: }
646:
647: /**
648: * Parser for indent values.
649: */
650: static class ParseIndent implements ParseProperty {
651:
652: /**
653: * @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration)
654: */
655: public Object parse(String value, String option,
656: Configuration configuration) {
657: boolean b = configuration.indentContent;
658:
659: if ("yes".equalsIgnoreCase(value)) {
660: b = true;
661: configuration.smartIndent = false;
662: } else if ("true".equalsIgnoreCase(value)) {
663: b = true;
664: configuration.smartIndent = false;
665: } else if ("no".equalsIgnoreCase(value)) {
666: b = false;
667: configuration.smartIndent = false;
668: } else if ("false".equalsIgnoreCase(value)) {
669: b = false;
670: configuration.smartIndent = false;
671: } else if ("auto".equalsIgnoreCase(value)) {
672: b = true;
673: configuration.smartIndent = true;
674: } else {
675: configuration.report.badArgument(value, option);
676: }
677: return b ? Boolean.TRUE : Boolean.FALSE;
678: }
679:
680: /**
681: * @see org.w3c.tidy.ParseProperty#getType()
682: */
683: public String getType() {
684: return "Indent";
685: }
686:
687: /**
688: * @see org.w3c.tidy.ParseProperty#getOptionValues()
689: */
690: public String getOptionValues() {
691: return "auto, y/n, yes/no, t/f, true/false, 1/0";
692: }
693:
694: /**
695: * @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration)
696: */
697: public String getFriendlyName(String option, Object value,
698: Configuration configuration) {
699: return value == null ? "" : value.toString();
700: }
701: }
702:
703: /**
704: * Parser for css selectors.
705: */
706: static class ParseCSS1Selector implements ParseProperty {
707:
708: /**
709: * @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration)
710: */
711: public Object parse(String value, String option,
712: Configuration configuration) {
713: StringTokenizer t = new StringTokenizer(value);
714: String buf = null;
715: if (t.countTokens() >= 1) {
716: buf = t.nextToken() + "-"; // Make sure any escaped Unicode is terminated so valid class names are
717: // generated after Tidy appends last digits.
718: } else {
719: configuration.report.badArgument(value, option);
720: }
721:
722: if (!Lexer.isCSS1Selector(value)) {
723: configuration.report.badArgument(value, option);
724: }
725:
726: return buf;
727: }
728:
729: /**
730: * @see org.w3c.tidy.ParseProperty#getType()
731: */
732: public String getType() {
733: return "Name";
734: }
735:
736: /**
737: * @see org.w3c.tidy.ParseProperty#getOptionValues()
738: */
739: public String getOptionValues() {
740: return "CSS1 selector";
741: }
742:
743: /**
744: * @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration)
745: */
746: public String getFriendlyName(String option, Object value,
747: Configuration configuration) {
748: return value == null ? "" : (String) value;
749: }
750: }
751:
752: /**
753: * Parser for newline bytes. Allows lf|crlf|cr.
754: */
755: static class ParseNewLine implements ParseProperty {
756:
757: /**
758: * @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration)
759: */
760: public Object parse(String value, String option,
761: Configuration configuration) {
762: // lf|crlf|cr
763: if ("lf".equalsIgnoreCase(value)) {
764: configuration.newline = new char[] { '\n' };
765: } else if ("cr".equalsIgnoreCase(value)) {
766: configuration.newline = new char[] { '\r' };
767: } else if ("crlf".equalsIgnoreCase(value)) {
768: configuration.newline = new char[] { '\r', '\n' };
769: } else {
770: configuration.report.badArgument(value, option);
771: }
772: return null;
773: }
774:
775: /**
776: * @see org.w3c.tidy.ParseProperty#getType()
777: */
778: public String getType() {
779: return "Enum";
780: }
781:
782: /**
783: * @see org.w3c.tidy.ParseProperty#getOptionValues()
784: */
785: public String getOptionValues() {
786: return "lf, crlf, cr";
787: }
788:
789: /**
790: * @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration)
791: */
792: public String getFriendlyName(String option, Object value,
793: Configuration configuration) {
794: if (configuration.newline.length == 1) {
795: return (configuration.newline[0] == '\n') ? "lf" : "cr";
796: }
797: return "crlf";
798: }
799: }
800:
801: }
|