0001: /*
0002: * Java HTML Tidy - JTidy
0003: * HTML parser and pretty printer
0004: *
0005: * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
0006: * Institute of Technology, Institut National de Recherche en
0007: * Informatique et en Automatique, Keio University). All Rights
0008: * Reserved.
0009: *
0010: * Contributing Author(s):
0011: *
0012: * Dave Raggett <dsr@w3.org>
0013: * Andy Quick <ac.quick@sympatico.ca> (translation to Java)
0014: * Gary L Peskin <garyp@firstech.com> (Java development)
0015: * Sami Lempinen <sami@lempinen.net> (release management)
0016: * Fabrizio Giustina <fgiust at users.sourceforge.net>
0017: *
0018: * The contributing author(s) would like to thank all those who
0019: * helped with testing, bug fixes, and patience. This wouldn't
0020: * have been possible without all of you.
0021: *
0022: * COPYRIGHT NOTICE:
0023: *
0024: * This software and documentation is provided "as is," and
0025: * the copyright holders and contributing author(s) make no
0026: * representations or warranties, express or implied, including
0027: * but not limited to, warranties of merchantability or fitness
0028: * for any particular purpose or that the use of the software or
0029: * documentation will not infringe any third party patents,
0030: * copyrights, trademarks or other rights.
0031: *
0032: * The copyright holders and contributing author(s) will not be
0033: * liable for any direct, indirect, special or consequential damages
0034: * arising out of any use of the software or documentation, even if
0035: * advised of the possibility of such damage.
0036: *
0037: * Permission is hereby granted to use, copy, modify, and distribute
0038: * this source code, or portions hereof, documentation and executables,
0039: * for any purpose, without fee, subject to the following restrictions:
0040: *
0041: * 1. The origin of this source code must not be misrepresented.
0042: * 2. Altered versions must be plainly marked as such and must
0043: * not be misrepresented as being the original source.
0044: * 3. This Copyright notice may not be removed or altered from any
0045: * source or altered source distribution.
0046: *
0047: * The copyright holders and contributing author(s) specifically
0048: * permit, without fee, and encourage the use of this source code
0049: * as a component for supporting the Hypertext Markup Language in
0050: * commercial products. If you use this source code in a product,
0051: * acknowledgment is not required but would be appreciated.
0052: *
0053: */
0054: package org.w3c.tidy;
0055:
0056: import java.util.HashMap;
0057: import java.util.Iterator;
0058: import java.util.Map;
0059:
0060: /**
0061: * Check attribute values implementations.
0062: * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a>
0063: * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java)
0064: * @author Fabrizio Giustina
0065: * @version $Revision: 1.36 $ ($Author: fgiust $)
0066: */
0067: public final class AttrCheckImpl {
0068:
0069: /**
0070: * checker for URLs.
0071: */
0072: public static final AttrCheck URL = new CheckUrl();
0073:
0074: /**
0075: * checker for scripts.
0076: */
0077: public static final AttrCheck SCRIPT = new CheckScript();
0078:
0079: /**
0080: * checker for "name" attribute.
0081: */
0082: public static final AttrCheck NAME = new CheckName();
0083:
0084: /**
0085: * checker for ids.
0086: */
0087: public static final AttrCheck ID = new CheckId();
0088:
0089: /**
0090: * checker for "align" attribute.
0091: */
0092: public static final AttrCheck ALIGN = new CheckAlign();
0093:
0094: /**
0095: * checker for "valign" attribute.
0096: */
0097: public static final AttrCheck VALIGN = new CheckValign();
0098:
0099: /**
0100: * checker for boolean attributes.
0101: */
0102: public static final AttrCheck BOOL = new CheckBool();
0103:
0104: /**
0105: * checker for "lenght" attribute.
0106: */
0107: public static final AttrCheck LENGTH = new CheckLength();
0108:
0109: /**
0110: * checker for "target" attribute.
0111: */
0112: public static final AttrCheck TARGET = new CheckTarget();
0113:
0114: /**
0115: * checker for "submit" attribute.
0116: */
0117: public static final AttrCheck FSUBMIT = new CheckFsubmit();
0118:
0119: /**
0120: * checker for "clear" attribute.
0121: */
0122: public static final AttrCheck CLEAR = new CheckClear();
0123:
0124: /**
0125: * checker for "shape" attribute.
0126: */
0127: public static final AttrCheck SHAPE = new CheckShape();
0128:
0129: /**
0130: * checker for "number" attribute.
0131: */
0132: public static final AttrCheck NUMBER = new CheckNumber();
0133:
0134: /**
0135: * checker for "scope" attribute.
0136: */
0137: public static final AttrCheck SCOPE = new CheckScope();
0138:
0139: /**
0140: * checker for "color" attribute.
0141: */
0142: public static final AttrCheck COLOR = new CheckColor();
0143:
0144: /**
0145: * checker for "vtype" attribute.
0146: */
0147: public static final AttrCheck VTYPE = new CheckVType();
0148:
0149: /**
0150: * checker for "scroll" attribute.
0151: */
0152: public static final AttrCheck SCROLL = new CheckScroll();
0153:
0154: /**
0155: * checker for "dir" attribute.
0156: */
0157: public static final AttrCheck TEXTDIR = new CheckTextDir();
0158:
0159: /**
0160: * checker for "lang" and "xml:lang" attributes.
0161: */
0162: public static final AttrCheck LANG = new CheckLang();
0163:
0164: /**
0165: * checker for text attributes. Actually null (no validation).
0166: */
0167: public static final AttrCheck TEXT = null;
0168:
0169: /**
0170: * checker for "charset" attribute. Actually null (no validation).
0171: */
0172: public static final AttrCheck CHARSET = null;
0173:
0174: /**
0175: * checker for "type" attribute. Actually null (no validation).
0176: */
0177: public static final AttrCheck TYPE = null;
0178:
0179: /**
0180: * checker for attributes that can contain a single character. Actually null (no validation).
0181: */
0182: public static final AttrCheck CHARACTER = null;
0183:
0184: /**
0185: * checker for attributes which contain a list of urls. Actually null (no validation).
0186: */
0187: public static final AttrCheck URLS = null;
0188:
0189: /**
0190: * checker for "cols" attribute. Actually null (no validation).
0191: */
0192: public static final AttrCheck COLS = null;
0193:
0194: /**
0195: * checker for "coords" attribute. Actually null (no validation).
0196: */
0197: public static final AttrCheck COORDS = null;
0198:
0199: /**
0200: * checker for attributes containing dates. Actually null (no validation).
0201: */
0202: public static final AttrCheck DATE = null;
0203:
0204: /**
0205: * checker for attributes referencng an id. Actually null (no validation).
0206: */
0207: public static final AttrCheck IDREF = null;
0208:
0209: /**
0210: * checker for table "frame" attribute. Actually null (no validation).
0211: */
0212: public static final AttrCheck TFRAME = null;
0213:
0214: /**
0215: * checker for "frameborder" attribute. Actually null (no validation).
0216: */
0217: public static final AttrCheck FBORDER = null;
0218:
0219: /**
0220: * checker for "media" attribute. Actually null (no validation).
0221: */
0222: public static final AttrCheck MEDIA = null;
0223:
0224: /**
0225: * checker for "rel" and "rev" attributes. Actually null (no validation).
0226: */
0227: public static final AttrCheck LINKTYPES = null;
0228:
0229: /**
0230: * checker for table "rules" attribute. Actually null (no validation).
0231: */
0232: public static final AttrCheck TRULES = null;
0233:
0234: /**
0235: * utility class, don't instantiate.
0236: */
0237: private AttrCheckImpl() {
0238: // empty private constructor
0239: }
0240:
0241: /**
0242: * AttrCheck implementation for checking URLs.
0243: */
0244: public static class CheckUrl implements AttrCheck {
0245:
0246: /**
0247: * @see AttrCheck#check(Lexer, Node, AttVal)
0248: */
0249: public void check(Lexer lexer, Node node, AttVal attval) {
0250: char c;
0251: StringBuffer dest;
0252: boolean escapeFound = false;
0253: boolean backslashFound = false;
0254: int i = 0;
0255:
0256: if (attval.value == null) {
0257: lexer.report.attrError(lexer, node, attval,
0258: Report.MISSING_ATTR_VALUE);
0259: return;
0260: }
0261:
0262: String p = attval.value;
0263:
0264: for (i = 0; i < p.length(); ++i) {
0265: c = p.charAt(i);
0266: // find \
0267: if (c == '\\') {
0268: backslashFound = true;
0269: }
0270: // find non-ascii chars
0271: else if ((c > 0x7e) || (c <= 0x20) || (c == '<')
0272: || (c == '>')) {
0273: escapeFound = true;
0274: }
0275: }
0276:
0277: // backslashes found, fix them
0278: if (lexer.configuration.fixBackslash && backslashFound) {
0279: attval.value = attval.value.replace('\\', '/');
0280: p = attval.value;
0281: }
0282:
0283: // non-ascii chars found, fix them
0284: if (lexer.configuration.fixUri && escapeFound) {
0285: dest = new StringBuffer();
0286:
0287: for (i = 0; i < p.length(); ++i) {
0288: c = p.charAt(i);
0289: if ((c > 0x7e) || (c <= 0x20) || (c == '<')
0290: || (c == '>')) {
0291: dest.append('%');
0292: dest.append(Integer.toHexString(c)
0293: .toUpperCase());
0294: } else {
0295: dest.append(c);
0296: }
0297: }
0298:
0299: attval.value = dest.toString();
0300: }
0301: if (backslashFound) {
0302: if (lexer.configuration.fixBackslash) {
0303: lexer.report.attrError(lexer, node, attval,
0304: Report.FIXED_BACKSLASH);
0305: } else {
0306: lexer.report.attrError(lexer, node, attval,
0307: Report.BACKSLASH_IN_URI);
0308: }
0309: }
0310: if (escapeFound) {
0311: if (lexer.configuration.fixUri) {
0312: lexer.report.attrError(lexer, node, attval,
0313: Report.ESCAPED_ILLEGAL_URI);
0314: } else {
0315: lexer.report.attrError(lexer, node, attval,
0316: Report.ILLEGAL_URI_REFERENCE);
0317: }
0318:
0319: lexer.badChars |= Report.INVALID_URI;
0320: }
0321:
0322: }
0323: }
0324:
0325: /**
0326: * AttrCheck implementation for checking scripts.
0327: */
0328: public static class CheckScript implements AttrCheck {
0329:
0330: /**
0331: * @see AttrCheck#check(Lexer, Node, AttVal)
0332: */
0333: public void check(Lexer lexer, Node node, AttVal attval) {
0334: // not implemented
0335: }
0336:
0337: }
0338:
0339: /**
0340: * AttrCheck implementation for checking the "align" attribute.
0341: */
0342: public static class CheckAlign implements AttrCheck {
0343:
0344: /**
0345: * valid values for this attribute.
0346: */
0347: private static final String[] VALID_VALUES = new String[] {
0348: "left", "center", "right", "justify" };
0349:
0350: /**
0351: * @see AttrCheck#check(Lexer, Node, AttVal)
0352: */
0353: public void check(Lexer lexer, Node node, AttVal attval) {
0354: // IMG, OBJECT, APPLET and EMBED use align for vertical position
0355: if (node.tag != null
0356: && ((node.tag.model & Dict.CM_IMG) != 0)) {
0357: VALIGN.check(lexer, node, attval);
0358: return;
0359: }
0360:
0361: if (attval.value == null) {
0362: lexer.report.attrError(lexer, node, attval,
0363: Report.MISSING_ATTR_VALUE);
0364: return;
0365: }
0366:
0367: attval.checkLowerCaseAttrValue(lexer, node);
0368:
0369: if (!TidyUtils.isInValuesIgnoreCase(VALID_VALUES,
0370: attval.value)) {
0371: lexer.report.attrError(lexer, node, attval,
0372: Report.BAD_ATTRIBUTE_VALUE);
0373: }
0374: }
0375:
0376: }
0377:
0378: /**
0379: * AttrCheck implementation for checking the "valign" attribute.
0380: */
0381: public static class CheckValign implements AttrCheck {
0382:
0383: /**
0384: * valid values for this attribute.
0385: */
0386: private static final String[] VALID_VALUES = new String[] {
0387: "top", "middle", "bottom", "baseline" };
0388:
0389: /**
0390: * valid values for this attribute (only for img tag).
0391: */
0392: private static final String[] VALID_VALUES_IMG = new String[] {
0393: "left", "right" };
0394:
0395: /**
0396: * proprietary values for this attribute.
0397: */
0398: private static final String[] VALID_VALUES_PROPRIETARY = new String[] {
0399: "texttop", "absmiddle", "absbottom", "textbottom" };
0400:
0401: /**
0402: * @see AttrCheck#check(Lexer, Node, AttVal)
0403: */
0404: public void check(Lexer lexer, Node node, AttVal attval) {
0405: String value;
0406:
0407: if (attval.value == null) {
0408: lexer.report.attrError(lexer, node, attval,
0409: Report.MISSING_ATTR_VALUE);
0410: return;
0411: }
0412:
0413: attval.checkLowerCaseAttrValue(lexer, node);
0414:
0415: value = attval.value;
0416:
0417: if (TidyUtils.isInValuesIgnoreCase(VALID_VALUES, value)) {
0418: // all is fine
0419: return;
0420: }
0421:
0422: if (TidyUtils.isInValuesIgnoreCase(VALID_VALUES_IMG, value)) {
0423: if (!(node.tag != null && ((node.tag.model & Dict.CM_IMG) != 0))) {
0424: lexer.report.attrError(lexer, node, attval,
0425: Report.BAD_ATTRIBUTE_VALUE);
0426: }
0427: } else if (TidyUtils.isInValuesIgnoreCase(
0428: VALID_VALUES_PROPRIETARY, value)) {
0429: lexer.constrainVersion(Dict.VERS_PROPRIETARY);
0430: lexer.report.attrError(lexer, node, attval,
0431: Report.PROPRIETARY_ATTR_VALUE);
0432: } else {
0433: lexer.report.attrError(lexer, node, attval,
0434: Report.BAD_ATTRIBUTE_VALUE);
0435: }
0436: }
0437:
0438: }
0439:
0440: /**
0441: * AttrCheck implementation for checking boolean attributes.
0442: */
0443: public static class CheckBool implements AttrCheck {
0444:
0445: /**
0446: * @see AttrCheck#check(Lexer, Node, AttVal)
0447: */
0448: public void check(Lexer lexer, Node node, AttVal attval) {
0449: if (attval.value == null) {
0450: return;
0451: }
0452:
0453: attval.checkLowerCaseAttrValue(lexer, node);
0454: }
0455:
0456: }
0457:
0458: /**
0459: * AttrCheck implementation for checking the "length" attribute.
0460: */
0461: public static class CheckLength implements AttrCheck {
0462:
0463: /**
0464: * @see AttrCheck#check(Lexer, Node, AttVal)
0465: */
0466: public void check(Lexer lexer, Node node, AttVal attval) {
0467:
0468: if (attval.value == null) {
0469: lexer.report.attrError(lexer, node, attval,
0470: Report.MISSING_ATTR_VALUE);
0471: return;
0472: }
0473:
0474: // don't check for <col width=...> and <colgroup width=...>
0475: if ("width".equalsIgnoreCase(attval.attribute)
0476: && (node.tag == lexer.configuration.tt.tagCol || node.tag == lexer.configuration.tt.tagColgroup)) {
0477: return;
0478: }
0479:
0480: String p = attval.value;
0481:
0482: if (p.length() == 0
0483: || (!Character.isDigit(p.charAt(0)) && !('%' == p
0484: .charAt(0)))) {
0485: lexer.report.attrError(lexer, node, attval,
0486: Report.BAD_ATTRIBUTE_VALUE);
0487: } else {
0488:
0489: TagTable tt = lexer.configuration.tt;
0490:
0491: for (int j = 1; j < p.length(); j++) {
0492: // elements th and td must not use percentages
0493: if ((!Character.isDigit(p.charAt(j)) && (node.tag == tt.tagTd || node.tag == tt.tagTh))
0494: || (!Character.isDigit(p.charAt(j)) && p
0495: .charAt(j) != '%')) {
0496: lexer.report.attrError(lexer, node, attval,
0497: Report.BAD_ATTRIBUTE_VALUE);
0498: break;
0499: }
0500: }
0501: }
0502: }
0503: }
0504:
0505: /**
0506: * AttrCheck implementation for checking the "target" attribute.
0507: */
0508: public static class CheckTarget implements AttrCheck {
0509:
0510: /**
0511: * valid values for this attribute.
0512: */
0513: private static final String[] VALID_VALUES = new String[] {
0514: "_blank", "_self", "_parent", "_top" };
0515:
0516: /**
0517: * @see AttrCheck#check(Lexer, Node, AttVal)
0518: */
0519: public void check(Lexer lexer, Node node, AttVal attval) {
0520:
0521: // No target attribute in strict HTML versions
0522: lexer.constrainVersion(~Dict.VERS_HTML40_STRICT);
0523:
0524: if (attval.value == null || attval.value.length() == 0) {
0525: lexer.report.attrError(lexer, node, attval,
0526: Report.MISSING_ATTR_VALUE);
0527: return;
0528: }
0529:
0530: String value = attval.value;
0531:
0532: // target names must begin with A-Za-z ...
0533: if (Character.isLetter(value.charAt(0))) {
0534: return;
0535: }
0536:
0537: // or be one of _blank, _self, _parent and _top
0538: if (!TidyUtils.isInValuesIgnoreCase(VALID_VALUES, value)) {
0539: lexer.report.attrError(lexer, node, attval,
0540: Report.BAD_ATTRIBUTE_VALUE);
0541: }
0542:
0543: }
0544: }
0545:
0546: /**
0547: * AttrCheck implementation for checking the "submit" attribute.
0548: */
0549: public static class CheckFsubmit implements AttrCheck {
0550:
0551: /**
0552: * valid values for this attribute.
0553: */
0554: private static final String[] VALID_VALUES = new String[] {
0555: "get", "post" };
0556:
0557: /**
0558: * @see AttrCheck#check(Lexer, Node, AttVal)
0559: */
0560: public void check(Lexer lexer, Node node, AttVal attval) {
0561: if (attval.value == null) {
0562: lexer.report.attrError(lexer, node, attval,
0563: Report.MISSING_ATTR_VALUE);
0564: return;
0565: }
0566:
0567: attval.checkLowerCaseAttrValue(lexer, node);
0568:
0569: if (!TidyUtils.isInValuesIgnoreCase(VALID_VALUES,
0570: attval.value)) {
0571: lexer.report.attrError(lexer, node, attval,
0572: Report.BAD_ATTRIBUTE_VALUE);
0573: }
0574: }
0575: }
0576:
0577: /**
0578: * AttrCheck implementation for checking the "clear" attribute.
0579: */
0580: public static class CheckClear implements AttrCheck {
0581:
0582: /**
0583: * valid values for this attribute.
0584: */
0585: private static final String[] VALID_VALUES = new String[] {
0586: "none", "left", "right", "all" };
0587:
0588: /**
0589: * @see AttrCheck#check(Lexer, Node, AttVal)
0590: */
0591: public void check(Lexer lexer, Node node, AttVal attval) {
0592: if (attval.value == null) {
0593: lexer.report.attrError(lexer, node, attval,
0594: Report.MISSING_ATTR_VALUE);
0595: attval.value = VALID_VALUES[0];
0596: return;
0597: }
0598:
0599: attval.checkLowerCaseAttrValue(lexer, node);
0600:
0601: if (!TidyUtils.isInValuesIgnoreCase(VALID_VALUES,
0602: attval.value)) {
0603: lexer.report.attrError(lexer, node, attval,
0604: Report.BAD_ATTRIBUTE_VALUE);
0605: }
0606:
0607: }
0608: }
0609:
0610: /**
0611: * AttrCheck implementation for checking the "shape" attribute.
0612: */
0613: public static class CheckShape implements AttrCheck {
0614:
0615: /**
0616: * valid values for this attribute.
0617: */
0618: private static final String[] VALID_VALUES = new String[] {
0619: "rect", "default", "circle", "poly" };
0620:
0621: /**
0622: * @see AttrCheck#check(Lexer, Node, AttVal)
0623: */
0624: public void check(Lexer lexer, Node node, AttVal attval) {
0625: if (attval.value == null) {
0626: lexer.report.attrError(lexer, node, attval,
0627: Report.MISSING_ATTR_VALUE);
0628: return;
0629: }
0630:
0631: attval.checkLowerCaseAttrValue(lexer, node);
0632:
0633: if (!TidyUtils.isInValuesIgnoreCase(VALID_VALUES,
0634: attval.value)) {
0635: lexer.report.attrError(lexer, node, attval,
0636: Report.BAD_ATTRIBUTE_VALUE);
0637: }
0638:
0639: }
0640: }
0641:
0642: /**
0643: * AttrCheck implementation for checking Scope.
0644: */
0645: public static class CheckScope implements AttrCheck {
0646:
0647: /**
0648: * valid values for this attribute.
0649: */
0650: private static final String[] VALID_VALUES = new String[] {
0651: "row", "rowgroup", "col", "colgroup" };
0652:
0653: /**
0654: * @see AttrCheck#check(Lexer, Node, AttVal)
0655: */
0656: public void check(Lexer lexer, Node node, AttVal attval) {
0657:
0658: if (attval.value == null) {
0659: lexer.report.attrError(lexer, node, attval,
0660: Report.MISSING_ATTR_VALUE);
0661: return;
0662: }
0663:
0664: attval.checkLowerCaseAttrValue(lexer, node);
0665:
0666: if (!TidyUtils.isInValuesIgnoreCase(VALID_VALUES,
0667: attval.value)) {
0668: lexer.report.attrError(lexer, node, attval,
0669: Report.BAD_ATTRIBUTE_VALUE);
0670: }
0671: }
0672: }
0673:
0674: /**
0675: * AttrCheck implementation for checking numbers.
0676: */
0677: public static class CheckNumber implements AttrCheck {
0678:
0679: /**
0680: * @see AttrCheck#check(Lexer, Node, AttVal)
0681: */
0682: public void check(Lexer lexer, Node node, AttVal attval) {
0683:
0684: if (attval.value == null) {
0685: lexer.report.attrError(lexer, node, attval,
0686: Report.MISSING_ATTR_VALUE);
0687: return;
0688: }
0689:
0690: // don't check <frameset cols=... rows=...>
0691: if (("cols".equalsIgnoreCase(attval.attribute) || "rows"
0692: .equalsIgnoreCase(attval.attribute))
0693: && node.tag == lexer.configuration.tt.tagFrameset) {
0694: return;
0695: }
0696:
0697: String value = attval.value;
0698:
0699: int j = 0;
0700:
0701: // font size may be preceded by + or -
0702: if (node.tag == lexer.configuration.tt.tagFont
0703: && (value.startsWith("+") || value.startsWith("-"))) {
0704: ++j;
0705: }
0706:
0707: for (; j < value.length(); j++) {
0708: char p = value.charAt(j);
0709: if (!Character.isDigit(p)) {
0710: lexer.report.attrError(lexer, node, attval,
0711: Report.BAD_ATTRIBUTE_VALUE);
0712: break;
0713: }
0714: }
0715: }
0716: }
0717:
0718: /**
0719: * AttrCheck implementation for checking ids.
0720: */
0721: public static class CheckId implements AttrCheck {
0722:
0723: /**
0724: * @see AttrCheck#check(Lexer, Node, AttVal)
0725: */
0726: public void check(Lexer lexer, Node node, AttVal attval) {
0727: Node old;
0728:
0729: if (attval.value == null || attval.value.length() == 0) {
0730: lexer.report.attrError(lexer, node, attval,
0731: Report.MISSING_ATTR_VALUE);
0732: return;
0733: }
0734:
0735: String p = attval.value;
0736: char s = p.charAt(0);
0737:
0738: if (p.length() == 0 || !Character.isLetter(p.charAt(0))) {
0739: if (lexer.isvoyager
0740: && (TidyUtils.isXMLLetter(s) || s == '_' || s == ':')) {
0741: lexer.report.attrError(lexer, node, attval,
0742: Report.XML_ID_SYNTAX);
0743: } else {
0744: lexer.report.attrError(lexer, node, attval,
0745: Report.BAD_ATTRIBUTE_VALUE);
0746: }
0747: } else {
0748:
0749: for (int j = 1; j < p.length(); j++) {
0750: s = p.charAt(j);
0751:
0752: if (!TidyUtils.isNamechar(s)) {
0753: if (lexer.isvoyager
0754: && TidyUtils.isXMLNamechar(s)) {
0755: lexer.report.attrError(lexer, node, attval,
0756: Report.XML_ID_SYNTAX);
0757: } else {
0758: lexer.report.attrError(lexer, node, attval,
0759: Report.BAD_ATTRIBUTE_VALUE);
0760: }
0761: break;
0762: }
0763: }
0764: }
0765:
0766: if (((old = lexer.configuration.tt
0767: .getNodeByAnchor(attval.value)) != null)
0768: && old != node) {
0769: lexer.report.attrError(lexer, node, attval,
0770: Report.ANCHOR_NOT_UNIQUE);
0771: } else {
0772: lexer.configuration.tt.anchorList = lexer.configuration.tt
0773: .addAnchor(attval.value, node);
0774: }
0775: }
0776:
0777: }
0778:
0779: /**
0780: * AttrCheck implementation for checking the "name" attribute.
0781: */
0782: public static class CheckName implements AttrCheck {
0783:
0784: /**
0785: * @see AttrCheck#check(Lexer, Node, AttVal)
0786: */
0787: public void check(Lexer lexer, Node node, AttVal attval) {
0788: Node old;
0789:
0790: if (attval.value == null) {
0791: lexer.report.attrError(lexer, node, attval,
0792: Report.MISSING_ATTR_VALUE);
0793: return;
0794: } else if (lexer.configuration.tt.isAnchorElement(node)) {
0795: lexer.constrainVersion(~Dict.VERS_XHTML11);
0796:
0797: if (((old = lexer.configuration.tt
0798: .getNodeByAnchor(attval.value)) != null)
0799: && old != node) {
0800: lexer.report.attrError(lexer, node, attval,
0801: Report.ANCHOR_NOT_UNIQUE);
0802: } else {
0803: lexer.configuration.tt.anchorList = lexer.configuration.tt
0804: .addAnchor(attval.value, node);
0805: }
0806: }
0807: }
0808:
0809: }
0810:
0811: /**
0812: * AttrCheck implementation for checking colors.
0813: */
0814: public static class CheckColor implements AttrCheck {
0815:
0816: /**
0817: * valid html colors.
0818: */
0819: private static final Map COLORS = new HashMap();
0820:
0821: static {
0822: COLORS.put("black", "#000000");
0823: COLORS.put("green", "#008000");
0824: COLORS.put("silver", "#C0C0C0");
0825: COLORS.put("lime", "#00FF00");
0826: COLORS.put("gray", "#808080");
0827: COLORS.put("olive", "#808000");
0828: COLORS.put("white", "#FFFFFF");
0829: COLORS.put("yellow", "#FFFF00");
0830: COLORS.put("maroon", "#800000");
0831: COLORS.put("navy", "#000080");
0832: COLORS.put("red", "#FF0000");
0833: COLORS.put("blue", "#0000FF");
0834: COLORS.put("purple", "#800080");
0835: COLORS.put("teal", "#008080");
0836: COLORS.put("fuchsia", "#FF00FF");
0837: COLORS.put("aqua", "#00FFFF");
0838: }
0839:
0840: /**
0841: * @see AttrCheck#check(Lexer, Node, AttVal)
0842: */
0843: public void check(Lexer lexer, Node node, AttVal attval) {
0844: boolean hexUppercase = true;
0845: boolean invalid = false;
0846: boolean found = false;
0847:
0848: if (attval.value == null || attval.value.length() == 0) {
0849: lexer.report.attrError(lexer, node, attval,
0850: Report.MISSING_ATTR_VALUE);
0851: return;
0852: }
0853:
0854: String given = attval.value;
0855:
0856: Iterator colorIter = COLORS.entrySet().iterator();
0857:
0858: while (colorIter.hasNext()) {
0859: Map.Entry color = (Map.Entry) colorIter.next();
0860:
0861: if (given.charAt(0) == '#') {
0862: if (given.length() != 7) {
0863: lexer.report.attrError(lexer, node, attval,
0864: Report.BAD_ATTRIBUTE_VALUE);
0865: invalid = true;
0866: break;
0867: } else if (given.equalsIgnoreCase((String) color
0868: .getValue())) {
0869: if (lexer.configuration.replaceColor) {
0870: attval.value = (String) color.getKey();
0871: }
0872: found = true;
0873: break;
0874: }
0875: } else if (TidyUtils.isLetter(given.charAt(0))) {
0876: if (given.equalsIgnoreCase((String) color.getKey())) {
0877: if (lexer.configuration.replaceColor) {
0878: attval.value = (String) color.getKey();
0879: }
0880: found = true;
0881: break;
0882: }
0883: } else {
0884:
0885: lexer.report.attrError(lexer, node, attval,
0886: Report.BAD_ATTRIBUTE_VALUE);
0887:
0888: invalid = true;
0889: break;
0890: }
0891: }
0892: if (!found && !invalid) {
0893: if (given.charAt(0) == '#') {
0894: // check if valid hex digits and letters
0895:
0896: for (int i = 1; i < 7; ++i) {
0897: if (!TidyUtils.isDigit(given.charAt(i))
0898: && ("abcdef".indexOf(Character
0899: .toLowerCase(given.charAt(i))) == -1)) {
0900: lexer.report.attrError(lexer, node, attval,
0901: Report.BAD_ATTRIBUTE_VALUE);
0902: invalid = true;
0903: break;
0904: }
0905: }
0906: // convert hex letters to uppercase
0907: if (!invalid && hexUppercase) {
0908: for (int i = 1; i < 7; ++i) {
0909: attval.value = given.toUpperCase();
0910: }
0911: }
0912: }
0913:
0914: else {
0915: // we could search for more colors and mark the file as HTML Proprietary, but I don't thinks
0916: // it's worth the effort, so values not in HTML 4.01 are invalid
0917: lexer.report.attrError(lexer, node, attval,
0918: Report.BAD_ATTRIBUTE_VALUE);
0919: invalid = true;
0920: }
0921: }
0922: }
0923: }
0924:
0925: /**
0926: * AttrCheck implementation for checking valuetype.
0927: */
0928: public static class CheckVType implements AttrCheck {
0929:
0930: /**
0931: * valid values for this attribute.
0932: */
0933: private static final String[] VALID_VALUES = new String[] {
0934: "data", "object", "ref" };
0935:
0936: /**
0937: * @see AttrCheck#check(Lexer, Node, AttVal)
0938: */
0939: public void check(Lexer lexer, Node node, AttVal attval) {
0940: if (attval.value == null) {
0941: lexer.report.attrError(lexer, node, attval,
0942: Report.MISSING_ATTR_VALUE);
0943: return;
0944: }
0945:
0946: attval.checkLowerCaseAttrValue(lexer, node);
0947:
0948: if (!TidyUtils.isInValuesIgnoreCase(VALID_VALUES,
0949: attval.value)) {
0950: lexer.report.attrError(lexer, node, attval,
0951: Report.BAD_ATTRIBUTE_VALUE);
0952: }
0953: }
0954: }
0955:
0956: /**
0957: * AttrCheck implementation for checking scroll.
0958: */
0959: public static class CheckScroll implements AttrCheck {
0960:
0961: /**
0962: * valid values for this attribute.
0963: */
0964: private static final String[] VALID_VALUES = new String[] {
0965: "no", "yes", "auto" };
0966:
0967: /**
0968: * @see AttrCheck#check(Lexer, Node, AttVal)
0969: */
0970: public void check(Lexer lexer, Node node, AttVal attval) {
0971:
0972: if (attval.value == null) {
0973: lexer.report.attrError(lexer, node, attval,
0974: Report.MISSING_ATTR_VALUE);
0975: return;
0976: }
0977:
0978: attval.checkLowerCaseAttrValue(lexer, node);
0979:
0980: if (!TidyUtils.isInValuesIgnoreCase(VALID_VALUES,
0981: attval.value)) {
0982: lexer.report.attrError(lexer, node, attval,
0983: Report.BAD_ATTRIBUTE_VALUE);
0984: }
0985: }
0986: }
0987:
0988: /**
0989: * AttrCheck implementation for checking dir.
0990: */
0991: public static class CheckTextDir implements AttrCheck {
0992:
0993: /**
0994: * valid values for this attribute.
0995: */
0996: private static final String[] VALID_VALUES = new String[] {
0997: "rtl", "ltr" };
0998:
0999: /**
1000: * @see AttrCheck#check(Lexer, Node, AttVal)
1001: */
1002: public void check(Lexer lexer, Node node, AttVal attval) {
1003:
1004: if (attval.value == null) {
1005: lexer.report.attrError(lexer, node, attval,
1006: Report.MISSING_ATTR_VALUE);
1007: return;
1008: }
1009:
1010: attval.checkLowerCaseAttrValue(lexer, node);
1011:
1012: if (!TidyUtils.isInValuesIgnoreCase(VALID_VALUES,
1013: attval.value)) {
1014: lexer.report.attrError(lexer, node, attval,
1015: Report.BAD_ATTRIBUTE_VALUE);
1016: }
1017: }
1018: }
1019:
1020: /**
1021: * AttrCheck implementation for checking lang and xml:lang.
1022: */
1023: public static class CheckLang implements AttrCheck {
1024:
1025: /**
1026: * @see AttrCheck#check(Lexer, Node, AttVal)
1027: */
1028: public void check(Lexer lexer, Node node, AttVal attval) {
1029:
1030: if ("lang".equals(attval.attribute)) {
1031: lexer.constrainVersion(~Dict.VERS_XHTML11);
1032: }
1033:
1034: if (attval.value == null) {
1035: lexer.report.attrError(lexer, node, attval,
1036: Report.MISSING_ATTR_VALUE);
1037: return;
1038: }
1039: }
1040: }
1041:
1042: }
|