0001: /*
0002: * Copyright 2001-2004 The Apache Software Foundation.
0003: *
0004: * Licensed under the Apache License, Version 2.0 (the "License");
0005: * you may not use this file except in compliance with the License.
0006: * You may obtain a copy of the License at
0007: *
0008: * http://www.apache.org/licenses/LICENSE-2.0
0009: *
0010: * Unless required by applicable law or agreed to in writing, software
0011: * distributed under the License is distributed on an "AS IS" BASIS,
0012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0013: * See the License for the specific language governing permissions and
0014: * limitations under the License.
0015: */
0016: /*
0017: * $Id: ToHTMLStream.java,v 1.43 2005/08/03 19:20:31 minchau Exp $
0018: */
0019: package org.apache.xml.serializer;
0020:
0021: import java.io.IOException;
0022: import java.io.OutputStream;
0023: import java.io.UnsupportedEncodingException;
0024: import java.util.Properties;
0025:
0026: import javax.xml.transform.Result;
0027:
0028: import org.apache.xml.serializer.utils.MsgKey;
0029: import org.apache.xml.serializer.utils.Utils;
0030: import org.xml.sax.Attributes;
0031: import org.xml.sax.SAXException;
0032:
0033: /**
0034: * This serializer takes a series of SAX or
0035: * SAX-like events and writes its output
0036: * to the given stream.
0037: *
0038: * This class is not a public API, it is public
0039: * because it is used from another package.
0040: *
0041: * @xsl.usage internal
0042: */
0043: public final class ToHTMLStream extends ToStream {
0044:
0045: /** This flag is set while receiving events from the DTD */
0046: protected boolean m_inDTD = false;
0047:
0048: /** True if the current element is a block element. (seems like
0049: * this needs to be a stack. -sb). */
0050: private boolean m_inBlockElem = false;
0051:
0052: /**
0053: * Map that tells which XML characters should have special treatment, and it
0054: * provides character to entity name lookup.
0055: */
0056: private static final CharInfo m_htmlcharInfo =
0057: // new CharInfo(CharInfo.HTML_ENTITIES_RESOURCE);
0058: CharInfo.getCharInfo(CharInfo.HTML_ENTITIES_RESOURCE, Method.HTML);
0059:
0060: /** A digital search trie for fast, case insensitive lookup of ElemDesc objects. */
0061: static final Trie m_elementFlags = new Trie();
0062:
0063: static {
0064: initTagReference(m_elementFlags);
0065: }
0066:
0067: static void initTagReference(Trie m_elementFlags) {
0068:
0069: // HTML 4.0 loose DTD
0070: m_elementFlags
0071: .put("BASEFONT", new ElemDesc(0 | ElemDesc.EMPTY));
0072: m_elementFlags.put("FRAME", new ElemDesc(0 | ElemDesc.EMPTY
0073: | ElemDesc.BLOCK));
0074: m_elementFlags
0075: .put("FRAMESET", new ElemDesc(0 | ElemDesc.BLOCK));
0076: m_elementFlags
0077: .put("NOFRAMES", new ElemDesc(0 | ElemDesc.BLOCK));
0078: m_elementFlags.put("ISINDEX", new ElemDesc(0 | ElemDesc.EMPTY
0079: | ElemDesc.BLOCK));
0080: m_elementFlags.put("APPLET", new ElemDesc(
0081: 0 | ElemDesc.WHITESPACESENSITIVE));
0082: m_elementFlags.put("CENTER", new ElemDesc(0 | ElemDesc.BLOCK));
0083: m_elementFlags.put("DIR", new ElemDesc(0 | ElemDesc.BLOCK));
0084: m_elementFlags.put("MENU", new ElemDesc(0 | ElemDesc.BLOCK));
0085:
0086: // HTML 4.0 strict DTD
0087: m_elementFlags.put("TT", new ElemDesc(0 | ElemDesc.FONTSTYLE));
0088: m_elementFlags.put("I", new ElemDesc(0 | ElemDesc.FONTSTYLE));
0089: m_elementFlags.put("B", new ElemDesc(0 | ElemDesc.FONTSTYLE));
0090: m_elementFlags.put("BIG", new ElemDesc(0 | ElemDesc.FONTSTYLE));
0091: m_elementFlags.put("SMALL",
0092: new ElemDesc(0 | ElemDesc.FONTSTYLE));
0093: m_elementFlags.put("EM", new ElemDesc(0 | ElemDesc.PHRASE));
0094: m_elementFlags.put("STRONG", new ElemDesc(0 | ElemDesc.PHRASE));
0095: m_elementFlags.put("DFN", new ElemDesc(0 | ElemDesc.PHRASE));
0096: m_elementFlags.put("CODE", new ElemDesc(0 | ElemDesc.PHRASE));
0097: m_elementFlags.put("SAMP", new ElemDesc(0 | ElemDesc.PHRASE));
0098: m_elementFlags.put("KBD", new ElemDesc(0 | ElemDesc.PHRASE));
0099: m_elementFlags.put("VAR", new ElemDesc(0 | ElemDesc.PHRASE));
0100: m_elementFlags.put("CITE", new ElemDesc(0 | ElemDesc.PHRASE));
0101: m_elementFlags.put("ABBR", new ElemDesc(0 | ElemDesc.PHRASE));
0102: m_elementFlags
0103: .put("ACRONYM", new ElemDesc(0 | ElemDesc.PHRASE));
0104: m_elementFlags.put("SUP", new ElemDesc(0 | ElemDesc.SPECIAL
0105: | ElemDesc.ASPECIAL));
0106: m_elementFlags.put("SUB", new ElemDesc(0 | ElemDesc.SPECIAL
0107: | ElemDesc.ASPECIAL));
0108: m_elementFlags.put("SPAN", new ElemDesc(0 | ElemDesc.SPECIAL
0109: | ElemDesc.ASPECIAL));
0110: m_elementFlags.put("BDO", new ElemDesc(0 | ElemDesc.SPECIAL
0111: | ElemDesc.ASPECIAL));
0112: m_elementFlags.put("BR", new ElemDesc(0 | ElemDesc.SPECIAL
0113: | ElemDesc.ASPECIAL | ElemDesc.EMPTY | ElemDesc.BLOCK));
0114: m_elementFlags.put("BODY", new ElemDesc(0 | ElemDesc.BLOCK));
0115: m_elementFlags.put("ADDRESS", new ElemDesc(0 | ElemDesc.BLOCK
0116: | ElemDesc.BLOCKFORM | ElemDesc.BLOCKFORMFIELDSET));
0117: m_elementFlags.put("DIV", new ElemDesc(0 | ElemDesc.BLOCK
0118: | ElemDesc.BLOCKFORM | ElemDesc.BLOCKFORMFIELDSET));
0119: m_elementFlags.put("A", new ElemDesc(0 | ElemDesc.SPECIAL));
0120: m_elementFlags.put("MAP", new ElemDesc(0 | ElemDesc.SPECIAL
0121: | ElemDesc.ASPECIAL | ElemDesc.BLOCK));
0122: m_elementFlags.put("AREA", new ElemDesc(0 | ElemDesc.EMPTY
0123: | ElemDesc.BLOCK));
0124: m_elementFlags.put("LINK", new ElemDesc(0 | ElemDesc.HEADMISC
0125: | ElemDesc.EMPTY | ElemDesc.BLOCK));
0126: m_elementFlags.put("IMG", new ElemDesc(0 | ElemDesc.SPECIAL
0127: | ElemDesc.ASPECIAL | ElemDesc.EMPTY
0128: | ElemDesc.WHITESPACESENSITIVE));
0129: m_elementFlags.put("OBJECT", new ElemDesc(0 | ElemDesc.SPECIAL
0130: | ElemDesc.ASPECIAL | ElemDesc.HEADMISC
0131: | ElemDesc.WHITESPACESENSITIVE));
0132: m_elementFlags.put("PARAM", new ElemDesc(0 | ElemDesc.EMPTY));
0133: m_elementFlags.put("HR", new ElemDesc(0 | ElemDesc.BLOCK
0134: | ElemDesc.BLOCKFORM | ElemDesc.BLOCKFORMFIELDSET
0135: | ElemDesc.EMPTY));
0136: m_elementFlags.put("P", new ElemDesc(0 | ElemDesc.BLOCK
0137: | ElemDesc.BLOCKFORM | ElemDesc.BLOCKFORMFIELDSET));
0138: m_elementFlags.put("H1", new ElemDesc(0 | ElemDesc.HEAD
0139: | ElemDesc.BLOCK));
0140: m_elementFlags.put("H2", new ElemDesc(0 | ElemDesc.HEAD
0141: | ElemDesc.BLOCK));
0142: m_elementFlags.put("H3", new ElemDesc(0 | ElemDesc.HEAD
0143: | ElemDesc.BLOCK));
0144: m_elementFlags.put("H4", new ElemDesc(0 | ElemDesc.HEAD
0145: | ElemDesc.BLOCK));
0146: m_elementFlags.put("H5", new ElemDesc(0 | ElemDesc.HEAD
0147: | ElemDesc.BLOCK));
0148: m_elementFlags.put("H6", new ElemDesc(0 | ElemDesc.HEAD
0149: | ElemDesc.BLOCK));
0150: m_elementFlags.put("PRE", new ElemDesc(0
0151: | ElemDesc.PREFORMATTED | ElemDesc.BLOCK));
0152: m_elementFlags.put("Q", new ElemDesc(0 | ElemDesc.SPECIAL
0153: | ElemDesc.ASPECIAL));
0154: m_elementFlags.put("BLOCKQUOTE", new ElemDesc(0
0155: | ElemDesc.BLOCK | ElemDesc.BLOCKFORM
0156: | ElemDesc.BLOCKFORMFIELDSET));
0157: m_elementFlags.put("INS", new ElemDesc(0));
0158: m_elementFlags.put("DEL", new ElemDesc(0));
0159: m_elementFlags.put("DL", new ElemDesc(0 | ElemDesc.BLOCK
0160: | ElemDesc.BLOCKFORM | ElemDesc.BLOCKFORMFIELDSET));
0161: m_elementFlags.put("DT", new ElemDesc(0 | ElemDesc.BLOCK));
0162: m_elementFlags.put("DD", new ElemDesc(0 | ElemDesc.BLOCK));
0163: m_elementFlags.put("OL", new ElemDesc(0 | ElemDesc.LIST
0164: | ElemDesc.BLOCK));
0165: m_elementFlags.put("UL", new ElemDesc(0 | ElemDesc.LIST
0166: | ElemDesc.BLOCK));
0167: m_elementFlags.put("LI", new ElemDesc(0 | ElemDesc.BLOCK));
0168: m_elementFlags.put("FORM", new ElemDesc(0 | ElemDesc.BLOCK));
0169: m_elementFlags
0170: .put("LABEL", new ElemDesc(0 | ElemDesc.FORMCTRL));
0171: m_elementFlags.put("INPUT", new ElemDesc(0 | ElemDesc.FORMCTRL
0172: | ElemDesc.INLINELABEL | ElemDesc.EMPTY));
0173: m_elementFlags.put("SELECT", new ElemDesc(0 | ElemDesc.FORMCTRL
0174: | ElemDesc.INLINELABEL));
0175: m_elementFlags.put("OPTGROUP", new ElemDesc(0));
0176: m_elementFlags.put("OPTION", new ElemDesc(0));
0177: m_elementFlags.put("TEXTAREA", new ElemDesc(0
0178: | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL));
0179: m_elementFlags.put("FIELDSET", new ElemDesc(0 | ElemDesc.BLOCK
0180: | ElemDesc.BLOCKFORM));
0181: m_elementFlags.put("LEGEND", new ElemDesc(0));
0182: m_elementFlags.put("BUTTON", new ElemDesc(0 | ElemDesc.FORMCTRL
0183: | ElemDesc.INLINELABEL));
0184: m_elementFlags.put("TABLE", new ElemDesc(0 | ElemDesc.BLOCK
0185: | ElemDesc.BLOCKFORM | ElemDesc.BLOCKFORMFIELDSET));
0186: m_elementFlags.put("CAPTION", new ElemDesc(0 | ElemDesc.BLOCK));
0187: m_elementFlags.put("THEAD", new ElemDesc(0 | ElemDesc.BLOCK));
0188: m_elementFlags.put("TFOOT", new ElemDesc(0 | ElemDesc.BLOCK));
0189: m_elementFlags.put("TBODY", new ElemDesc(0 | ElemDesc.BLOCK));
0190: m_elementFlags
0191: .put("COLGROUP", new ElemDesc(0 | ElemDesc.BLOCK));
0192: m_elementFlags.put("COL", new ElemDesc(0 | ElemDesc.EMPTY
0193: | ElemDesc.BLOCK));
0194: m_elementFlags.put("TR", new ElemDesc(0 | ElemDesc.BLOCK));
0195: m_elementFlags.put("TH", new ElemDesc(0));
0196: m_elementFlags.put("TD", new ElemDesc(0));
0197: m_elementFlags.put("HEAD", new ElemDesc(0 | ElemDesc.BLOCK
0198: | ElemDesc.HEADELEM));
0199: m_elementFlags.put("TITLE", new ElemDesc(0 | ElemDesc.BLOCK));
0200: m_elementFlags.put("BASE", new ElemDesc(0 | ElemDesc.EMPTY
0201: | ElemDesc.BLOCK));
0202: m_elementFlags.put("META", new ElemDesc(0 | ElemDesc.HEADMISC
0203: | ElemDesc.EMPTY | ElemDesc.BLOCK));
0204: m_elementFlags.put("STYLE", new ElemDesc(0 | ElemDesc.HEADMISC
0205: | ElemDesc.RAW | ElemDesc.BLOCK));
0206: m_elementFlags
0207: .put("SCRIPT", new ElemDesc(0 | ElemDesc.SPECIAL
0208: | ElemDesc.ASPECIAL | ElemDesc.HEADMISC
0209: | ElemDesc.RAW));
0210: m_elementFlags.put("NOSCRIPT", new ElemDesc(0 | ElemDesc.BLOCK
0211: | ElemDesc.BLOCKFORM | ElemDesc.BLOCKFORMFIELDSET));
0212: m_elementFlags.put("HTML", new ElemDesc(0 | ElemDesc.BLOCK));
0213:
0214: // From "John Ky" <hand@syd.speednet.com.au
0215: // Transitional Document Type Definition ()
0216: // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/sgml/loosedtd.html#basefont
0217: m_elementFlags
0218: .put("FONT", new ElemDesc(0 | ElemDesc.FONTSTYLE));
0219:
0220: // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-STRIKE
0221: m_elementFlags.put("S", new ElemDesc(0 | ElemDesc.FONTSTYLE));
0222: m_elementFlags.put("STRIKE", new ElemDesc(
0223: 0 | ElemDesc.FONTSTYLE));
0224:
0225: // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-U
0226: m_elementFlags.put("U", new ElemDesc(0 | ElemDesc.FONTSTYLE));
0227:
0228: // From "John Ky" <hand@syd.speednet.com.au
0229: m_elementFlags
0230: .put("NOBR", new ElemDesc(0 | ElemDesc.FONTSTYLE));
0231:
0232: // HTML 4.0, section 16.5
0233: m_elementFlags.put("IFRAME", new ElemDesc(0 | ElemDesc.BLOCK
0234: | ElemDesc.BLOCKFORM | ElemDesc.BLOCKFORMFIELDSET));
0235:
0236: // Netscape 4 extension
0237: m_elementFlags.put("LAYER", new ElemDesc(0 | ElemDesc.BLOCK
0238: | ElemDesc.BLOCKFORM | ElemDesc.BLOCKFORMFIELDSET));
0239: // Netscape 4 extension
0240: m_elementFlags.put("ILAYER", new ElemDesc(0 | ElemDesc.BLOCK
0241: | ElemDesc.BLOCKFORM | ElemDesc.BLOCKFORMFIELDSET));
0242:
0243: // NOW FOR ATTRIBUTE INFORMATION . . .
0244: ElemDesc elemDesc;
0245:
0246: // ----------------------------------------------
0247: elemDesc = (ElemDesc) m_elementFlags.get("A");
0248: elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
0249: elemDesc.setAttr("NAME", ElemDesc.ATTRURL);
0250:
0251: // ----------------------------------------------
0252: elemDesc = (ElemDesc) m_elementFlags.get("AREA");
0253: elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
0254: elemDesc.setAttr("NOHREF", ElemDesc.ATTREMPTY);
0255:
0256: // ----------------------------------------------
0257: elemDesc = (ElemDesc) m_elementFlags.get("BASE");
0258: elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
0259:
0260: // ----------------------------------------------
0261: elemDesc = (ElemDesc) m_elementFlags.get("BUTTON");
0262: elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
0263:
0264: // ----------------------------------------------
0265: elemDesc = (ElemDesc) m_elementFlags.get("BLOCKQUOTE");
0266: elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
0267:
0268: // ----------------------------------------------
0269: elemDesc = (ElemDesc) m_elementFlags.get("DEL");
0270: elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
0271:
0272: // ----------------------------------------------
0273: elemDesc = (ElemDesc) m_elementFlags.get("DIR");
0274: elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
0275:
0276: // ----------------------------------------------
0277:
0278: elemDesc = (ElemDesc) m_elementFlags.get("DIV");
0279: elemDesc.setAttr("SRC", ElemDesc.ATTRURL); // Netscape 4 extension
0280: elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); // Internet-Explorer extension
0281:
0282: // ----------------------------------------------
0283: elemDesc = (ElemDesc) m_elementFlags.get("DL");
0284: elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
0285:
0286: // ----------------------------------------------
0287: elemDesc = (ElemDesc) m_elementFlags.get("FORM");
0288: elemDesc.setAttr("ACTION", ElemDesc.ATTRURL);
0289:
0290: // ----------------------------------------------
0291: // Attribution to: "Voytenko, Dimitry" <DVoytenko@SECTORBASE.COM>
0292: elemDesc = (ElemDesc) m_elementFlags.get("FRAME");
0293: elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
0294: elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL);
0295: elemDesc.setAttr("NORESIZE", ElemDesc.ATTREMPTY);
0296:
0297: // ----------------------------------------------
0298: elemDesc = (ElemDesc) m_elementFlags.get("HEAD");
0299: elemDesc.setAttr("PROFILE", ElemDesc.ATTRURL);
0300:
0301: // ----------------------------------------------
0302: elemDesc = (ElemDesc) m_elementFlags.get("HR");
0303: elemDesc.setAttr("NOSHADE", ElemDesc.ATTREMPTY);
0304:
0305: // ----------------------------------------------
0306: // HTML 4.0, section 16.5
0307: elemDesc = (ElemDesc) m_elementFlags.get("IFRAME");
0308: elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
0309: elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL);
0310:
0311: // ----------------------------------------------
0312: // Netscape 4 extension
0313: elemDesc = (ElemDesc) m_elementFlags.get("ILAYER");
0314: elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
0315:
0316: // ----------------------------------------------
0317: elemDesc = (ElemDesc) m_elementFlags.get("IMG");
0318: elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
0319: elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL);
0320: elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL);
0321: elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY);
0322:
0323: // ----------------------------------------------
0324: elemDesc = (ElemDesc) m_elementFlags.get("INPUT");
0325: elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
0326: elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL);
0327: elemDesc.setAttr("CHECKED", ElemDesc.ATTREMPTY);
0328: elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
0329: elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY);
0330: elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY);
0331:
0332: // ----------------------------------------------
0333: elemDesc = (ElemDesc) m_elementFlags.get("INS");
0334: elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
0335:
0336: // ----------------------------------------------
0337: // Netscape 4 extension
0338: elemDesc = (ElemDesc) m_elementFlags.get("LAYER");
0339: elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
0340:
0341: // ----------------------------------------------
0342: elemDesc = (ElemDesc) m_elementFlags.get("LINK");
0343: elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
0344:
0345: // ----------------------------------------------
0346: elemDesc = (ElemDesc) m_elementFlags.get("MENU");
0347: elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
0348:
0349: // ----------------------------------------------
0350: elemDesc = (ElemDesc) m_elementFlags.get("OBJECT");
0351: elemDesc.setAttr("CLASSID", ElemDesc.ATTRURL);
0352: elemDesc.setAttr("CODEBASE", ElemDesc.ATTRURL);
0353: elemDesc.setAttr("DATA", ElemDesc.ATTRURL);
0354: elemDesc.setAttr("ARCHIVE", ElemDesc.ATTRURL);
0355: elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL);
0356: elemDesc.setAttr("DECLARE", ElemDesc.ATTREMPTY);
0357:
0358: // ----------------------------------------------
0359: elemDesc = (ElemDesc) m_elementFlags.get("OL");
0360: elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
0361:
0362: // ----------------------------------------------
0363: elemDesc = (ElemDesc) m_elementFlags.get("OPTGROUP");
0364: elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
0365:
0366: // ----------------------------------------------
0367: elemDesc = (ElemDesc) m_elementFlags.get("OPTION");
0368: elemDesc.setAttr("SELECTED", ElemDesc.ATTREMPTY);
0369: elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
0370:
0371: // ----------------------------------------------
0372: elemDesc = (ElemDesc) m_elementFlags.get("Q");
0373: elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
0374:
0375: // ----------------------------------------------
0376: elemDesc = (ElemDesc) m_elementFlags.get("SCRIPT");
0377: elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
0378: elemDesc.setAttr("FOR", ElemDesc.ATTRURL);
0379: elemDesc.setAttr("DEFER", ElemDesc.ATTREMPTY);
0380:
0381: // ----------------------------------------------
0382: elemDesc = (ElemDesc) m_elementFlags.get("SELECT");
0383: elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
0384: elemDesc.setAttr("MULTIPLE", ElemDesc.ATTREMPTY);
0385:
0386: // ----------------------------------------------
0387: elemDesc = (ElemDesc) m_elementFlags.get("TABLE");
0388: elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); // Internet-Explorer extension
0389:
0390: // ----------------------------------------------
0391: elemDesc = (ElemDesc) m_elementFlags.get("TD");
0392: elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY);
0393:
0394: // ----------------------------------------------
0395: elemDesc = (ElemDesc) m_elementFlags.get("TEXTAREA");
0396: elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
0397: elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY);
0398:
0399: // ----------------------------------------------
0400: elemDesc = (ElemDesc) m_elementFlags.get("TH");
0401: elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY);
0402:
0403: // ----------------------------------------------
0404: // The nowrap attribute of a tr element is both
0405: // a Netscape and Internet-Explorer extension
0406: elemDesc = (ElemDesc) m_elementFlags.get("TR");
0407: elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY);
0408:
0409: // ----------------------------------------------
0410: elemDesc = (ElemDesc) m_elementFlags.get("UL");
0411: elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
0412: }
0413:
0414: /**
0415: * Dummy element for elements not found.
0416: */
0417: static private final ElemDesc m_dummy = new ElemDesc(
0418: 0 | ElemDesc.BLOCK);
0419:
0420: /** True if URLs should be specially escaped with the %xx form. */
0421: private boolean m_specialEscapeURLs = true;
0422:
0423: /** True if the META tag should be omitted. */
0424: private boolean m_omitMetaTag = false;
0425:
0426: /**
0427: * Tells if the formatter should use special URL escaping.
0428: *
0429: * @param bool True if URLs should be specially escaped with the %xx form.
0430: */
0431: public void setSpecialEscapeURLs(boolean bool) {
0432: m_specialEscapeURLs = bool;
0433: }
0434:
0435: /**
0436: * Tells if the formatter should omit the META tag.
0437: *
0438: * @param bool True if the META tag should be omitted.
0439: */
0440: public void setOmitMetaTag(boolean bool) {
0441: m_omitMetaTag = bool;
0442: }
0443:
0444: /**
0445: * Specifies an output format for this serializer. It the
0446: * serializer has already been associated with an output format,
0447: * it will switch to the new format. This method should not be
0448: * called while the serializer is in the process of serializing
0449: * a document.
0450: *
0451: * This method can be called multiple times before starting
0452: * the serialization of a particular result-tree. In principle
0453: * all serialization parameters can be changed, with the exception
0454: * of method="html" (it must be method="html" otherwise we
0455: * shouldn't even have a ToHTMLStream object here!)
0456: *
0457: * @param format The output format or serialzation parameters
0458: * to use.
0459: */
0460: public void setOutputFormat(Properties format) {
0461:
0462: m_specialEscapeURLs = OutputPropertyUtils.getBooleanProperty(
0463: OutputPropertiesFactory.S_USE_URL_ESCAPING, format);
0464:
0465: m_omitMetaTag = OutputPropertyUtils.getBooleanProperty(
0466: OutputPropertiesFactory.S_OMIT_META_TAG, format);
0467:
0468: super .setOutputFormat(format);
0469: }
0470:
0471: /**
0472: * Tells if the formatter should use special URL escaping.
0473: *
0474: * @return True if URLs should be specially escaped with the %xx form.
0475: */
0476: private final boolean getSpecialEscapeURLs() {
0477: return m_specialEscapeURLs;
0478: }
0479:
0480: /**
0481: * Tells if the formatter should omit the META tag.
0482: *
0483: * @return True if the META tag should be omitted.
0484: */
0485: private final boolean getOmitMetaTag() {
0486: return m_omitMetaTag;
0487: }
0488:
0489: /**
0490: * Get a description of the given element.
0491: *
0492: * @param name non-null name of element, case insensitive.
0493: *
0494: * @return non-null reference to ElemDesc, which may be m_dummy if no
0495: * element description matches the given name.
0496: */
0497: public static final ElemDesc getElemDesc(String name) {
0498: /* this method used to return m_dummy when name was null
0499: * but now it doesn't check and and requires non-null name.
0500: */
0501: Object obj = m_elementFlags.get(name);
0502: if (null != obj)
0503: return (ElemDesc) obj;
0504: return m_dummy;
0505: }
0506:
0507: /**
0508: * A Trie that is just a copy of the "static" one.
0509: * We need this one to be able to use the faster, but not thread-safe
0510: * method Trie.get2(name)
0511: */
0512: private Trie m_htmlInfo = new Trie(m_elementFlags);
0513:
0514: /**
0515: * Calls to this method could be replaced with calls to
0516: * getElemDesc(name), but this one should be faster.
0517: */
0518: private ElemDesc getElemDesc2(String name) {
0519: Object obj = m_htmlInfo.get2(name);
0520: if (null != obj)
0521: return (ElemDesc) obj;
0522: return m_dummy;
0523: }
0524:
0525: /**
0526: * Default constructor.
0527: */
0528: public ToHTMLStream() {
0529:
0530: super ();
0531: m_charInfo = m_htmlcharInfo;
0532: // initialize namespaces
0533: m_prefixMap = new NamespaceMappings();
0534:
0535: }
0536:
0537: /** The name of the current element. */
0538: // private String m_currentElementName = null;
0539: /**
0540: * Receive notification of the beginning of a document.
0541: *
0542: * @throws org.xml.sax.SAXException Any SAX exception, possibly
0543: * wrapping another exception.
0544: *
0545: * @throws org.xml.sax.SAXException
0546: */
0547: protected void startDocumentInternal()
0548: throws org.xml.sax.SAXException {
0549: super .startDocumentInternal();
0550:
0551: m_needToCallStartDocument = false;
0552: m_needToOutputDocTypeDecl = true;
0553: m_startNewLine = false;
0554: setOmitXMLDeclaration(true);
0555:
0556: if (true == m_needToOutputDocTypeDecl) {
0557: String doctypeSystem = getDoctypeSystem();
0558: String doctypePublic = getDoctypePublic();
0559: if ((null != doctypeSystem) || (null != doctypePublic)) {
0560: final java.io.Writer writer = m_writer;
0561: try {
0562: writer.write("<!DOCTYPE html");
0563:
0564: if (null != doctypePublic) {
0565: writer.write(" PUBLIC \"");
0566: writer.write(doctypePublic);
0567: writer.write('"');
0568: }
0569:
0570: if (null != doctypeSystem) {
0571: if (null == doctypePublic)
0572: writer.write(" SYSTEM \"");
0573: else
0574: writer.write(" \"");
0575:
0576: writer.write(doctypeSystem);
0577: writer.write('"');
0578: }
0579:
0580: writer.write('>');
0581: outputLineSep();
0582: } catch (IOException e) {
0583: throw new SAXException(e);
0584: }
0585: }
0586: }
0587:
0588: m_needToOutputDocTypeDecl = false;
0589: }
0590:
0591: /**
0592: * Receive notification of the end of a document.
0593: *
0594: * @throws org.xml.sax.SAXException Any SAX exception, possibly
0595: * wrapping another exception.
0596: *
0597: * @throws org.xml.sax.SAXException
0598: */
0599: public final void endDocument() throws org.xml.sax.SAXException {
0600:
0601: flushPending();
0602: if (m_doIndent && !m_isprevtext) {
0603: try {
0604: outputLineSep();
0605: } catch (IOException e) {
0606: throw new SAXException(e);
0607: }
0608: }
0609:
0610: flushWriter();
0611: if (m_tracer != null)
0612: super .fireEndDoc();
0613: }
0614:
0615: /**
0616: * Receive notification of the beginning of an element.
0617: *
0618: *
0619: * @param namespaceURI
0620: * @param localName
0621: * @param name The element type name.
0622: * @param atts The attributes attached to the element, if any.
0623: * @throws org.xml.sax.SAXException Any SAX exception, possibly
0624: * wrapping another exception.
0625: * @see #endElement
0626: * @see org.xml.sax.AttributeList
0627: */
0628: public void startElement(String namespaceURI, String localName,
0629: String name, Attributes atts)
0630: throws org.xml.sax.SAXException {
0631:
0632: ElemContext elemContext = m_elemContext;
0633:
0634: // clean up any pending things first
0635: if (elemContext.m_startTagOpen) {
0636: closeStartTag();
0637: elemContext.m_startTagOpen = false;
0638: } else if (m_cdataTagOpen) {
0639: closeCDATA();
0640: m_cdataTagOpen = false;
0641: } else if (m_needToCallStartDocument) {
0642: startDocumentInternal();
0643: m_needToCallStartDocument = false;
0644: }
0645:
0646: // if this element has a namespace then treat it like XML
0647: if (null != namespaceURI && namespaceURI.length() > 0) {
0648: super .startElement(namespaceURI, localName, name, atts);
0649:
0650: return;
0651: }
0652:
0653: try {
0654: // getElemDesc2(name) is faster than getElemDesc(name)
0655: ElemDesc elemDesc = getElemDesc2(name);
0656: int elemFlags = elemDesc.getFlags();
0657:
0658: // deal with indentation issues first
0659: if (m_doIndent) {
0660:
0661: boolean isBlockElement = (elemFlags & ElemDesc.BLOCK) != 0;
0662: if (m_ispreserve)
0663: m_ispreserve = false;
0664: else if ((null != elemContext.m_elementName)
0665: && (!m_inBlockElem || isBlockElement) /* && !isWhiteSpaceSensitive */
0666: ) {
0667: m_startNewLine = true;
0668:
0669: indent();
0670:
0671: }
0672: m_inBlockElem = !isBlockElement;
0673: }
0674:
0675: // save any attributes for later processing
0676: if (atts != null)
0677: addAttributes(atts);
0678:
0679: m_isprevtext = false;
0680: final java.io.Writer writer = m_writer;
0681: writer.write('<');
0682: writer.write(name);
0683:
0684: if (m_tracer != null)
0685: firePseudoAttributes();
0686:
0687: if ((elemFlags & ElemDesc.EMPTY) != 0) {
0688: // an optimization for elements which are expected
0689: // to be empty.
0690: m_elemContext = elemContext.push();
0691: /* XSLTC sometimes calls namespaceAfterStartElement()
0692: * so we need to remember the name
0693: */
0694: m_elemContext.m_elementName = name;
0695: m_elemContext.m_elementDesc = elemDesc;
0696: return;
0697: } else {
0698: elemContext = elemContext.push(namespaceURI, localName,
0699: name);
0700: m_elemContext = elemContext;
0701: elemContext.m_elementDesc = elemDesc;
0702: elemContext.m_isRaw = (elemFlags & ElemDesc.RAW) != 0;
0703: }
0704:
0705: if ((elemFlags & ElemDesc.HEADELEM) != 0) {
0706: // This is the <HEAD> element, do some special processing
0707: closeStartTag();
0708: elemContext.m_startTagOpen = false;
0709: if (!m_omitMetaTag) {
0710: if (m_doIndent)
0711: indent();
0712: writer
0713: .write("<META http-equiv=\"Content-Type\" content=\"text/html; charset=");
0714: String encoding = getEncoding();
0715: String encode = Encodings.getMimeEncoding(encoding);
0716: writer.write(encode);
0717: writer.write("\">");
0718: }
0719: }
0720: } catch (IOException e) {
0721: throw new SAXException(e);
0722: }
0723: }
0724:
0725: /**
0726: * Receive notification of the end of an element.
0727: *
0728: *
0729: * @param namespaceURI
0730: * @param localName
0731: * @param name The element type name
0732: * @throws org.xml.sax.SAXException Any SAX exception, possibly
0733: * wrapping another exception.
0734: */
0735: public final void endElement(final String namespaceURI,
0736: final String localName, final String name)
0737: throws org.xml.sax.SAXException {
0738: // deal with any pending issues
0739: if (m_cdataTagOpen)
0740: closeCDATA();
0741:
0742: // if the element has a namespace, treat it like XML, not HTML
0743: if (null != namespaceURI && namespaceURI.length() > 0) {
0744: super .endElement(namespaceURI, localName, name);
0745:
0746: return;
0747: }
0748:
0749: try {
0750:
0751: ElemContext elemContext = m_elemContext;
0752: final ElemDesc elemDesc = elemContext.m_elementDesc;
0753: final int elemFlags = elemDesc.getFlags();
0754: final boolean elemEmpty = (elemFlags & ElemDesc.EMPTY) != 0;
0755:
0756: // deal with any indentation issues
0757: if (m_doIndent) {
0758: final boolean isBlockElement = (elemFlags & ElemDesc.BLOCK) != 0;
0759: boolean shouldIndent = false;
0760:
0761: if (m_ispreserve) {
0762: m_ispreserve = false;
0763: } else if (m_doIndent
0764: && (!m_inBlockElem || isBlockElement)) {
0765: m_startNewLine = true;
0766: shouldIndent = true;
0767: }
0768: if (!elemContext.m_startTagOpen && shouldIndent)
0769: indent(elemContext.m_currentElemDepth - 1);
0770: m_inBlockElem = !isBlockElement;
0771: }
0772:
0773: final java.io.Writer writer = m_writer;
0774: if (!elemContext.m_startTagOpen) {
0775: writer.write("</");
0776: writer.write(name);
0777: writer.write('>');
0778: } else {
0779: // the start-tag open when this method was called,
0780: // so we need to process it now.
0781:
0782: if (m_tracer != null)
0783: super .fireStartElem(name);
0784:
0785: // the starting tag was still open when we received this endElement() call
0786: // so we need to process any gathered attributes NOW, before they go away.
0787: int nAttrs = m_attributes.getLength();
0788: if (nAttrs > 0) {
0789: processAttributes(m_writer, nAttrs);
0790: // clear attributes object for re-use with next element
0791: m_attributes.clear();
0792: }
0793: if (!elemEmpty) {
0794: // As per Dave/Paul recommendation 12/06/2000
0795: // if (shouldIndent)
0796: // writer.write('>');
0797: // indent(m_currentIndent);
0798:
0799: writer.write("></");
0800: writer.write(name);
0801: writer.write('>');
0802: } else {
0803: writer.write('>');
0804: }
0805: }
0806:
0807: // clean up because the element has ended
0808: if ((elemFlags & ElemDesc.WHITESPACESENSITIVE) != 0)
0809: m_ispreserve = true;
0810: m_isprevtext = false;
0811:
0812: // fire off the end element event
0813: if (m_tracer != null)
0814: super .fireEndElem(name);
0815:
0816: // OPTIMIZE-EMPTY
0817: if (elemEmpty) {
0818: // a quick exit if the HTML element had no children.
0819: // This block of code can be removed if the corresponding block of code
0820: // in startElement() also labeled with "OPTIMIZE-EMPTY" is also removed
0821: m_elemContext = elemContext.m_prev;
0822: return;
0823: }
0824:
0825: // some more clean because the element has ended.
0826: if (!elemContext.m_startTagOpen) {
0827: if (m_doIndent && !m_preserves.isEmpty())
0828: m_preserves.pop();
0829: }
0830: m_elemContext = elemContext.m_prev;
0831: // m_isRawStack.pop();
0832: } catch (IOException e) {
0833: throw new SAXException(e);
0834: }
0835: }
0836:
0837: /**
0838: * Process an attribute.
0839: * @param writer The writer to write the processed output to.
0840: * @param name The name of the attribute.
0841: * @param value The value of the attribute.
0842: * @param elemDesc The description of the HTML element
0843: * that has this attribute.
0844: *
0845: * @throws org.xml.sax.SAXException
0846: */
0847: protected void processAttribute(java.io.Writer writer, String name,
0848: String value, ElemDesc elemDesc) throws IOException {
0849: writer.write(' ');
0850:
0851: if (((value.length() == 0) || value.equalsIgnoreCase(name))
0852: && elemDesc != null
0853: && elemDesc.isAttrFlagSet(name, ElemDesc.ATTREMPTY)) {
0854: writer.write(name);
0855: } else {
0856: // %REVIEW% %OPT%
0857: // Two calls to single-char write may NOT
0858: // be more efficient than one to string-write...
0859: writer.write(name);
0860: writer.write("=\"");
0861: if (elemDesc != null
0862: && elemDesc.isAttrFlagSet(name, ElemDesc.ATTRURL))
0863: writeAttrURI(writer, value, m_specialEscapeURLs);
0864: else
0865: writeAttrString(writer, value, this .getEncoding());
0866: writer.write('"');
0867:
0868: }
0869: }
0870:
0871: /**
0872: * Tell if a character is an ASCII digit.
0873: */
0874: private boolean isASCIIDigit(char c) {
0875: return (c >= '0' && c <= '9');
0876: }
0877:
0878: /**
0879: * Make an integer into an HH hex value.
0880: * Does no checking on the size of the input, since this
0881: * is only meant to be used locally by writeAttrURI.
0882: *
0883: * @param i must be a value less than 255.
0884: *
0885: * @return should be a two character string.
0886: */
0887: private static String makeHHString(int i) {
0888: String s = Integer.toHexString(i).toUpperCase();
0889: if (s.length() == 1) {
0890: s = "0" + s;
0891: }
0892: return s;
0893: }
0894:
0895: /**
0896: * Dmitri Ilyin: Makes sure if the String is HH encoded sign.
0897: * @param str must be 2 characters long
0898: *
0899: * @return true or false
0900: */
0901: private boolean isHHSign(String str) {
0902: boolean sign = true;
0903: try {
0904: char r = (char) Integer.parseInt(str, 16);
0905: } catch (NumberFormatException e) {
0906: sign = false;
0907: }
0908: return sign;
0909: }
0910:
0911: /**
0912: * Write the specified <var>string</var> after substituting non ASCII characters,
0913: * with <CODE>%HH</CODE>, where HH is the hex of the byte value.
0914: *
0915: * @param string String to convert to XML format.
0916: * @param doURLEscaping True if we should try to encode as
0917: * per http://www.ietf.org/rfc/rfc2396.txt.
0918: *
0919: * @throws org.xml.sax.SAXException if a bad surrogate pair is detected.
0920: */
0921: public void writeAttrURI(final java.io.Writer writer,
0922: String string, boolean doURLEscaping) throws IOException {
0923: // http://www.ietf.org/rfc/rfc2396.txt says:
0924: // A URI is always in an "escaped" form, since escaping or unescaping a
0925: // completed URI might change its semantics. Normally, the only time
0926: // escape encodings can safely be made is when the URI is being created
0927: // from its component parts; each component may have its own set of
0928: // characters that are reserved, so only the mechanism responsible for
0929: // generating or interpreting that component can determine whether or
0930: // not escaping a character will change its semantics. Likewise, a URI
0931: // must be separated into its components before the escaped characters
0932: // within those components can be safely decoded.
0933: //
0934: // ...So we do our best to do limited escaping of the URL, without
0935: // causing damage. If the URL is already properly escaped, in theory, this
0936: // function should not change the string value.
0937:
0938: final int end = string.length();
0939: if (end > m_attrBuff.length) {
0940: m_attrBuff = new char[end * 2 + 1];
0941: }
0942: string.getChars(0, end, m_attrBuff, 0);
0943: final char[] chars = m_attrBuff;
0944:
0945: int cleanStart = 0;
0946: int cleanLength = 0;
0947:
0948: char ch = 0;
0949: for (int i = 0; i < end; i++) {
0950: ch = chars[i];
0951:
0952: if ((ch < 32) || (ch > 126)) {
0953: if (cleanLength > 0) {
0954: writer.write(chars, cleanStart, cleanLength);
0955: cleanLength = 0;
0956: }
0957: if (doURLEscaping) {
0958: // Encode UTF16 to UTF8.
0959: // Reference is Unicode, A Primer, by Tony Graham.
0960: // Page 92.
0961:
0962: // Note that Kay doesn't escape 0x20...
0963: // if(ch == 0x20) // Not sure about this... -sb
0964: // {
0965: // writer.write(ch);
0966: // }
0967: // else
0968: if (ch <= 0x7F) {
0969: writer.write('%');
0970: writer.write(makeHHString(ch));
0971: } else if (ch <= 0x7FF) {
0972: // Clear low 6 bits before rotate, put high 4 bits in low byte,
0973: // and set two high bits.
0974: int high = (ch >> 6) | 0xC0;
0975: int low = (ch & 0x3F) | 0x80;
0976: // First 6 bits, + high bit
0977: writer.write('%');
0978: writer.write(makeHHString(high));
0979: writer.write('%');
0980: writer.write(makeHHString(low));
0981: } else if (Encodings.isHighUTF16Surrogate(ch)) // high surrogate
0982: {
0983: // I'm sure this can be done in 3 instructions, but I choose
0984: // to try and do it exactly like it is done in the book, at least
0985: // until we are sure this is totally clean. I don't think performance
0986: // is a big issue with this particular function, though I could be
0987: // wrong. Also, the stuff below clearly does more masking than
0988: // it needs to do.
0989:
0990: // Clear high 6 bits.
0991: int highSurrogate = ((int) ch) & 0x03FF;
0992:
0993: // Middle 4 bits (wwww) + 1
0994: // "Note that the value of wwww from the high surrogate bit pattern
0995: // is incremented to make the uuuuu bit pattern in the scalar value
0996: // so the surrogate pair don't address the BMP."
0997: int wwww = ((highSurrogate & 0x03C0) >> 6);
0998: int uuuuu = wwww + 1;
0999:
1000: // next 4 bits
1001: int zzzz = (highSurrogate & 0x003C) >> 2;
1002:
1003: // low 2 bits
1004: int yyyyyy = ((highSurrogate & 0x0003) << 4) & 0x30;
1005:
1006: // Get low surrogate character.
1007: ch = chars[++i];
1008:
1009: // Clear high 6 bits.
1010: int lowSurrogate = ((int) ch) & 0x03FF;
1011:
1012: // put the middle 4 bits into the bottom of yyyyyy (byte 3)
1013: yyyyyy = yyyyyy
1014: | ((lowSurrogate & 0x03C0) >> 6);
1015:
1016: // bottom 6 bits.
1017: int xxxxxx = (lowSurrogate & 0x003F);
1018:
1019: int byte1 = 0xF0 | (uuuuu >> 2); // top 3 bits of uuuuu
1020: int byte2 = 0x80
1021: | (((uuuuu & 0x03) << 4) & 0x30) | zzzz;
1022: int byte3 = 0x80 | yyyyyy;
1023: int byte4 = 0x80 | xxxxxx;
1024:
1025: writer.write('%');
1026: writer.write(makeHHString(byte1));
1027: writer.write('%');
1028: writer.write(makeHHString(byte2));
1029: writer.write('%');
1030: writer.write(makeHHString(byte3));
1031: writer.write('%');
1032: writer.write(makeHHString(byte4));
1033: } else {
1034: int high = (ch >> 12) | 0xE0; // top 4 bits
1035: int middle = ((ch & 0x0FC0) >> 6) | 0x80;
1036: // middle 6 bits
1037: int low = (ch & 0x3F) | 0x80;
1038: // First 6 bits, + high bit
1039: writer.write('%');
1040: writer.write(makeHHString(high));
1041: writer.write('%');
1042: writer.write(makeHHString(middle));
1043: writer.write('%');
1044: writer.write(makeHHString(low));
1045: }
1046:
1047: } else if (escapingNotNeeded(ch)) {
1048: writer.write(ch);
1049: } else {
1050: writer.write("&#");
1051: writer.write(Integer.toString(ch));
1052: writer.write(';');
1053: }
1054: // In this character range we have first written out any previously accumulated
1055: // "clean" characters, then processed the current more complicated character,
1056: // which may have incremented "i".
1057: // We now we reset the next possible clean character.
1058: cleanStart = i + 1;
1059: }
1060: // Since http://www.ietf.org/rfc/rfc2396.txt refers to the URI grammar as
1061: // not allowing quotes in the URI proper syntax, nor in the fragment
1062: // identifier, we believe that it's OK to double escape quotes.
1063: else if (ch == '"') {
1064: // If the character is a '%' number number, try to avoid double-escaping.
1065: // There is a question if this is legal behavior.
1066:
1067: // Dmitri Ilyin: to check if '%' number number is invalid. It must be checked if %xx is a sign, that would be encoded
1068: // The encoded signes are in Hex form. So %xx my be in form %3C that is "<" sign. I will try to change here a little.
1069:
1070: // if( ((i+2) < len) && isASCIIDigit(stringArray[i+1]) && isASCIIDigit(stringArray[i+2]) )
1071:
1072: // We are no longer escaping '%'
1073:
1074: if (cleanLength > 0) {
1075: writer.write(chars, cleanStart, cleanLength);
1076: cleanLength = 0;
1077: }
1078:
1079: // Mike Kay encodes this as ", so he may know something I don't?
1080: if (doURLEscaping)
1081: writer.write("%22");
1082: else
1083: writer.write("""); // we have to escape this, I guess.
1084:
1085: // We have written out any clean characters, then the escaped '%' and now we
1086: // We now we reset the next possible clean character.
1087: cleanStart = i + 1;
1088: } else {
1089: // no processing for this character, just count how
1090: // many characters in a row that we have that need no processing
1091: cleanLength++;
1092: }
1093: }
1094:
1095: // are there any clean characters at the end of the array
1096: // that we haven't processed yet?
1097: if (cleanLength > 1) {
1098: // if the whole string can be written out as-is do so
1099: // otherwise write out the clean chars at the end of the
1100: // array
1101: if (cleanStart == 0)
1102: writer.write(string);
1103: else
1104: writer.write(chars, cleanStart, cleanLength);
1105: } else if (cleanLength == 1) {
1106: // a little optimization for 1 clean character
1107: // (we could have let the previous if(...) handle them all)
1108: writer.write(ch);
1109: }
1110: }
1111:
1112: /**
1113: * Writes the specified <var>string</var> after substituting <VAR>specials</VAR>,
1114: * and UTF-16 surrogates for character references <CODE>&#xnn</CODE>.
1115: *
1116: * @param string String to convert to XML format.
1117: * @param encoding CURRENTLY NOT IMPLEMENTED.
1118: *
1119: * @throws org.xml.sax.SAXException
1120: */
1121: public void writeAttrString(final java.io.Writer writer,
1122: String string, String encoding) throws IOException {
1123: final int end = string.length();
1124: if (end > m_attrBuff.length) {
1125: m_attrBuff = new char[end * 2 + 1];
1126: }
1127: string.getChars(0, end, m_attrBuff, 0);
1128: final char[] chars = m_attrBuff;
1129:
1130: int cleanStart = 0;
1131: int cleanLength = 0;
1132:
1133: char ch = 0;
1134: for (int i = 0; i < end; i++) {
1135: ch = chars[i];
1136:
1137: // System.out.println("SPECIALSSIZE: "+SPECIALSSIZE);
1138: // System.out.println("ch: "+(int)ch);
1139: // System.out.println("m_maxCharacter: "+(int)m_maxCharacter);
1140: // System.out.println("m_attrCharsMap[ch]: "+(int)m_attrCharsMap[ch]);
1141: if (escapingNotNeeded(ch)
1142: && (!m_charInfo.isSpecialAttrChar(ch))) {
1143: cleanLength++;
1144: } else if ('<' == ch || '>' == ch) {
1145: cleanLength++; // no escaping in this case, as specified in 15.2
1146: } else if (('&' == ch) && ((i + 1) < end)
1147: && ('{' == chars[i + 1])) {
1148: cleanLength++; // no escaping in this case, as specified in 15.2
1149: } else {
1150: if (cleanLength > 0) {
1151: writer.write(chars, cleanStart, cleanLength);
1152: cleanLength = 0;
1153: }
1154: int pos = accumDefaultEntity(writer, ch, i, chars, end,
1155: false, true);
1156:
1157: if (i != pos) {
1158: i = pos - 1;
1159: } else {
1160: if (Encodings.isHighUTF16Surrogate(ch)) {
1161:
1162: writeUTF16Surrogate(ch, chars, i, end);
1163: i++; // two input characters processed
1164: // this increments by one and the for()
1165: // loop itself increments by another one.
1166: }
1167:
1168: // The next is kind of a hack to keep from escaping in the case
1169: // of Shift_JIS and the like.
1170:
1171: /*
1172: else if ((ch < m_maxCharacter) && (m_maxCharacter == 0xFFFF)
1173: && (ch != 160))
1174: {
1175: writer.write(ch); // no escaping in this case
1176: }
1177: else
1178: */
1179: String outputStringForChar = m_charInfo
1180: .getOutputStringForChar(ch);
1181: if (null != outputStringForChar) {
1182: writer.write(outputStringForChar);
1183: } else if (escapingNotNeeded(ch)) {
1184: writer.write(ch); // no escaping in this case
1185: } else {
1186: writer.write("&#");
1187: writer.write(Integer.toString(ch));
1188: writer.write(';');
1189: }
1190: }
1191: cleanStart = i + 1;
1192: }
1193: } // end of for()
1194:
1195: // are there any clean characters at the end of the array
1196: // that we haven't processed yet?
1197: if (cleanLength > 1) {
1198: // if the whole string can be written out as-is do so
1199: // otherwise write out the clean chars at the end of the
1200: // array
1201: if (cleanStart == 0)
1202: writer.write(string);
1203: else
1204: writer.write(chars, cleanStart, cleanLength);
1205: } else if (cleanLength == 1) {
1206: // a little optimization for 1 clean character
1207: // (we could have let the previous if(...) handle them all)
1208: writer.write(ch);
1209: }
1210: }
1211:
1212: /**
1213: * Receive notification of character data.
1214: *
1215: * <p>The Parser will call this method to report each chunk of
1216: * character data. SAX parsers may return all contiguous character
1217: * data in a single chunk, or they may split it into several
1218: * chunks; however, all of the characters in any single event
1219: * must come from the same external entity, so that the Locator
1220: * provides useful information.</p>
1221: *
1222: * <p>The application must not attempt to read from the array
1223: * outside of the specified range.</p>
1224: *
1225: * <p>Note that some parsers will report whitespace using the
1226: * ignorableWhitespace() method rather than this one (validating
1227: * parsers must do so).</p>
1228: *
1229: * @param chars The characters from the XML document.
1230: * @param start The start position in the array.
1231: * @param length The number of characters to read from the array.
1232: * @throws org.xml.sax.SAXException Any SAX exception, possibly
1233: * wrapping another exception.
1234: * @see #ignorableWhitespace
1235: * @see org.xml.sax.Locator
1236: *
1237: * @throws org.xml.sax.SAXException
1238: */
1239: public final void characters(char chars[], int start, int length)
1240: throws org.xml.sax.SAXException {
1241:
1242: if (m_elemContext.m_isRaw) {
1243: try {
1244: if (m_elemContext.m_startTagOpen) {
1245: closeStartTag();
1246: m_elemContext.m_startTagOpen = false;
1247: }
1248: m_ispreserve = true;
1249:
1250: // With m_ispreserve just set true it looks like shouldIndent()
1251: // will always return false, so drop any possible indentation.
1252: // if (shouldIndent())
1253: // indent();
1254:
1255: // writer.write("<![CDATA[");
1256: // writer.write(chars, start, length);
1257: writeNormalizedChars(chars, start, length, false,
1258: m_lineSepUse);
1259:
1260: // writer.write("]]>");
1261:
1262: // time to generate characters event
1263: if (m_tracer != null)
1264: super .fireCharEvent(chars, start, length);
1265:
1266: return;
1267: } catch (IOException ioe) {
1268: throw new org.xml.sax.SAXException(Utils.messages
1269: .createMessage(MsgKey.ER_OIERROR, null), ioe);
1270: //"IO error", ioe);
1271: }
1272: } else {
1273: super .characters(chars, start, length);
1274: }
1275: }
1276:
1277: /**
1278: * Receive notification of cdata.
1279: *
1280: * <p>The Parser will call this method to report each chunk of
1281: * character data. SAX parsers may return all contiguous character
1282: * data in a single chunk, or they may split it into several
1283: * chunks; however, all of the characters in any single event
1284: * must come from the same external entity, so that the Locator
1285: * provides useful information.</p>
1286: *
1287: * <p>The application must not attempt to read from the array
1288: * outside of the specified range.</p>
1289: *
1290: * <p>Note that some parsers will report whitespace using the
1291: * ignorableWhitespace() method rather than this one (validating
1292: * parsers must do so).</p>
1293: *
1294: * @param ch The characters from the XML document.
1295: * @param start The start position in the array.
1296: * @param length The number of characters to read from the array.
1297: * @throws org.xml.sax.SAXException Any SAX exception, possibly
1298: * wrapping another exception.
1299: * @see #ignorableWhitespace
1300: * @see org.xml.sax.Locator
1301: *
1302: * @throws org.xml.sax.SAXException
1303: */
1304: public final void cdata(char ch[], int start, int length)
1305: throws org.xml.sax.SAXException {
1306:
1307: if ((null != m_elemContext.m_elementName)
1308: && (m_elemContext.m_elementName
1309: .equalsIgnoreCase("SCRIPT") || m_elemContext.m_elementName
1310: .equalsIgnoreCase("STYLE"))) {
1311: try {
1312: if (m_elemContext.m_startTagOpen) {
1313: closeStartTag();
1314: m_elemContext.m_startTagOpen = false;
1315: }
1316:
1317: m_ispreserve = true;
1318:
1319: if (shouldIndent())
1320: indent();
1321:
1322: // writer.write(ch, start, length);
1323: writeNormalizedChars(ch, start, length, true,
1324: m_lineSepUse);
1325: } catch (IOException ioe) {
1326: throw new org.xml.sax.SAXException(Utils.messages
1327: .createMessage(MsgKey.ER_OIERROR, null), ioe);
1328: //"IO error", ioe);
1329: }
1330: } else {
1331: super .cdata(ch, start, length);
1332: }
1333: }
1334:
1335: /**
1336: * Receive notification of a processing instruction.
1337: *
1338: * @param target The processing instruction target.
1339: * @param data The processing instruction data, or null if
1340: * none was supplied.
1341: * @throws org.xml.sax.SAXException Any SAX exception, possibly
1342: * wrapping another exception.
1343: *
1344: * @throws org.xml.sax.SAXException
1345: */
1346: public void processingInstruction(String target, String data)
1347: throws org.xml.sax.SAXException {
1348:
1349: // Process any pending starDocument and startElement first.
1350: flushPending();
1351:
1352: // Use a fairly nasty hack to tell if the next node is supposed to be
1353: // unescaped text.
1354: if (target.equals(Result.PI_DISABLE_OUTPUT_ESCAPING)) {
1355: startNonEscaping();
1356: } else if (target.equals(Result.PI_ENABLE_OUTPUT_ESCAPING)) {
1357: endNonEscaping();
1358: } else {
1359: try {
1360: if (m_elemContext.m_startTagOpen) {
1361: closeStartTag();
1362: m_elemContext.m_startTagOpen = false;
1363: } else if (m_needToCallStartDocument)
1364: startDocumentInternal();
1365:
1366: if (shouldIndent())
1367: indent();
1368:
1369: final java.io.Writer writer = m_writer;
1370: //writer.write("<?" + target);
1371: writer.write("<?");
1372: writer.write(target);
1373:
1374: if (data.length() > 0
1375: && !Character.isSpaceChar(data.charAt(0)))
1376: writer.write(' ');
1377:
1378: //writer.write(data + ">"); // different from XML
1379: writer.write(data); // different from XML
1380: writer.write('>'); // different from XML
1381:
1382: // Always output a newline char if not inside of an
1383: // element. The whitespace is not significant in that
1384: // case.
1385: if (m_elemContext.m_currentElemDepth <= 0)
1386: outputLineSep();
1387:
1388: m_startNewLine = true;
1389: } catch (IOException e) {
1390: throw new SAXException(e);
1391: }
1392: }
1393:
1394: // now generate the PI event
1395: if (m_tracer != null)
1396: super .fireEscapingEvent(target, data);
1397: }
1398:
1399: /**
1400: * Receive notivication of a entityReference.
1401: *
1402: * @param name non-null reference to entity name string.
1403: *
1404: * @throws org.xml.sax.SAXException
1405: */
1406: public final void entityReference(String name)
1407: throws org.xml.sax.SAXException {
1408: try {
1409:
1410: final java.io.Writer writer = m_writer;
1411: writer.write('&');
1412: writer.write(name);
1413: writer.write(';');
1414:
1415: } catch (IOException e) {
1416: throw new SAXException(e);
1417: }
1418: }
1419:
1420: /**
1421: * @see ExtendedContentHandler#endElement(String)
1422: */
1423: public final void endElement(String elemName) throws SAXException {
1424: endElement(null, null, elemName);
1425: }
1426:
1427: /**
1428: * Process the attributes, which means to write out the currently
1429: * collected attributes to the writer. The attributes are not
1430: * cleared by this method
1431: *
1432: * @param writer the writer to write processed attributes to.
1433: * @param nAttrs the number of attributes in m_attributes
1434: * to be processed
1435: *
1436: * @throws org.xml.sax.SAXException
1437: */
1438: public void processAttributes(java.io.Writer writer, int nAttrs)
1439: throws IOException, SAXException {
1440: /*
1441: * process the collected attributes
1442: */
1443: for (int i = 0; i < nAttrs; i++) {
1444: processAttribute(writer, m_attributes.getQName(i),
1445: m_attributes.getValue(i),
1446: m_elemContext.m_elementDesc);
1447: }
1448: }
1449:
1450: /**
1451: * For the enclosing elements starting tag write out out any attributes
1452: * followed by ">"
1453: *
1454: *@throws org.xml.sax.SAXException
1455: */
1456: protected void closeStartTag() throws SAXException {
1457: try {
1458:
1459: // finish processing attributes, time to fire off the start element event
1460: if (m_tracer != null)
1461: super .fireStartElem(m_elemContext.m_elementName);
1462:
1463: int nAttrs = m_attributes.getLength();
1464: if (nAttrs > 0) {
1465: processAttributes(m_writer, nAttrs);
1466: // clear attributes object for re-use with next element
1467: m_attributes.clear();
1468: }
1469:
1470: m_writer.write('>');
1471:
1472: /* whether Xalan or XSLTC, we have the prefix mappings now, so
1473: * lets determine if the current element is specified in the cdata-
1474: * section-elements list.
1475: */
1476: if (m_cdataSectionElements != null)
1477: m_elemContext.m_isCdataSection = isCdataSection();
1478: if (m_doIndent) {
1479: m_isprevtext = false;
1480: m_preserves.push(m_ispreserve);
1481: }
1482:
1483: } catch (IOException e) {
1484: throw new SAXException(e);
1485: }
1486: }
1487:
1488: /**
1489: * Initialize the serializer with the specified output stream and output
1490: * format. Must be called before calling any of the serialize methods.
1491: *
1492: * @param output The output stream to use
1493: * @param format The output format
1494: * @throws UnsupportedEncodingException The encoding specified in the
1495: * output format is not supported
1496: */
1497: protected synchronized void init(OutputStream output,
1498: Properties format) throws UnsupportedEncodingException {
1499: if (null == format) {
1500: format = OutputPropertiesFactory
1501: .getDefaultMethodProperties(Method.HTML);
1502: }
1503: super .init(output, format, false);
1504: }
1505:
1506: /**
1507: * Specifies an output stream to which the document should be
1508: * serialized. This method should not be called while the
1509: * serializer is in the process of serializing a document.
1510: * <p>
1511: * The encoding specified in the output properties is used, or
1512: * if no encoding was specified, the default for the selected
1513: * output method.
1514: *
1515: * @param output The output stream
1516: */
1517: public void setOutputStream(OutputStream output) {
1518:
1519: try {
1520: Properties format;
1521: if (null == m_format)
1522: format = OutputPropertiesFactory
1523: .getDefaultMethodProperties(Method.HTML);
1524: else
1525: format = m_format;
1526: init(output, format, true);
1527: } catch (UnsupportedEncodingException uee) {
1528:
1529: // Should have been warned in init, I guess...
1530: }
1531: }
1532:
1533: /**
1534: * This method is used when a prefix/uri namespace mapping
1535: * is indicated after the element was started with a
1536: * startElement() and before and endElement().
1537: * startPrefixMapping(prefix,uri) would be used before the
1538: * startElement() call.
1539: * @param uri the URI of the namespace
1540: * @param prefix the prefix associated with the given URI.
1541: *
1542: * @see ExtendedContentHandler#namespaceAfterStartElement(String, String)
1543: */
1544: public void namespaceAfterStartElement(String prefix, String uri)
1545: throws SAXException {
1546: // hack for XSLTC with finding URI for default namespace
1547: if (m_elemContext.m_elementURI == null) {
1548: String prefix1 = getPrefixPart(m_elemContext.m_elementName);
1549: if (prefix1 == null && EMPTYSTRING.equals(prefix)) {
1550: // the elements URI is not known yet, and it
1551: // doesn't have a prefix, and we are currently
1552: // setting the uri for prefix "", so we have
1553: // the uri for the element... lets remember it
1554: m_elemContext.m_elementURI = uri;
1555: }
1556: }
1557: startPrefixMapping(prefix, uri, false);
1558: }
1559:
1560: public void startDTD(String name, String publicId, String systemId)
1561: throws SAXException {
1562: m_inDTD = true;
1563: super .startDTD(name, publicId, systemId);
1564: }
1565:
1566: /**
1567: * Report the end of DTD declarations.
1568: * @throws org.xml.sax.SAXException The application may raise an exception.
1569: * @see #startDTD
1570: */
1571: public void endDTD() throws org.xml.sax.SAXException {
1572: m_inDTD = false;
1573: /* for ToHTMLStream the DOCTYPE is entirely output in the
1574: * startDocumentInternal() method, so don't do anything here
1575: */
1576: }
1577:
1578: /**
1579: * This method does nothing.
1580: */
1581: public void attributeDecl(String eName, String aName, String type,
1582: String valueDefault, String value) throws SAXException {
1583: // The internal DTD subset is not serialized by the ToHTMLStream serializer
1584: }
1585:
1586: /**
1587: * This method does nothing.
1588: */
1589: public void elementDecl(String name, String model)
1590: throws SAXException {
1591: // The internal DTD subset is not serialized by the ToHTMLStream serializer
1592: }
1593:
1594: /**
1595: * This method does nothing.
1596: */
1597: public void internalEntityDecl(String name, String value)
1598: throws SAXException {
1599: // The internal DTD subset is not serialized by the ToHTMLStream serializer
1600: }
1601:
1602: /**
1603: * This method does nothing.
1604: */
1605: public void externalEntityDecl(String name, String publicId,
1606: String systemId) throws SAXException {
1607: // The internal DTD subset is not serialized by the ToHTMLStream serializer
1608: }
1609:
1610: /**
1611: * This method is used to add an attribute to the currently open element.
1612: * The caller has guaranted that this attribute is unique, which means that it
1613: * not been seen before and will not be seen again.
1614: *
1615: * @param name the qualified name of the attribute
1616: * @param value the value of the attribute which can contain only
1617: * ASCII printable characters characters in the range 32 to 127 inclusive.
1618: * @param flags the bit values of this integer give optimization information.
1619: */
1620: public void addUniqueAttribute(String name, String value, int flags)
1621: throws SAXException {
1622: try {
1623: final java.io.Writer writer = m_writer;
1624: if ((flags & NO_BAD_CHARS) > 0
1625: && m_htmlcharInfo.onlyQuotAmpLtGt) {
1626: // "flags" has indicated that the characters
1627: // '>' '<' '&' and '"' are not in the value and
1628: // m_htmlcharInfo has recorded that there are no other
1629: // entities in the range 0 to 127 so we write out the
1630: // value directly
1631: writer.write(' ');
1632: writer.write(name);
1633: writer.write("=\"");
1634: writer.write(value);
1635: writer.write('"');
1636: } else if ((flags & HTML_ATTREMPTY) > 0
1637: && (value.length() == 0 || value
1638: .equalsIgnoreCase(name))) {
1639: writer.write(' ');
1640: writer.write(name);
1641: } else {
1642: writer.write(' ');
1643: writer.write(name);
1644: writer.write("=\"");
1645: if ((flags & HTML_ATTRURL) > 0) {
1646: writeAttrURI(writer, value, m_specialEscapeURLs);
1647: } else {
1648: writeAttrString(writer, value, this .getEncoding());
1649: }
1650: writer.write('"');
1651: }
1652: } catch (IOException e) {
1653: throw new SAXException(e);
1654: }
1655: }
1656:
1657: public void comment(char ch[], int start, int length)
1658: throws SAXException {
1659: // The internal DTD subset is not serialized by the ToHTMLStream serializer
1660: if (m_inDTD)
1661: return;
1662: super .comment(ch, start, length);
1663: }
1664:
1665: public boolean reset() {
1666: boolean ret = super .reset();
1667: if (!ret)
1668: return false;
1669: initToHTMLStream();
1670: return true;
1671: }
1672:
1673: private void initToHTMLStream() {
1674: // m_elementDesc = null;
1675: m_inBlockElem = false;
1676: m_inDTD = false;
1677: // m_isRawStack.clear();
1678: m_omitMetaTag = false;
1679: m_specialEscapeURLs = true;
1680: }
1681:
1682: static class Trie {
1683: /**
1684: * A digital search trie for 7-bit ASCII text
1685: * The API is a subset of java.util.Hashtable
1686: * The key must be a 7-bit ASCII string
1687: * The value may be any Java Object
1688: * One can get an object stored in a trie from its key,
1689: * but the search is either case sensitive or case
1690: * insensitive to the characters in the key, and this
1691: * choice of sensitivity or insensitivity is made when
1692: * the Trie is created, before any objects are put in it.
1693: *
1694: * This class is a copy of the one in org.apache.xml.utils.
1695: * It exists to cut the serializers dependancy on that package.
1696: *
1697: * @xsl.usage internal
1698: */
1699:
1700: /** Size of the m_nextChar array. */
1701: public static final int ALPHA_SIZE = 128;
1702:
1703: /** The root node of the tree. */
1704: final Node m_Root;
1705:
1706: /** helper buffer to convert Strings to char arrays */
1707: private char[] m_charBuffer = new char[0];
1708:
1709: /** true if the search for an object is lower case only with the key */
1710: private final boolean m_lowerCaseOnly;
1711:
1712: /**
1713: * Construct the trie that has a case insensitive search.
1714: */
1715: public Trie() {
1716: m_Root = new Node();
1717: m_lowerCaseOnly = false;
1718: }
1719:
1720: /**
1721: * Construct the trie given the desired case sensitivity with the key.
1722: * @param lowerCaseOnly true if the search keys are to be loser case only,
1723: * not case insensitive.
1724: */
1725: public Trie(boolean lowerCaseOnly) {
1726: m_Root = new Node();
1727: m_lowerCaseOnly = lowerCaseOnly;
1728: }
1729:
1730: /**
1731: * Put an object into the trie for lookup.
1732: *
1733: * @param key must be a 7-bit ASCII string
1734: * @param value any java object.
1735: *
1736: * @return The old object that matched key, or null.
1737: */
1738: public Object put(String key, Object value) {
1739:
1740: final int len = key.length();
1741: if (len > m_charBuffer.length) {
1742: // make the biggest buffer ever needed in get(String)
1743: m_charBuffer = new char[len];
1744: }
1745:
1746: Node node = m_Root;
1747:
1748: for (int i = 0; i < len; i++) {
1749: Node nextNode = node.m_nextChar[Character
1750: .toLowerCase(key.charAt(i))];
1751:
1752: if (nextNode != null) {
1753: node = nextNode;
1754: } else {
1755: for (; i < len; i++) {
1756: Node newNode = new Node();
1757: if (m_lowerCaseOnly) {
1758: // put this value into the tree only with a lower case key
1759: node.m_nextChar[Character.toLowerCase(key
1760: .charAt(i))] = newNode;
1761: } else {
1762: // put this value into the tree with a case insensitive key
1763: node.m_nextChar[Character.toUpperCase(key
1764: .charAt(i))] = newNode;
1765: node.m_nextChar[Character.toLowerCase(key
1766: .charAt(i))] = newNode;
1767: }
1768: node = newNode;
1769: }
1770: break;
1771: }
1772: }
1773:
1774: Object ret = node.m_Value;
1775:
1776: node.m_Value = value;
1777:
1778: return ret;
1779: }
1780:
1781: /**
1782: * Get an object that matches the key.
1783: *
1784: * @param key must be a 7-bit ASCII string
1785: *
1786: * @return The object that matches the key, or null.
1787: */
1788: public Object get(final String key) {
1789:
1790: final int len = key.length();
1791:
1792: /* If the name is too long, we won't find it, this also keeps us
1793: * from overflowing m_charBuffer
1794: */
1795: if (m_charBuffer.length < len)
1796: return null;
1797:
1798: Node node = m_Root;
1799: switch (len) // optimize the look up based on the number of chars
1800: {
1801: // case 0 looks silly, but the generated bytecode runs
1802: // faster for lookup of elements of length 2 with this in
1803: // and a fair bit faster. Don't know why.
1804: case 0: {
1805: return null;
1806: }
1807:
1808: case 1: {
1809: final char ch = key.charAt(0);
1810: if (ch < ALPHA_SIZE) {
1811: node = node.m_nextChar[ch];
1812: if (node != null)
1813: return node.m_Value;
1814: }
1815: return null;
1816: }
1817: // comment out case 2 because the default is faster
1818: // case 2 :
1819: // {
1820: // final char ch0 = key.charAt(0);
1821: // final char ch1 = key.charAt(1);
1822: // if (ch0 < ALPHA_SIZE && ch1 < ALPHA_SIZE)
1823: // {
1824: // node = node.m_nextChar[ch0];
1825: // if (node != null)
1826: // {
1827: //
1828: // if (ch1 < ALPHA_SIZE)
1829: // {
1830: // node = node.m_nextChar[ch1];
1831: // if (node != null)
1832: // return node.m_Value;
1833: // }
1834: // }
1835: // }
1836: // return null;
1837: // }
1838: default: {
1839: for (int i = 0; i < len; i++) {
1840: // A thread-safe way to loop over the characters
1841: final char ch = key.charAt(i);
1842: if (ALPHA_SIZE <= ch) {
1843: // the key is not 7-bit ASCII so we won't find it here
1844: return null;
1845: }
1846:
1847: node = node.m_nextChar[ch];
1848: if (node == null)
1849: return null;
1850: }
1851:
1852: return node.m_Value;
1853: }
1854: }
1855: }
1856:
1857: /**
1858: * The node representation for the trie.
1859: * @xsl.usage internal
1860: */
1861: private class Node {
1862:
1863: /**
1864: * Constructor, creates a Node[ALPHA_SIZE].
1865: */
1866: Node() {
1867: m_nextChar = new Node[ALPHA_SIZE];
1868: m_Value = null;
1869: }
1870:
1871: /** The next nodes. */
1872: final Node m_nextChar[];
1873:
1874: /** The value. */
1875: Object m_Value;
1876: }
1877:
1878: /**
1879: * Construct the trie from another Trie.
1880: * Both the existing Trie and this new one share the same table for
1881: * lookup, and it is assumed that the table is fully populated and
1882: * not changing anymore.
1883: *
1884: * @param existingTrie the Trie that this one is a copy of.
1885: */
1886: public Trie(Trie existingTrie) {
1887: // copy some fields from the existing Trie into this one.
1888: m_Root = existingTrie.m_Root;
1889: m_lowerCaseOnly = existingTrie.m_lowerCaseOnly;
1890:
1891: // get a buffer just big enough to hold the longest key in the table.
1892: int max = existingTrie.getLongestKeyLength();
1893: m_charBuffer = new char[max];
1894: }
1895:
1896: /**
1897: * Get an object that matches the key.
1898: * This method is faster than get(), but is not thread-safe.
1899: *
1900: * @param key must be a 7-bit ASCII string
1901: *
1902: * @return The object that matches the key, or null.
1903: */
1904: public Object get2(final String key) {
1905:
1906: final int len = key.length();
1907:
1908: /* If the name is too long, we won't find it, this also keeps us
1909: * from overflowing m_charBuffer
1910: */
1911: if (m_charBuffer.length < len)
1912: return null;
1913:
1914: Node node = m_Root;
1915: switch (len) // optimize the look up based on the number of chars
1916: {
1917: // case 0 looks silly, but the generated bytecode runs
1918: // faster for lookup of elements of length 2 with this in
1919: // and a fair bit faster. Don't know why.
1920: case 0: {
1921: return null;
1922: }
1923:
1924: case 1: {
1925: final char ch = key.charAt(0);
1926: if (ch < ALPHA_SIZE) {
1927: node = node.m_nextChar[ch];
1928: if (node != null)
1929: return node.m_Value;
1930: }
1931: return null;
1932: }
1933: default: {
1934: /* Copy string into array. This is not thread-safe because
1935: * it modifies the contents of m_charBuffer. If multiple
1936: * threads were to use this Trie they all would be
1937: * using this same array (not good). So this
1938: * method is not thread-safe, but it is faster because
1939: * converting to a char[] and looping over elements of
1940: * the array is faster than a String's charAt(i).
1941: */
1942: key.getChars(0, len, m_charBuffer, 0);
1943:
1944: for (int i = 0; i < len; i++) {
1945: final char ch = m_charBuffer[i];
1946: if (ALPHA_SIZE <= ch) {
1947: // the key is not 7-bit ASCII so we won't find it here
1948: return null;
1949: }
1950:
1951: node = node.m_nextChar[ch];
1952: if (node == null)
1953: return null;
1954: }
1955:
1956: return node.m_Value;
1957: }
1958: }
1959: }
1960:
1961: /**
1962: * Get the length of the longest key used in the table.
1963: */
1964: public int getLongestKeyLength() {
1965: return m_charBuffer.length;
1966: }
1967: }
1968: }
|