0001: /*
0002: * $Id: XMLEntityCodec.java 8077 2007-08-27 20:15:25Z aperepel $
0003: * --------------------------------------------------------------------------------------
0004: * Copyright (c) MuleSource, Inc. All rights reserved. http://www.mulesource.com
0005: *
0006: * The software in this package is published under the terms of the CPAL v1.0
0007: * license, a copy of which has been included with this distribution in the
0008: * LICENSE.txt file.
0009: */
0010:
0011: package org.mule.util;
0012:
0013: import java.io.IOException;
0014: import java.io.Writer;
0015: import java.util.HashMap;
0016: import java.util.Map;
0017:
0018: /**
0019: * This encoder contains methods that convert characters to Character entities as
0020: * defined by http://www.w3.org/TR/REC-html40/sgml/entities.html. More precisely it
0021: * combines the functionality of {@link org.apache.commons.lang.StringEscapeUtils#escapeXml(String)} and
0022: * {@link org.apache.commons.lang.StringEscapeUtils#escapeHtml(String)} into a single pass.
0023: */
0024: // @ThreadSafe
0025: public final class XMLEntityCodec {
0026: private static final Entities MuleEntities = new Entities();
0027:
0028: static {
0029: MuleEntities.addEntities(Entities.APOS_ARRAY);
0030: MuleEntities.addEntities(Entities.BASIC_ARRAY);
0031: MuleEntities.addEntities(Entities.ISO8859_1_ARRAY);
0032: MuleEntities.addEntities(Entities.HTML40_ARRAY);
0033: }
0034:
0035: protected XMLEntityCodec() {
0036: // no-op
0037: }
0038:
0039: public static String encodeString(String str) {
0040: if (StringUtils.isEmpty(str)) {
0041: return str;
0042: }
0043:
0044: return MuleEntities.escape(str);
0045: }
0046:
0047: public static String decodeString(String str) {
0048: if (StringUtils.isEmpty(str)) {
0049: return str;
0050: }
0051:
0052: return MuleEntities.unescape(str);
0053: }
0054:
0055: /**
0056: * <p>
0057: * Returns the name of the entity identified by the specified value.
0058: * </p>
0059: *
0060: * @param value the value to locate
0061: * @return entity name associated with the specified value
0062: */
0063: public static String entityName(int value) {
0064: return MuleEntities.map.name(value);
0065: }
0066:
0067: /**
0068: * <p>
0069: * Returns the value of the entity identified by the specified name.
0070: * </p>
0071: *
0072: * @param name the name to locate
0073: * @return entity value associated with the specified name
0074: */
0075: public static int entityValue(String name) {
0076: return MuleEntities.map.value(name);
0077: }
0078:
0079: //
0080: // everything from here on is copied from commons-lang 2.2 + svn since it is not
0081: // extensible and referencing the package-private class can lead to classloader
0082: // problems :-(
0083: //
0084:
0085: /*
0086: * Licensed to the Apache Software Foundation (ASF) under one or more
0087: * contributor license agreements. See the NOTICE file distributed with
0088: * this work for additional information regarding copyright ownership.
0089: * The ASF licenses this file to You under the Apache License, Version 2.0
0090: * (the "License"); you may not use this file except in compliance with
0091: * the License. You may obtain a copy of the License at
0092: *
0093: * http://www.apache.org/licenses/LICENSE-2.0
0094: *
0095: * Unless required by applicable law or agreed to in writing, software
0096: * distributed under the License is distributed on an "AS IS" BASIS,
0097: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0098: * See the License for the specific language governing permissions and
0099: * limitations under the License.
0100: */
0101:
0102: /**
0103: * <p>Provides HTML and XML entity utilities.</p>
0104: *
0105: * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
0106: * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
0107: * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a>
0108: * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
0109: * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a>
0110: *
0111: * @author <a href="mailto:alex@purpletech.com">Alexander Day Chaffee</a>
0112: * @author <a href="mailto:ggregory@seagullsw.com">Gary Gregory</a>
0113: * @since 2.0
0114: * @version $Id: XMLEntityCodec.java 8077 2007-08-27 20:15:25Z aperepel $
0115: */
0116: private static class Entities {
0117:
0118: private static final String[][] BASIC_ARRAY = {
0119: { "quot", "34" }, // " - double-quote
0120: { "amp", "38" }, // & - ampersand
0121: { "lt", "60" }, // < - less-than
0122: { "gt", "62" }, // > - greater-than
0123: };
0124:
0125: private static final String[][] APOS_ARRAY = { { "apos", "39" }, // XML apostrophe
0126: };
0127:
0128: // package scoped for testing
0129: static final String[][] ISO8859_1_ARRAY = { { "nbsp", "160" }, // non-breaking space
0130: { "iexcl", "161" }, //inverted exclamation mark
0131: { "cent", "162" }, //cent sign
0132: { "pound", "163" }, //pound sign
0133: { "curren", "164" }, //currency sign
0134: { "yen", "165" }, //yen sign = yuan sign
0135: { "brvbar", "166" }, //broken bar = broken vertical bar
0136: { "sect", "167" }, //section sign
0137: { "uml", "168" }, //diaeresis = spacing diaeresis
0138: { "copy", "169" }, // © - copyright sign
0139: { "ordf", "170" }, //feminine ordinal indicator
0140: { "laquo", "171" }, //left-pointing double angle quotation mark = left pointing guillemet
0141: { "not", "172" }, //not sign
0142: { "shy", "173" }, //soft hyphen = discretionary hyphen
0143: { "reg", "174" }, // ® - registered trademark sign
0144: { "macr", "175" }, //macron = spacing macron = overline = APL overbar
0145: { "deg", "176" }, //degree sign
0146: { "plusmn", "177" }, //plus-minus sign = plus-or-minus sign
0147: { "sup2", "178" }, //superscript two = superscript digit two = squared
0148: { "sup3", "179" }, //superscript three = superscript digit three = cubed
0149: { "acute", "180" }, //acute accent = spacing acute
0150: { "micro", "181" }, //micro sign
0151: { "para", "182" }, //pilcrow sign = paragraph sign
0152: { "middot", "183" }, //middle dot = Georgian comma = Greek middle dot
0153: { "cedil", "184" }, //cedilla = spacing cedilla
0154: { "sup1", "185" }, //superscript one = superscript digit one
0155: { "ordm", "186" }, //masculine ordinal indicator
0156: { "raquo", "187" }, //right-pointing double angle quotation mark = right pointing guillemet
0157: { "frac14", "188" }, //vulgar fraction one quarter = fraction one quarter
0158: { "frac12", "189" }, //vulgar fraction one half = fraction one half
0159: { "frac34", "190" }, //vulgar fraction three quarters = fraction three quarters
0160: { "iquest", "191" }, //inverted question mark = turned question mark
0161: { "Agrave", "192" }, // À - uppercase A, grave accent
0162: { "Aacute", "193" }, // Á - uppercase A, acute accent
0163: { "Acirc", "194" }, // Â - uppercase A, circumflex accent
0164: { "Atilde", "195" }, // Ã - uppercase A, tilde
0165: { "Auml", "196" }, // Ä - uppercase A, umlaut
0166: { "Aring", "197" }, // Å - uppercase A, ring
0167: { "AElig", "198" }, // Æ - uppercase AE
0168: { "Ccedil", "199" }, // Ç - uppercase C, cedilla
0169: { "Egrave", "200" }, // È - uppercase E, grave accent
0170: { "Eacute", "201" }, // É - uppercase E, acute accent
0171: { "Ecirc", "202" }, // Ê - uppercase E, circumflex accent
0172: { "Euml", "203" }, // Ë - uppercase E, umlaut
0173: { "Igrave", "204" }, // Ì - uppercase I, grave accent
0174: { "Iacute", "205" }, // Í - uppercase I, acute accent
0175: { "Icirc", "206" }, // Î - uppercase I, circumflex accent
0176: { "Iuml", "207" }, // Ï - uppercase I, umlaut
0177: { "ETH", "208" }, // Ð - uppercase Eth, Icelandic
0178: { "Ntilde", "209" }, // Ñ - uppercase N, tilde
0179: { "Ograve", "210" }, // Ò - uppercase O, grave accent
0180: { "Oacute", "211" }, // Ó - uppercase O, acute accent
0181: { "Ocirc", "212" }, // Ô - uppercase O, circumflex accent
0182: { "Otilde", "213" }, // Õ - uppercase O, tilde
0183: { "Ouml", "214" }, // Ö - uppercase O, umlaut
0184: { "times", "215" }, //multiplication sign
0185: { "Oslash", "216" }, // Ø - uppercase O, slash
0186: { "Ugrave", "217" }, // Ù - uppercase U, grave accent
0187: { "Uacute", "218" }, // Ú - uppercase U, acute accent
0188: { "Ucirc", "219" }, // Û - uppercase U, circumflex accent
0189: { "Uuml", "220" }, // Ü - uppercase U, umlaut
0190: { "Yacute", "221" }, // Ý - uppercase Y, acute accent
0191: { "THORN", "222" }, // Þ - uppercase THORN, Icelandic
0192: { "szlig", "223" }, // ß - lowercase sharps, German
0193: { "agrave", "224" }, // à - lowercase a, grave accent
0194: { "aacute", "225" }, // á - lowercase a, acute accent
0195: { "acirc", "226" }, // â - lowercase a, circumflex accent
0196: { "atilde", "227" }, // ã - lowercase a, tilde
0197: { "auml", "228" }, // ä - lowercase a, umlaut
0198: { "aring", "229" }, // å - lowercase a, ring
0199: { "aelig", "230" }, // æ - lowercase ae
0200: { "ccedil", "231" }, // ç - lowercase c, cedilla
0201: { "egrave", "232" }, // è - lowercase e, grave accent
0202: { "eacute", "233" }, // é - lowercase e, acute accent
0203: { "ecirc", "234" }, // ê - lowercase e, circumflex accent
0204: { "euml", "235" }, // ë - lowercase e, umlaut
0205: { "igrave", "236" }, // ì - lowercase i, grave accent
0206: { "iacute", "237" }, // í - lowercase i, acute accent
0207: { "icirc", "238" }, // î - lowercase i, circumflex accent
0208: { "iuml", "239" }, // ï - lowercase i, umlaut
0209: { "eth", "240" }, // ð - lowercase eth, Icelandic
0210: { "ntilde", "241" }, // ñ - lowercase n, tilde
0211: { "ograve", "242" }, // ò - lowercase o, grave accent
0212: { "oacute", "243" }, // ó - lowercase o, acute accent
0213: { "ocirc", "244" }, // ô - lowercase o, circumflex accent
0214: { "otilde", "245" }, // õ - lowercase o, tilde
0215: { "ouml", "246" }, // ö - lowercase o, umlaut
0216: { "divide", "247" }, // division sign
0217: { "oslash", "248" }, // ø - lowercase o, slash
0218: { "ugrave", "249" }, // ù - lowercase u, grave accent
0219: { "uacute", "250" }, // ú - lowercase u, acute accent
0220: { "ucirc", "251" }, // û - lowercase u, circumflex accent
0221: { "uuml", "252" }, // ü - lowercase u, umlaut
0222: { "yacute", "253" }, // ý - lowercase y, acute accent
0223: { "thorn", "254" }, // þ - lowercase thorn, Icelandic
0224: { "yuml", "255" }, // ÿ - lowercase y, umlaut
0225: };
0226:
0227: // http://www.w3.org/TR/REC-html40/sgml/entities.html
0228: // package scoped for testing
0229: static final String[][] HTML40_ARRAY = {
0230: // <!-- Latin Extended-B -->
0231: { "fnof", "402" }, //latin small f with hook = function= florin, U+0192 ISOtech -->
0232: // <!-- Greek -->
0233: { "Alpha", "913" }, //greek capital letter alpha, U+0391 -->
0234: { "Beta", "914" }, //greek capital letter beta, U+0392 -->
0235: { "Gamma", "915" }, //greek capital letter gamma,U+0393 ISOgrk3 -->
0236: { "Delta", "916" }, //greek capital letter delta,U+0394 ISOgrk3 -->
0237: { "Epsilon", "917" }, //greek capital letter epsilon, U+0395 -->
0238: { "Zeta", "918" }, //greek capital letter zeta, U+0396 -->
0239: { "Eta", "919" }, //greek capital letter eta, U+0397 -->
0240: { "Theta", "920" }, //greek capital letter theta,U+0398 ISOgrk3 -->
0241: { "Iota", "921" }, //greek capital letter iota, U+0399 -->
0242: { "Kappa", "922" }, //greek capital letter kappa, U+039A -->
0243: { "Lambda", "923" }, //greek capital letter lambda,U+039B ISOgrk3 -->
0244: { "Mu", "924" }, //greek capital letter mu, U+039C -->
0245: { "Nu", "925" }, //greek capital letter nu, U+039D -->
0246: { "Xi", "926" }, //greek capital letter xi, U+039E ISOgrk3 -->
0247: { "Omicron", "927" }, //greek capital letter omicron, U+039F -->
0248: { "Pi", "928" }, //greek capital letter pi, U+03A0 ISOgrk3 -->
0249: { "Rho", "929" }, //greek capital letter rho, U+03A1 -->
0250: // <!-- there is no Sigmaf, and no U+03A2 character either -->
0251: { "Sigma", "931" }, //greek capital letter sigma,U+03A3 ISOgrk3 -->
0252: { "Tau", "932" }, //greek capital letter tau, U+03A4 -->
0253: { "Upsilon", "933" }, //greek capital letter upsilon,U+03A5 ISOgrk3 -->
0254: { "Phi", "934" }, //greek capital letter phi,U+03A6 ISOgrk3 -->
0255: { "Chi", "935" }, //greek capital letter chi, U+03A7 -->
0256: { "Psi", "936" }, //greek capital letter psi,U+03A8 ISOgrk3 -->
0257: { "Omega", "937" }, //greek capital letter omega,U+03A9 ISOgrk3 -->
0258: { "alpha", "945" }, //greek small letter alpha,U+03B1 ISOgrk3 -->
0259: { "beta", "946" }, //greek small letter beta, U+03B2 ISOgrk3 -->
0260: { "gamma", "947" }, //greek small letter gamma,U+03B3 ISOgrk3 -->
0261: { "delta", "948" }, //greek small letter delta,U+03B4 ISOgrk3 -->
0262: { "epsilon", "949" }, //greek small letter epsilon,U+03B5 ISOgrk3 -->
0263: { "zeta", "950" }, //greek small letter zeta, U+03B6 ISOgrk3 -->
0264: { "eta", "951" }, //greek small letter eta, U+03B7 ISOgrk3 -->
0265: { "theta", "952" }, //greek small letter theta,U+03B8 ISOgrk3 -->
0266: { "iota", "953" }, //greek small letter iota, U+03B9 ISOgrk3 -->
0267: { "kappa", "954" }, //greek small letter kappa,U+03BA ISOgrk3 -->
0268: { "lambda", "955" }, //greek small letter lambda,U+03BB ISOgrk3 -->
0269: { "mu", "956" }, //greek small letter mu, U+03BC ISOgrk3 -->
0270: { "nu", "957" }, //greek small letter nu, U+03BD ISOgrk3 -->
0271: { "xi", "958" }, //greek small letter xi, U+03BE ISOgrk3 -->
0272: { "omicron", "959" }, //greek small letter omicron, U+03BF NEW -->
0273: { "pi", "960" }, //greek small letter pi, U+03C0 ISOgrk3 -->
0274: { "rho", "961" }, //greek small letter rho, U+03C1 ISOgrk3 -->
0275: { "sigmaf", "962" }, //greek small letter final sigma,U+03C2 ISOgrk3 -->
0276: { "sigma", "963" }, //greek small letter sigma,U+03C3 ISOgrk3 -->
0277: { "tau", "964" }, //greek small letter tau, U+03C4 ISOgrk3 -->
0278: { "upsilon", "965" }, //greek small letter upsilon,U+03C5 ISOgrk3 -->
0279: { "phi", "966" }, //greek small letter phi, U+03C6 ISOgrk3 -->
0280: { "chi", "967" }, //greek small letter chi, U+03C7 ISOgrk3 -->
0281: { "psi", "968" }, //greek small letter psi, U+03C8 ISOgrk3 -->
0282: { "omega", "969" }, //greek small letter omega,U+03C9 ISOgrk3 -->
0283: { "thetasym", "977" }, //greek small letter theta symbol,U+03D1 NEW -->
0284: { "upsih", "978" }, //greek upsilon with hook symbol,U+03D2 NEW -->
0285: { "piv", "982" }, //greek pi symbol, U+03D6 ISOgrk3 -->
0286: // <!-- General Punctuation -->
0287: { "bull", "8226" }, //bullet = black small circle,U+2022 ISOpub -->
0288: // <!-- bullet is NOT the same as bullet operator, U+2219 -->
0289: { "hellip", "8230" }, //horizontal ellipsis = three dot leader,U+2026 ISOpub -->
0290: { "prime", "8242" }, //prime = minutes = feet, U+2032 ISOtech -->
0291: { "Prime", "8243" }, //double prime = seconds = inches,U+2033 ISOtech -->
0292: { "oline", "8254" }, //overline = spacing overscore,U+203E NEW -->
0293: { "frasl", "8260" }, //fraction slash, U+2044 NEW -->
0294: // <!-- Letterlike Symbols -->
0295: { "weierp", "8472" }, //script capital P = power set= Weierstrass p, U+2118 ISOamso -->
0296: { "image", "8465" }, //blackletter capital I = imaginary part,U+2111 ISOamso -->
0297: { "real", "8476" }, //blackletter capital R = real part symbol,U+211C ISOamso -->
0298: { "trade", "8482" }, //trade mark sign, U+2122 ISOnum -->
0299: { "alefsym", "8501" }, //alef symbol = first transfinite cardinal,U+2135 NEW -->
0300: // <!-- alef symbol is NOT the same as hebrew letter alef,U+05D0 although the
0301: // same glyph could be used to depict both characters -->
0302: // <!-- Arrows -->
0303: { "larr", "8592" }, //leftwards arrow, U+2190 ISOnum -->
0304: { "uarr", "8593" }, //upwards arrow, U+2191 ISOnum-->
0305: { "rarr", "8594" }, //rightwards arrow, U+2192 ISOnum -->
0306: { "darr", "8595" }, //downwards arrow, U+2193 ISOnum -->
0307: { "harr", "8596" }, //left right arrow, U+2194 ISOamsa -->
0308: { "crarr", "8629" }, //downwards arrow with corner leftwards= carriage return, U+21B5 NEW -->
0309: { "lArr", "8656" }, //leftwards double arrow, U+21D0 ISOtech -->
0310: // <!-- ISO 10646 does not say that lArr is the same as the 'is implied by'
0311: // arrow but also does not have any other character for that function.
0312: // So ? lArr canbe used for 'is implied by' as ISOtech suggests -->
0313: { "uArr", "8657" }, //upwards double arrow, U+21D1 ISOamsa -->
0314: { "rArr", "8658" }, //rightwards double arrow,U+21D2 ISOtech -->
0315: // <!-- ISO 10646 does not say this is the 'implies' character but does not
0316: // have another character with this function so ?rArr can be used for
0317: // 'implies' as ISOtech suggests -->
0318: { "dArr", "8659" }, //downwards double arrow, U+21D3 ISOamsa -->
0319: { "hArr", "8660" }, //left right double arrow,U+21D4 ISOamsa -->
0320: // <!-- Mathematical Operators -->
0321: { "forall", "8704" }, //for all, U+2200 ISOtech -->
0322: { "part", "8706" }, //partial differential, U+2202 ISOtech -->
0323: { "exist", "8707" }, //there exists, U+2203 ISOtech -->
0324: { "empty", "8709" }, //empty set = null set = diameter,U+2205 ISOamso -->
0325: { "nabla", "8711" }, //nabla = backward difference,U+2207 ISOtech -->
0326: { "isin", "8712" }, //element of, U+2208 ISOtech -->
0327: { "notin", "8713" }, //not an element of, U+2209 ISOtech -->
0328: { "ni", "8715" }, //contains as member, U+220B ISOtech -->
0329: // <!-- should there be a more memorable name than 'ni'? -->
0330: { "prod", "8719" }, //n-ary product = product sign,U+220F ISOamsb -->
0331: // <!-- prod is NOT the same character as U+03A0 'greek capital letter pi'
0332: // though the same glyph might be used for both -->
0333: { "sum", "8721" }, //n-ary summation, U+2211 ISOamsb -->
0334: // <!-- sum is NOT the same character as U+03A3 'greek capital letter sigma'
0335: // though the same glyph might be used for both -->
0336: { "minus", "8722" }, //minus sign, U+2212 ISOtech -->
0337: { "lowast", "8727" }, //asterisk operator, U+2217 ISOtech -->
0338: { "radic", "8730" }, //square root = radical sign,U+221A ISOtech -->
0339: { "prop", "8733" }, //proportional to, U+221D ISOtech -->
0340: { "infin", "8734" }, //infinity, U+221E ISOtech -->
0341: { "ang", "8736" }, //angle, U+2220 ISOamso -->
0342: { "and", "8743" }, //logical and = wedge, U+2227 ISOtech -->
0343: { "or", "8744" }, //logical or = vee, U+2228 ISOtech -->
0344: { "cap", "8745" }, //intersection = cap, U+2229 ISOtech -->
0345: { "cup", "8746" }, //union = cup, U+222A ISOtech -->
0346: { "int", "8747" }, //integral, U+222B ISOtech -->
0347: { "there4", "8756" }, //therefore, U+2234 ISOtech -->
0348: { "sim", "8764" }, //tilde operator = varies with = similar to,U+223C ISOtech -->
0349: // <!-- tilde operator is NOT the same character as the tilde, U+007E,although
0350: // the same glyph might be used to represent both -->
0351: { "cong", "8773" }, //approximately equal to, U+2245 ISOtech -->
0352: { "asymp", "8776" }, //almost equal to = asymptotic to,U+2248 ISOamsr -->
0353: { "ne", "8800" }, //not equal to, U+2260 ISOtech -->
0354: { "equiv", "8801" }, //identical to, U+2261 ISOtech -->
0355: { "le", "8804" }, //less-than or equal to, U+2264 ISOtech -->
0356: { "ge", "8805" }, //greater-than or equal to,U+2265 ISOtech -->
0357: { "sub", "8834" }, //subset of, U+2282 ISOtech -->
0358: { "sup", "8835" }, //superset of, U+2283 ISOtech -->
0359: // <!-- note that nsup, 'not a superset of, U+2283' is not covered by the
0360: // Symbol font encoding and is not included. Should it be, for symmetry?
0361: // It is in ISOamsn --> <!ENTITY nsub", "8836"},
0362: // not a subset of, U+2284 ISOamsn -->
0363: { "sube", "8838" }, //subset of or equal to, U+2286 ISOtech -->
0364: { "supe", "8839" }, //superset of or equal to,U+2287 ISOtech -->
0365: { "oplus", "8853" }, //circled plus = direct sum,U+2295 ISOamsb -->
0366: { "otimes", "8855" }, //circled times = vector product,U+2297 ISOamsb -->
0367: { "perp", "8869" }, //up tack = orthogonal to = perpendicular,U+22A5 ISOtech -->
0368: { "sdot", "8901" }, //dot operator, U+22C5 ISOamsb -->
0369: // <!-- dot operator is NOT the same character as U+00B7 middle dot -->
0370: // <!-- Miscellaneous Technical -->
0371: { "lceil", "8968" }, //left ceiling = apl upstile,U+2308 ISOamsc -->
0372: { "rceil", "8969" }, //right ceiling, U+2309 ISOamsc -->
0373: { "lfloor", "8970" }, //left floor = apl downstile,U+230A ISOamsc -->
0374: { "rfloor", "8971" }, //right floor, U+230B ISOamsc -->
0375: { "lang", "9001" }, //left-pointing angle bracket = bra,U+2329 ISOtech -->
0376: // <!-- lang is NOT the same character as U+003C 'less than' or U+2039 'single left-pointing angle quotation mark' -->
0377: { "rang", "9002" }, //right-pointing angle bracket = ket,U+232A ISOtech -->
0378: // <!-- rang is NOT the same character as U+003E 'greater than' or U+203A
0379: // 'single right-pointing angle quotation mark' -->
0380: // <!-- Geometric Shapes -->
0381: { "loz", "9674" }, //lozenge, U+25CA ISOpub -->
0382: // <!-- Miscellaneous Symbols -->
0383: { "spades", "9824" }, //black spade suit, U+2660 ISOpub -->
0384: // <!-- black here seems to mean filled as opposed to hollow -->
0385: { "clubs", "9827" }, //black club suit = shamrock,U+2663 ISOpub -->
0386: { "hearts", "9829" }, //black heart suit = valentine,U+2665 ISOpub -->
0387: { "diams", "9830" }, //black diamond suit, U+2666 ISOpub -->
0388:
0389: // <!-- Latin Extended-A -->
0390: { "OElig", "338" }, // -- latin capital ligature OE,U+0152 ISOlat2 -->
0391: { "oelig", "339" }, // -- latin small ligature oe, U+0153 ISOlat2 -->
0392: // <!-- ligature is a misnomer, this is a separate character in some languages -->
0393: { "Scaron", "352" }, // -- latin capital letter S with caron,U+0160 ISOlat2 -->
0394: { "scaron", "353" }, // -- latin small letter s with caron,U+0161 ISOlat2 -->
0395: { "Yuml", "376" }, // -- latin capital letter Y with diaeresis,U+0178 ISOlat2 -->
0396: // <!-- Spacing Modifier Letters -->
0397: { "circ", "710" }, // -- modifier letter circumflex accent,U+02C6 ISOpub -->
0398: { "tilde", "732" }, //small tilde, U+02DC ISOdia -->
0399: // <!-- General Punctuation -->
0400: { "ensp", "8194" }, //en space, U+2002 ISOpub -->
0401: { "emsp", "8195" }, //em space, U+2003 ISOpub -->
0402: { "thinsp", "8201" }, //thin space, U+2009 ISOpub -->
0403: { "zwnj", "8204" }, //zero width non-joiner,U+200C NEW RFC 2070 -->
0404: { "zwj", "8205" }, //zero width joiner, U+200D NEW RFC 2070 -->
0405: { "lrm", "8206" }, //left-to-right mark, U+200E NEW RFC 2070 -->
0406: { "rlm", "8207" }, //right-to-left mark, U+200F NEW RFC 2070 -->
0407: { "ndash", "8211" }, //en dash, U+2013 ISOpub -->
0408: { "mdash", "8212" }, //em dash, U+2014 ISOpub -->
0409: { "lsquo", "8216" }, //left single quotation mark,U+2018 ISOnum -->
0410: { "rsquo", "8217" }, //right single quotation mark,U+2019 ISOnum -->
0411: { "sbquo", "8218" }, //single low-9 quotation mark, U+201A NEW -->
0412: { "ldquo", "8220" }, //left double quotation mark,U+201C ISOnum -->
0413: { "rdquo", "8221" }, //right double quotation mark,U+201D ISOnum -->
0414: { "bdquo", "8222" }, //double low-9 quotation mark, U+201E NEW -->
0415: { "dagger", "8224" }, //dagger, U+2020 ISOpub -->
0416: { "Dagger", "8225" }, //double dagger, U+2021 ISOpub -->
0417: { "permil", "8240" }, //per mille sign, U+2030 ISOtech -->
0418: { "lsaquo", "8249" }, //single left-pointing angle quotation mark,U+2039 ISO proposed -->
0419: // <!-- lsaquo is proposed but not yet ISO standardized -->
0420: { "rsaquo", "8250" }, //single right-pointing angle quotation mark,U+203A ISO proposed -->
0421: // <!-- rsaquo is proposed but not yet ISO standardized -->
0422: { "euro", "8364" }, // -- euro sign, U+20AC NEW -->
0423: };
0424:
0425: // package scoped for testing
0426: private EntityMap map = new Entities.LookupEntityMap();
0427:
0428: /**
0429: * <p>
0430: * Adds entities to this entity.
0431: * </p>
0432: *
0433: * @param entityArray array of entities to be added
0434: */
0435: public void addEntities(String[][] entityArray) {
0436: for (int i = 0; i < entityArray.length; ++i) {
0437: addEntity(entityArray[i][0], Integer
0438: .parseInt(entityArray[i][1]));
0439: }
0440: }
0441:
0442: /**
0443: * <p>
0444: * Add an entity to this entity.
0445: * </p>
0446: *
0447: * @param name name of the entity
0448: * @param value vale of the entity
0449: */
0450: public void addEntity(String name, int value) {
0451: map.add(name, value);
0452: }
0453:
0454: /**
0455: * <p>
0456: * Returns the name of the entity identified by the specified value.
0457: * </p>
0458: *
0459: * @param value the value to locate
0460: * @return entity name associated with the specified value
0461: */
0462: public String entityName(int value) {
0463: return map.name(value);
0464: }
0465:
0466: /**
0467: * <p>
0468: * Returns the value of the entity identified by the specified name.
0469: * </p>
0470: *
0471: * @param name the name to locate
0472: * @return entity value associated with the specified name
0473: */
0474: public int entityValue(String name) {
0475: return map.value(name);
0476: }
0477:
0478: /**
0479: * <p>
0480: * Escapes the characters in a <code>String</code>.
0481: * </p>
0482: * <p>
0483: * For example, if you have called addEntity("foo", 0xA1),
0484: * escape("\u00A1") will return "&foo;"
0485: * </p>
0486: *
0487: * @param str The <code>String</code> to escape.
0488: * @return A new escaped <code>String</code>.
0489: */
0490: public String escape(String str) {
0491: // TODO: rewrite to use a Writer
0492: StringBuffer buf = new StringBuffer(str.length() * 2);
0493: for (int i = 0; i < str.length(); ++i) {
0494: char ch = str.charAt(i);
0495: String entityName = this .entityName(ch);
0496: if (entityName == null) {
0497: if (ch > 0x7F) {
0498: buf.append('&');
0499: buf.append('#');
0500: buf.append((int) ch);
0501: buf.append(';');
0502: } else {
0503: buf.append(ch);
0504: }
0505: } else {
0506: buf.append('&');
0507: buf.append(entityName);
0508: buf.append(';');
0509: }
0510: }
0511: return buf.toString();
0512: }
0513:
0514: /**
0515: * <p>
0516: * Escapes the characters in the <code>String</code> passed and writes the
0517: * result to the <code>Writer</code> passed.
0518: * </p>
0519: *
0520: * @param writer The <code>Writer</code> to write the results of the
0521: * escaping to. Assumed to be a non-null value.
0522: * @param str The <code>String</code> to escape. Assumed to be a non-null
0523: * value.
0524: * @throws IOException when <code>Writer</code> passed throws the exception
0525: * from calls to the {@link Writer#write(int)} methods.
0526: * @see #escape(String)
0527: * @see Writer
0528: */
0529: public void escape(Writer writer, String str)
0530: throws IOException {
0531: int len = str.length();
0532: for (int i = 0; i < len; i++) {
0533: char c = str.charAt(i);
0534: String entityName = this .entityName(c);
0535: if (entityName == null) {
0536: if (c > 0x7F) {
0537: writer.write("&#");
0538: writer.write(Integer.toString(c, 10));
0539: writer.write(';');
0540: } else {
0541: writer.write(c);
0542: }
0543: } else {
0544: writer.write('&');
0545: writer.write(entityName);
0546: writer.write(';');
0547: }
0548: }
0549: }
0550:
0551: /**
0552: * <p>
0553: * Unescapes the entities in a <code>String</code>.
0554: * </p>
0555: * <p>
0556: * For example, if you have called addEntity("foo", 0xA1),
0557: * unescape("&foo;") will return "\u00A1"
0558: * </p>
0559: *
0560: * @param str The <code>String</code> to escape.
0561: * @return A new escaped <code>String</code> or str itself if no unescaping
0562: * was necessary.
0563: */
0564: public String unescape(String str) {
0565: int firstAmp = str.indexOf('&');
0566: if (firstAmp < 0) {
0567: return str;
0568: }
0569:
0570: StringBuffer buf = new StringBuffer(str.length());
0571: buf.append(str.substring(0, firstAmp));
0572: for (int i = firstAmp; i < str.length(); ++i) {
0573: char ch = str.charAt(i);
0574: if (ch == '&') {
0575: int semi = str.indexOf(';', i + 1);
0576: if (semi == -1) {
0577: buf.append(ch);
0578: continue;
0579: }
0580: int amph = str.indexOf('&', i + 1);
0581: if (amph != -1 && amph < semi) {
0582: // Then the text looks like &...&...;
0583: buf.append(ch);
0584: continue;
0585: }
0586: String entityName = str.substring(i + 1, semi);
0587: int entityValue;
0588: if (entityName.length() == 0) {
0589: entityValue = -1;
0590: } else if (entityName.charAt(0) == '#') {
0591: if (entityName.length() == 1) {
0592: entityValue = -1;
0593: } else {
0594: char charAt1 = entityName.charAt(1);
0595: try {
0596: if (charAt1 == 'x' || charAt1 == 'X') {
0597: entityValue = Integer
0598: .valueOf(
0599: entityName
0600: .substring(2),
0601: 16).intValue();
0602: } else {
0603: entityValue = Integer
0604: .parseInt(entityName
0605: .substring(1));
0606: }
0607: if (entityValue > 0xFFFF) {
0608: entityValue = -1;
0609: }
0610: } catch (NumberFormatException ex) {
0611: entityValue = -1;
0612: }
0613: }
0614: } else {
0615: entityValue = this .entityValue(entityName);
0616: }
0617: if (entityValue == -1) {
0618: buf.append('&');
0619: buf.append(entityName);
0620: buf.append(';');
0621: } else {
0622: buf.append((char) (entityValue));
0623: }
0624: i = semi;
0625: } else {
0626: buf.append(ch);
0627: }
0628: }
0629: return buf.toString();
0630: }
0631:
0632: /**
0633: * <p>
0634: * Unescapes the escaped entities in the <code>String</code> passed and
0635: * writes the result to the <code>Writer</code> passed.
0636: * </p>
0637: *
0638: * @param writer The <code>Writer</code> to write the results to; assumed
0639: * to be non-null.
0640: * @param string The <code>String</code> to write the results to; assumed
0641: * to be non-null.
0642: * @throws IOException when <code>Writer</code> passed throws the exception
0643: * from calls to the {@link Writer#write(int)} methods.
0644: * @see #escape(String)
0645: * @see Writer
0646: */
0647: public void unescape(Writer writer, String string)
0648: throws IOException {
0649: int firstAmp = string.indexOf('&');
0650: if (firstAmp < 0) {
0651: writer.write(string);
0652: return;
0653: }
0654:
0655: writer.write(string, 0, firstAmp);
0656: int len = string.length();
0657: for (int i = firstAmp; i < len; i++) {
0658: char c = string.charAt(i);
0659: if (c == '&') {
0660: int nextIdx = i + 1;
0661: int semiColonIdx = string.indexOf(';', nextIdx);
0662: if (semiColonIdx == -1) {
0663: writer.write(c);
0664: continue;
0665: }
0666: int amphersandIdx = string.indexOf('&', i + 1);
0667: if (amphersandIdx != -1
0668: && amphersandIdx < semiColonIdx) {
0669: // Then the text looks like &...&...;
0670: writer.write(c);
0671: continue;
0672: }
0673: String entityContent = string.substring(nextIdx,
0674: semiColonIdx);
0675: int entityValue = -1;
0676: int entityContentLen = entityContent.length();
0677: if (entityContentLen > 0) {
0678: if (entityContent.charAt(0) == '#') { // escaped value content is an integer (decimal or
0679: // hexidecimal)
0680: if (entityContentLen > 1) {
0681: char isHexChar = entityContent
0682: .charAt(1);
0683: try {
0684: switch (isHexChar) {
0685: case 'X':
0686: case 'x':
0687: entityValue = Integer.parseInt(
0688: entityContent
0689: .substring(2),
0690: 16);
0691: break;
0692: default:
0693: entityValue = Integer.parseInt(
0694: entityContent
0695: .substring(1),
0696: 10);
0697: }
0698: if (entityValue > 0xFFFF) {
0699: entityValue = -1;
0700: }
0701: } catch (NumberFormatException e) {
0702: entityValue = -1;
0703: }
0704: }
0705: } else { // escaped value content is an entity name
0706: entityValue = this
0707: .entityValue(entityContent);
0708: }
0709: }
0710:
0711: if (entityValue == -1) {
0712: writer.write('&');
0713: writer.write(entityContent);
0714: writer.write(';');
0715: } else {
0716: writer.write(entityValue);
0717: }
0718: i = semiColonIdx; // move index up to the semi-colon
0719: } else {
0720: writer.write(c);
0721: }
0722: }
0723: }
0724:
0725: private static interface EntityMap {
0726: /**
0727: * <p>
0728: * Add an entry to this entity map.
0729: * </p>
0730: *
0731: * @param name the entity name
0732: * @param value the entity value
0733: */
0734: void add(String name, int value);
0735:
0736: /**
0737: * <p>
0738: * Returns the name of the entity identified by the specified value.
0739: * </p>
0740: *
0741: * @param value the value to locate
0742: * @return entity name associated with the specified value
0743: */
0744: String name(int value);
0745:
0746: /**
0747: * <p>
0748: * Returns the value of the entity identified by the specified name.
0749: * </p>
0750: *
0751: * @param name the name to locate
0752: * @return entity value associated with the specified name
0753: */
0754: int value(String name);
0755: }
0756:
0757: private static class PrimitiveEntityMap implements EntityMap {
0758: private Map mapNameToValue = new HashMap();
0759: private IntHashMap mapValueToName = new IntHashMap();
0760:
0761: /**
0762: * {@inheritDoc}
0763: */
0764: public void add(String name, int value) {
0765: mapNameToValue.put(name, new Integer(value));
0766: mapValueToName.put(value, name);
0767: }
0768:
0769: /**
0770: * {@inheritDoc}
0771: */
0772: public String name(int value) {
0773: return (String) mapValueToName.get(value);
0774: }
0775:
0776: /**
0777: * {@inheritDoc}
0778: */
0779: public int value(String name) {
0780: Object value = mapNameToValue.get(name);
0781: if (value == null) {
0782: return -1;
0783: }
0784: return ((Integer) value).intValue();
0785: }
0786: }
0787:
0788: private static class LookupEntityMap extends PrimitiveEntityMap {
0789: private static final int LOOKUP_TABLE_SIZE = 256;
0790: private String[] lookupTable;
0791:
0792: /**
0793: * {@inheritDoc}
0794: */
0795: public String name(int value) {
0796: if (value < LOOKUP_TABLE_SIZE) {
0797: return lookupTable()[value];
0798: }
0799: return super .name(value);
0800: }
0801:
0802: /**
0803: * <p>
0804: * Returns the lookup table for this entity map. The lookup table is
0805: * created if it has not been previously.
0806: * </p>
0807: *
0808: * @return the lookup table
0809: */
0810: private String[] lookupTable() {
0811: if (lookupTable == null) {
0812: createLookupTable();
0813: }
0814: return lookupTable;
0815: }
0816:
0817: /**
0818: * <p>
0819: * Creates an entity lookup table of LOOKUP_TABLE_SIZE elements,
0820: * initialized with entity names.
0821: * </p>
0822: */
0823: private void createLookupTable() {
0824: lookupTable = new String[LOOKUP_TABLE_SIZE];
0825: for (int i = 0; i < LOOKUP_TABLE_SIZE; ++i) {
0826: lookupTable[i] = super .name(i);
0827: }
0828: }
0829: }
0830:
0831: /**
0832: * <p>
0833: * A hash map that uses primitive ints for the key rather than objects.
0834: * </p>
0835: * <p>
0836: * Note that this class is for internal optimization purposes only, and may
0837: * not be supported in future releases of Jakarta Commons Lang. Utilities of
0838: * this sort may be included in future releases of Jakarta Commons
0839: * Collections.
0840: * </p>
0841: *
0842: * @author Justin Couch
0843: * @author Alex Chaffee (alex@apache.org)
0844: * @author Stephen Colebourne
0845: * @since 2.0
0846: * @version $Revision: 8077 $
0847: * @see java.util.HashMap
0848: */
0849: private static class IntHashMap {
0850:
0851: /**
0852: * The hash table data.
0853: */
0854: private transient Entry table[];
0855:
0856: /**
0857: * The total number of entries in the hash table.
0858: */
0859: private transient int count;
0860:
0861: /**
0862: * The table is rehashed when its size exceeds this threshold. (The value
0863: * of this field is (int)(capacity * loadFactor).)
0864: *
0865: * @serial
0866: */
0867: private int threshold;
0868:
0869: /**
0870: * The load factor for the hashtable.
0871: *
0872: * @serial
0873: */
0874: private float loadFactor;
0875:
0876: /**
0877: * <p>
0878: * Innerclass that acts as a datastructure to create a new entry in the
0879: * table.
0880: * </p>
0881: */
0882: private static class Entry {
0883: int hash;
0884: int key;
0885: Object value;
0886: Entry next;
0887:
0888: /**
0889: * <p>
0890: * Create a new entry with the given values.
0891: * </p>
0892: *
0893: * @param hash The code used to hash the object with
0894: * @param key The key used to enter this in the table
0895: * @param value The value for this key
0896: * @param next A reference to the next entry in the table
0897: */
0898: protected Entry(int hash, int key, Object value,
0899: Entry next) {
0900: this .hash = hash;
0901: this .key = key;
0902: this .value = value;
0903: this .next = next;
0904: }
0905: }
0906:
0907: /**
0908: * <p>
0909: * Constructs a new, empty hashtable with a default capacity and load
0910: * factor, which is <code>20</code> and <code>0.75</code>
0911: * respectively.
0912: * </p>
0913: */
0914: public IntHashMap() {
0915: this (20, 0.75f);
0916: }
0917:
0918: /**
0919: * <p>
0920: * Constructs a new, empty hashtable with the specified initial capacity
0921: * and default load factor, which is <code>0.75</code>.
0922: * </p>
0923: *
0924: * @param initialCapacity the initial capacity of the hashtable.
0925: * @throws IllegalArgumentException if the initial capacity is less than
0926: * zero.
0927: */
0928: public IntHashMap(int initialCapacity) {
0929: this (initialCapacity, 0.75f);
0930: }
0931:
0932: /**
0933: * <p>
0934: * Constructs a new, empty hashtable with the specified initial capacity
0935: * and the specified load factor.
0936: * </p>
0937: *
0938: * @param initialCapacity the initial capacity of the hashtable.
0939: * @param loadFactor the load factor of the hashtable.
0940: * @throws IllegalArgumentException if the initial capacity is less than
0941: * zero, or if the load factor is nonpositive.
0942: */
0943: public IntHashMap(int initialCapacity, float loadFactor) {
0944: super ();
0945: if (initialCapacity < 0) {
0946: throw new IllegalArgumentException(
0947: "Illegal Capacity: " + initialCapacity);
0948: }
0949: if (loadFactor <= 0) {
0950: throw new IllegalArgumentException("Illegal Load: "
0951: + loadFactor);
0952: }
0953: if (initialCapacity == 0) {
0954: initialCapacity = 1;
0955: }
0956:
0957: this .loadFactor = loadFactor;
0958: table = new Entry[initialCapacity];
0959: threshold = (int) (initialCapacity * loadFactor);
0960: }
0961:
0962: /**
0963: * <p>
0964: * Returns the number of keys in this hashtable.
0965: * </p>
0966: *
0967: * @return the number of keys in this hashtable.
0968: */
0969: public int size() {
0970: return count;
0971: }
0972:
0973: /**
0974: * <p>
0975: * Tests if this hashtable maps no keys to values.
0976: * </p>
0977: *
0978: * @return <code>true</code> if this hashtable maps no keys to values;
0979: * <code>false</code> otherwise.
0980: */
0981: public boolean isEmpty() {
0982: return count == 0;
0983: }
0984:
0985: /**
0986: * <p>
0987: * Tests if some key maps into the specified value in this hashtable.
0988: * This operation is more expensive than the <code>containsKey</code>
0989: * method.
0990: * </p>
0991: * <p>
0992: * Note that this method is identical in functionality to containsValue,
0993: * (which is part of the Map interface in the collections framework).
0994: * </p>
0995: *
0996: * @param value a value to search for.
0997: * @return <code>true</code> if and only if some key maps to the
0998: * <code>value</code> argument in this hashtable as determined
0999: * by the <tt>equals</tt> method; <code>false</code>
1000: * otherwise.
1001: * @throws NullPointerException if the value is <code>null</code>.
1002: * @see #containsKey(int)
1003: * @see #containsValue(Object)
1004: * @see java.util.Map
1005: */
1006: public boolean contains(Object value) {
1007: if (value == null) {
1008: throw new IllegalArgumentException(
1009: "parameter value may not be null");
1010: }
1011:
1012: Entry tab[] = table;
1013: for (int i = tab.length; i-- > 0;) {
1014: for (Entry e = tab[i]; e != null; e = e.next) {
1015: if (e.value.equals(value)) {
1016: return true;
1017: }
1018: }
1019: }
1020: return false;
1021: }
1022:
1023: /**
1024: * <p>
1025: * Returns <code>true</code> if this HashMap maps one or more keys to
1026: * this value.
1027: * </p>
1028: * <p>
1029: * Note that this method is identical in functionality to contains (which
1030: * predates the Map interface).
1031: * </p>
1032: *
1033: * @param value value whose presence in this HashMap is to be tested.
1034: * @return boolean <code>true</code> if the value is contained
1035: * @see java.util.Map
1036: * @since JDK1.2
1037: */
1038: public boolean containsValue(Object value) {
1039: return contains(value);
1040: }
1041:
1042: /**
1043: * <p>
1044: * Tests if the specified object is a key in this hashtable.
1045: * </p>
1046: *
1047: * @param key possible key.
1048: * @return <code>true</code> if and only if the specified object is a
1049: * key in this hashtable, as determined by the <tt>equals</tt>
1050: * method; <code>false</code> otherwise.
1051: * @see #contains(Object)
1052: */
1053: public boolean containsKey(int key) {
1054: Entry tab[] = table;
1055: int hash = key;
1056: int index = (hash & 0x7FFFFFFF) % tab.length;
1057: for (Entry e = tab[index]; e != null; e = e.next) {
1058: if (e.hash == hash) {
1059: return true;
1060: }
1061: }
1062: return false;
1063: }
1064:
1065: /**
1066: * <p>
1067: * Returns the value to which the specified key is mapped in this map.
1068: * </p>
1069: *
1070: * @param key a key in the hashtable.
1071: * @return the value to which the key is mapped in this hashtable;
1072: * <code>null</code> if the key is not mapped to any value in
1073: * this hashtable.
1074: * @see #put(int, Object)
1075: */
1076: public Object get(int key) {
1077: Entry tab[] = table;
1078: int hash = key;
1079: int index = (hash & 0x7FFFFFFF) % tab.length;
1080: for (Entry e = tab[index]; e != null; e = e.next) {
1081: if (e.hash == hash) {
1082: return e.value;
1083: }
1084: }
1085: return null;
1086: }
1087:
1088: /**
1089: * <p>
1090: * Increases the capacity of and internally reorganizes this hashtable,
1091: * in order to accommodate and access its entries more efficiently.
1092: * </p>
1093: * <p>
1094: * This method is called automatically when the number of keys in the
1095: * hashtable exceeds this hashtable's capacity and load factor.
1096: * </p>
1097: */
1098: protected void rehash() {
1099: int oldCapacity = table.length;
1100: Entry oldMap[] = table;
1101:
1102: int newCapacity = oldCapacity * 2 + 1;
1103: Entry newMap[] = new Entry[newCapacity];
1104:
1105: threshold = (int) (newCapacity * loadFactor);
1106: table = newMap;
1107:
1108: for (int i = oldCapacity; i-- > 0;) {
1109: for (Entry old = oldMap[i]; old != null;) {
1110: Entry e = old;
1111: old = old.next;
1112:
1113: int index = (e.hash & 0x7FFFFFFF) % newCapacity;
1114: e.next = newMap[index];
1115: newMap[index] = e;
1116: }
1117: }
1118: }
1119:
1120: /**
1121: * <p>
1122: * Maps the specified <code>key</code> to the specified
1123: * <code>value</code> in this hashtable. The key cannot be
1124: * <code>null</code>.
1125: * </p>
1126: * <p>
1127: * The value can be retrieved by calling the <code>get</code> method
1128: * with a key that is equal to the original key.
1129: * </p>
1130: *
1131: * @param key the hashtable key.
1132: * @param value the value.
1133: * @return the previous value of the specified key in this hashtable, or
1134: * <code>null</code> if it did not have one.
1135: * @throws NullPointerException if the key is <code>null</code>.
1136: * @see #get(int)
1137: */
1138: public Object put(int key, Object value) {
1139: // Makes sure the key is not already in the hashtable.
1140: Entry tab[] = table;
1141: int hash = key;
1142: int index = (hash & 0x7FFFFFFF) % tab.length;
1143: for (Entry e = tab[index]; e != null; e = e.next) {
1144: if (e.hash == hash) {
1145: Object old = e.value;
1146: e.value = value;
1147: return old;
1148: }
1149: }
1150:
1151: if (count >= threshold) {
1152: // Rehash the table if the threshold is exceeded
1153: rehash();
1154:
1155: tab = table;
1156: index = (hash & 0x7FFFFFFF) % tab.length;
1157: }
1158:
1159: // Creates the new entry.
1160: Entry e = new Entry(hash, key, value, tab[index]);
1161: tab[index] = e;
1162: count++;
1163: return null;
1164: }
1165:
1166: /**
1167: * <p>
1168: * Removes the key (and its corresponding value) from this hashtable.
1169: * </p>
1170: * <p>
1171: * This method does nothing if the key is not present in the hashtable.
1172: * </p>
1173: *
1174: * @param key the key that needs to be removed.
1175: * @return the value to which the key had been mapped in this hashtable,
1176: * or <code>null</code> if the key did not have a mapping.
1177: */
1178: public Object remove(int key) {
1179: Entry tab[] = table;
1180: int hash = key;
1181: int index = (hash & 0x7FFFFFFF) % tab.length;
1182: for (Entry e = tab[index], prev = null; e != null; prev = e, e = e.next) {
1183: if (e.hash == hash) {
1184: if (prev != null) {
1185: prev.next = e.next;
1186: } else {
1187: tab[index] = e.next;
1188: }
1189: count--;
1190: Object oldValue = e.value;
1191: e.value = null;
1192: return oldValue;
1193: }
1194: }
1195: return null;
1196: }
1197:
1198: /**
1199: * <p>Clears this hashtable so that it contains no keys.</p>
1200: */
1201: public synchronized void clear() {
1202: Entry tab[] = table;
1203: for (int index = tab.length; --index >= 0;) {
1204: tab[index] = null;
1205: }
1206: count = 0;
1207: }
1208:
1209: }
1210:
1211: }
1212: }
|