0001: // Jericho HTML Parser - Java based library for analysing and manipulating HTML
0002: // Version 2.5
0003: // Copyright (C) 2007 Martin Jericho
0004: // http://jerichohtml.sourceforge.net/
0005: //
0006: // This library is free software; you can redistribute it and/or
0007: // modify it under the terms of either one of the following licences:
0008: //
0009: // 1. The Eclipse Public License (EPL) version 1.0,
0010: // included in this distribution in the file licence-epl-1.0.html
0011: // or available at http://www.eclipse.org/legal/epl-v10.html
0012: //
0013: // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later,
0014: // included in this distribution in the file licence-lgpl-2.1.txt
0015: // or available at http://www.gnu.org/licenses/lgpl.txt
0016: //
0017: // This library is distributed on an "AS IS" basis,
0018: // WITHOUT WARRANTY OF ANY KIND, either express or implied.
0019: // See the individual licence texts for more details.
0020:
0021: package au.id.jericho.lib.html;
0022:
0023: import java.util.*;
0024:
0025: /**
0026: * Represents an HTML <a target="_blank" href="http://www.w3.org/TR/REC-html40/charset.html#h-5.3.2">Character Entity Reference</a>.
0027: * <p>
0028: * <b>Click <a href="#method_summary">here</a> to scroll down to the method summary.</b>
0029: * <p>
0030: * The full list of HTML character entity references can be found at the following URL:<br />
0031: * <a target="_blank" href="http://www.w3.org/TR/REC-html40/sgml/entities.html">http://www.w3.org/TR/REC-html40/sgml/entities.html</a>.
0032: * <p>
0033: * There are a total of 253 HTML character entity references, ranging from codepoints U+0022 to U+2666.
0034: * <p>
0035: * Static methods to {@linkplain #encode(CharSequence) encode} and {@linkplain #decode(CharSequence) decode} strings
0036: * and single characters can be found in the {@link CharacterReference} superclass.
0037: * <p>
0038: * The {@link #_apos &apos;} entity reference is not defined for use in HTML.
0039: * It is defined in the <a target="_blank" href="http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Special_characters">XHTML Special Characters Entity Set</a>,
0040: * and is the only one that is not included in both HTML and XHTML.
0041: * For this reason, the <code>&apos;</code> entity reference is recognised by this library in decoding functions, but in encoding functions
0042: * the numeric character reference <code>&#39;</code> is used instead.
0043: * Most modern browsers support it in both XHTML and HTML, with the notable exception
0044: * of Microsoft Internet Explorer 6.0, which doesn't support it in either.
0045: * <p>
0046: * <code>CharacterEntityReference</code> instances are obtained using one of the following methods:
0047: * <ul>
0048: * <li>{@link CharacterReference#parse(CharSequence characterReferenceText)}
0049: * <li>{@link Source#findNextCharacterReference(int pos)}
0050: * <li>{@link Source#findPreviousCharacterReference(int pos)}
0051: * <li>{@link Segment#findAllCharacterReferences()}
0052: * </ul>
0053: *
0054: * @see CharacterReference
0055: * @see NumericCharacterReference
0056: */
0057: public class CharacterEntityReference extends CharacterReference {
0058: private String name;
0059:
0060: /** <samp> </samp> <code>&nbsp; = &#160;</code> -- no-break space = non-breaking space, U+00A0 ISOnum. */
0061: public static final char _nbsp = '\u00A0';
0062: /** <samp>¡</samp> <code>&iexcl; = &#161;</code> -- inverted exclamation mark, U+00A1 ISOnum. */
0063: public static final char _iexcl = '\u00A1';
0064: /** <samp>¢</samp> <code>&cent; = &#162;</code> -- cent sign, U+00A2 ISOnum. */
0065: public static final char _cent = '\u00A2';
0066: /** <samp>£</samp> <code>&pound; = &#163;</code> -- pound sign, U+00A3 ISOnum. */
0067: public static final char _pound = '\u00A3';
0068: /** <samp>¤</samp> <code>&curren; = &#164;</code> -- currency sign, U+00A4 ISOnum. */
0069: public static final char _curren = '\u00A4';
0070: /** <samp>¥</samp> <code>&yen; = &#165;</code> -- yen sign = yuan sign, U+00A5 ISOnum. */
0071: public static final char _yen = '\u00A5';
0072: /** <samp>¦</samp> <code>&brvbar; = &#166;</code> -- broken bar = broken vertical bar, U+00A6 ISOnum. */
0073: public static final char _brvbar = '\u00A6';
0074: /** <samp>§</samp> <code>&sect; = &#167;</code> -- section sign, U+00A7 ISOnum. */
0075: public static final char _sect = '\u00A7';
0076: /** <samp>¨</samp> <code>&uml; = &#168;</code> -- diaeresis = spacing diaeresis, U+00A8 ISOdia. */
0077: public static final char _uml = '\u00A8';
0078: /** <samp>©</samp> <code>&copy; = &#169;</code> -- copyright sign, U+00A9 ISOnum. */
0079: public static final char _copy = '\u00A9';
0080: /** <samp>ª</samp> <code>&ordf; = &#170;</code> -- feminine ordinal indicator, U+00AA ISOnum. */
0081: public static final char _ordf = '\u00AA';
0082: /** <samp>«</samp> <code>&laquo; = &#171;</code> -- left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum. */
0083: public static final char _laquo = '\u00AB';
0084: /** <samp>¬</samp> <code>&not; = &#172;</code> -- not sign = angled dash, U+00AC ISOnum. */
0085: public static final char _not = '\u00AC';
0086: /** <samp>­</samp> <code>&shy; = &#173;</code> -- soft hyphen = discretionary hyphen, U+00AD ISOnum. */
0087: public static final char _shy = '\u00AD';
0088: /** <samp>®</samp> <code>&reg; = &#174;</code> -- registered sign = registered trade mark sign, U+00AE ISOnum. */
0089: public static final char _reg = '\u00AE';
0090: /** <samp>¯</samp> <code>&macr; = &#175;</code> -- macron = spacing macron = overline = APL overbar, U+00AF ISOdia. */
0091: public static final char _macr = '\u00AF';
0092: /** <samp>°</samp> <code>&deg; = &#176;</code> -- degree sign, U+00B0 ISOnum. */
0093: public static final char _deg = '\u00B0';
0094: /** <samp>±</samp> <code>&plusmn; = &#177;</code> -- plus-minus sign = plus-or-minus sign, U+00B1 ISOnum. */
0095: public static final char _plusmn = '\u00B1';
0096: /** <samp>²</samp> <code>&sup2; = &#178;</code> -- superscript two = superscript digit two = squared, U+00B2 ISOnum. */
0097: public static final char _sup2 = '\u00B2';
0098: /** <samp>³</samp> <code>&sup3; = &#179;</code> -- superscript three = superscript digit three = cubed, U+00B3 ISOnum. */
0099: public static final char _sup3 = '\u00B3';
0100: /** <samp>´</samp> <code>&acute; = &#180;</code> -- acute accent = spacing acute, U+00B4 ISOdia. */
0101: public static final char _acute = '\u00B4';
0102: /** <samp>µ</samp> <code>&micro; = &#181;</code> -- micro sign, U+00B5 ISOnum. */
0103: public static final char _micro = '\u00B5';
0104: /** <samp>¶</samp> <code>&para; = &#182;</code> -- pilcrow sign = paragraph sign, U+00B6 ISOnum. */
0105: public static final char _para = '\u00B6';
0106: /** <samp>·</samp> <code>&middot; = &#183;</code> -- middle dot = Georgian comma = Greek middle dot, U+00B7 ISOnum. */
0107: public static final char _middot = '\u00B7';
0108: /** <samp>¸</samp> <code>&cedil; = &#184;</code> -- cedilla = spacing cedilla, U+00B8 ISOdia. */
0109: public static final char _cedil = '\u00B8';
0110: /** <samp>¹</samp> <code>&sup1; = &#185;</code> -- superscript one = superscript digit one, U+00B9 ISOnum. */
0111: public static final char _sup1 = '\u00B9';
0112: /** <samp>º</samp> <code>&ordm; = &#186;</code> -- masculine ordinal indicator, U+00BA ISOnum. */
0113: public static final char _ordm = '\u00BA';
0114: /** <samp>»</samp> <code>&raquo; = &#187;</code> -- right-pointing double angle quotation mark = right pointing guillemet, U+00BB ISOnum. */
0115: public static final char _raquo = '\u00BB';
0116: /** <samp>¼</samp> <code>&frac14; = &#188;</code> -- vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum. */
0117: public static final char _frac14 = '\u00BC';
0118: /** <samp>½</samp> <code>&frac12; = &#189;</code> -- vulgar fraction one half = fraction one half, U+00BD ISOnum. */
0119: public static final char _frac12 = '\u00BD';
0120: /** <samp>¾</samp> <code>&frac34; = &#190;</code> -- vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum. */
0121: public static final char _frac34 = '\u00BE';
0122: /** <samp>¿</samp> <code>&iquest; = &#191;</code> -- inverted question mark = turned question mark, U+00BF ISOnum. */
0123: public static final char _iquest = '\u00BF';
0124: /** <samp>À</samp> <code>&Agrave; = &#192;</code> -- latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1. */
0125: public static final char _Agrave = '\u00C0';
0126: /** <samp>Á</samp> <code>&Aacute; = &#193;</code> -- latin capital letter A with acute, U+00C1 ISOlat1. */
0127: public static final char _Aacute = '\u00C1';
0128: /** <samp>Â</samp> <code>&Acirc; = &#194;</code> -- latin capital letter A with circumflex, U+00C2 ISOlat1. */
0129: public static final char _Acirc = '\u00C2';
0130: /** <samp>Ã</samp> <code>&Atilde; = &#195;</code> -- latin capital letter A with tilde, U+00C3 ISOlat1. */
0131: public static final char _Atilde = '\u00C3';
0132: /** <samp>Ä</samp> <code>&Auml; = &#196;</code> -- latin capital letter A with diaeresis, U+00C4 ISOlat1. */
0133: public static final char _Auml = '\u00C4';
0134: /** <samp>Å</samp> <code>&Aring; = &#197;</code> -- latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1. */
0135: public static final char _Aring = '\u00C5';
0136: /** <samp>Æ</samp> <code>&AElig; = &#198;</code> -- latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1. */
0137: public static final char _AElig = '\u00C6';
0138: /** <samp>Ç</samp> <code>&Ccedil; = &#199;</code> -- latin capital letter C with cedilla, U+00C7 ISOlat1. */
0139: public static final char _Ccedil = '\u00C7';
0140: /** <samp>È</samp> <code>&Egrave; = &#200;</code> -- latin capital letter E with grave, U+00C8 ISOlat1. */
0141: public static final char _Egrave = '\u00C8';
0142: /** <samp>É</samp> <code>&Eacute; = &#201;</code> -- latin capital letter E with acute, U+00C9 ISOlat1. */
0143: public static final char _Eacute = '\u00C9';
0144: /** <samp>Ê</samp> <code>&Ecirc; = &#202;</code> -- latin capital letter E with circumflex, U+00CA ISOlat1. */
0145: public static final char _Ecirc = '\u00CA';
0146: /** <samp>Ë</samp> <code>&Euml; = &#203;</code> -- latin capital letter E with diaeresis, U+00CB ISOlat1. */
0147: public static final char _Euml = '\u00CB';
0148: /** <samp>Ì</samp> <code>&Igrave; = &#204;</code> -- latin capital letter I with grave, U+00CC ISOlat1. */
0149: public static final char _Igrave = '\u00CC';
0150: /** <samp>Í</samp> <code>&Iacute; = &#205;</code> -- latin capital letter I with acute, U+00CD ISOlat1. */
0151: public static final char _Iacute = '\u00CD';
0152: /** <samp>Î</samp> <code>&Icirc; = &#206;</code> -- latin capital letter I with circumflex, U+00CE ISOlat1. */
0153: public static final char _Icirc = '\u00CE';
0154: /** <samp>Ï</samp> <code>&Iuml; = &#207;</code> -- latin capital letter I with diaeresis, U+00CF ISOlat1. */
0155: public static final char _Iuml = '\u00CF';
0156: /** <samp>Ð</samp> <code>&ETH; = &#208;</code> -- latin capital letter ETH, U+00D0 ISOlat1. */
0157: public static final char _ETH = '\u00D0';
0158: /** <samp>Ñ</samp> <code>&Ntilde; = &#209;</code> -- latin capital letter N with tilde, U+00D1 ISOlat1. */
0159: public static final char _Ntilde = '\u00D1';
0160: /** <samp>Ò</samp> <code>&Ograve; = &#210;</code> -- latin capital letter O with grave, U+00D2 ISOlat1. */
0161: public static final char _Ograve = '\u00D2';
0162: /** <samp>Ó</samp> <code>&Oacute; = &#211;</code> -- latin capital letter O with acute, U+00D3 ISOlat1. */
0163: public static final char _Oacute = '\u00D3';
0164: /** <samp>Ô</samp> <code>&Ocirc; = &#212;</code> -- latin capital letter O with circumflex, U+00D4 ISOlat1. */
0165: public static final char _Ocirc = '\u00D4';
0166: /** <samp>Õ</samp> <code>&Otilde; = &#213;</code> -- latin capital letter O with tilde, U+00D5 ISOlat1. */
0167: public static final char _Otilde = '\u00D5';
0168: /** <samp>Ö</samp> <code>&Ouml; = &#214;</code> -- latin capital letter O with diaeresis, U+00D6 ISOlat1. */
0169: public static final char _Ouml = '\u00D6';
0170: /** <samp>×</samp> <code>&times; = &#215;</code> -- multiplication sign, U+00D7 ISOnum. */
0171: public static final char _times = '\u00D7';
0172: /** <samp>Ø</samp> <code>&Oslash; = &#216;</code> -- latin capital letter O with stroke = latin capital letter O slash, U+00D8 ISOlat1. */
0173: public static final char _Oslash = '\u00D8';
0174: /** <samp>Ù</samp> <code>&Ugrave; = &#217;</code> -- latin capital letter U with grave, U+00D9 ISOlat1. */
0175: public static final char _Ugrave = '\u00D9';
0176: /** <samp>Ú</samp> <code>&Uacute; = &#218;</code> -- latin capital letter U with acute, U+00DA ISOlat1. */
0177: public static final char _Uacute = '\u00DA';
0178: /** <samp>Û</samp> <code>&Ucirc; = &#219;</code> -- latin capital letter U with circumflex, U+00DB ISOlat1. */
0179: public static final char _Ucirc = '\u00DB';
0180: /** <samp>Ü</samp> <code>&Uuml; = &#220;</code> -- latin capital letter U with diaeresis, U+00DC ISOlat1. */
0181: public static final char _Uuml = '\u00DC';
0182: /** <samp>Ý</samp> <code>&Yacute; = &#221;</code> -- latin capital letter Y with acute, U+00DD ISOlat1. */
0183: public static final char _Yacute = '\u00DD';
0184: /** <samp>Þ</samp> <code>&THORN; = &#222;</code> -- latin capital letter THORN, U+00DE ISOlat1. */
0185: public static final char _THORN = '\u00DE';
0186: /** <samp>ß</samp> <code>&szlig; = &#223;</code> -- latin small letter sharp s = ess-zed, U+00DF ISOlat1. */
0187: public static final char _szlig = '\u00DF';
0188: /** <samp>à</samp> <code>&agrave; = &#224;</code> -- latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1. */
0189: public static final char _agrave = '\u00E0';
0190: /** <samp>á</samp> <code>&aacute; = &#225;</code> -- latin small letter a with acute, U+00E1 ISOlat1. */
0191: public static final char _aacute = '\u00E1';
0192: /** <samp>â</samp> <code>&acirc; = &#226;</code> -- latin small letter a with circumflex, U+00E2 ISOlat1. */
0193: public static final char _acirc = '\u00E2';
0194: /** <samp>ã</samp> <code>&atilde; = &#227;</code> -- latin small letter a with tilde, U+00E3 ISOlat1. */
0195: public static final char _atilde = '\u00E3';
0196: /** <samp>ä</samp> <code>&auml; = &#228;</code> -- latin small letter a with diaeresis, U+00E4 ISOlat1. */
0197: public static final char _auml = '\u00E4';
0198: /** <samp>å</samp> <code>&aring; = &#229;</code> -- latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1. */
0199: public static final char _aring = '\u00E5';
0200: /** <samp>æ</samp> <code>&aelig; = &#230;</code> -- latin small letter ae = latin small ligature ae, U+00E6 ISOlat1. */
0201: public static final char _aelig = '\u00E6';
0202: /** <samp>ç</samp> <code>&ccedil; = &#231;</code> -- latin small letter c with cedilla, U+00E7 ISOlat1. */
0203: public static final char _ccedil = '\u00E7';
0204: /** <samp>è</samp> <code>&egrave; = &#232;</code> -- latin small letter e with grave, U+00E8 ISOlat1. */
0205: public static final char _egrave = '\u00E8';
0206: /** <samp>é</samp> <code>&eacute; = &#233;</code> -- latin small letter e with acute, U+00E9 ISOlat1. */
0207: public static final char _eacute = '\u00E9';
0208: /** <samp>ê</samp> <code>&ecirc; = &#234;</code> -- latin small letter e with circumflex, U+00EA ISOlat1. */
0209: public static final char _ecirc = '\u00EA';
0210: /** <samp>ë</samp> <code>&euml; = &#235;</code> -- latin small letter e with diaeresis, U+00EB ISOlat1. */
0211: public static final char _euml = '\u00EB';
0212: /** <samp>ì</samp> <code>&igrave; = &#236;</code> -- latin small letter i with grave, U+00EC ISOlat1. */
0213: public static final char _igrave = '\u00EC';
0214: /** <samp>í</samp> <code>&iacute; = &#237;</code> -- latin small letter i with acute, U+00ED ISOlat1. */
0215: public static final char _iacute = '\u00ED';
0216: /** <samp>î</samp> <code>&icirc; = &#238;</code> -- latin small letter i with circumflex, U+00EE ISOlat1. */
0217: public static final char _icirc = '\u00EE';
0218: /** <samp>ï</samp> <code>&iuml; = &#239;</code> -- latin small letter i with diaeresis, U+00EF ISOlat1. */
0219: public static final char _iuml = '\u00EF';
0220: /** <samp>ð</samp> <code>&eth; = &#240;</code> -- latin small letter eth, U+00F0 ISOlat1. */
0221: public static final char _eth = '\u00F0';
0222: /** <samp>ñ</samp> <code>&ntilde; = &#241;</code> -- latin small letter n with tilde, U+00F1 ISOlat1. */
0223: public static final char _ntilde = '\u00F1';
0224: /** <samp>ò</samp> <code>&ograve; = &#242;</code> -- latin small letter o with grave, U+00F2 ISOlat1. */
0225: public static final char _ograve = '\u00F2';
0226: /** <samp>ó</samp> <code>&oacute; = &#243;</code> -- latin small letter o with acute, U+00F3 ISOlat1. */
0227: public static final char _oacute = '\u00F3';
0228: /** <samp>ô</samp> <code>&ocirc; = &#244;</code> -- latin small letter o with circumflex, U+00F4 ISOlat1. */
0229: public static final char _ocirc = '\u00F4';
0230: /** <samp>õ</samp> <code>&otilde; = &#245;</code> -- latin small letter o with tilde, U+00F5 ISOlat1. */
0231: public static final char _otilde = '\u00F5';
0232: /** <samp>ö</samp> <code>&ouml; = &#246;</code> -- latin small letter o with diaeresis, U+00F6 ISOlat1. */
0233: public static final char _ouml = '\u00F6';
0234: /** <samp>÷</samp> <code>&divide; = &#247;</code> -- division sign, U+00F7 ISOnum. */
0235: public static final char _divide = '\u00F7';
0236: /** <samp>ø</samp> <code>&oslash; = &#248;</code> -- latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1. */
0237: public static final char _oslash = '\u00F8';
0238: /** <samp>ù</samp> <code>&ugrave; = &#249;</code> -- latin small letter u with grave, U+00F9 ISOlat1. */
0239: public static final char _ugrave = '\u00F9';
0240: /** <samp>ú</samp> <code>&uacute; = &#250;</code> -- latin small letter u with acute, U+00FA ISOlat1. */
0241: public static final char _uacute = '\u00FA';
0242: /** <samp>û</samp> <code>&ucirc; = &#251;</code> -- latin small letter u with circumflex, U+00FB ISOlat1. */
0243: public static final char _ucirc = '\u00FB';
0244: /** <samp>ü</samp> <code>&uuml; = &#252;</code> -- latin small letter u with diaeresis, U+00FC ISOlat1. */
0245: public static final char _uuml = '\u00FC';
0246: /** <samp>ý</samp> <code>&yacute; = &#253;</code> -- latin small letter y with acute, U+00FD ISOlat1. */
0247: public static final char _yacute = '\u00FD';
0248: /** <samp>þ</samp> <code>&thorn; = &#254;</code> -- latin small letter thorn, U+00FE ISOlat1. */
0249: public static final char _thorn = '\u00FE';
0250: /** <samp>ÿ</samp> <code>&yuml; = &#255;</code> -- latin small letter y with diaeresis, U+00FF ISOlat1. */
0251: public static final char _yuml = '\u00FF';
0252: /** <samp>ƒ</samp> <code>&fnof; = &#402;</code> -- latin small letter f with hook = function = florin, U+0192 ISOtech. */
0253: public static final char _fnof = '\u0192';
0254: /** <samp>Α</samp> <code>&Alpha; = &#913;</code> -- greek capital letter alpha, U+0391. */
0255: public static final char _Alpha = '\u0391';
0256: /** <samp>Β</samp> <code>&Beta; = &#914;</code> -- greek capital letter beta, U+0392. */
0257: public static final char _Beta = '\u0392';
0258: /** <samp>Γ</samp> <code>&Gamma; = &#915;</code> -- greek capital letter gamma, U+0393 ISOgrk3. */
0259: public static final char _Gamma = '\u0393';
0260: /** <samp>Δ</samp> <code>&Delta; = &#916;</code> -- greek capital letter delta, U+0394 ISOgrk3. */
0261: public static final char _Delta = '\u0394';
0262: /** <samp>Ε</samp> <code>&Epsilon; = &#917;</code> -- greek capital letter epsilon, U+0395. */
0263: public static final char _Epsilon = '\u0395';
0264: /** <samp>Ζ</samp> <code>&Zeta; = &#918;</code> -- greek capital letter zeta, U+0396. */
0265: public static final char _Zeta = '\u0396';
0266: /** <samp>Η</samp> <code>&Eta; = &#919;</code> -- greek capital letter eta, U+0397. */
0267: public static final char _Eta = '\u0397';
0268: /** <samp>Θ</samp> <code>&Theta; = &#920;</code> -- greek capital letter theta, U+0398 ISOgrk3. */
0269: public static final char _Theta = '\u0398';
0270: /** <samp>Ι</samp> <code>&Iota; = &#921;</code> -- greek capital letter iota, U+0399. */
0271: public static final char _Iota = '\u0399';
0272: /** <samp>Κ</samp> <code>&Kappa; = &#922;</code> -- greek capital letter kappa, U+039A. */
0273: public static final char _Kappa = '\u039A';
0274: /** <samp>Λ</samp> <code>&Lambda; = &#923;</code> -- greek capital letter lambda, U+039B ISOgrk3. */
0275: public static final char _Lambda = '\u039B';
0276: /** <samp>Μ</samp> <code>&Mu; = &#924;</code> -- greek capital letter mu, U+039C. */
0277: public static final char _Mu = '\u039C';
0278: /** <samp>Ν</samp> <code>&Nu; = &#925;</code> -- greek capital letter nu, U+039D. */
0279: public static final char _Nu = '\u039D';
0280: /** <samp>Ξ</samp> <code>&Xi; = &#926;</code> -- greek capital letter xi, U+039E ISOgrk3. */
0281: public static final char _Xi = '\u039E';
0282: /** <samp>Ο</samp> <code>&Omicron; = &#927;</code> -- greek capital letter omicron, U+039F. */
0283: public static final char _Omicron = '\u039F';
0284: /** <samp>Π</samp> <code>&Pi; = &#928;</code> -- greek capital letter pi, U+03A0 ISOgrk3. */
0285: public static final char _Pi = '\u03A0';
0286: /** <samp>Ρ</samp> <code>&Rho; = &#929;</code> -- greek capital letter rho, U+03A1. */
0287: public static final char _Rho = '\u03A1';
0288: /** <samp>Σ</samp> <code>&Sigma; = &#931;</code> -- greek capital letter sigma, U+03A3 ISOgrk3. */
0289: public static final char _Sigma = '\u03A3';
0290: /** <samp>Τ</samp> <code>&Tau; = &#932;</code> -- greek capital letter tau, U+03A4. */
0291: public static final char _Tau = '\u03A4';
0292: /** <samp>Υ</samp> <code>&Upsilon; = &#933;</code> -- greek capital letter upsilon, U+03A5 ISOgrk3. */
0293: public static final char _Upsilon = '\u03A5';
0294: /** <samp>Φ</samp> <code>&Phi; = &#934;</code> -- greek capital letter phi, U+03A6 ISOgrk3. */
0295: public static final char _Phi = '\u03A6';
0296: /** <samp>Χ</samp> <code>&Chi; = &#935;</code> -- greek capital letter chi, U+03A7. */
0297: public static final char _Chi = '\u03A7';
0298: /** <samp>Ψ</samp> <code>&Psi; = &#936;</code> -- greek capital letter psi, U+03A8 ISOgrk3. */
0299: public static final char _Psi = '\u03A8';
0300: /** <samp>Ω</samp> <code>&Omega; = &#937;</code> -- greek capital letter omega, U+03A9 ISOgrk3. */
0301: public static final char _Omega = '\u03A9';
0302: /** <samp>α</samp> <code>&alpha; = &#945;</code> -- greek small letter alpha, U+03B1 ISOgrk3. */
0303: public static final char _alpha = '\u03B1';
0304: /** <samp>β</samp> <code>&beta; = &#946;</code> -- greek small letter beta, U+03B2 ISOgrk3. */
0305: public static final char _beta = '\u03B2';
0306: /** <samp>γ</samp> <code>&gamma; = &#947;</code> -- greek small letter gamma, U+03B3 ISOgrk3. */
0307: public static final char _gamma = '\u03B3';
0308: /** <samp>δ</samp> <code>&delta; = &#948;</code> -- greek small letter delta, U+03B4 ISOgrk3. */
0309: public static final char _delta = '\u03B4';
0310: /** <samp>ε</samp> <code>&epsilon; = &#949;</code> -- greek small letter epsilon, U+03B5 ISOgrk3. */
0311: public static final char _epsilon = '\u03B5';
0312: /** <samp>ζ</samp> <code>&zeta; = &#950;</code> -- greek small letter zeta, U+03B6 ISOgrk3. */
0313: public static final char _zeta = '\u03B6';
0314: /** <samp>η</samp> <code>&eta; = &#951;</code> -- greek small letter eta, U+03B7 ISOgrk3. */
0315: public static final char _eta = '\u03B7';
0316: /** <samp>θ</samp> <code>&theta; = &#952;</code> -- greek small letter theta, U+03B8 ISOgrk3. */
0317: public static final char _theta = '\u03B8';
0318: /** <samp>ι</samp> <code>&iota; = &#953;</code> -- greek small letter iota, U+03B9 ISOgrk3. */
0319: public static final char _iota = '\u03B9';
0320: /** <samp>κ</samp> <code>&kappa; = &#954;</code> -- greek small letter kappa, U+03BA ISOgrk3. */
0321: public static final char _kappa = '\u03BA';
0322: /** <samp>λ</samp> <code>&lambda; = &#955;</code> -- greek small letter lambda, U+03BB ISOgrk3. */
0323: public static final char _lambda = '\u03BB';
0324: /** <samp>μ</samp> <code>&mu; = &#956;</code> -- greek small letter mu, U+03BC ISOgrk3. */
0325: public static final char _mu = '\u03BC';
0326: /** <samp>ν</samp> <code>&nu; = &#957;</code> -- greek small letter nu, U+03BD ISOgrk3. */
0327: public static final char _nu = '\u03BD';
0328: /** <samp>ξ</samp> <code>&xi; = &#958;</code> -- greek small letter xi, U+03BE ISOgrk3. */
0329: public static final char _xi = '\u03BE';
0330: /** <samp>ο</samp> <code>&omicron; = &#959;</code> -- greek small letter omicron, U+03BF NEW. */
0331: public static final char _omicron = '\u03BF';
0332: /** <samp>π</samp> <code>&pi; = &#960;</code> -- greek small letter pi, U+03C0 ISOgrk3. */
0333: public static final char _pi = '\u03C0';
0334: /** <samp>ρ</samp> <code>&rho; = &#961;</code> -- greek small letter rho, U+03C1 ISOgrk3. */
0335: public static final char _rho = '\u03C1';
0336: /** <samp>ς</samp> <code>&sigmaf; = &#962;</code> -- greek small letter final sigma, U+03C2 ISOgrk3. */
0337: public static final char _sigmaf = '\u03C2';
0338: /** <samp>σ</samp> <code>&sigma; = &#963;</code> -- greek small letter sigma, U+03C3 ISOgrk3. */
0339: public static final char _sigma = '\u03C3';
0340: /** <samp>τ</samp> <code>&tau; = &#964;</code> -- greek small letter tau, U+03C4 ISOgrk3. */
0341: public static final char _tau = '\u03C4';
0342: /** <samp>υ</samp> <code>&upsilon; = &#965;</code> -- greek small letter upsilon, U+03C5 ISOgrk3. */
0343: public static final char _upsilon = '\u03C5';
0344: /** <samp>φ</samp> <code>&phi; = &#966;</code> -- greek small letter phi, U+03C6 ISOgrk3. */
0345: public static final char _phi = '\u03C6';
0346: /** <samp>χ</samp> <code>&chi; = &#967;</code> -- greek small letter chi, U+03C7 ISOgrk3. */
0347: public static final char _chi = '\u03C7';
0348: /** <samp>ψ</samp> <code>&psi; = &#968;</code> -- greek small letter psi, U+03C8 ISOgrk3. */
0349: public static final char _psi = '\u03C8';
0350: /** <samp>ω</samp> <code>&omega; = &#969;</code> -- greek small letter omega, U+03C9 ISOgrk3. */
0351: public static final char _omega = '\u03C9';
0352: /** <samp>ϑ</samp> <code>&thetasym; = &#977;</code> -- greek small letter theta symbol, U+03D1 NEW. */
0353: public static final char _thetasym = '\u03D1';
0354: /** <samp>ϒ</samp> <code>&upsih; = &#978;</code> -- greek upsilon with hook symbol, U+03D2 NEW. */
0355: public static final char _upsih = '\u03D2';
0356: /** <samp>ϖ</samp> <code>&piv; = &#982;</code> -- greek pi symbol, U+03D6 ISOgrk3. */
0357: public static final char _piv = '\u03D6';
0358: /** <samp>•</samp> <code>&bull; = &#8226;</code> -- bullet = black small circle, U+2022 ISOpub<br />(see <a href="#_bull">comments</a>).<p>bullet is NOT the same as bullet operator, U+2219</p> */
0359: public static final char _bull = '\u2022';
0360: /** <samp>…</samp> <code>&hellip; = &#8230;</code> -- horizontal ellipsis = three dot leader, U+2026 ISOpub. */
0361: public static final char _hellip = '\u2026';
0362: /** <samp>′</samp> <code>&prime; = &#8242;</code> -- prime = minutes = feet, U+2032 ISOtech. */
0363: public static final char _prime = '\u2032';
0364: /** <samp>″</samp> <code>&Prime; = &#8243;</code> -- double prime = seconds = inches, U+2033 ISOtech. */
0365: public static final char _Prime = '\u2033';
0366: /** <samp>‾</samp> <code>&oline; = &#8254;</code> -- overline = spacing overscore, U+203E NEW. */
0367: public static final char _oline = '\u203E';
0368: /** <samp>⁄</samp> <code>&frasl; = &#8260;</code> -- fraction slash, U+2044 NEW. */
0369: public static final char _frasl = '\u2044';
0370: /** <samp>℘</samp> <code>&weierp; = &#8472;</code> -- script capital P = power set = Weierstrass p, U+2118 ISOamso. */
0371: public static final char _weierp = '\u2118';
0372: /** <samp>ℑ</samp> <code>&image; = &#8465;</code> -- black-letter capital I = imaginary part, U+2111 ISOamso. */
0373: public static final char _image = '\u2111';
0374: /** <samp>ℜ</samp> <code>&real; = &#8476;</code> -- black-letter capital R = real part symbol, U+211C ISOamso. */
0375: public static final char _real = '\u211C';
0376: /** <samp>™</samp> <code>&trade; = &#8482;</code> -- trade mark sign, U+2122 ISOnum. */
0377: public static final char _trade = '\u2122';
0378: /** <samp>ℵ</samp> <code>&alefsym; = &#8501;</code> -- alef symbol = first transfinite cardinal, U+2135 NEW<br />(see <a href="#_alefsym">comments</a>).<p>alef symbol is NOT the same as hebrew letter alef, U+05D0 although the same glyph could be used to depict both characters</p> */
0379: public static final char _alefsym = '\u2135';
0380: /** <samp>←</samp> <code>&larr; = &#8592;</code> -- leftwards arrow, U+2190 ISOnum. */
0381: public static final char _larr = '\u2190';
0382: /** <samp>↑</samp> <code>&uarr; = &#8593;</code> -- upwards arrow, U+2191 ISOnum. */
0383: public static final char _uarr = '\u2191';
0384: /** <samp>→</samp> <code>&rarr; = &#8594;</code> -- rightwards arrow, U+2192 ISOnum. */
0385: public static final char _rarr = '\u2192';
0386: /** <samp>↓</samp> <code>&darr; = &#8595;</code> -- downwards arrow, U+2193 ISOnum. */
0387: public static final char _darr = '\u2193';
0388: /** <samp>↔</samp> <code>&harr; = &#8596;</code> -- left right arrow, U+2194 ISOamsa. */
0389: public static final char _harr = '\u2194';
0390: /** <samp>↵</samp> <code>&crarr; = &#8629;</code> -- downwards arrow with corner leftwards = carriage return, U+21B5 NEW. */
0391: public static final char _crarr = '\u21B5';
0392: /** <samp>⇐</samp> <code>&lArr; = &#8656;</code> -- leftwards double arrow, U+21D0 ISOtech<br />(see <a href="#_lArr">comments</a>).<p>ISO 10646 does not say that lArr is the same as the 'is implied by' arrow but also does not have any other character for that function. So ? lArr can be used for 'is implied by' as ISOtech suggests</p> */
0393: public static final char _lArr = '\u21D0';
0394: /** <samp>⇑</samp> <code>&uArr; = &#8657;</code> -- upwards double arrow, U+21D1 ISOamsa. */
0395: public static final char _uArr = '\u21D1';
0396: /** <samp>⇒</samp> <code>&rArr; = &#8658;</code> -- rightwards double arrow, U+21D2 ISOtech<br />(see <a href="#_rArr">comments</a>).<p>ISO 10646 does not say this is the 'implies' character but does not have another character with this function so ? rArr can be used for 'implies' as ISOtech suggests</p> */
0397: public static final char _rArr = '\u21D2';
0398: /** <samp>⇓</samp> <code>&dArr; = &#8659;</code> -- downwards double arrow, U+21D3 ISOamsa. */
0399: public static final char _dArr = '\u21D3';
0400: /** <samp>⇔</samp> <code>&hArr; = &#8660;</code> -- left right double arrow, U+21D4 ISOamsa. */
0401: public static final char _hArr = '\u21D4';
0402: /** <samp>∀</samp> <code>&forall; = &#8704;</code> -- for all, U+2200 ISOtech. */
0403: public static final char _forall = '\u2200';
0404: /** <samp>∂</samp> <code>&part; = &#8706;</code> -- partial differential, U+2202 ISOtech. */
0405: public static final char _part = '\u2202';
0406: /** <samp>∃</samp> <code>&exist; = &#8707;</code> -- there exists, U+2203 ISOtech. */
0407: public static final char _exist = '\u2203';
0408: /** <samp>∅</samp> <code>&empty; = &#8709;</code> -- empty set = null set = diameter, U+2205 ISOamso. */
0409: public static final char _empty = '\u2205';
0410: /** <samp>∇</samp> <code>&nabla; = &#8711;</code> -- nabla = backward difference, U+2207 ISOtech. */
0411: public static final char _nabla = '\u2207';
0412: /** <samp>∈</samp> <code>&isin; = &#8712;</code> -- element of, U+2208 ISOtech. */
0413: public static final char _isin = '\u2208';
0414: /** <samp>∉</samp> <code>&notin; = &#8713;</code> -- not an element of, U+2209 ISOtech. */
0415: public static final char _notin = '\u2209';
0416: /** <samp>∋</samp> <code>&ni; = &#8715;</code> -- contains as member, U+220B ISOtech<br />(see <a href="#_ni">comments</a>).<p>should there be a more memorable name than 'ni'?</p> */
0417: public static final char _ni = '\u220B';
0418: /** <samp>∏</samp> <code>&prod; = &#8719;</code> -- n-ary product = product sign, U+220F ISOamsb<br />(see <a href="#_prod">comments</a>).<p>prod is NOT the same character as U+03A0 'greek capital letter pi' though the same glyph might be used for both</p> */
0419: public static final char _prod = '\u220F';
0420: /** <samp>∑</samp> <code>&sum; = &#8721;</code> -- n-ary summation, U+2211 ISOamsb<br />(see <a href="#_sum">comments</a>).<p>sum is NOT the same character as U+03A3 'greek capital letter sigma' though the same glyph might be used for both</p> */
0421: public static final char _sum = '\u2211';
0422: /** <samp>−</samp> <code>&minus; = &#8722;</code> -- minus sign, U+2212 ISOtech. */
0423: public static final char _minus = '\u2212';
0424: /** <samp>∗</samp> <code>&lowast; = &#8727;</code> -- asterisk operator, U+2217 ISOtech. */
0425: public static final char _lowast = '\u2217';
0426: /** <samp>√</samp> <code>&radic; = &#8730;</code> -- square root = radical sign, U+221A ISOtech. */
0427: public static final char _radic = '\u221A';
0428: /** <samp>∝</samp> <code>&prop; = &#8733;</code> -- proportional to, U+221D ISOtech. */
0429: public static final char _prop = '\u221D';
0430: /** <samp>∞</samp> <code>&infin; = &#8734;</code> -- infinity, U+221E ISOtech. */
0431: public static final char _infin = '\u221E';
0432: /** <samp>∠</samp> <code>&ang; = &#8736;</code> -- angle, U+2220 ISOamso. */
0433: public static final char _ang = '\u2220';
0434: /** <samp>∧</samp> <code>&and; = &#8743;</code> -- logical and = wedge, U+2227 ISOtech. */
0435: public static final char _and = '\u2227';
0436: /** <samp>∨</samp> <code>&or; = &#8744;</code> -- logical or = vee, U+2228 ISOtech. */
0437: public static final char _or = '\u2228';
0438: /** <samp>∩</samp> <code>&cap; = &#8745;</code> -- intersection = cap, U+2229 ISOtech. */
0439: public static final char _cap = '\u2229';
0440: /** <samp>∪</samp> <code>&cup; = &#8746;</code> -- union = cup, U+222A ISOtech. */
0441: public static final char _cup = '\u222A';
0442: /** <samp>∫</samp> <code>&int; = &#8747;</code> -- integral, U+222B ISOtech. */
0443: public static final char _int = '\u222B';
0444: /** <samp>∴</samp> <code>&there4; = &#8756;</code> -- therefore, U+2234 ISOtech. */
0445: public static final char _there4 = '\u2234';
0446: /** <samp>∼</samp> <code>&sim; = &#8764;</code> -- tilde operator = varies with = similar to, U+223C ISOtech<br />(see <a href="#_sim">comments</a>).<p>tilde operator is NOT the same character as the tilde, U+007E, although the same glyph might be used to represent both</p> */
0447: public static final char _sim = '\u223C';
0448: /** <samp>≅</samp> <code>&cong; = &#8773;</code> -- approximately equal to, U+2245 ISOtech. */
0449: public static final char _cong = '\u2245';
0450: /** <samp>≈</samp> <code>&asymp; = &#8776;</code> -- almost equal to = asymptotic to, U+2248 ISOamsr. */
0451: public static final char _asymp = '\u2248';
0452: /** <samp>≠</samp> <code>&ne; = &#8800;</code> -- not equal to, U+2260 ISOtech. */
0453: public static final char _ne = '\u2260';
0454: /** <samp>≡</samp> <code>&equiv; = &#8801;</code> -- identical to, U+2261 ISOtech. */
0455: public static final char _equiv = '\u2261';
0456: /** <samp>≤</samp> <code>&le; = &#8804;</code> -- less-than or equal to, U+2264 ISOtech. */
0457: public static final char _le = '\u2264';
0458: /** <samp>≥</samp> <code>&ge; = &#8805;</code> -- greater-than or equal to, U+2265 ISOtech. */
0459: public static final char _ge = '\u2265';
0460: /** <samp>⊂</samp> <code>&sub; = &#8834;</code> -- subset of, U+2282 ISOtech. */
0461: public static final char _sub = '\u2282';
0462: /** <samp>⊃</samp> <code>&sup; = &#8835;</code> -- superset of, U+2283 ISOtech<br />(see <a href="#_sup">comments</a>).<p>note that nsup, 'not a superset of, U+2283' is not covered by the Symbol font encoding and is not included. Should it be, for symmetry? It is in ISOamsn</p> */
0463: public static final char _sup = '\u2283';
0464: /** <samp>⊄</samp> <code>&nsub; = &#8836;</code> -- not a subset of, U+2284 ISOamsn. */
0465: public static final char _nsub = '\u2284';
0466: /** <samp>⊆</samp> <code>&sube; = &#8838;</code> -- subset of or equal to, U+2286 ISOtech. */
0467: public static final char _sube = '\u2286';
0468: /** <samp>⊇</samp> <code>&supe; = &#8839;</code> -- superset of or equal to, U+2287 ISOtech. */
0469: public static final char _supe = '\u2287';
0470: /** <samp>⊕</samp> <code>&oplus; = &#8853;</code> -- circled plus = direct sum, U+2295 ISOamsb. */
0471: public static final char _oplus = '\u2295';
0472: /** <samp>⊗</samp> <code>&otimes; = &#8855;</code> -- circled times = vector product, U+2297 ISOamsb. */
0473: public static final char _otimes = '\u2297';
0474: /** <samp>⊥</samp> <code>&perp; = &#8869;</code> -- up tack = orthogonal to = perpendicular, U+22A5 ISOtech. */
0475: public static final char _perp = '\u22A5';
0476: /** <samp>⋅</samp> <code>&sdot; = &#8901;</code> -- dot operator, U+22C5 ISOamsb<br />(see <a href="#_sdot">comments</a>).<p>dot operator is NOT the same character as U+00B7 middle dot</p> */
0477: public static final char _sdot = '\u22C5';
0478: /** <samp>⌈</samp> <code>&lceil; = &#8968;</code> -- left ceiling = APL upstile, U+2308 ISOamsc. */
0479: public static final char _lceil = '\u2308';
0480: /** <samp>⌉</samp> <code>&rceil; = &#8969;</code> -- right ceiling, U+2309 ISOamsc. */
0481: public static final char _rceil = '\u2309';
0482: /** <samp>⌊</samp> <code>&lfloor; = &#8970;</code> -- left floor = APL downstile, U+230A ISOamsc. */
0483: public static final char _lfloor = '\u230A';
0484: /** <samp>⌋</samp> <code>&rfloor; = &#8971;</code> -- right floor, U+230B ISOamsc. */
0485: public static final char _rfloor = '\u230B';
0486: /** <samp>⟨</samp> <code>&lang; = &#9001;</code> -- left-pointing angle bracket = bra, U+2329 ISOtech<br />(see <a href="#_lang">comments</a>).<p>lang is NOT the same character as U+003C 'less than' or U+2039 'single left-pointing angle quotation mark'</p> */
0487: public static final char _lang = '\u2329';
0488: /** <samp>⟩</samp> <code>&rang; = &#9002;</code> -- right-pointing angle bracket = ket, U+232A ISOtech<br />(see <a href="#_rang">comments</a>).<p>rang is NOT the same character as U+003E 'greater than' or U+203A 'single right-pointing angle quotation mark'</p> */
0489: public static final char _rang = '\u232A';
0490: /** <samp>◊</samp> <code>&loz; = &#9674;</code> -- lozenge, U+25CA ISOpub. */
0491: public static final char _loz = '\u25CA';
0492: /** <samp>♠</samp> <code>&spades; = &#9824;</code> -- black spade suit, U+2660 ISOpub<br />(see <a href="#_spades">comments</a>).<p>black here seems to mean filled as opposed to hollow</p> */
0493: public static final char _spades = '\u2660';
0494: /** <samp>♣</samp> <code>&clubs; = &#9827;</code> -- black club suit = shamrock, U+2663 ISOpub. */
0495: public static final char _clubs = '\u2663';
0496: /** <samp>♥</samp> <code>&hearts; = &#9829;</code> -- black heart suit = valentine, U+2665 ISOpub. */
0497: public static final char _hearts = '\u2665';
0498: /** <samp>♦</samp> <code>&diams; = &#9830;</code> -- black diamond suit, U+2666 ISOpub. */
0499: public static final char _diams = '\u2666';
0500: /** <samp>"</samp> <code>&quot; = &#34;</code> -- quotation mark = APL quote, U+0022 ISOnum. */
0501: public static final char _quot = '\u0022';
0502: /** <samp>&</samp> <code>&amp; = &#38;</code> -- ampersand, U+0026 ISOnum. */
0503: public static final char _amp = '\u0026';
0504: /** <samp><</samp> <code>&lt; = &#60;</code> -- less-than sign, U+003C ISOnum. */
0505: public static final char _lt = '\u003C';
0506: /** <samp>></samp> <code>&gt; = &#62;</code> -- greater-than sign, U+003E ISOnum. */
0507: public static final char _gt = '\u003E';
0508: /** <samp>Œ</samp> <code>&OElig; = &#338;</code> -- latin capital ligature OE, U+0152 ISOlat2. */
0509: public static final char _OElig = '\u0152';
0510: /** <samp>œ</samp> <code>&oelig; = &#339;</code> -- latin small ligature oe, U+0153 ISOlat2<br />(see <a href="#_oelig">comments</a>).<p>ligature is a misnomer, this is a separate character in some languages</p> */
0511: public static final char _oelig = '\u0153';
0512: /** <samp>Š</samp> <code>&Scaron; = &#352;</code> -- latin capital letter S with caron, U+0160 ISOlat2. */
0513: public static final char _Scaron = '\u0160';
0514: /** <samp>š</samp> <code>&scaron; = &#353;</code> -- latin small letter s with caron, U+0161 ISOlat2. */
0515: public static final char _scaron = '\u0161';
0516: /** <samp>Ÿ</samp> <code>&Yuml; = &#376;</code> -- latin capital letter Y with diaeresis, U+0178 ISOlat2. */
0517: public static final char _Yuml = '\u0178';
0518: /** <samp>ˆ</samp> <code>&circ; = &#710;</code> -- modifier letter circumflex accent, U+02C6 ISOpub. */
0519: public static final char _circ = '\u02C6';
0520: /** <samp>˜</samp> <code>&tilde; = &#732;</code> -- small tilde, U+02DC ISOdia. */
0521: public static final char _tilde = '\u02DC';
0522: /** <samp> </samp> <code>&ensp; = &#8194;</code> -- en space, U+2002 ISOpub. */
0523: public static final char _ensp = '\u2002';
0524: /** <samp> </samp> <code>&emsp; = &#8195;</code> -- em space, U+2003 ISOpub. */
0525: public static final char _emsp = '\u2003';
0526: /** <samp> </samp> <code>&thinsp; = &#8201;</code> -- thin space, U+2009 ISOpub. */
0527: public static final char _thinsp = '\u2009';
0528: /** <samp>‌</samp> <code>&zwnj; = &#8204;</code> -- zero width non-joiner, U+200C NEW RFC 2070. */
0529: public static final char _zwnj = '\u200C';
0530: /** <samp>‍</samp> <code>&zwj; = &#8205;</code> -- zero width joiner, U+200D NEW RFC 2070. */
0531: public static final char _zwj = '\u200D';
0532: /** <samp>‎</samp> <code>&lrm; = &#8206;</code> -- left-to-right mark, U+200E NEW RFC 2070. */
0533: public static final char _lrm = '\u200E';
0534: /** <samp>‏</samp> <code>&rlm; = &#8207;</code> -- right-to-left mark, U+200F NEW RFC 2070. */
0535: public static final char _rlm = '\u200F';
0536: /** <samp>–</samp> <code>&ndash; = &#8211;</code> -- en dash, U+2013 ISOpub. */
0537: public static final char _ndash = '\u2013';
0538: /** <samp>—</samp> <code>&mdash; = &#8212;</code> -- em dash, U+2014 ISOpub. */
0539: public static final char _mdash = '\u2014';
0540: /** <samp>‘</samp> <code>&lsquo; = &#8216;</code> -- left single quotation mark, U+2018 ISOnum. */
0541: public static final char _lsquo = '\u2018';
0542: /** <samp>’</samp> <code>&rsquo; = &#8217;</code> -- right single quotation mark, U+2019 ISOnum. */
0543: public static final char _rsquo = '\u2019';
0544: /** <samp>‚</samp> <code>&sbquo; = &#8218;</code> -- single low-9 quotation mark, U+201A NEW. */
0545: public static final char _sbquo = '\u201A';
0546: /** <samp>“</samp> <code>&ldquo; = &#8220;</code> -- left double quotation mark, U+201C ISOnum. */
0547: public static final char _ldquo = '\u201C';
0548: /** <samp>”</samp> <code>&rdquo; = &#8221;</code> -- right double quotation mark, U+201D ISOnum. */
0549: public static final char _rdquo = '\u201D';
0550: /** <samp>„</samp> <code>&bdquo; = &#8222;</code> -- double low-9 quotation mark, U+201E NEW. */
0551: public static final char _bdquo = '\u201E';
0552: /** <samp>†</samp> <code>&dagger; = &#8224;</code> -- dagger, U+2020 ISOpub. */
0553: public static final char _dagger = '\u2020';
0554: /** <samp>‡</samp> <code>&Dagger; = &#8225;</code> -- double dagger, U+2021 ISOpub. */
0555: public static final char _Dagger = '\u2021';
0556: /** <samp>‰</samp> <code>&permil; = &#8240;</code> -- per mille sign, U+2030 ISOtech. */
0557: public static final char _permil = '\u2030';
0558: /** <samp>‹</samp> <code>&lsaquo; = &#8249;</code> -- single left-pointing angle quotation mark, U+2039 ISO proposed<br />(see <a href="#_lsaquo">comments</a>).<p>lsaquo is proposed but not yet ISO standardized</p> */
0559: public static final char _lsaquo = '\u2039';
0560: /** <samp>›</samp> <code>&rsaquo; = &#8250;</code> -- single right-pointing angle quotation mark, U+203A ISO proposed<br />(see <a href="#_rsaquo">comments</a>).<p>rsaquo is proposed but not yet ISO standardized</p> */
0561: public static final char _rsaquo = '\u203A';
0562: /** <samp>€</samp> <code>&euro; = &#8364;</code> -- euro sign, U+20AC NEW. */
0563: public static final char _euro = '\u20AC';
0564: /**
0565: * <samp>'</samp> <code>&apos; = &#39;</code> -- apostrophe = APL quote, U+0027 ISOnum<br />(see <a href="#_apos">comments</a>).<p>
0566: * apos is only defined for use in XHTML
0567: * (see the <a target="_blank" href="http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Special_characters">XHTML Special Characters Entity Set</a>),
0568: * but not in HTML.
0569: * @see Config#IsApostropheEncoded
0570: */
0571: public static final char _apos = '\'';
0572:
0573: private static Map NAME_TO_CODE_POINT_MAP = new HashMap(512, 1.0F); // 253 entities in total
0574: private static IntStringHashMap CODE_POINT_TO_NAME_MAP;
0575:
0576: private static int MAX_NAME_LENGTH = 0;
0577:
0578: static {
0579: NAME_TO_CODE_POINT_MAP.put("nbsp", new Integer(_nbsp));
0580: NAME_TO_CODE_POINT_MAP.put("iexcl", new Integer(_iexcl));
0581: NAME_TO_CODE_POINT_MAP.put("cent", new Integer(_cent));
0582: NAME_TO_CODE_POINT_MAP.put("pound", new Integer(_pound));
0583: NAME_TO_CODE_POINT_MAP.put("curren", new Integer(_curren));
0584: NAME_TO_CODE_POINT_MAP.put("yen", new Integer(_yen));
0585: NAME_TO_CODE_POINT_MAP.put("brvbar", new Integer(_brvbar));
0586: NAME_TO_CODE_POINT_MAP.put("sect", new Integer(_sect));
0587: NAME_TO_CODE_POINT_MAP.put("uml", new Integer(_uml));
0588: NAME_TO_CODE_POINT_MAP.put("copy", new Integer(_copy));
0589: NAME_TO_CODE_POINT_MAP.put("ordf", new Integer(_ordf));
0590: NAME_TO_CODE_POINT_MAP.put("laquo", new Integer(_laquo));
0591: NAME_TO_CODE_POINT_MAP.put("not", new Integer(_not));
0592: NAME_TO_CODE_POINT_MAP.put("shy", new Integer(_shy));
0593: NAME_TO_CODE_POINT_MAP.put("reg", new Integer(_reg));
0594: NAME_TO_CODE_POINT_MAP.put("macr", new Integer(_macr));
0595: NAME_TO_CODE_POINT_MAP.put("deg", new Integer(_deg));
0596: NAME_TO_CODE_POINT_MAP.put("plusmn", new Integer(_plusmn));
0597: NAME_TO_CODE_POINT_MAP.put("sup2", new Integer(_sup2));
0598: NAME_TO_CODE_POINT_MAP.put("sup3", new Integer(_sup3));
0599: NAME_TO_CODE_POINT_MAP.put("acute", new Integer(_acute));
0600: NAME_TO_CODE_POINT_MAP.put("micro", new Integer(_micro));
0601: NAME_TO_CODE_POINT_MAP.put("para", new Integer(_para));
0602: NAME_TO_CODE_POINT_MAP.put("middot", new Integer(_middot));
0603: NAME_TO_CODE_POINT_MAP.put("cedil", new Integer(_cedil));
0604: NAME_TO_CODE_POINT_MAP.put("sup1", new Integer(_sup1));
0605: NAME_TO_CODE_POINT_MAP.put("ordm", new Integer(_ordm));
0606: NAME_TO_CODE_POINT_MAP.put("raquo", new Integer(_raquo));
0607: NAME_TO_CODE_POINT_MAP.put("frac14", new Integer(_frac14));
0608: NAME_TO_CODE_POINT_MAP.put("frac12", new Integer(_frac12));
0609: NAME_TO_CODE_POINT_MAP.put("frac34", new Integer(_frac34));
0610: NAME_TO_CODE_POINT_MAP.put("iquest", new Integer(_iquest));
0611: NAME_TO_CODE_POINT_MAP.put("Agrave", new Integer(_Agrave));
0612: NAME_TO_CODE_POINT_MAP.put("Aacute", new Integer(_Aacute));
0613: NAME_TO_CODE_POINT_MAP.put("Acirc", new Integer(_Acirc));
0614: NAME_TO_CODE_POINT_MAP.put("Atilde", new Integer(_Atilde));
0615: NAME_TO_CODE_POINT_MAP.put("Auml", new Integer(_Auml));
0616: NAME_TO_CODE_POINT_MAP.put("Aring", new Integer(_Aring));
0617: NAME_TO_CODE_POINT_MAP.put("AElig", new Integer(_AElig));
0618: NAME_TO_CODE_POINT_MAP.put("Ccedil", new Integer(_Ccedil));
0619: NAME_TO_CODE_POINT_MAP.put("Egrave", new Integer(_Egrave));
0620: NAME_TO_CODE_POINT_MAP.put("Eacute", new Integer(_Eacute));
0621: NAME_TO_CODE_POINT_MAP.put("Ecirc", new Integer(_Ecirc));
0622: NAME_TO_CODE_POINT_MAP.put("Euml", new Integer(_Euml));
0623: NAME_TO_CODE_POINT_MAP.put("Igrave", new Integer(_Igrave));
0624: NAME_TO_CODE_POINT_MAP.put("Iacute", new Integer(_Iacute));
0625: NAME_TO_CODE_POINT_MAP.put("Icirc", new Integer(_Icirc));
0626: NAME_TO_CODE_POINT_MAP.put("Iuml", new Integer(_Iuml));
0627: NAME_TO_CODE_POINT_MAP.put("ETH", new Integer(_ETH));
0628: NAME_TO_CODE_POINT_MAP.put("Ntilde", new Integer(_Ntilde));
0629: NAME_TO_CODE_POINT_MAP.put("Ograve", new Integer(_Ograve));
0630: NAME_TO_CODE_POINT_MAP.put("Oacute", new Integer(_Oacute));
0631: NAME_TO_CODE_POINT_MAP.put("Ocirc", new Integer(_Ocirc));
0632: NAME_TO_CODE_POINT_MAP.put("Otilde", new Integer(_Otilde));
0633: NAME_TO_CODE_POINT_MAP.put("Ouml", new Integer(_Ouml));
0634: NAME_TO_CODE_POINT_MAP.put("times", new Integer(_times));
0635: NAME_TO_CODE_POINT_MAP.put("Oslash", new Integer(_Oslash));
0636: NAME_TO_CODE_POINT_MAP.put("Ugrave", new Integer(_Ugrave));
0637: NAME_TO_CODE_POINT_MAP.put("Uacute", new Integer(_Uacute));
0638: NAME_TO_CODE_POINT_MAP.put("Ucirc", new Integer(_Ucirc));
0639: NAME_TO_CODE_POINT_MAP.put("Uuml", new Integer(_Uuml));
0640: NAME_TO_CODE_POINT_MAP.put("Yacute", new Integer(_Yacute));
0641: NAME_TO_CODE_POINT_MAP.put("THORN", new Integer(_THORN));
0642: NAME_TO_CODE_POINT_MAP.put("szlig", new Integer(_szlig));
0643: NAME_TO_CODE_POINT_MAP.put("agrave", new Integer(_agrave));
0644: NAME_TO_CODE_POINT_MAP.put("aacute", new Integer(_aacute));
0645: NAME_TO_CODE_POINT_MAP.put("acirc", new Integer(_acirc));
0646: NAME_TO_CODE_POINT_MAP.put("atilde", new Integer(_atilde));
0647: NAME_TO_CODE_POINT_MAP.put("auml", new Integer(_auml));
0648: NAME_TO_CODE_POINT_MAP.put("aring", new Integer(_aring));
0649: NAME_TO_CODE_POINT_MAP.put("aelig", new Integer(_aelig));
0650: NAME_TO_CODE_POINT_MAP.put("ccedil", new Integer(_ccedil));
0651: NAME_TO_CODE_POINT_MAP.put("egrave", new Integer(_egrave));
0652: NAME_TO_CODE_POINT_MAP.put("eacute", new Integer(_eacute));
0653: NAME_TO_CODE_POINT_MAP.put("ecirc", new Integer(_ecirc));
0654: NAME_TO_CODE_POINT_MAP.put("euml", new Integer(_euml));
0655: NAME_TO_CODE_POINT_MAP.put("igrave", new Integer(_igrave));
0656: NAME_TO_CODE_POINT_MAP.put("iacute", new Integer(_iacute));
0657: NAME_TO_CODE_POINT_MAP.put("icirc", new Integer(_icirc));
0658: NAME_TO_CODE_POINT_MAP.put("iuml", new Integer(_iuml));
0659: NAME_TO_CODE_POINT_MAP.put("eth", new Integer(_eth));
0660: NAME_TO_CODE_POINT_MAP.put("ntilde", new Integer(_ntilde));
0661: NAME_TO_CODE_POINT_MAP.put("ograve", new Integer(_ograve));
0662: NAME_TO_CODE_POINT_MAP.put("oacute", new Integer(_oacute));
0663: NAME_TO_CODE_POINT_MAP.put("ocirc", new Integer(_ocirc));
0664: NAME_TO_CODE_POINT_MAP.put("otilde", new Integer(_otilde));
0665: NAME_TO_CODE_POINT_MAP.put("ouml", new Integer(_ouml));
0666: NAME_TO_CODE_POINT_MAP.put("divide", new Integer(_divide));
0667: NAME_TO_CODE_POINT_MAP.put("oslash", new Integer(_oslash));
0668: NAME_TO_CODE_POINT_MAP.put("ugrave", new Integer(_ugrave));
0669: NAME_TO_CODE_POINT_MAP.put("uacute", new Integer(_uacute));
0670: NAME_TO_CODE_POINT_MAP.put("ucirc", new Integer(_ucirc));
0671: NAME_TO_CODE_POINT_MAP.put("uuml", new Integer(_uuml));
0672: NAME_TO_CODE_POINT_MAP.put("yacute", new Integer(_yacute));
0673: NAME_TO_CODE_POINT_MAP.put("thorn", new Integer(_thorn));
0674: NAME_TO_CODE_POINT_MAP.put("yuml", new Integer(_yuml));
0675: NAME_TO_CODE_POINT_MAP.put("fnof", new Integer(_fnof));
0676: NAME_TO_CODE_POINT_MAP.put("Alpha", new Integer(_Alpha));
0677: NAME_TO_CODE_POINT_MAP.put("Beta", new Integer(_Beta));
0678: NAME_TO_CODE_POINT_MAP.put("Gamma", new Integer(_Gamma));
0679: NAME_TO_CODE_POINT_MAP.put("Delta", new Integer(_Delta));
0680: NAME_TO_CODE_POINT_MAP.put("Epsilon", new Integer(_Epsilon));
0681: NAME_TO_CODE_POINT_MAP.put("Zeta", new Integer(_Zeta));
0682: NAME_TO_CODE_POINT_MAP.put("Eta", new Integer(_Eta));
0683: NAME_TO_CODE_POINT_MAP.put("Theta", new Integer(_Theta));
0684: NAME_TO_CODE_POINT_MAP.put("Iota", new Integer(_Iota));
0685: NAME_TO_CODE_POINT_MAP.put("Kappa", new Integer(_Kappa));
0686: NAME_TO_CODE_POINT_MAP.put("Lambda", new Integer(_Lambda));
0687: NAME_TO_CODE_POINT_MAP.put("Mu", new Integer(_Mu));
0688: NAME_TO_CODE_POINT_MAP.put("Nu", new Integer(_Nu));
0689: NAME_TO_CODE_POINT_MAP.put("Xi", new Integer(_Xi));
0690: NAME_TO_CODE_POINT_MAP.put("Omicron", new Integer(_Omicron));
0691: NAME_TO_CODE_POINT_MAP.put("Pi", new Integer(_Pi));
0692: NAME_TO_CODE_POINT_MAP.put("Rho", new Integer(_Rho));
0693: NAME_TO_CODE_POINT_MAP.put("Sigma", new Integer(_Sigma));
0694: NAME_TO_CODE_POINT_MAP.put("Tau", new Integer(_Tau));
0695: NAME_TO_CODE_POINT_MAP.put("Upsilon", new Integer(_Upsilon));
0696: NAME_TO_CODE_POINT_MAP.put("Phi", new Integer(_Phi));
0697: NAME_TO_CODE_POINT_MAP.put("Chi", new Integer(_Chi));
0698: NAME_TO_CODE_POINT_MAP.put("Psi", new Integer(_Psi));
0699: NAME_TO_CODE_POINT_MAP.put("Omega", new Integer(_Omega));
0700: NAME_TO_CODE_POINT_MAP.put("alpha", new Integer(_alpha));
0701: NAME_TO_CODE_POINT_MAP.put("beta", new Integer(_beta));
0702: NAME_TO_CODE_POINT_MAP.put("gamma", new Integer(_gamma));
0703: NAME_TO_CODE_POINT_MAP.put("delta", new Integer(_delta));
0704: NAME_TO_CODE_POINT_MAP.put("epsilon", new Integer(_epsilon));
0705: NAME_TO_CODE_POINT_MAP.put("zeta", new Integer(_zeta));
0706: NAME_TO_CODE_POINT_MAP.put("eta", new Integer(_eta));
0707: NAME_TO_CODE_POINT_MAP.put("theta", new Integer(_theta));
0708: NAME_TO_CODE_POINT_MAP.put("iota", new Integer(_iota));
0709: NAME_TO_CODE_POINT_MAP.put("kappa", new Integer(_kappa));
0710: NAME_TO_CODE_POINT_MAP.put("lambda", new Integer(_lambda));
0711: NAME_TO_CODE_POINT_MAP.put("mu", new Integer(_mu));
0712: NAME_TO_CODE_POINT_MAP.put("nu", new Integer(_nu));
0713: NAME_TO_CODE_POINT_MAP.put("xi", new Integer(_xi));
0714: NAME_TO_CODE_POINT_MAP.put("omicron", new Integer(_omicron));
0715: NAME_TO_CODE_POINT_MAP.put("pi", new Integer(_pi));
0716: NAME_TO_CODE_POINT_MAP.put("rho", new Integer(_rho));
0717: NAME_TO_CODE_POINT_MAP.put("sigmaf", new Integer(_sigmaf));
0718: NAME_TO_CODE_POINT_MAP.put("sigma", new Integer(_sigma));
0719: NAME_TO_CODE_POINT_MAP.put("tau", new Integer(_tau));
0720: NAME_TO_CODE_POINT_MAP.put("upsilon", new Integer(_upsilon));
0721: NAME_TO_CODE_POINT_MAP.put("phi", new Integer(_phi));
0722: NAME_TO_CODE_POINT_MAP.put("chi", new Integer(_chi));
0723: NAME_TO_CODE_POINT_MAP.put("psi", new Integer(_psi));
0724: NAME_TO_CODE_POINT_MAP.put("omega", new Integer(_omega));
0725: NAME_TO_CODE_POINT_MAP.put("thetasym", new Integer(_thetasym));
0726: NAME_TO_CODE_POINT_MAP.put("upsih", new Integer(_upsih));
0727: NAME_TO_CODE_POINT_MAP.put("piv", new Integer(_piv));
0728: NAME_TO_CODE_POINT_MAP.put("bull", new Integer(_bull));
0729: NAME_TO_CODE_POINT_MAP.put("hellip", new Integer(_hellip));
0730: NAME_TO_CODE_POINT_MAP.put("prime", new Integer(_prime));
0731: NAME_TO_CODE_POINT_MAP.put("Prime", new Integer(_Prime));
0732: NAME_TO_CODE_POINT_MAP.put("oline", new Integer(_oline));
0733: NAME_TO_CODE_POINT_MAP.put("frasl", new Integer(_frasl));
0734: NAME_TO_CODE_POINT_MAP.put("weierp", new Integer(_weierp));
0735: NAME_TO_CODE_POINT_MAP.put("image", new Integer(_image));
0736: NAME_TO_CODE_POINT_MAP.put("real", new Integer(_real));
0737: NAME_TO_CODE_POINT_MAP.put("trade", new Integer(_trade));
0738: NAME_TO_CODE_POINT_MAP.put("alefsym", new Integer(_alefsym));
0739: NAME_TO_CODE_POINT_MAP.put("larr", new Integer(_larr));
0740: NAME_TO_CODE_POINT_MAP.put("uarr", new Integer(_uarr));
0741: NAME_TO_CODE_POINT_MAP.put("rarr", new Integer(_rarr));
0742: NAME_TO_CODE_POINT_MAP.put("darr", new Integer(_darr));
0743: NAME_TO_CODE_POINT_MAP.put("harr", new Integer(_harr));
0744: NAME_TO_CODE_POINT_MAP.put("crarr", new Integer(_crarr));
0745: NAME_TO_CODE_POINT_MAP.put("lArr", new Integer(_lArr));
0746: NAME_TO_CODE_POINT_MAP.put("uArr", new Integer(_uArr));
0747: NAME_TO_CODE_POINT_MAP.put("rArr", new Integer(_rArr));
0748: NAME_TO_CODE_POINT_MAP.put("dArr", new Integer(_dArr));
0749: NAME_TO_CODE_POINT_MAP.put("hArr", new Integer(_hArr));
0750: NAME_TO_CODE_POINT_MAP.put("forall", new Integer(_forall));
0751: NAME_TO_CODE_POINT_MAP.put("part", new Integer(_part));
0752: NAME_TO_CODE_POINT_MAP.put("exist", new Integer(_exist));
0753: NAME_TO_CODE_POINT_MAP.put("empty", new Integer(_empty));
0754: NAME_TO_CODE_POINT_MAP.put("nabla", new Integer(_nabla));
0755: NAME_TO_CODE_POINT_MAP.put("isin", new Integer(_isin));
0756: NAME_TO_CODE_POINT_MAP.put("notin", new Integer(_notin));
0757: NAME_TO_CODE_POINT_MAP.put("ni", new Integer(_ni));
0758: NAME_TO_CODE_POINT_MAP.put("prod", new Integer(_prod));
0759: NAME_TO_CODE_POINT_MAP.put("sum", new Integer(_sum));
0760: NAME_TO_CODE_POINT_MAP.put("minus", new Integer(_minus));
0761: NAME_TO_CODE_POINT_MAP.put("lowast", new Integer(_lowast));
0762: NAME_TO_CODE_POINT_MAP.put("radic", new Integer(_radic));
0763: NAME_TO_CODE_POINT_MAP.put("prop", new Integer(_prop));
0764: NAME_TO_CODE_POINT_MAP.put("infin", new Integer(_infin));
0765: NAME_TO_CODE_POINT_MAP.put("ang", new Integer(_ang));
0766: NAME_TO_CODE_POINT_MAP.put("and", new Integer(_and));
0767: NAME_TO_CODE_POINT_MAP.put("or", new Integer(_or));
0768: NAME_TO_CODE_POINT_MAP.put("cap", new Integer(_cap));
0769: NAME_TO_CODE_POINT_MAP.put("cup", new Integer(_cup));
0770: NAME_TO_CODE_POINT_MAP.put("int", new Integer(_int));
0771: NAME_TO_CODE_POINT_MAP.put("there4", new Integer(_there4));
0772: NAME_TO_CODE_POINT_MAP.put("sim", new Integer(_sim));
0773: NAME_TO_CODE_POINT_MAP.put("cong", new Integer(_cong));
0774: NAME_TO_CODE_POINT_MAP.put("asymp", new Integer(_asymp));
0775: NAME_TO_CODE_POINT_MAP.put("ne", new Integer(_ne));
0776: NAME_TO_CODE_POINT_MAP.put("equiv", new Integer(_equiv));
0777: NAME_TO_CODE_POINT_MAP.put("le", new Integer(_le));
0778: NAME_TO_CODE_POINT_MAP.put("ge", new Integer(_ge));
0779: NAME_TO_CODE_POINT_MAP.put("sub", new Integer(_sub));
0780: NAME_TO_CODE_POINT_MAP.put("sup", new Integer(_sup));
0781: NAME_TO_CODE_POINT_MAP.put("nsub", new Integer(_nsub));
0782: NAME_TO_CODE_POINT_MAP.put("sube", new Integer(_sube));
0783: NAME_TO_CODE_POINT_MAP.put("supe", new Integer(_supe));
0784: NAME_TO_CODE_POINT_MAP.put("oplus", new Integer(_oplus));
0785: NAME_TO_CODE_POINT_MAP.put("otimes", new Integer(_otimes));
0786: NAME_TO_CODE_POINT_MAP.put("perp", new Integer(_perp));
0787: NAME_TO_CODE_POINT_MAP.put("sdot", new Integer(_sdot));
0788: NAME_TO_CODE_POINT_MAP.put("lceil", new Integer(_lceil));
0789: NAME_TO_CODE_POINT_MAP.put("rceil", new Integer(_rceil));
0790: NAME_TO_CODE_POINT_MAP.put("lfloor", new Integer(_lfloor));
0791: NAME_TO_CODE_POINT_MAP.put("rfloor", new Integer(_rfloor));
0792: NAME_TO_CODE_POINT_MAP.put("lang", new Integer(_lang));
0793: NAME_TO_CODE_POINT_MAP.put("rang", new Integer(_rang));
0794: NAME_TO_CODE_POINT_MAP.put("loz", new Integer(_loz));
0795: NAME_TO_CODE_POINT_MAP.put("spades", new Integer(_spades));
0796: NAME_TO_CODE_POINT_MAP.put("clubs", new Integer(_clubs));
0797: NAME_TO_CODE_POINT_MAP.put("hearts", new Integer(_hearts));
0798: NAME_TO_CODE_POINT_MAP.put("diams", new Integer(_diams));
0799: NAME_TO_CODE_POINT_MAP.put("quot", new Integer(_quot));
0800: NAME_TO_CODE_POINT_MAP.put("amp", new Integer(_amp));
0801: NAME_TO_CODE_POINT_MAP.put("lt", new Integer(_lt));
0802: NAME_TO_CODE_POINT_MAP.put("gt", new Integer(_gt));
0803: NAME_TO_CODE_POINT_MAP.put("OElig", new Integer(_OElig));
0804: NAME_TO_CODE_POINT_MAP.put("oelig", new Integer(_oelig));
0805: NAME_TO_CODE_POINT_MAP.put("Scaron", new Integer(_Scaron));
0806: NAME_TO_CODE_POINT_MAP.put("scaron", new Integer(_scaron));
0807: NAME_TO_CODE_POINT_MAP.put("Yuml", new Integer(_Yuml));
0808: NAME_TO_CODE_POINT_MAP.put("circ", new Integer(_circ));
0809: NAME_TO_CODE_POINT_MAP.put("tilde", new Integer(_tilde));
0810: NAME_TO_CODE_POINT_MAP.put("ensp", new Integer(_ensp));
0811: NAME_TO_CODE_POINT_MAP.put("emsp", new Integer(_emsp));
0812: NAME_TO_CODE_POINT_MAP.put("thinsp", new Integer(_thinsp));
0813: NAME_TO_CODE_POINT_MAP.put("zwnj", new Integer(_zwnj));
0814: NAME_TO_CODE_POINT_MAP.put("zwj", new Integer(_zwj));
0815: NAME_TO_CODE_POINT_MAP.put("lrm", new Integer(_lrm));
0816: NAME_TO_CODE_POINT_MAP.put("rlm", new Integer(_rlm));
0817: NAME_TO_CODE_POINT_MAP.put("ndash", new Integer(_ndash));
0818: NAME_TO_CODE_POINT_MAP.put("mdash", new Integer(_mdash));
0819: NAME_TO_CODE_POINT_MAP.put("lsquo", new Integer(_lsquo));
0820: NAME_TO_CODE_POINT_MAP.put("rsquo", new Integer(_rsquo));
0821: NAME_TO_CODE_POINT_MAP.put("sbquo", new Integer(_sbquo));
0822: NAME_TO_CODE_POINT_MAP.put("ldquo", new Integer(_ldquo));
0823: NAME_TO_CODE_POINT_MAP.put("rdquo", new Integer(_rdquo));
0824: NAME_TO_CODE_POINT_MAP.put("bdquo", new Integer(_bdquo));
0825: NAME_TO_CODE_POINT_MAP.put("dagger", new Integer(_dagger));
0826: NAME_TO_CODE_POINT_MAP.put("Dagger", new Integer(_Dagger));
0827: NAME_TO_CODE_POINT_MAP.put("permil", new Integer(_permil));
0828: NAME_TO_CODE_POINT_MAP.put("lsaquo", new Integer(_lsaquo));
0829: NAME_TO_CODE_POINT_MAP.put("rsaquo", new Integer(_rsaquo));
0830: NAME_TO_CODE_POINT_MAP.put("euro", new Integer(_euro));
0831: NAME_TO_CODE_POINT_MAP.put("apos", new Integer(_apos));
0832:
0833: CODE_POINT_TO_NAME_MAP = new IntStringHashMap(
0834: (int) (NAME_TO_CODE_POINT_MAP.size() / 0.75F), 1.0F);
0835: for (final Iterator i = NAME_TO_CODE_POINT_MAP.entrySet()
0836: .iterator(); i.hasNext();) {
0837: Map.Entry entry = (Map.Entry) i.next();
0838: String name = (String) entry.getKey();
0839: if (MAX_NAME_LENGTH < name.length())
0840: MAX_NAME_LENGTH = name.length();
0841: CODE_POINT_TO_NAME_MAP.put(((Integer) entry.getValue())
0842: .intValue(), name);
0843: }
0844: MAX_ENTITY_REFERENCE_LENGTH = MAX_NAME_LENGTH + 2; // '&'+name+';'
0845: }
0846:
0847: private CharacterEntityReference(final Source source,
0848: final int begin, final int end, final int codePoint) {
0849: super (source, begin, end, codePoint);
0850: name = getName(codePoint);
0851: }
0852:
0853: /**
0854: * Returns the name of this character entity reference.
0855: * <p>
0856: * <dl>
0857: * <dt>Example:</dt>
0858: * <dd><code>((CharacterEntityReference)CharacterReference.parse("&gt;")).getName()</code> returns "<code>gt</code>"</dd>
0859: * </dl>
0860: * @return the name of this character entity reference.
0861: * @see #getName(int codePoint)
0862: */
0863: public String getName() {
0864: return name;
0865: }
0866:
0867: /**
0868: * Returns the character entity reference name of the specified character.
0869: * <p>
0870: * Since all character entity references represent unicode <a target="_blank" href="http://www.unicode.org/glossary/#bmp_code_point">BMP</a> code points,
0871: * the functionality of this method is identical to that of {@link #getName(int codePoint)}.
0872: * <p>
0873: * <dl>
0874: * <dt>Example:</dt>
0875: * <dd><code>CharacterEntityReference.getName('>')</code> returns "<code>gt</code>"</dd>
0876: * </dl>
0877: * @return the character entity reference name of the specified character, or <code>null</code> if none exists.
0878: */
0879: public static String getName(final char ch) {
0880: return getName((int) ch);
0881: }
0882:
0883: /**
0884: * Returns the character entity reference name of the specified unicode code point.
0885: * <p>
0886: * Since all character entity references represent unicode <a target="_blank" href="http://www.unicode.org/glossary/#bmp_code_point">BMP</a> code points,
0887: * the functionality of this method is identical to that of {@link #getName(char ch)}.
0888: * <p>
0889: * <dl>
0890: * <dt>Example:</dt>
0891: * <dd><code>CharacterEntityReference.getName(62)</code> returns "<code>gt</code>"</dd>
0892: * </dl>
0893: * @return the character entity reference name of the specified unicode code point, or <code>null</code> if none exists.
0894: */
0895: public static String getName(final int codePoint) {
0896: return CODE_POINT_TO_NAME_MAP.get(codePoint);
0897: }
0898:
0899: /**
0900: * Returns the unicode code point of the specified character entity reference name.
0901: * <p>
0902: * If the string does not represent a valid character entity reference name, this method returns {@link #INVALID_CODE_POINT INVALID_CODE_POINT}.
0903: * <p>
0904: * Although character entity reference names are case sensitive, and in some cases differ from other entity references only by their case,
0905: * some browsers also recognise them in a case-insensitive way.
0906: * For this reason, all decoding methods in this library recognise character entity reference names even if they are in the wrong case.
0907: * <p>
0908: * <dl>
0909: * <dt>Example:</dt>
0910: * <dd><code>CharacterEntityReference.getCodePointFromName("gt")</code> returns <code>62</code></dd>
0911: * </dl>
0912: * @return the unicode code point of the specified character entity reference name, or {@link #INVALID_CODE_POINT INVALID_CODE_POINT} if the string does not represent a valid character entity reference name.
0913: */
0914: public static int getCodePointFromName(final String name) {
0915: Integer codePoint = (Integer) NAME_TO_CODE_POINT_MAP.get(name);
0916: if (codePoint == null) {
0917: // Most browsers recognise character entity references even if they have the wrong case, so check for this as well:
0918: final String lowerCaseName = name.toLowerCase();
0919: if (lowerCaseName != name)
0920: codePoint = (Integer) NAME_TO_CODE_POINT_MAP
0921: .get(lowerCaseName);
0922: }
0923: return (codePoint != null) ? codePoint.intValue()
0924: : INVALID_CODE_POINT;
0925: }
0926:
0927: /**
0928: * Returns the correct encoded form of this character entity reference.
0929: * <p>
0930: * Note that the returned string is not necessarily the same as the original source text used to create this object.
0931: * This library recognises certain invalid forms of character references, as detailed in the {@link #decode(CharSequence) decode(String encodedString)} method.
0932: * <p>
0933: * To retrieve the original source text, use the {@link #toString() toString()} method instead.
0934: * <p>
0935: * <dl>
0936: * <dt>Example:</dt>
0937: * <dd><code>CharacterReference.parse("&GT").getCharacterReferenceString()</code> returns "<code>&gt;</code>"</dd>
0938: * </dl>
0939: *
0940: * @return the correct encoded form of this character entity reference.
0941: * @see CharacterReference#getCharacterReferenceString(int codePoint)
0942: */
0943: public String getCharacterReferenceString() {
0944: return getCharacterReferenceString(name);
0945: }
0946:
0947: /**
0948: * Returns the character entity reference encoded form of the specified unicode code point.
0949: * <p>
0950: * If the specified unicode code point does not have an equivalent character entity reference, this method returns <code>null</code>.
0951: * To get either the entity or numeric reference encoded form, use the {@link CharacterReference#getCharacterReferenceString(int codePoint)} method instead.
0952: * <p>
0953: * <dl>
0954: * <dt>Examples:</dt>
0955: * <dd><code>CharacterEntityReference.getCharacterReferenceString(62)</code> returns "<code>&gt;</code>"</dd>
0956: * <dd><code>CharacterEntityReference.getCharacterReferenceString(9786)</code> returns <code>null</code></dd>
0957: * </dl>
0958: *
0959: * @return the character entity reference encoded form of the specified unicode code point, or <code>null</code> if none exists.
0960: * @see CharacterReference#getCharacterReferenceString(int codePoint)
0961: */
0962: public static String getCharacterReferenceString(final int codePoint) {
0963: if (codePoint > Character.MAX_VALUE)
0964: return null;
0965: final String name = getName(codePoint);
0966: return name != null ? getCharacterReferenceString(name) : null;
0967: }
0968:
0969: /**
0970: * Returns a map of character entity reference names (<code>String</code>) to unicode code points (<code>Integer</code>).
0971: * @return a map of character entity reference names to unicode code points.
0972: */
0973: public static Map getNameToCodePointMap() {
0974: return NAME_TO_CODE_POINT_MAP;
0975: }
0976:
0977: /**
0978: * Returns a string representation of this object useful for debugging purposes.
0979: * @return a string representation of this object useful for debugging purposes.
0980: */
0981: public String getDebugInfo() {
0982: final StringBuffer sb = new StringBuffer();
0983: sb.append('"');
0984: appendCharacterReferenceString(sb, name);
0985: sb.append("\" ");
0986: appendUnicodeText(sb, codePoint);
0987: sb.append(' ').append(super .getDebugInfo());
0988: return sb.toString();
0989: }
0990:
0991: private static String getCharacterReferenceString(final String name) {
0992: return appendCharacterReferenceString(new StringBuffer(), name)
0993: .toString();
0994: }
0995:
0996: static final StringBuffer appendCharacterReferenceString(
0997: final StringBuffer sb, final String name) {
0998: return sb.append('&').append(name).append(';');
0999: }
1000:
1001: static CharacterReference construct(final Source source,
1002: final int begin, final int unterminatedMaxCodePoint) {
1003: // only called from CharacterReference.construct(), so we can assume that first character is '&'
1004: String name;
1005: final int nameBegin = begin + 1;
1006: final int maxNameEnd = nameBegin + MAX_NAME_LENGTH;
1007: final int maxSourcePos = source.end - 1;
1008: int end;
1009: int x = nameBegin;
1010: boolean unterminated = false;
1011: while (true) {
1012: final char ch = source.charAt(x);
1013: if (ch == ';') {
1014: end = x + 1;
1015: name = source.subSequence(nameBegin, x).toString();
1016: break;
1017: }
1018: if (!isValidReferenceNameChar(ch)) {
1019: // At this point, ch is determined to be an invalid character, meaning the character reference is unterminated.
1020: unterminated = true;
1021: } else if (x == maxSourcePos) {
1022: // At this point, we have a valid name character but are at the last position in the source text without the terminating semicolon.
1023: unterminated = true;
1024: x++; // include this character in the name
1025: }
1026: if (unterminated) {
1027: // Different browsers react differently to unterminated character entity references.
1028: // The behaviour of this method is determined by the unterminatedMaxCodePoint parameter.
1029: if (unterminatedMaxCodePoint == INVALID_CODE_POINT) {
1030: // reject:
1031: return null;
1032: } else {
1033: // accept:
1034: end = x;
1035: name = source.subSequence(nameBegin, x).toString();
1036: break;
1037: }
1038: }
1039: if (++x > maxNameEnd)
1040: return null;
1041: }
1042: final int codePoint = getCodePointFromName(name);
1043: if (codePoint == INVALID_CODE_POINT
1044: || (unterminated && codePoint > unterminatedMaxCodePoint))
1045: return null;
1046: return new CharacterEntityReference(source, begin, end,
1047: codePoint);
1048: }
1049:
1050: private static final boolean isValidReferenceNameChar(final char ch) {
1051: return ch >= 'A' && ch <= 'z' && (ch <= 'Z' || ch >= 'a');
1052: }
1053: }
|