0001: /*--
0002:
0003: $Id: Verifier.java,v 1.1 2005/04/27 09:32:39 wittek Exp $
0004:
0005: Copyright (C) 2000-2004 Jason Hunter & Brett McLaughlin.
0006: All rights reserved.
0007:
0008: Redistribution and use in source and binary forms, with or without
0009: modification, are permitted provided that the following conditions
0010: are met:
0011:
0012: 1. Redistributions of source code must retain the above copyright
0013: notice, this list of conditions, and the following disclaimer.
0014:
0015: 2. Redistributions in binary form must reproduce the above copyright
0016: notice, this list of conditions, and the disclaimer that follows
0017: these conditions in the documentation and/or other materials
0018: provided with the distribution.
0019:
0020: 3. The name "JDOM" must not be used to endorse or promote products
0021: derived from this software without prior written permission. For
0022: written permission, please contact <request_AT_jdom_DOT_org>.
0023:
0024: 4. Products derived from this software may not be called "JDOM", nor
0025: may "JDOM" appear in their name, without prior written permission
0026: from the JDOM Project Management <request_AT_jdom_DOT_org>.
0027:
0028: In addition, we request (but do not require) that you include in the
0029: end-user documentation provided with the redistribution and/or in the
0030: software itself an acknowledgement equivalent to the following:
0031: "This product includes software developed by the
0032: JDOM Project (http://www.jdom.org/)."
0033: Alternatively, the acknowledgment may be graphical using the logos
0034: available at http://www.jdom.org/images/logos.
0035:
0036: THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
0037: WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
0038: OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
0039: DISCLAIMED. IN NO EVENT SHALL THE JDOM AUTHORS OR THE PROJECT
0040: CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
0041: SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
0042: LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
0043: USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
0044: ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
0045: OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
0046: OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
0047: SUCH DAMAGE.
0048:
0049: This software consists of voluntary contributions made by many
0050: individuals on behalf of the JDOM Project and was originally
0051: created by Jason Hunter <jhunter_AT_jdom_DOT_org> and
0052: Brett McLaughlin <brett_AT_jdom_DOT_org>. For more information
0053: on the JDOM Project, please see <http://www.jdom.org/>.
0054:
0055: */
0056:
0057: package org.jdom;
0058:
0059: import java.util.*;
0060:
0061: /**
0062: * A utility class to handle well-formedness checks on names, data, and other
0063: * verification tasks for JDOM. The class is final and may not be subclassed.
0064: *
0065: * @version $Revision: 1.1 $, $Date: 2005/04/27 09:32:39 $
0066: * @author Brett McLaughlin
0067: * @author Elliotte Rusty Harold
0068: * @author Jason Hunter
0069: * @author Bradley S. Huffman
0070: */
0071: final public class Verifier {
0072:
0073: private static final String CVS_ID = "@(#) $RCSfile: Verifier.java,v $ $Revision: 1.1 $ $Date: 2005/04/27 09:32:39 $ $Name: $";
0074:
0075: /**
0076: * Ensure instantation cannot occur.
0077: */
0078: private Verifier() {
0079: }
0080:
0081: /**
0082: * This will check the supplied name to see if it is legal for use as
0083: * a JDOM <code>{@link Element}</code> name.
0084: *
0085: * @param name <code>String</code> name to check.
0086: * @return <code>String</code> reason name is illegal, or
0087: * <code>null</code> if name is OK.
0088: */
0089: public static String checkElementName(String name) {
0090: // Check basic XML name rules first
0091: String reason;
0092: if ((reason = checkXMLName(name)) != null) {
0093: return reason;
0094: }
0095:
0096: // No colons allowed, since elements handle this internally
0097: if (name.indexOf(":") != -1) {
0098: return "Element names cannot contain colons";
0099: }
0100:
0101: // If we got here, everything is OK
0102: return null;
0103: }
0104:
0105: /**
0106: * This will check the supplied name to see if it is legal for use as
0107: * a JDOM <code>{@link Attribute}</code> name.
0108: *
0109: * @param name <code>String</code> name to check.
0110: * @return <code>String</code> reason name is illegal, or
0111: * <code>null</code> if name is OK.
0112: */
0113: public static String checkAttributeName(String name) {
0114: // Check basic XML name rules first
0115: String reason;
0116: if ((reason = checkXMLName(name)) != null) {
0117: return reason;
0118: }
0119:
0120: // No colons are allowed, since attributes handle this internally
0121: if (name.indexOf(":") != -1) {
0122: return "Attribute names cannot contain colons";
0123: }
0124:
0125: // Attribute names may not be xmlns since we do this internally too
0126: if (name.equals("xmlns")) {
0127: return "An Attribute name may not be \"xmlns\"; "
0128: + "use the Namespace class to manage namespaces";
0129: }
0130:
0131: // If we got here, everything is OK
0132: return null;
0133: }
0134:
0135: /**
0136: * This will check the supplied string to see if it only contains
0137: * characters allowed by the XML 1.0 specification. The C0 controls
0138: * (e.g. null, vertical tab, formfeed, etc.) are specifically excluded
0139: * except for carriage return, linefeed, and the horizontal tab.
0140: * Surrogates are also excluded.
0141: * <p>
0142: * This method is useful for checking element content and attribute
0143: * values. Note that characters
0144: * like " and < are allowed in attribute values and element content.
0145: * They will simply be escaped as " or <
0146: * when the value is serialized.
0147: * </p>
0148: *
0149: * @param text <code>String</code> value to check.
0150: * @return <code>String</code> reason name is illegal, or
0151: * <code>null</code> if name is OK.
0152: */
0153: public static String checkCharacterData(String text) {
0154: if (text == null) {
0155: return "A null is not a legal XML value";
0156: }
0157:
0158: // Do check
0159: for (int i = 0, len = text.length(); i < len; i++) {
0160:
0161: int ch = text.charAt(i);
0162:
0163: // Check if high part of a surrogate pair
0164: if (ch >= 0xD800 && ch <= 0xDBFF) {
0165: // Check if next char is the low-surrogate
0166: i++;
0167: if (i < len) {
0168: char low = text.charAt(i);
0169: if (low < 0xDC00 || low > 0xDFFF) {
0170: return "Illegal Surrogate Pair";
0171: }
0172: // It's a good pair, calculate the true value of
0173: // the character to then fall thru to isXMLCharacter
0174: ch = 0x10000 + (ch - 0xD800) * 0x400
0175: + (low - 0xDC00);
0176: } else {
0177: return "Surrogate Pair Truncated";
0178: }
0179: }
0180:
0181: if (!isXMLCharacter(ch)) {
0182: // Likely this character can't be easily displayed
0183: // because it's a control so we use it'd hexadecimal
0184: // representation in the reason.
0185: return ("0x" + Integer.toHexString(ch) + " is not a legal XML character");
0186: }
0187: }
0188:
0189: // If we got here, everything is OK
0190: return null;
0191: }
0192:
0193: /**
0194: * This will check the supplied data to see if it is legal for use as
0195: * JDOM <code>{@link CDATA}</code>.
0196: *
0197: * @param data <code>String</code> data to check.
0198: * @return <code>String</code> reason data is illegal, or
0199: * <code>null</code> is name is OK.
0200: */
0201: public static String checkCDATASection(String data) {
0202: String reason = null;
0203: if ((reason = checkCharacterData(data)) != null) {
0204: return reason;
0205: }
0206:
0207: if (data.indexOf("]]>") != -1) {
0208: return "CDATA cannot internally contain a CDATA ending "
0209: + "delimiter (]]>)";
0210: }
0211:
0212: // If we got here, everything is OK
0213: return null;
0214: }
0215:
0216: /**
0217: * This will check the supplied name to see if it is legal for use as
0218: * a JDOM <code>{@link Namespace}</code> prefix.
0219: *
0220: * @param prefix <code>String</code> prefix to check.
0221: * @return <code>String</code> reason name is illegal, or
0222: * <code>null</code> if name is OK.
0223: */
0224: public static String checkNamespacePrefix(String prefix) {
0225: // Manually do rules, since URIs can be null or empty
0226: if ((prefix == null) || (prefix.equals(""))) {
0227: return null;
0228: }
0229:
0230: // Cannot start with a number
0231: char first = prefix.charAt(0);
0232: if (isXMLDigit(first)) {
0233: return "Namespace prefixes cannot begin with a number";
0234: }
0235: // Cannot start with a $
0236: if (first == '$') {
0237: return "Namespace prefixes cannot begin with a dollar sign ($)";
0238: }
0239: // Cannot start with a -
0240: if (first == '-') {
0241: return "Namespace prefixes cannot begin with a hyphen (-)";
0242: }
0243: // Cannot start with a .
0244: if (first == '.') {
0245: return "Namespace prefixes cannot begin with a period (.)";
0246: }
0247: // Cannot start with "xml" in any character case
0248: if (prefix.toLowerCase().startsWith("xml")) {
0249: return "Namespace prefixes cannot begin with "
0250: + "\"xml\" in any combination of case";
0251: }
0252:
0253: // Ensure legal content
0254: for (int i = 0, len = prefix.length(); i < len; i++) {
0255: char c = prefix.charAt(i);
0256: if (!isXMLNameCharacter(c)) {
0257: return "Namespace prefixes cannot contain the character \""
0258: + c + "\"";
0259: }
0260: }
0261:
0262: // No colons allowed
0263: if (prefix.indexOf(":") != -1) {
0264: return "Namespace prefixes cannot contain colons";
0265: }
0266:
0267: // If we got here, everything is OK
0268: return null;
0269: }
0270:
0271: /**
0272: * This will check the supplied name to see if it is legal for use as
0273: * a JDOM <code>{@link Namespace}</code> URI.
0274: *
0275: * @param uri <code>String</code> URI to check.
0276: * @return <code>String</code> reason name is illegal, or
0277: * <code>null</code> if name is OK.
0278: */
0279: public static String checkNamespaceURI(String uri) {
0280: // Manually do rules, since URIs can be null or empty
0281: if ((uri == null) || (uri.equals(""))) {
0282: return null;
0283: }
0284:
0285: // Cannot start with a number
0286: char first = uri.charAt(0);
0287: if (Character.isDigit(first)) {
0288: return "Namespace URIs cannot begin with a number";
0289: }
0290: // Cannot start with a $
0291: if (first == '$') {
0292: return "Namespace URIs cannot begin with a dollar sign ($)";
0293: }
0294: // Cannot start with a -
0295: if (first == '-') {
0296: return "Namespace URIs cannot begin with a hyphen (-)";
0297: }
0298:
0299: // If we got here, everything is OK
0300: return null;
0301: }
0302:
0303: /**
0304: * Check if two namespaces collide.
0305: *
0306: * @param namespace <code>Namespace</code> to check.
0307: * @param other <code>Namespace</code> to check against.
0308: * @return <code>String</code> reason for collision, or
0309: * <code>null</code> if no collision.
0310: */
0311: public static String checkNamespaceCollision(Namespace namespace,
0312: Namespace other) {
0313: String p1, p2, u1, u2, reason;
0314:
0315: reason = null;
0316: p1 = namespace.getPrefix();
0317: u1 = namespace.getURI();
0318: p2 = other.getPrefix();
0319: u2 = other.getURI();
0320: if (p1.equals(p2) && !u1.equals(u2)) {
0321: reason = "The namespace prefix \"" + p1 + "\" collides";
0322: }
0323: return reason;
0324: }
0325:
0326: /**
0327: * Check if <code>{@link Attribute}</code>'s namespace collides with a
0328: * <code>{@link Element}</code>'s namespace.
0329: *
0330: * @param attribute <code>Attribute</code> to check.
0331: * @param element <code>Element</code> to check against.
0332: * @return <code>String</code> reason for collision, or
0333: * <code>null</code> if no collision.
0334: */
0335: public static String checkNamespaceCollision(Attribute attribute,
0336: Element element) {
0337: Namespace namespace = attribute.getNamespace();
0338: String prefix = namespace.getPrefix();
0339: if ("".equals(prefix)) {
0340: return null;
0341: }
0342:
0343: return checkNamespaceCollision(namespace, element);
0344: }
0345:
0346: /**
0347: * Check if a <code>{@link Namespace}</code> collides with a
0348: * <code>{@link Element}</code>'s namespace.
0349: *
0350: * @param namespace <code>Namespace</code> to check.
0351: * @param element <code>Element</code> to check against.
0352: * @return <code>String</code> reason for collision, or
0353: * <code>null</code> if no collision.
0354: */
0355: public static String checkNamespaceCollision(Namespace namespace,
0356: Element element) {
0357: String reason = checkNamespaceCollision(namespace, element
0358: .getNamespace());
0359: if (reason != null) {
0360: return reason + " with the element namespace prefix";
0361: }
0362:
0363: reason = checkNamespaceCollision(namespace, element
0364: .getAdditionalNamespaces());
0365: if (reason != null) {
0366: return reason;
0367: }
0368:
0369: reason = checkNamespaceCollision(namespace, element
0370: .getAttributes());
0371: if (reason != null) {
0372: return reason;
0373: }
0374:
0375: return null;
0376: }
0377:
0378: /**
0379: * Check if a <code>{@link Namespace}</code> collides with a
0380: * <code>{@link Attribute}</code>'s namespace.
0381: *
0382: * @param namespace <code>Namespace</code> to check.
0383: * @param attribute <code>Attribute</code> to check against.
0384: * @return <code>String</code> reason for collision, or
0385: * <code>null</code> if no collision.
0386: */
0387: public static String checkNamespaceCollision(Namespace namespace,
0388: Attribute attribute) {
0389: String reason = checkNamespaceCollision(namespace, attribute
0390: .getNamespace());
0391: if (reason != null) {
0392: reason += " with an attribute namespace prefix on the element";
0393: }
0394: return reason;
0395: }
0396:
0397: /**
0398: * Check if a <code>{@link Namespace}</code> collides with any namespace
0399: * from a list of objects.
0400: *
0401: * @param namespace <code>Namespace</code> to check.
0402: * @param list <code>List</code> to check against.
0403: * @return <code>String</code> reason for collision, or
0404: * <code>null</code> if no collision.
0405: */
0406: public static String checkNamespaceCollision(Namespace namespace,
0407: List list) {
0408: if (list == null) {
0409: return null;
0410: }
0411:
0412: String reason = null;
0413: Iterator i = list.iterator();
0414: while ((reason == null) && i.hasNext()) {
0415: Object obj = i.next();
0416: if (obj instanceof Attribute) {
0417: reason = checkNamespaceCollision(namespace,
0418: (Attribute) obj);
0419: } else if (obj instanceof Element) {
0420: reason = checkNamespaceCollision(namespace,
0421: (Element) obj);
0422: } else if (obj instanceof Namespace) {
0423: reason = checkNamespaceCollision(namespace,
0424: (Namespace) obj);
0425: if (reason != null) {
0426: reason += " with an additional namespace declared"
0427: + " by the element";
0428: }
0429: }
0430: }
0431: return reason;
0432: }
0433:
0434: /**
0435: * This will check the supplied data to see if it is legal for use as
0436: * a JDOM <code>{@link ProcessingInstruction}</code> target.
0437: *
0438: * @param target <code>String</code> target to check.
0439: * @return <code>String</code> reason target is illegal, or
0440: * <code>null</code> if target is OK.
0441: */
0442: public static String checkProcessingInstructionTarget(String target) {
0443: // Check basic XML name rules first
0444: String reason;
0445: if ((reason = checkXMLName(target)) != null) {
0446: return reason;
0447: }
0448:
0449: // No colons allowed, per Namespace Specification Section 6
0450: if (target.indexOf(":") != -1) {
0451: return "Processing instruction targets cannot contain colons";
0452: }
0453:
0454: // Cannot begin with 'xml' in any case
0455: if (target.equalsIgnoreCase("xml")) {
0456: return "Processing instructions cannot have a target of "
0457: + "\"xml\" in any combination of case. (Note that the "
0458: + "\"<?xml ... ?>\" declaration at the beginning of a "
0459: + "document is not a processing instruction and should not "
0460: + "be added as one; it is written automatically during "
0461: + "output, e.g. by XMLOutputter.)";
0462: }
0463:
0464: // If we got here, everything is OK
0465: return null;
0466: }
0467:
0468: /**
0469: * This will check the supplied data to see if it is legal for use as
0470: * <code>{@link ProcessingInstruction}</code> data. Besides checking that
0471: * all the characters are allowed in XML, this also checks
0472: * that the data does not contain the PI end-string "?>".
0473: *
0474: * @param data <code>String</code> data to check.
0475: * @return <code>String</code> reason data is illegal, or
0476: * <code>null</code> if data is OK.
0477: */
0478: public static String checkProcessingInstructionData(String data) {
0479: // Check basic XML name rules first
0480: String reason = checkCharacterData(data);
0481:
0482: if (reason == null) {
0483: if (data.indexOf("?>") >= 0) {
0484: return "Processing instructions cannot contain "
0485: + "the string \"?>\"";
0486: }
0487: }
0488:
0489: return reason;
0490: }
0491:
0492: /**
0493: * This will check the supplied data to see if it is legal for use as
0494: * JDOM <code>{@link Comment}</code> data.
0495: *
0496: * @param data <code>String</code> data to check.
0497: * @return <code>String</code> reason data is illegal, or
0498: * <code>null</code> if data is OK.
0499: */
0500: public static String checkCommentData(String data) {
0501: String reason = null;
0502: if ((reason = checkCharacterData(data)) != null) {
0503: return reason;
0504: }
0505:
0506: if (data.indexOf("--") != -1) {
0507: return "Comments cannot contain double hyphens (--)";
0508: }
0509: if (data.startsWith("-")) {
0510: return "Comment data cannot start with a hyphen.";
0511: }
0512: if (data.endsWith("-")) {
0513: return "Comment data cannot end with a hyphen.";
0514: }
0515:
0516: // If we got here, everything is OK
0517: return null;
0518: }
0519:
0520: // [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] |
0521: // [-'()+,./:=?;*#@$_%]
0522: public static boolean isXMLPublicIDCharacter(char c) {
0523:
0524: if (c >= 'a' && c <= 'z')
0525: return true;
0526: if (c >= '?' && c <= 'Z')
0527: return true;
0528: if (c >= '\'' && c <= ';')
0529: return true;
0530:
0531: if (c == ' ')
0532: return true;
0533: if (c == '!')
0534: return true;
0535: if (c == '=')
0536: return true;
0537: if (c == '#')
0538: return true;
0539: if (c == '$')
0540: return true;
0541: if (c == '_')
0542: return true;
0543: if (c == '%')
0544: return true;
0545: if (c == '\n')
0546: return true;
0547: if (c == '\r')
0548: return true;
0549: if (c == '\t')
0550: return true;
0551:
0552: return false;
0553: }
0554:
0555: /**
0556: * This will ensure that the data for a public identifier
0557: * is legal.
0558: *
0559: * @param publicID <code>String</code> public ID to check.
0560: * @return <code>String</code> reason public ID is illegal, or
0561: * <code>null</code> if public ID is OK.
0562: */
0563: public static String checkPublicID(String publicID) {
0564: String reason = null;
0565:
0566: if (publicID == null)
0567: return null;
0568: // This indicates there is no public ID
0569:
0570: for (int i = 0; i < publicID.length(); i++) {
0571: char c = publicID.charAt(i);
0572: if (!isXMLPublicIDCharacter(c)) {
0573: reason = c + " is not a legal character in public IDs";
0574: break;
0575: }
0576: }
0577:
0578: return reason;
0579: }
0580:
0581: /**
0582: * This will ensure that the data for a system literal
0583: * is legal.
0584: *
0585: * @param systemLiteral <code>String</code> system literal to check.
0586: * @return <code>String</code> reason system literal is illegal, or
0587: * <code>null</code> if system literal is OK.
0588: */
0589: public static String checkSystemLiteral(String systemLiteral) {
0590: String reason = null;
0591:
0592: if (systemLiteral == null)
0593: return null;
0594: // This indicates there is no system ID
0595:
0596: if (systemLiteral.indexOf('\'') != -1
0597: && systemLiteral.indexOf('"') != -1) {
0598: reason = "System literals cannot simultaneously contain both single and double quotes.";
0599: } else {
0600: reason = checkCharacterData(systemLiteral);
0601: }
0602:
0603: return reason;
0604: }
0605:
0606: /**
0607: * This is a utility function for sharing the base process of checking
0608: * any XML name.
0609: *
0610: * @param name <code>String</code> to check for XML name compliance.
0611: * @return <code>String</code> reason the name is illegal, or
0612: * <code>null</code> if OK.
0613: */
0614: public static String checkXMLName(String name) {
0615: // Cannot be empty or null
0616: if ((name == null) || (name.length() == 0)
0617: || (name.trim().equals(""))) {
0618: return "XML names cannot be null or empty";
0619: }
0620:
0621: // Cannot start with a number
0622: char first = name.charAt(0);
0623: if (!isXMLNameStartCharacter(first)) {
0624: return "XML names cannot begin with the character \""
0625: + first + "\"";
0626: }
0627: // Ensure legal content for non-first chars
0628: for (int i = 1, len = name.length(); i < len; i++) {
0629: char c = name.charAt(i);
0630: if (!isXMLNameCharacter(c)) {
0631: return "XML names cannot contain the character \"" + c
0632: + "\"";
0633: }
0634: }
0635:
0636: // We got here, so everything is OK
0637: return null;
0638: }
0639:
0640: /**
0641: * <p>
0642: * Checks a string to see if it is a legal RFC 2396 URI.
0643: * Both absolute and relative URIs are supported.
0644: * </p>
0645: *
0646: * @param uri <code>String</code> to check.
0647: * @return <code>String</code> reason the URI is illegal, or
0648: * <code>null</code> if OK.
0649: */
0650: public static String checkURI(String uri) {
0651: // URIs can be null or empty
0652: if ((uri == null) || (uri.equals(""))) {
0653: return null;
0654: }
0655:
0656: for (int i = 0; i < uri.length(); i++) {
0657: char test = uri.charAt(i);
0658: if (!isURICharacter(test)) {
0659: String msgNumber = "0x" + Integer.toHexString(test);
0660: if (test <= 0x09)
0661: msgNumber = "0x0" + Integer.toHexString(test);
0662: return "URIs cannot contain " + msgNumber;
0663: } // end if
0664: if (test == '%') { // must be followed by two hexadecimal digits
0665: try {
0666: char firstDigit = uri.charAt(i + 1);
0667: char secondDigit = uri.charAt(i + 2);
0668: if (!isHexDigit(firstDigit)
0669: || !isHexDigit(secondDigit)) {
0670: return "Percent signs in URIs must be followed by "
0671: + "exactly two hexadecimal digits.";
0672: }
0673:
0674: } catch (StringIndexOutOfBoundsException e) {
0675: return "Percent signs in URIs must be followed by "
0676: + "exactly two hexadecimal digits.";
0677: }
0678: }
0679: } // end for
0680:
0681: // If we got here, everything is OK
0682: return null;
0683: }
0684:
0685: /**
0686: * <p>
0687: * <p>
0688: * This is a utility function for determining whether a specified
0689: * Unicode character is a hexadecimal digit as defined in RFC 2396;
0690: * that is, one of the ASCII characters 0-9, a-f, or A-F
0691: * </p>
0692: *
0693: * @param c to check for hex digit.
0694: * @return true if it's allowed, false otherwise.
0695: */
0696: public static boolean isHexDigit(char c) {
0697:
0698: // I suspect most characters passed to this method will be
0699: // correct hexadecimal digits, so I test for the true cases
0700: // first. If this proves to be a performance bottleneck
0701: // a switch statement or lookup table
0702: // might optimize this.
0703: if (c >= '0' && c <= '9')
0704: return true;
0705: if (c >= 'A' && c <= 'F')
0706: return true;
0707: if (c >= 'a' && c <= 'f')
0708: return true;
0709:
0710: return false;
0711: }
0712:
0713: /**
0714: * <p>
0715: * This is a utility function for determining whether
0716: * a specified Unicode character is legal in URI references
0717: * as determined by RFC 2396.
0718: * </p>
0719: *
0720: * @param c <code>char</code> to check for URI reference compliance.
0721: * @return true if it's allowed, false otherwise.
0722: */
0723: public static boolean isURICharacter(char c) {
0724: if (c >= 'a' && c <= 'z')
0725: return true;
0726: if (c >= 'A' && c <= 'Z')
0727: return true;
0728: if (c >= '0' && c <= '9')
0729: return true;
0730: if (c == '/')
0731: return true;
0732: if (c == '-')
0733: return true;
0734: if (c == '.')
0735: return true;
0736: if (c == '?')
0737: return true;
0738: if (c == ':')
0739: return true;
0740: if (c == '@')
0741: return true;
0742: if (c == '&')
0743: return true;
0744: if (c == '=')
0745: return true;
0746: if (c == '+')
0747: return true;
0748: if (c == '$')
0749: return true;
0750: if (c == ',')
0751: return true;
0752: if (c == '%')
0753: return true;
0754:
0755: if (c == '_')
0756: return true;
0757: if (c == '!')
0758: return true;
0759: if (c == '~')
0760: return true;
0761: if (c == '*')
0762: return true;
0763: if (c == '\'')
0764: return true;
0765: if (c == '(')
0766: return true;
0767: if (c == ')')
0768: return true;
0769: return false;
0770: }
0771:
0772: /**
0773: * This is a utility function for determining whether a specified
0774: * character is a character according to production 2 of the
0775: * XML 1.0 specification.
0776: *
0777: * @param c <code>char</code> to check for XML compliance
0778: * @return <code>boolean</code> true if it's a character,
0779: * false otherwise
0780: */
0781: public static boolean isXMLCharacter(int c) {
0782:
0783: if (c == '\n')
0784: return true;
0785: if (c == '\r')
0786: return true;
0787: if (c == '\t')
0788: return true;
0789:
0790: if (c < 0x20)
0791: return false;
0792: if (c <= 0xD7FF)
0793: return true;
0794: if (c < 0xE000)
0795: return false;
0796: if (c <= 0xFFFD)
0797: return true;
0798: if (c < 0x10000)
0799: return false;
0800: if (c <= 0x10FFFF)
0801: return true;
0802:
0803: return false;
0804: }
0805:
0806: /**
0807: * This is a utility function for determining whether a specified
0808: * character is a name character according to production 4 of the
0809: * XML 1.0 specification.
0810: *
0811: * @param c <code>char</code> to check for XML name compliance.
0812: * @return <code>boolean</code> true if it's a name character,
0813: * false otherwise.
0814: */
0815: public static boolean isXMLNameCharacter(char c) {
0816:
0817: return (isXMLLetter(c) || isXMLDigit(c) || c == '.' || c == '-'
0818: || c == '_' || c == ':' || isXMLCombiningChar(c) || isXMLExtender(c));
0819: }
0820:
0821: /**
0822: * This is a utility function for determining whether a specified
0823: * character is a legal name start character according to production 5
0824: * of the XML 1.0 specification. This production does allow names
0825: * to begin with colons which the Namespaces in XML Recommendation
0826: * disallows.
0827: *
0828: * @param c <code>char</code> to check for XML name start compliance.
0829: * @return <code>boolean</code> true if it's a name start character,
0830: * false otherwise.
0831: */
0832: public static boolean isXMLNameStartCharacter(char c) {
0833:
0834: return (isXMLLetter(c) || c == '_' || c == ':');
0835:
0836: }
0837:
0838: /**
0839: * This is a utility function for determining whether a specified
0840: * character is a letter or digit according to productions 84 and 88
0841: * of the XML 1.0 specification.
0842: *
0843: * @param c <code>char</code> to check.
0844: * @return <code>boolean</code> true if it's letter or digit,
0845: * false otherwise.
0846: */
0847: public static boolean isXMLLetterOrDigit(char c) {
0848:
0849: return (isXMLLetter(c) || isXMLDigit(c));
0850:
0851: }
0852:
0853: /**
0854: * This is a utility function for determining whether a specified character
0855: * is a letter according to production 84 of the XML 1.0 specification.
0856: *
0857: * @param c <code>char</code> to check for XML name compliance.
0858: * @return <code>String</code> true if it's a letter, false otherwise.
0859: */
0860: public static boolean isXMLLetter(char c) {
0861: // Note that order is very important here. The search proceeds
0862: // from lowest to highest values, so that no searching occurs
0863: // above the character's value. BTW, the first line is equivalent to:
0864: // if (c >= 0x0041 && c <= 0x005A) return true;
0865:
0866: if (c < 0x0041)
0867: return false;
0868: if (c <= 0x005a)
0869: return true;
0870: if (c < 0x0061)
0871: return false;
0872: if (c <= 0x007A)
0873: return true;
0874: if (c < 0x00C0)
0875: return false;
0876: if (c <= 0x00D6)
0877: return true;
0878: if (c < 0x00D8)
0879: return false;
0880: if (c <= 0x00F6)
0881: return true;
0882: if (c < 0x00F8)
0883: return false;
0884: if (c <= 0x00FF)
0885: return true;
0886: if (c < 0x0100)
0887: return false;
0888: if (c <= 0x0131)
0889: return true;
0890: if (c < 0x0134)
0891: return false;
0892: if (c <= 0x013E)
0893: return true;
0894: if (c < 0x0141)
0895: return false;
0896: if (c <= 0x0148)
0897: return true;
0898: if (c < 0x014A)
0899: return false;
0900: if (c <= 0x017E)
0901: return true;
0902: if (c < 0x0180)
0903: return false;
0904: if (c <= 0x01C3)
0905: return true;
0906: if (c < 0x01CD)
0907: return false;
0908: if (c <= 0x01F0)
0909: return true;
0910: if (c < 0x01F4)
0911: return false;
0912: if (c <= 0x01F5)
0913: return true;
0914: if (c < 0x01FA)
0915: return false;
0916: if (c <= 0x0217)
0917: return true;
0918: if (c < 0x0250)
0919: return false;
0920: if (c <= 0x02A8)
0921: return true;
0922: if (c < 0x02BB)
0923: return false;
0924: if (c <= 0x02C1)
0925: return true;
0926: if (c == 0x0386)
0927: return true;
0928: if (c < 0x0388)
0929: return false;
0930: if (c <= 0x038A)
0931: return true;
0932: if (c == 0x038C)
0933: return true;
0934: if (c < 0x038E)
0935: return false;
0936: if (c <= 0x03A1)
0937: return true;
0938: if (c < 0x03A3)
0939: return false;
0940: if (c <= 0x03CE)
0941: return true;
0942: if (c < 0x03D0)
0943: return false;
0944: if (c <= 0x03D6)
0945: return true;
0946: if (c == 0x03DA)
0947: return true;
0948: if (c == 0x03DC)
0949: return true;
0950: if (c == 0x03DE)
0951: return true;
0952: if (c == 0x03E0)
0953: return true;
0954: if (c < 0x03E2)
0955: return false;
0956: if (c <= 0x03F3)
0957: return true;
0958: if (c < 0x0401)
0959: return false;
0960: if (c <= 0x040C)
0961: return true;
0962: if (c < 0x040E)
0963: return false;
0964: if (c <= 0x044F)
0965: return true;
0966: if (c < 0x0451)
0967: return false;
0968: if (c <= 0x045C)
0969: return true;
0970: if (c < 0x045E)
0971: return false;
0972: if (c <= 0x0481)
0973: return true;
0974: if (c < 0x0490)
0975: return false;
0976: if (c <= 0x04C4)
0977: return true;
0978: if (c < 0x04C7)
0979: return false;
0980: if (c <= 0x04C8)
0981: return true;
0982: if (c < 0x04CB)
0983: return false;
0984: if (c <= 0x04CC)
0985: return true;
0986: if (c < 0x04D0)
0987: return false;
0988: if (c <= 0x04EB)
0989: return true;
0990: if (c < 0x04EE)
0991: return false;
0992: if (c <= 0x04F5)
0993: return true;
0994: if (c < 0x04F8)
0995: return false;
0996: if (c <= 0x04F9)
0997: return true;
0998: if (c < 0x0531)
0999: return false;
1000: if (c <= 0x0556)
1001: return true;
1002: if (c == 0x0559)
1003: return true;
1004: if (c < 0x0561)
1005: return false;
1006: if (c <= 0x0586)
1007: return true;
1008: if (c < 0x05D0)
1009: return false;
1010: if (c <= 0x05EA)
1011: return true;
1012: if (c < 0x05F0)
1013: return false;
1014: if (c <= 0x05F2)
1015: return true;
1016: if (c < 0x0621)
1017: return false;
1018: if (c <= 0x063A)
1019: return true;
1020: if (c < 0x0641)
1021: return false;
1022: if (c <= 0x064A)
1023: return true;
1024: if (c < 0x0671)
1025: return false;
1026: if (c <= 0x06B7)
1027: return true;
1028: if (c < 0x06BA)
1029: return false;
1030: if (c <= 0x06BE)
1031: return true;
1032: if (c < 0x06C0)
1033: return false;
1034: if (c <= 0x06CE)
1035: return true;
1036: if (c < 0x06D0)
1037: return false;
1038: if (c <= 0x06D3)
1039: return true;
1040: if (c == 0x06D5)
1041: return true;
1042: if (c < 0x06E5)
1043: return false;
1044: if (c <= 0x06E6)
1045: return true;
1046: if (c < 0x0905)
1047: return false;
1048: if (c <= 0x0939)
1049: return true;
1050: if (c == 0x093D)
1051: return true;
1052: if (c < 0x0958)
1053: return false;
1054: if (c <= 0x0961)
1055: return true;
1056: if (c < 0x0985)
1057: return false;
1058: if (c <= 0x098C)
1059: return true;
1060: if (c < 0x098F)
1061: return false;
1062: if (c <= 0x0990)
1063: return true;
1064: if (c < 0x0993)
1065: return false;
1066: if (c <= 0x09A8)
1067: return true;
1068: if (c < 0x09AA)
1069: return false;
1070: if (c <= 0x09B0)
1071: return true;
1072: if (c == 0x09B2)
1073: return true;
1074: if (c < 0x09B6)
1075: return false;
1076: if (c <= 0x09B9)
1077: return true;
1078: if (c < 0x09DC)
1079: return false;
1080: if (c <= 0x09DD)
1081: return true;
1082: if (c < 0x09DF)
1083: return false;
1084: if (c <= 0x09E1)
1085: return true;
1086: if (c < 0x09F0)
1087: return false;
1088: if (c <= 0x09F1)
1089: return true;
1090: if (c < 0x0A05)
1091: return false;
1092: if (c <= 0x0A0A)
1093: return true;
1094: if (c < 0x0A0F)
1095: return false;
1096: if (c <= 0x0A10)
1097: return true;
1098: if (c < 0x0A13)
1099: return false;
1100: if (c <= 0x0A28)
1101: return true;
1102: if (c < 0x0A2A)
1103: return false;
1104: if (c <= 0x0A30)
1105: return true;
1106: if (c < 0x0A32)
1107: return false;
1108: if (c <= 0x0A33)
1109: return true;
1110: if (c < 0x0A35)
1111: return false;
1112: if (c <= 0x0A36)
1113: return true;
1114: if (c < 0x0A38)
1115: return false;
1116: if (c <= 0x0A39)
1117: return true;
1118: if (c < 0x0A59)
1119: return false;
1120: if (c <= 0x0A5C)
1121: return true;
1122: if (c == 0x0A5E)
1123: return true;
1124: if (c < 0x0A72)
1125: return false;
1126: if (c <= 0x0A74)
1127: return true;
1128: if (c < 0x0A85)
1129: return false;
1130: if (c <= 0x0A8B)
1131: return true;
1132: if (c == 0x0A8D)
1133: return true;
1134: if (c < 0x0A8F)
1135: return false;
1136: if (c <= 0x0A91)
1137: return true;
1138: if (c < 0x0A93)
1139: return false;
1140: if (c <= 0x0AA8)
1141: return true;
1142: if (c < 0x0AAA)
1143: return false;
1144: if (c <= 0x0AB0)
1145: return true;
1146: if (c < 0x0AB2)
1147: return false;
1148: if (c <= 0x0AB3)
1149: return true;
1150: if (c < 0x0AB5)
1151: return false;
1152: if (c <= 0x0AB9)
1153: return true;
1154: if (c == 0x0ABD)
1155: return true;
1156: if (c == 0x0AE0)
1157: return true;
1158: if (c < 0x0B05)
1159: return false;
1160: if (c <= 0x0B0C)
1161: return true;
1162: if (c < 0x0B0F)
1163: return false;
1164: if (c <= 0x0B10)
1165: return true;
1166: if (c < 0x0B13)
1167: return false;
1168: if (c <= 0x0B28)
1169: return true;
1170: if (c < 0x0B2A)
1171: return false;
1172: if (c <= 0x0B30)
1173: return true;
1174: if (c < 0x0B32)
1175: return false;
1176: if (c <= 0x0B33)
1177: return true;
1178: if (c < 0x0B36)
1179: return false;
1180: if (c <= 0x0B39)
1181: return true;
1182: if (c == 0x0B3D)
1183: return true;
1184: if (c < 0x0B5C)
1185: return false;
1186: if (c <= 0x0B5D)
1187: return true;
1188: if (c < 0x0B5F)
1189: return false;
1190: if (c <= 0x0B61)
1191: return true;
1192: if (c < 0x0B85)
1193: return false;
1194: if (c <= 0x0B8A)
1195: return true;
1196: if (c < 0x0B8E)
1197: return false;
1198: if (c <= 0x0B90)
1199: return true;
1200: if (c < 0x0B92)
1201: return false;
1202: if (c <= 0x0B95)
1203: return true;
1204: if (c < 0x0B99)
1205: return false;
1206: if (c <= 0x0B9A)
1207: return true;
1208: if (c == 0x0B9C)
1209: return true;
1210: if (c < 0x0B9E)
1211: return false;
1212: if (c <= 0x0B9F)
1213: return true;
1214: if (c < 0x0BA3)
1215: return false;
1216: if (c <= 0x0BA4)
1217: return true;
1218: if (c < 0x0BA8)
1219: return false;
1220: if (c <= 0x0BAA)
1221: return true;
1222: if (c < 0x0BAE)
1223: return false;
1224: if (c <= 0x0BB5)
1225: return true;
1226: if (c < 0x0BB7)
1227: return false;
1228: if (c <= 0x0BB9)
1229: return true;
1230: if (c < 0x0C05)
1231: return false;
1232: if (c <= 0x0C0C)
1233: return true;
1234: if (c < 0x0C0E)
1235: return false;
1236: if (c <= 0x0C10)
1237: return true;
1238: if (c < 0x0C12)
1239: return false;
1240: if (c <= 0x0C28)
1241: return true;
1242: if (c < 0x0C2A)
1243: return false;
1244: if (c <= 0x0C33)
1245: return true;
1246: if (c < 0x0C35)
1247: return false;
1248: if (c <= 0x0C39)
1249: return true;
1250: if (c < 0x0C60)
1251: return false;
1252: if (c <= 0x0C61)
1253: return true;
1254: if (c < 0x0C85)
1255: return false;
1256: if (c <= 0x0C8C)
1257: return true;
1258: if (c < 0x0C8E)
1259: return false;
1260: if (c <= 0x0C90)
1261: return true;
1262: if (c < 0x0C92)
1263: return false;
1264: if (c <= 0x0CA8)
1265: return true;
1266: if (c < 0x0CAA)
1267: return false;
1268: if (c <= 0x0CB3)
1269: return true;
1270: if (c < 0x0CB5)
1271: return false;
1272: if (c <= 0x0CB9)
1273: return true;
1274: if (c == 0x0CDE)
1275: return true;
1276: if (c < 0x0CE0)
1277: return false;
1278: if (c <= 0x0CE1)
1279: return true;
1280: if (c < 0x0D05)
1281: return false;
1282: if (c <= 0x0D0C)
1283: return true;
1284: if (c < 0x0D0E)
1285: return false;
1286: if (c <= 0x0D10)
1287: return true;
1288: if (c < 0x0D12)
1289: return false;
1290: if (c <= 0x0D28)
1291: return true;
1292: if (c < 0x0D2A)
1293: return false;
1294: if (c <= 0x0D39)
1295: return true;
1296: if (c < 0x0D60)
1297: return false;
1298: if (c <= 0x0D61)
1299: return true;
1300: if (c < 0x0E01)
1301: return false;
1302: if (c <= 0x0E2E)
1303: return true;
1304: if (c == 0x0E30)
1305: return true;
1306: if (c < 0x0E32)
1307: return false;
1308: if (c <= 0x0E33)
1309: return true;
1310: if (c < 0x0E40)
1311: return false;
1312: if (c <= 0x0E45)
1313: return true;
1314: if (c < 0x0E81)
1315: return false;
1316: if (c <= 0x0E82)
1317: return true;
1318: if (c == 0x0E84)
1319: return true;
1320: if (c < 0x0E87)
1321: return false;
1322: if (c <= 0x0E88)
1323: return true;
1324: if (c == 0x0E8A)
1325: return true;
1326: if (c == 0x0E8D)
1327: return true;
1328: if (c < 0x0E94)
1329: return false;
1330: if (c <= 0x0E97)
1331: return true;
1332: if (c < 0x0E99)
1333: return false;
1334: if (c <= 0x0E9F)
1335: return true;
1336: if (c < 0x0EA1)
1337: return false;
1338: if (c <= 0x0EA3)
1339: return true;
1340: if (c == 0x0EA5)
1341: return true;
1342: if (c == 0x0EA7)
1343: return true;
1344: if (c < 0x0EAA)
1345: return false;
1346: if (c <= 0x0EAB)
1347: return true;
1348: if (c < 0x0EAD)
1349: return false;
1350: if (c <= 0x0EAE)
1351: return true;
1352: if (c == 0x0EB0)
1353: return true;
1354: if (c < 0x0EB2)
1355: return false;
1356: if (c <= 0x0EB3)
1357: return true;
1358: if (c == 0x0EBD)
1359: return true;
1360: if (c < 0x0EC0)
1361: return false;
1362: if (c <= 0x0EC4)
1363: return true;
1364: if (c < 0x0F40)
1365: return false;
1366: if (c <= 0x0F47)
1367: return true;
1368: if (c < 0x0F49)
1369: return false;
1370: if (c <= 0x0F69)
1371: return true;
1372: if (c < 0x10A0)
1373: return false;
1374: if (c <= 0x10C5)
1375: return true;
1376: if (c < 0x10D0)
1377: return false;
1378: if (c <= 0x10F6)
1379: return true;
1380: if (c == 0x1100)
1381: return true;
1382: if (c < 0x1102)
1383: return false;
1384: if (c <= 0x1103)
1385: return true;
1386: if (c < 0x1105)
1387: return false;
1388: if (c <= 0x1107)
1389: return true;
1390: if (c == 0x1109)
1391: return true;
1392: if (c < 0x110B)
1393: return false;
1394: if (c <= 0x110C)
1395: return true;
1396: if (c < 0x110E)
1397: return false;
1398: if (c <= 0x1112)
1399: return true;
1400: if (c == 0x113C)
1401: return true;
1402: if (c == 0x113E)
1403: return true;
1404: if (c == 0x1140)
1405: return true;
1406: if (c == 0x114C)
1407: return true;
1408: if (c == 0x114E)
1409: return true;
1410: if (c == 0x1150)
1411: return true;
1412: if (c < 0x1154)
1413: return false;
1414: if (c <= 0x1155)
1415: return true;
1416: if (c == 0x1159)
1417: return true;
1418: if (c < 0x115F)
1419: return false;
1420: if (c <= 0x1161)
1421: return true;
1422: if (c == 0x1163)
1423: return true;
1424: if (c == 0x1165)
1425: return true;
1426: if (c == 0x1167)
1427: return true;
1428: if (c == 0x1169)
1429: return true;
1430: if (c < 0x116D)
1431: return false;
1432: if (c <= 0x116E)
1433: return true;
1434: if (c < 0x1172)
1435: return false;
1436: if (c <= 0x1173)
1437: return true;
1438: if (c == 0x1175)
1439: return true;
1440: if (c == 0x119E)
1441: return true;
1442: if (c == 0x11A8)
1443: return true;
1444: if (c == 0x11AB)
1445: return true;
1446: if (c < 0x11AE)
1447: return false;
1448: if (c <= 0x11AF)
1449: return true;
1450: if (c < 0x11B7)
1451: return false;
1452: if (c <= 0x11B8)
1453: return true;
1454: if (c == 0x11BA)
1455: return true;
1456: if (c < 0x11BC)
1457: return false;
1458: if (c <= 0x11C2)
1459: return true;
1460: if (c == 0x11EB)
1461: return true;
1462: if (c == 0x11F0)
1463: return true;
1464: if (c == 0x11F9)
1465: return true;
1466: if (c < 0x1E00)
1467: return false;
1468: if (c <= 0x1E9B)
1469: return true;
1470: if (c < 0x1EA0)
1471: return false;
1472: if (c <= 0x1EF9)
1473: return true;
1474: if (c < 0x1F00)
1475: return false;
1476: if (c <= 0x1F15)
1477: return true;
1478: if (c < 0x1F18)
1479: return false;
1480: if (c <= 0x1F1D)
1481: return true;
1482: if (c < 0x1F20)
1483: return false;
1484: if (c <= 0x1F45)
1485: return true;
1486: if (c < 0x1F48)
1487: return false;
1488: if (c <= 0x1F4D)
1489: return true;
1490: if (c < 0x1F50)
1491: return false;
1492: if (c <= 0x1F57)
1493: return true;
1494: if (c == 0x1F59)
1495: return true;
1496: if (c == 0x1F5B)
1497: return true;
1498: if (c == 0x1F5D)
1499: return true;
1500: if (c < 0x1F5F)
1501: return false;
1502: if (c <= 0x1F7D)
1503: return true;
1504: if (c < 0x1F80)
1505: return false;
1506: if (c <= 0x1FB4)
1507: return true;
1508: if (c < 0x1FB6)
1509: return false;
1510: if (c <= 0x1FBC)
1511: return true;
1512: if (c == 0x1FBE)
1513: return true;
1514: if (c < 0x1FC2)
1515: return false;
1516: if (c <= 0x1FC4)
1517: return true;
1518: if (c < 0x1FC6)
1519: return false;
1520: if (c <= 0x1FCC)
1521: return true;
1522: if (c < 0x1FD0)
1523: return false;
1524: if (c <= 0x1FD3)
1525: return true;
1526: if (c < 0x1FD6)
1527: return false;
1528: if (c <= 0x1FDB)
1529: return true;
1530: if (c < 0x1FE0)
1531: return false;
1532: if (c <= 0x1FEC)
1533: return true;
1534: if (c < 0x1FF2)
1535: return false;
1536: if (c <= 0x1FF4)
1537: return true;
1538: if (c < 0x1FF6)
1539: return false;
1540: if (c <= 0x1FFC)
1541: return true;
1542: if (c == 0x2126)
1543: return true;
1544: if (c < 0x212A)
1545: return false;
1546: if (c <= 0x212B)
1547: return true;
1548: if (c == 0x212E)
1549: return true;
1550: if (c < 0x2180)
1551: return false;
1552: if (c <= 0x2182)
1553: return true;
1554: if (c == 0x3007)
1555: return true; // ideographic
1556: if (c < 0x3021)
1557: return false;
1558: if (c <= 0x3029)
1559: return true; // ideo
1560: if (c < 0x3041)
1561: return false;
1562: if (c <= 0x3094)
1563: return true;
1564: if (c < 0x30A1)
1565: return false;
1566: if (c <= 0x30FA)
1567: return true;
1568: if (c < 0x3105)
1569: return false;
1570: if (c <= 0x312C)
1571: return true;
1572: if (c < 0x4E00)
1573: return false;
1574: if (c <= 0x9FA5)
1575: return true; // ideo
1576: if (c < 0xAC00)
1577: return false;
1578: if (c <= 0xD7A3)
1579: return true;
1580:
1581: return false;
1582:
1583: }
1584:
1585: /**
1586: * This is a utility function for determining whether a specified character
1587: * is a combining character according to production 87
1588: * of the XML 1.0 specification.
1589: *
1590: * @param c <code>char</code> to check.
1591: * @return <code>boolean</code> true if it's a combining character,
1592: * false otherwise.
1593: */
1594: public static boolean isXMLCombiningChar(char c) {
1595: // CombiningChar
1596: if (c < 0x0300)
1597: return false;
1598: if (c <= 0x0345)
1599: return true;
1600: if (c < 0x0360)
1601: return false;
1602: if (c <= 0x0361)
1603: return true;
1604: if (c < 0x0483)
1605: return false;
1606: if (c <= 0x0486)
1607: return true;
1608: if (c < 0x0591)
1609: return false;
1610: if (c <= 0x05A1)
1611: return true;
1612:
1613: if (c < 0x05A3)
1614: return false;
1615: if (c <= 0x05B9)
1616: return true;
1617: if (c < 0x05BB)
1618: return false;
1619: if (c <= 0x05BD)
1620: return true;
1621: if (c == 0x05BF)
1622: return true;
1623: if (c < 0x05C1)
1624: return false;
1625: if (c <= 0x05C2)
1626: return true;
1627:
1628: if (c == 0x05C4)
1629: return true;
1630: if (c < 0x064B)
1631: return false;
1632: if (c <= 0x0652)
1633: return true;
1634: if (c == 0x0670)
1635: return true;
1636: if (c < 0x06D6)
1637: return false;
1638: if (c <= 0x06DC)
1639: return true;
1640:
1641: if (c < 0x06DD)
1642: return false;
1643: if (c <= 0x06DF)
1644: return true;
1645: if (c < 0x06E0)
1646: return false;
1647: if (c <= 0x06E4)
1648: return true;
1649: if (c < 0x06E7)
1650: return false;
1651: if (c <= 0x06E8)
1652: return true;
1653:
1654: if (c < 0x06EA)
1655: return false;
1656: if (c <= 0x06ED)
1657: return true;
1658: if (c < 0x0901)
1659: return false;
1660: if (c <= 0x0903)
1661: return true;
1662: if (c == 0x093C)
1663: return true;
1664: if (c < 0x093E)
1665: return false;
1666: if (c <= 0x094C)
1667: return true;
1668:
1669: if (c == 0x094D)
1670: return true;
1671: if (c < 0x0951)
1672: return false;
1673: if (c <= 0x0954)
1674: return true;
1675: if (c < 0x0962)
1676: return false;
1677: if (c <= 0x0963)
1678: return true;
1679: if (c < 0x0981)
1680: return false;
1681: if (c <= 0x0983)
1682: return true;
1683:
1684: if (c == 0x09BC)
1685: return true;
1686: if (c == 0x09BE)
1687: return true;
1688: if (c == 0x09BF)
1689: return true;
1690: if (c < 0x09C0)
1691: return false;
1692: if (c <= 0x09C4)
1693: return true;
1694: if (c < 0x09C7)
1695: return false;
1696: if (c <= 0x09C8)
1697: return true;
1698:
1699: if (c < 0x09CB)
1700: return false;
1701: if (c <= 0x09CD)
1702: return true;
1703: if (c == 0x09D7)
1704: return true;
1705: if (c < 0x09E2)
1706: return false;
1707: if (c <= 0x09E3)
1708: return true;
1709: if (c == 0x0A02)
1710: return true;
1711: if (c == 0x0A3C)
1712: return true;
1713:
1714: if (c == 0x0A3E)
1715: return true;
1716: if (c == 0x0A3F)
1717: return true;
1718: if (c < 0x0A40)
1719: return false;
1720: if (c <= 0x0A42)
1721: return true;
1722: if (c < 0x0A47)
1723: return false;
1724: if (c <= 0x0A48)
1725: return true;
1726:
1727: if (c < 0x0A4B)
1728: return false;
1729: if (c <= 0x0A4D)
1730: return true;
1731: if (c < 0x0A70)
1732: return false;
1733: if (c <= 0x0A71)
1734: return true;
1735: if (c < 0x0A81)
1736: return false;
1737: if (c <= 0x0A83)
1738: return true;
1739: if (c == 0x0ABC)
1740: return true;
1741:
1742: if (c < 0x0ABE)
1743: return false;
1744: if (c <= 0x0AC5)
1745: return true;
1746: if (c < 0x0AC7)
1747: return false;
1748: if (c <= 0x0AC9)
1749: return true;
1750: if (c < 0x0ACB)
1751: return false;
1752: if (c <= 0x0ACD)
1753: return true;
1754:
1755: if (c < 0x0B01)
1756: return false;
1757: if (c <= 0x0B03)
1758: return true;
1759: if (c == 0x0B3C)
1760: return true;
1761: if (c < 0x0B3E)
1762: return false;
1763: if (c <= 0x0B43)
1764: return true;
1765: if (c < 0x0B47)
1766: return false;
1767: if (c <= 0x0B48)
1768: return true;
1769:
1770: if (c < 0x0B4B)
1771: return false;
1772: if (c <= 0x0B4D)
1773: return true;
1774: if (c < 0x0B56)
1775: return false;
1776: if (c <= 0x0B57)
1777: return true;
1778: if (c < 0x0B82)
1779: return false;
1780: if (c <= 0x0B83)
1781: return true;
1782:
1783: if (c < 0x0BBE)
1784: return false;
1785: if (c <= 0x0BC2)
1786: return true;
1787: if (c < 0x0BC6)
1788: return false;
1789: if (c <= 0x0BC8)
1790: return true;
1791: if (c < 0x0BCA)
1792: return false;
1793: if (c <= 0x0BCD)
1794: return true;
1795: if (c == 0x0BD7)
1796: return true;
1797:
1798: if (c < 0x0C01)
1799: return false;
1800: if (c <= 0x0C03)
1801: return true;
1802: if (c < 0x0C3E)
1803: return false;
1804: if (c <= 0x0C44)
1805: return true;
1806: if (c < 0x0C46)
1807: return false;
1808: if (c <= 0x0C48)
1809: return true;
1810:
1811: if (c < 0x0C4A)
1812: return false;
1813: if (c <= 0x0C4D)
1814: return true;
1815: if (c < 0x0C55)
1816: return false;
1817: if (c <= 0x0C56)
1818: return true;
1819: if (c < 0x0C82)
1820: return false;
1821: if (c <= 0x0C83)
1822: return true;
1823:
1824: if (c < 0x0CBE)
1825: return false;
1826: if (c <= 0x0CC4)
1827: return true;
1828: if (c < 0x0CC6)
1829: return false;
1830: if (c <= 0x0CC8)
1831: return true;
1832: if (c < 0x0CCA)
1833: return false;
1834: if (c <= 0x0CCD)
1835: return true;
1836:
1837: if (c < 0x0CD5)
1838: return false;
1839: if (c <= 0x0CD6)
1840: return true;
1841: if (c < 0x0D02)
1842: return false;
1843: if (c <= 0x0D03)
1844: return true;
1845: if (c < 0x0D3E)
1846: return false;
1847: if (c <= 0x0D43)
1848: return true;
1849:
1850: if (c < 0x0D46)
1851: return false;
1852: if (c <= 0x0D48)
1853: return true;
1854: if (c < 0x0D4A)
1855: return false;
1856: if (c <= 0x0D4D)
1857: return true;
1858: if (c == 0x0D57)
1859: return true;
1860: if (c == 0x0E31)
1861: return true;
1862:
1863: if (c < 0x0E34)
1864: return false;
1865: if (c <= 0x0E3A)
1866: return true;
1867: if (c < 0x0E47)
1868: return false;
1869: if (c <= 0x0E4E)
1870: return true;
1871: if (c == 0x0EB1)
1872: return true;
1873: if (c < 0x0EB4)
1874: return false;
1875: if (c <= 0x0EB9)
1876: return true;
1877:
1878: if (c < 0x0EBB)
1879: return false;
1880: if (c <= 0x0EBC)
1881: return true;
1882: if (c < 0x0EC8)
1883: return false;
1884: if (c <= 0x0ECD)
1885: return true;
1886: if (c < 0x0F18)
1887: return false;
1888: if (c <= 0x0F19)
1889: return true;
1890: if (c == 0x0F35)
1891: return true;
1892:
1893: if (c == 0x0F37)
1894: return true;
1895: if (c == 0x0F39)
1896: return true;
1897: if (c == 0x0F3E)
1898: return true;
1899: if (c == 0x0F3F)
1900: return true;
1901: if (c < 0x0F71)
1902: return false;
1903: if (c <= 0x0F84)
1904: return true;
1905:
1906: if (c < 0x0F86)
1907: return false;
1908: if (c <= 0x0F8B)
1909: return true;
1910: if (c < 0x0F90)
1911: return false;
1912: if (c <= 0x0F95)
1913: return true;
1914: if (c == 0x0F97)
1915: return true;
1916: if (c < 0x0F99)
1917: return false;
1918: if (c <= 0x0FAD)
1919: return true;
1920:
1921: if (c < 0x0FB1)
1922: return false;
1923: if (c <= 0x0FB7)
1924: return true;
1925: if (c == 0x0FB9)
1926: return true;
1927: if (c < 0x20D0)
1928: return false;
1929: if (c <= 0x20DC)
1930: return true;
1931: if (c == 0x20E1)
1932: return true;
1933:
1934: if (c < 0x302A)
1935: return false;
1936: if (c <= 0x302F)
1937: return true;
1938: if (c == 0x3099)
1939: return true;
1940: if (c == 0x309A)
1941: return true;
1942:
1943: return false;
1944:
1945: }
1946:
1947: /**
1948: * This is a utility function for determining whether a specified
1949: * character is an extender according to production 88 of the XML 1.0
1950: * specification.
1951: *
1952: * @param c <code>char</code> to check.
1953: * @return <code>String</code> true if it's an extender, false otherwise.
1954: */
1955: public static boolean isXMLExtender(char c) {
1956:
1957: if (c < 0x00B6)
1958: return false; // quick short circuit
1959:
1960: // Extenders
1961: if (c == 0x00B7)
1962: return true;
1963: if (c == 0x02D0)
1964: return true;
1965: if (c == 0x02D1)
1966: return true;
1967: if (c == 0x0387)
1968: return true;
1969: if (c == 0x0640)
1970: return true;
1971: if (c == 0x0E46)
1972: return true;
1973: if (c == 0x0EC6)
1974: return true;
1975: if (c == 0x3005)
1976: return true;
1977:
1978: if (c < 0x3031)
1979: return false;
1980: if (c <= 0x3035)
1981: return true;
1982: if (c < 0x309D)
1983: return false;
1984: if (c <= 0x309E)
1985: return true;
1986: if (c < 0x30FC)
1987: return false;
1988: if (c <= 0x30FE)
1989: return true;
1990:
1991: return false;
1992:
1993: }
1994:
1995: /**
1996: * This is a utility function for determining whether a specified
1997: * Unicode character
1998: * is a digit according to production 88 of the XML 1.0 specification.
1999: *
2000: * @param c <code>char</code> to check for XML digit compliance
2001: * @return <code>boolean</code> true if it's a digit, false otherwise
2002: */
2003: public static boolean isXMLDigit(char c) {
2004:
2005: if (c < 0x0030)
2006: return false;
2007: if (c <= 0x0039)
2008: return true;
2009: if (c < 0x0660)
2010: return false;
2011: if (c <= 0x0669)
2012: return true;
2013: if (c < 0x06F0)
2014: return false;
2015: if (c <= 0x06F9)
2016: return true;
2017: if (c < 0x0966)
2018: return false;
2019: if (c <= 0x096F)
2020: return true;
2021:
2022: if (c < 0x09E6)
2023: return false;
2024: if (c <= 0x09EF)
2025: return true;
2026: if (c < 0x0A66)
2027: return false;
2028: if (c <= 0x0A6F)
2029: return true;
2030: if (c < 0x0AE6)
2031: return false;
2032: if (c <= 0x0AEF)
2033: return true;
2034:
2035: if (c < 0x0B66)
2036: return false;
2037: if (c <= 0x0B6F)
2038: return true;
2039: if (c < 0x0BE7)
2040: return false;
2041: if (c <= 0x0BEF)
2042: return true;
2043: if (c < 0x0C66)
2044: return false;
2045: if (c <= 0x0C6F)
2046: return true;
2047:
2048: if (c < 0x0CE6)
2049: return false;
2050: if (c <= 0x0CEF)
2051: return true;
2052: if (c < 0x0D66)
2053: return false;
2054: if (c <= 0x0D6F)
2055: return true;
2056: if (c < 0x0E50)
2057: return false;
2058: if (c <= 0x0E59)
2059: return true;
2060:
2061: if (c < 0x0ED0)
2062: return false;
2063: if (c <= 0x0ED9)
2064: return true;
2065: if (c < 0x0F20)
2066: return false;
2067: if (c <= 0x0F29)
2068: return true;
2069:
2070: return false;
2071: }
2072:
2073: }
|