001: /*
002: * Copyright 1999-2005 The Apache Software Foundation.
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016:
017: package org.apache.xml.utils;
018:
019: import java.util.Arrays;
020:
021: /**
022: * THIS IS A COPY OF THE XERCES-2J CLASS org.apache.xerces.utls.XMLChar
023: *
024: * This class defines the basic properties of characters in XML 1.1. The data
025: * in this class can be used to verify that a character is a valid
026: * XML 1.1 character or if the character is a space, name start, or name
027: * character.
028: * <p>
029: * A series of convenience methods are supplied to ease the burden
030: * of the developer. Using the character as an index into the <code>XML11CHARS</code>
031: * array and applying the appropriate mask flag (e.g.
032: * <code>MASK_VALID</code>), yields the same results as calling the
033: * convenience methods. There is one exception: check the comments
034: * for the <code>isValid</code> method for details.
035: *
036: * @version $Id: XML11Char.java,v 1.1 2005/03/23 17:54:05 ytalwar Exp $
037: */
038: public class XML11Char {
039:
040: //
041: // Constants
042: //
043:
044: /** Character flags for XML 1.1. */
045: private static final byte XML11CHARS[] = new byte[1 << 16];
046:
047: /** XML 1.1 Valid character mask. */
048: public static final int MASK_XML11_VALID = 0x01;
049:
050: /** XML 1.1 Space character mask. */
051: public static final int MASK_XML11_SPACE = 0x02;
052:
053: /** XML 1.1 Name start character mask. */
054: public static final int MASK_XML11_NAME_START = 0x04;
055:
056: /** XML 1.1 Name character mask. */
057: public static final int MASK_XML11_NAME = 0x08;
058:
059: /** XML 1.1 control character mask */
060: public static final int MASK_XML11_CONTROL = 0x10;
061:
062: /** XML 1.1 content for external entities (valid - "special" chars - control chars) */
063: public static final int MASK_XML11_CONTENT = 0x20;
064:
065: /** XML namespaces 1.1 NCNameStart */
066: public static final int MASK_XML11_NCNAME_START = 0x40;
067:
068: /** XML namespaces 1.1 NCName */
069: public static final int MASK_XML11_NCNAME = 0x80;
070:
071: /** XML 1.1 content for internal entities (valid - "special" chars) */
072: public static final int MASK_XML11_CONTENT_INTERNAL = MASK_XML11_CONTROL
073: | MASK_XML11_CONTENT;
074:
075: //
076: // Static initialization
077: //
078:
079: static {
080:
081: // Initializing the Character Flag Array
082: // Code generated by: XML11CharGenerator.
083:
084: Arrays.fill(XML11CHARS, 1, 9, (byte) 17); // Fill 8 of value (byte) 17
085: XML11CHARS[9] = 35;
086: XML11CHARS[10] = 3;
087: Arrays.fill(XML11CHARS, 11, 13, (byte) 17); // Fill 2 of value (byte) 17
088: XML11CHARS[13] = 3;
089: Arrays.fill(XML11CHARS, 14, 32, (byte) 17); // Fill 18 of value (byte) 17
090: XML11CHARS[32] = 35;
091: Arrays.fill(XML11CHARS, 33, 38, (byte) 33); // Fill 5 of value (byte) 33
092: XML11CHARS[38] = 1;
093: Arrays.fill(XML11CHARS, 39, 45, (byte) 33); // Fill 6 of value (byte) 33
094: Arrays.fill(XML11CHARS, 45, 47, (byte) -87); // Fill 2 of value (byte) -87
095: XML11CHARS[47] = 33;
096: Arrays.fill(XML11CHARS, 48, 58, (byte) -87); // Fill 10 of value (byte) -87
097: XML11CHARS[58] = 45;
098: XML11CHARS[59] = 33;
099: XML11CHARS[60] = 1;
100: Arrays.fill(XML11CHARS, 61, 65, (byte) 33); // Fill 4 of value (byte) 33
101: Arrays.fill(XML11CHARS, 65, 91, (byte) -19); // Fill 26 of value (byte) -19
102: Arrays.fill(XML11CHARS, 91, 93, (byte) 33); // Fill 2 of value (byte) 33
103: XML11CHARS[93] = 1;
104: XML11CHARS[94] = 33;
105: XML11CHARS[95] = -19;
106: XML11CHARS[96] = 33;
107: Arrays.fill(XML11CHARS, 97, 123, (byte) -19); // Fill 26 of value (byte) -19
108: Arrays.fill(XML11CHARS, 123, 127, (byte) 33); // Fill 4 of value (byte) 33
109: Arrays.fill(XML11CHARS, 127, 133, (byte) 17); // Fill 6 of value (byte) 17
110: XML11CHARS[133] = 35;
111: Arrays.fill(XML11CHARS, 134, 160, (byte) 17); // Fill 26 of value (byte) 17
112: Arrays.fill(XML11CHARS, 160, 183, (byte) 33); // Fill 23 of value (byte) 33
113: XML11CHARS[183] = -87;
114: Arrays.fill(XML11CHARS, 184, 192, (byte) 33); // Fill 8 of value (byte) 33
115: Arrays.fill(XML11CHARS, 192, 215, (byte) -19); // Fill 23 of value (byte) -19
116: XML11CHARS[215] = 33;
117: Arrays.fill(XML11CHARS, 216, 247, (byte) -19); // Fill 31 of value (byte) -19
118: XML11CHARS[247] = 33;
119: Arrays.fill(XML11CHARS, 248, 768, (byte) -19); // Fill 520 of value (byte) -19
120: Arrays.fill(XML11CHARS, 768, 880, (byte) -87); // Fill 112 of value (byte) -87
121: Arrays.fill(XML11CHARS, 880, 894, (byte) -19); // Fill 14 of value (byte) -19
122: XML11CHARS[894] = 33;
123: Arrays.fill(XML11CHARS, 895, 8192, (byte) -19); // Fill 7297 of value (byte) -19
124: Arrays.fill(XML11CHARS, 8192, 8204, (byte) 33); // Fill 12 of value (byte) 33
125: Arrays.fill(XML11CHARS, 8204, 8206, (byte) -19); // Fill 2 of value (byte) -19
126: Arrays.fill(XML11CHARS, 8206, 8232, (byte) 33); // Fill 26 of value (byte) 33
127: XML11CHARS[8232] = 35;
128: Arrays.fill(XML11CHARS, 8233, 8255, (byte) 33); // Fill 22 of value (byte) 33
129: Arrays.fill(XML11CHARS, 8255, 8257, (byte) -87); // Fill 2 of value (byte) -87
130: Arrays.fill(XML11CHARS, 8257, 8304, (byte) 33); // Fill 47 of value (byte) 33
131: Arrays.fill(XML11CHARS, 8304, 8592, (byte) -19); // Fill 288 of value (byte) -19
132: Arrays.fill(XML11CHARS, 8592, 11264, (byte) 33); // Fill 2672 of value (byte) 33
133: Arrays.fill(XML11CHARS, 11264, 12272, (byte) -19); // Fill 1008 of value (byte) -19
134: Arrays.fill(XML11CHARS, 12272, 12289, (byte) 33); // Fill 17 of value (byte) 33
135: Arrays.fill(XML11CHARS, 12289, 55296, (byte) -19); // Fill 43007 of value (byte) -19
136: Arrays.fill(XML11CHARS, 57344, 63744, (byte) 33); // Fill 6400 of value (byte) 33
137: Arrays.fill(XML11CHARS, 63744, 64976, (byte) -19); // Fill 1232 of value (byte) -19
138: Arrays.fill(XML11CHARS, 64976, 65008, (byte) 33); // Fill 32 of value (byte) 33
139: Arrays.fill(XML11CHARS, 65008, 65534, (byte) -19); // Fill 526 of value (byte) -19
140:
141: } // <clinit>()
142:
143: //
144: // Public static methods
145: //
146:
147: /**
148: * Returns true if the specified character is a space character
149: * as amdended in the XML 1.1 specification.
150: *
151: * @param c The character to check.
152: */
153: public static boolean isXML11Space(int c) {
154: return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_SPACE) != 0);
155: } // isXML11Space(int):boolean
156:
157: /**
158: * Returns true if the specified character is valid. This method
159: * also checks the surrogate character range from 0x10000 to 0x10FFFF.
160: * <p>
161: * If the program chooses to apply the mask directly to the
162: * <code>XML11CHARS</code> array, then they are responsible for checking
163: * the surrogate character range.
164: *
165: * @param c The character to check.
166: */
167: public static boolean isXML11Valid(int c) {
168: return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_VALID) != 0)
169: || (0x10000 <= c && c <= 0x10FFFF);
170: } // isXML11Valid(int):boolean
171:
172: /**
173: * Returns true if the specified character is invalid.
174: *
175: * @param c The character to check.
176: */
177: public static boolean isXML11Invalid(int c) {
178: return !isXML11Valid(c);
179: } // isXML11Invalid(int):boolean
180:
181: /**
182: * Returns true if the specified character is valid and permitted outside
183: * of a character reference.
184: * That is, this method will return false for the same set as
185: * isXML11Valid, except it also reports false for "control characters".
186: *
187: * @param c The character to check.
188: */
189: public static boolean isXML11ValidLiteral(int c) {
190: return ((c < 0x10000 && ((XML11CHARS[c] & MASK_XML11_VALID) != 0 && (XML11CHARS[c] & MASK_XML11_CONTROL) == 0)) || (0x10000 <= c && c <= 0x10FFFF));
191: } // isXML11ValidLiteral(int):boolean
192:
193: /**
194: * Returns true if the specified character can be considered
195: * content in an external parsed entity.
196: *
197: * @param c The character to check.
198: */
199: public static boolean isXML11Content(int c) {
200: return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_CONTENT) != 0)
201: || (0x10000 <= c && c <= 0x10FFFF);
202: } // isXML11Content(int):boolean
203:
204: /**
205: * Returns true if the specified character can be considered
206: * content in an internal parsed entity.
207: *
208: * @param c The character to check.
209: */
210: public static boolean isXML11InternalEntityContent(int c) {
211: return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_CONTENT_INTERNAL) != 0)
212: || (0x10000 <= c && c <= 0x10FFFF);
213: } // isXML11InternalEntityContent(int):boolean
214:
215: /**
216: * Returns true if the specified character is a valid name start
217: * character as defined by production [4] in the XML 1.1
218: * specification.
219: *
220: * @param c The character to check.
221: */
222: public static boolean isXML11NameStart(int c) {
223: return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NAME_START) != 0)
224: || (0x10000 <= c && c < 0xF0000);
225: } // isXML11NameStart(int):boolean
226:
227: /**
228: * Returns true if the specified character is a valid name
229: * character as defined by production [4a] in the XML 1.1
230: * specification.
231: *
232: * @param c The character to check.
233: */
234: public static boolean isXML11Name(int c) {
235: return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NAME) != 0)
236: || (c >= 0x10000 && c < 0xF0000);
237: } // isXML11Name(int):boolean
238:
239: /**
240: * Returns true if the specified character is a valid NCName start
241: * character as defined by production [4] in Namespaces in XML
242: * 1.1 recommendation.
243: *
244: * @param c The character to check.
245: */
246: public static boolean isXML11NCNameStart(int c) {
247: return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NCNAME_START) != 0)
248: || (0x10000 <= c && c < 0xF0000);
249: } // isXML11NCNameStart(int):boolean
250:
251: /**
252: * Returns true if the specified character is a valid NCName
253: * character as defined by production [5] in Namespaces in XML
254: * 1.1 recommendation.
255: *
256: * @param c The character to check.
257: */
258: public static boolean isXML11NCName(int c) {
259: return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NCNAME) != 0)
260: || (0x10000 <= c && c < 0xF0000);
261: } // isXML11NCName(int):boolean
262:
263: /**
264: * Returns whether the given character is a valid
265: * high surrogate for a name character. This includes
266: * all high surrogates for characters [0x10000-0xEFFFF].
267: * In other words everything excluding planes 15 and 16.
268: *
269: * @param c The character to check.
270: */
271: public static boolean isXML11NameHighSurrogate(int c) {
272: return (0xD800 <= c && c <= 0xDB7F);
273: }
274:
275: /*
276: * [5] Name ::= NameStartChar NameChar*
277: */
278: /**
279: * Check to see if a string is a valid Name according to [5]
280: * in the XML 1.1 Recommendation
281: *
282: * @param name string to check
283: * @return true if name is a valid Name
284: */
285: public static boolean isXML11ValidName(String name) {
286: int length = name.length();
287: if (length == 0)
288: return false;
289: int i = 1;
290: char ch = name.charAt(0);
291: if (!isXML11NameStart(ch)) {
292: if (length > 1 && isXML11NameHighSurrogate(ch)) {
293: char ch2 = name.charAt(1);
294: if (!XMLChar.isLowSurrogate(ch2)
295: || !isXML11NameStart(XMLChar.supplemental(ch,
296: ch2))) {
297: return false;
298: }
299: i = 2;
300: } else {
301: return false;
302: }
303: }
304: while (i < length) {
305: ch = name.charAt(i);
306: if (!isXML11Name(ch)) {
307: if (++i < length && isXML11NameHighSurrogate(ch)) {
308: char ch2 = name.charAt(i);
309: if (!XMLChar.isLowSurrogate(ch2)
310: || !isXML11Name(XMLChar.supplemental(ch,
311: ch2))) {
312: return false;
313: }
314: } else {
315: return false;
316: }
317: }
318: ++i;
319: }
320: return true;
321: } // isXML11ValidName(String):boolean
322:
323: /*
324: * from the namespace 1.1 rec
325: * [4] NCName ::= NCNameStartChar NCNameChar*
326: */
327: /**
328: * Check to see if a string is a valid NCName according to [4]
329: * from the XML Namespaces 1.1 Recommendation
330: *
331: * @param ncName string to check
332: * @return true if name is a valid NCName
333: */
334: public static boolean isXML11ValidNCName(String ncName) {
335: int length = ncName.length();
336: if (length == 0)
337: return false;
338: int i = 1;
339: char ch = ncName.charAt(0);
340: if (!isXML11NCNameStart(ch)) {
341: if (length > 1 && isXML11NameHighSurrogate(ch)) {
342: char ch2 = ncName.charAt(1);
343: if (!XMLChar.isLowSurrogate(ch2)
344: || !isXML11NCNameStart(XMLChar.supplemental(ch,
345: ch2))) {
346: return false;
347: }
348: i = 2;
349: } else {
350: return false;
351: }
352: }
353: while (i < length) {
354: ch = ncName.charAt(i);
355: if (!isXML11NCName(ch)) {
356: if (++i < length && isXML11NameHighSurrogate(ch)) {
357: char ch2 = ncName.charAt(i);
358: if (!XMLChar.isLowSurrogate(ch2)
359: || !isXML11NCName(XMLChar.supplemental(ch,
360: ch2))) {
361: return false;
362: }
363: } else {
364: return false;
365: }
366: }
367: ++i;
368: }
369: return true;
370: } // isXML11ValidNCName(String):boolean
371:
372: /*
373: * [7] Nmtoken ::= (NameChar)+
374: */
375: /**
376: * Check to see if a string is a valid Nmtoken according to [7]
377: * in the XML 1.1 Recommendation
378: *
379: * @param nmtoken string to check
380: * @return true if nmtoken is a valid Nmtoken
381: */
382: public static boolean isXML11ValidNmtoken(String nmtoken) {
383: int length = nmtoken.length();
384: if (length == 0)
385: return false;
386: for (int i = 0; i < length; ++i) {
387: char ch = nmtoken.charAt(i);
388: if (!isXML11Name(ch)) {
389: if (++i < length && isXML11NameHighSurrogate(ch)) {
390: char ch2 = nmtoken.charAt(i);
391: if (!XMLChar.isLowSurrogate(ch2)
392: || !isXML11Name(XMLChar.supplemental(ch,
393: ch2))) {
394: return false;
395: }
396: } else {
397: return false;
398: }
399: }
400: }
401: return true;
402: } // isXML11ValidName(String):boolean
403:
404: /**
405: * Simple check to determine if qname is legal. If it returns false
406: * then <param>str</param> is illegal; if it returns true then
407: * <param>str</param> is legal.
408: */
409: public static boolean isXML11ValidQName(String str) {
410:
411: final int colon = str.indexOf(':');
412:
413: if (colon == 0 || colon == str.length() - 1) {
414: return false;
415: }
416:
417: if (colon > 0) {
418: final String prefix = str.substring(0, colon);
419: final String localPart = str.substring(colon + 1);
420: return isXML11ValidNCName(prefix)
421: && isXML11ValidNCName(localPart);
422: } else {
423: return isXML11ValidNCName(str);
424: }
425: }
426:
427: } // class XML11Char
|