001: package net.sf.saxon.om;
002:
003: // Copied from Xerces module org.apache.xerces.util.XMLChar, version 2.7.1
004: // Changes marked "//MHK"
005:
006: /*
007: * Copyright 1999-2004 The Apache Software Foundation.
008: *
009: * Licensed under the Apache License, Version 2.0 (the "License");
010: * you may not use this file except in compliance with the License.
011: * You may obtain a copy of the License at
012: *
013: * http://www.apache.org/licenses/LICENSE-2.0
014: *
015: * Unless required by applicable law or agreed to in writing, software
016: * distributed under the License is distributed on an "AS IS" BASIS,
017: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
018: * See the License for the specific language governing permissions and
019: * limitations under the License.
020: */
021:
022: //package org.apache.xerces.util; //MHK
023: import java.util.Arrays;
024:
025: /**
026: * This class defines the basic properties of characters in XML 1.1. The data
027: * in this class can be used to verify that a character is a valid
028: * XML 1.1 character or if the character is a space, name start, or name
029: * character.
030: * <p>
031: * A series of convenience methods are supplied to ease the burden
032: * of the developer. Using the character as an index into the <code>XML11CHARS</code>
033: * array and applying the appropriate mask flag (e.g.
034: * <code>MASK_VALID</code>), yields the same results as calling the
035: * convenience methods. There is one exception: check the comments
036: * for the <code>isValid</code> method for details.
037: *
038: * @author Glenn Marcy, IBM
039: * @author Andy Clark, IBM
040: * @author Arnaud Le Hors, IBM
041: * @author Neil Graham, IBM
042: * @author Michael Glavassevich, IBM
043: *
044: * @version $Id: XML11Char.java,v 1.8 2004/03/25 04:03:22 mrglavas Exp $
045: */
046: public class XML11Char {
047:
048: //
049: // Constants
050: //
051:
052: /** Character flags for XML 1.1. */
053: private static final byte XML11CHARS[] = new byte[1 << 16];
054:
055: /** XML 1.1 Valid character mask. */
056: public static final int MASK_XML11_VALID = 0x01;
057:
058: /** XML 1.1 Space character mask. */
059: public static final int MASK_XML11_SPACE = 0x02;
060:
061: /** XML 1.1 Name start character mask. */
062: public static final int MASK_XML11_NAME_START = 0x04;
063:
064: /** XML 1.1 Name character mask. */
065: public static final int MASK_XML11_NAME = 0x08;
066:
067: /** XML 1.1 control character mask */
068: public static final int MASK_XML11_CONTROL = 0x10;
069:
070: /** XML 1.1 content for external entities (valid - "special" chars - control chars) */
071: public static final int MASK_XML11_CONTENT = 0x20;
072:
073: /** XML namespaces 1.1 NCNameStart */
074: public static final int MASK_XML11_NCNAME_START = 0x40;
075:
076: /** XML namespaces 1.1 NCName */
077: public static final int MASK_XML11_NCNAME = 0x80;
078:
079: /** XML 1.1 content for internal entities (valid - "special" chars) */
080: public static final int MASK_XML11_CONTENT_INTERNAL = MASK_XML11_CONTROL
081: | MASK_XML11_CONTENT;
082:
083: //
084: // Static initialization
085: //
086:
087: static {
088:
089: // Initializing the Character Flag Array
090: // Code generated by: XML11CharGenerator.
091:
092: Arrays.fill(XML11CHARS, 1, 9, (byte) 17); // Fill 8 of value (byte) 17
093: XML11CHARS[9] = 35;
094: XML11CHARS[10] = 3;
095: Arrays.fill(XML11CHARS, 11, 13, (byte) 17); // Fill 2 of value (byte) 17
096: XML11CHARS[13] = 3;
097: Arrays.fill(XML11CHARS, 14, 32, (byte) 17); // Fill 18 of value (byte) 17
098: XML11CHARS[32] = 35;
099: Arrays.fill(XML11CHARS, 33, 38, (byte) 33); // Fill 5 of value (byte) 33
100: XML11CHARS[38] = 1;
101: Arrays.fill(XML11CHARS, 39, 45, (byte) 33); // Fill 6 of value (byte) 33
102: Arrays.fill(XML11CHARS, 45, 47, (byte) -87); // Fill 2 of value (byte) -87
103: XML11CHARS[47] = 33;
104: Arrays.fill(XML11CHARS, 48, 58, (byte) -87); // Fill 10 of value (byte) -87
105: XML11CHARS[58] = 45;
106: XML11CHARS[59] = 33;
107: XML11CHARS[60] = 1;
108: Arrays.fill(XML11CHARS, 61, 65, (byte) 33); // Fill 4 of value (byte) 33
109: Arrays.fill(XML11CHARS, 65, 91, (byte) -19); // Fill 26 of value (byte) -19
110: Arrays.fill(XML11CHARS, 91, 93, (byte) 33); // Fill 2 of value (byte) 33
111: XML11CHARS[93] = 1;
112: XML11CHARS[94] = 33;
113: XML11CHARS[95] = -19;
114: XML11CHARS[96] = 33;
115: Arrays.fill(XML11CHARS, 97, 123, (byte) -19); // Fill 26 of value (byte) -19
116: Arrays.fill(XML11CHARS, 123, 127, (byte) 33); // Fill 4 of value (byte) 33
117: Arrays.fill(XML11CHARS, 127, 133, (byte) 17); // Fill 6 of value (byte) 17
118: XML11CHARS[133] = 35;
119: Arrays.fill(XML11CHARS, 134, 160, (byte) 17); // Fill 26 of value (byte) 17
120: Arrays.fill(XML11CHARS, 160, 183, (byte) 33); // Fill 23 of value (byte) 33
121: XML11CHARS[183] = -87;
122: Arrays.fill(XML11CHARS, 184, 192, (byte) 33); // Fill 8 of value (byte) 33
123: Arrays.fill(XML11CHARS, 192, 215, (byte) -19); // Fill 23 of value (byte) -19
124: XML11CHARS[215] = 33;
125: Arrays.fill(XML11CHARS, 216, 247, (byte) -19); // Fill 31 of value (byte) -19
126: XML11CHARS[247] = 33;
127: Arrays.fill(XML11CHARS, 248, 768, (byte) -19); // Fill 520 of value (byte) -19
128: Arrays.fill(XML11CHARS, 768, 880, (byte) -87); // Fill 112 of value (byte) -87
129: Arrays.fill(XML11CHARS, 880, 894, (byte) -19); // Fill 14 of value (byte) -19
130: XML11CHARS[894] = 33;
131: Arrays.fill(XML11CHARS, 895, 8192, (byte) -19); // Fill 7297 of value (byte) -19
132: Arrays.fill(XML11CHARS, 8192, 8204, (byte) 33); // Fill 12 of value (byte) 33
133: Arrays.fill(XML11CHARS, 8204, 8206, (byte) -19); // Fill 2 of value (byte) -19
134: Arrays.fill(XML11CHARS, 8206, 8232, (byte) 33); // Fill 26 of value (byte) 33
135: XML11CHARS[8232] = 35;
136: Arrays.fill(XML11CHARS, 8233, 8255, (byte) 33); // Fill 22 of value (byte) 33
137: Arrays.fill(XML11CHARS, 8255, 8257, (byte) -87); // Fill 2 of value (byte) -87
138: Arrays.fill(XML11CHARS, 8257, 8304, (byte) 33); // Fill 47 of value (byte) 33
139: Arrays.fill(XML11CHARS, 8304, 8592, (byte) -19); // Fill 288 of value (byte) -19
140: Arrays.fill(XML11CHARS, 8592, 11264, (byte) 33); // Fill 2672 of value (byte) 33
141: Arrays.fill(XML11CHARS, 11264, 12272, (byte) -19); // Fill 1008 of value (byte) -19
142: Arrays.fill(XML11CHARS, 12272, 12289, (byte) 33); // Fill 17 of value (byte) 33
143: Arrays.fill(XML11CHARS, 12289, 55296, (byte) -19); // Fill 43007 of value (byte) -19
144: Arrays.fill(XML11CHARS, 57344, 63744, (byte) 33); // Fill 6400 of value (byte) 33
145: Arrays.fill(XML11CHARS, 63744, 64976, (byte) -19); // Fill 1232 of value (byte) -19
146: Arrays.fill(XML11CHARS, 64976, 65008, (byte) 33); // Fill 32 of value (byte) 33
147: Arrays.fill(XML11CHARS, 65008, 65534, (byte) -19); // Fill 526 of value (byte) -19
148:
149: } // <clinit>()
150:
151: //
152: // Public static methods
153: //
154:
155: /**
156: * Returns true if the specified character is a space character
157: * as amdended in the XML 1.1 specification.
158: *
159: * @param c The character to check.
160: */
161: public static boolean isXML11Space(int c) {
162: return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_SPACE) != 0);
163: } // isXML11Space(int):boolean
164:
165: /**
166: * Returns true if the specified character is valid. This method
167: * also checks the surrogate character range from 0x10000 to 0x10FFFF.
168: * <p>
169: * If the program chooses to apply the mask directly to the
170: * <code>XML11CHARS</code> array, then they are responsible for checking
171: * the surrogate character range.
172: *
173: * @param c The character to check.
174: */
175: public static boolean isXML11Valid(int c) {
176: return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_VALID) != 0)
177: || (0x10000 <= c && c <= 0x10FFFF);
178: } // isXML11Valid(int):boolean
179:
180: /**
181: * Returns true if the specified character is invalid.
182: *
183: * @param c The character to check.
184: */
185: public static boolean isXML11Invalid(int c) {
186: return !isXML11Valid(c);
187: } // isXML11Invalid(int):boolean
188:
189: /**
190: * Returns true if the specified character is valid and permitted outside
191: * of a character reference.
192: * That is, this method will return false for the same set as
193: * isXML11Valid, except it also reports false for "control characters".
194: *
195: * @param c The character to check.
196: */
197: public static boolean isXML11ValidLiteral(int c) {
198: return ((c < 0x10000 && ((XML11CHARS[c] & MASK_XML11_VALID) != 0 && (XML11CHARS[c] & MASK_XML11_CONTROL) == 0)) || (0x10000 <= c && c <= 0x10FFFF));
199: } // isXML11ValidLiteral(int):boolean
200:
201: /**
202: * Returns true if the specified character can be considered
203: * content in an external parsed entity.
204: *
205: * @param c The character to check.
206: */
207: public static boolean isXML11Content(int c) {
208: return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_CONTENT) != 0)
209: || (0x10000 <= c && c <= 0x10FFFF);
210: } // isXML11Content(int):boolean
211:
212: /**
213: * Returns true if the specified character can be considered
214: * content in an internal parsed entity.
215: *
216: * @param c The character to check.
217: */
218: public static boolean isXML11InternalEntityContent(int c) {
219: return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_CONTENT_INTERNAL) != 0)
220: || (0x10000 <= c && c <= 0x10FFFF);
221: } // isXML11InternalEntityContent(int):boolean
222:
223: /**
224: * Returns true if the specified character is a valid name start
225: * character as defined by production [4] in the XML 1.1
226: * specification.
227: *
228: * @param c The character to check.
229: */
230: public static boolean isXML11NameStart(int c) {
231: return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NAME_START) != 0)
232: || (0x10000 <= c && c < 0xF0000);
233: } // isXML11NameStart(int):boolean
234:
235: /**
236: * Returns true if the specified character is a valid name
237: * character as defined by production [4a] in the XML 1.1
238: * specification.
239: *
240: * @param c The character to check.
241: */
242: public static boolean isXML11Name(int c) {
243: return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NAME) != 0)
244: || (c >= 0x10000 && c < 0xF0000);
245: } // isXML11Name(int):boolean
246:
247: /**
248: * Returns true if the specified character is a valid NCName start
249: * character as defined by production [4] in Namespaces in XML
250: * 1.1 recommendation.
251: *
252: * @param c The character to check.
253: */
254: public static boolean isXML11NCNameStart(int c) {
255: return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NCNAME_START) != 0)
256: || (0x10000 <= c && c < 0xF0000);
257: } // isXML11NCNameStart(int):boolean
258:
259: /**
260: * Returns true if the specified character is a valid NCName
261: * character as defined by production [5] in Namespaces in XML
262: * 1.1 recommendation.
263: *
264: * @param c The character to check.
265: */
266: public static boolean isXML11NCName(int c) {
267: return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NCNAME) != 0)
268: || (0x10000 <= c && c < 0xF0000);
269: } // isXML11NCName(int):boolean
270:
271: /**
272: * Returns whether the given character is a valid
273: * high surrogate for a name character. This includes
274: * all high surrogates for characters [0x10000-0xEFFFF].
275: * In other words everything excluding planes 15 and 16.
276: *
277: * @param c The character to check.
278: */
279: public static boolean isXML11NameHighSurrogate(int c) {
280: return (0xD800 <= c && c <= 0xDB7F);
281: }
282:
283: /*
284: * [5] Name ::= NameStartChar NameChar*
285: */
286: /**
287: * Check to see if a string is a valid Name according to [5]
288: * in the XML 1.1 Recommendation
289: *
290: * @param name string to check
291: * @return true if name is a valid Name
292: */
293: public static boolean isXML11ValidName(String name) {
294: int length = name.length();
295: if (length == 0)
296: return false;
297: int i = 1;
298: char ch = name.charAt(0);
299: if (!isXML11NameStart(ch)) {
300: if (length > 1 && isXML11NameHighSurrogate(ch)) {
301: char ch2 = name.charAt(1);
302: if (!XMLChar.isLowSurrogate(ch2)
303: || !isXML11NameStart(XMLChar.supplemental(ch,
304: ch2))) {
305: return false;
306: }
307: i = 2;
308: } else {
309: return false;
310: }
311: }
312: while (i < length) {
313: ch = name.charAt(i);
314: if (!isXML11Name(ch)) {
315: if (++i < length && isXML11NameHighSurrogate(ch)) {
316: char ch2 = name.charAt(i);
317: if (!XMLChar.isLowSurrogate(ch2)
318: || !isXML11Name(XMLChar.supplemental(ch,
319: ch2))) {
320: return false;
321: }
322: } else {
323: return false;
324: }
325: }
326: ++i;
327: }
328: return true;
329: } // isXML11ValidName(String):boolean
330:
331: /*
332: * from the namespace 1.1 rec
333: * [4] NCName ::= NCNameStartChar NCNameChar*
334: */
335: /**
336: * Check to see if a string is a valid NCName according to [4]
337: * from the XML Namespaces 1.1 Recommendation
338: *
339: * @param ncName string to check
340: * @return true if name is a valid NCName
341: */
342: public static boolean isXML11ValidNCName(String ncName) {
343: int length = ncName.length();
344: if (length == 0)
345: return false;
346: int i = 1;
347: char ch = ncName.charAt(0);
348: if (!isXML11NCNameStart(ch)) {
349: if (length > 1 && isXML11NameHighSurrogate(ch)) {
350: char ch2 = ncName.charAt(1);
351: if (!XMLChar.isLowSurrogate(ch2)
352: || !isXML11NCNameStart(XMLChar.supplemental(ch,
353: ch2))) {
354: return false;
355: }
356: i = 2;
357: } else {
358: return false;
359: }
360: }
361: while (i < length) {
362: ch = ncName.charAt(i);
363: if (!isXML11NCName(ch)) {
364: if (++i < length && isXML11NameHighSurrogate(ch)) {
365: char ch2 = ncName.charAt(i);
366: if (!XMLChar.isLowSurrogate(ch2)
367: || !isXML11NCName(XMLChar.supplemental(ch,
368: ch2))) {
369: return false;
370: }
371: } else {
372: return false;
373: }
374: }
375: ++i;
376: }
377: return true;
378: } // isXML11ValidNCName(String):boolean
379:
380: /*
381: * [7] Nmtoken ::= (NameChar)+
382: */
383: /**
384: * Check to see if a string is a valid Nmtoken according to [7]
385: * in the XML 1.1 Recommendation
386: *
387: * @param nmtoken string to check
388: * @return true if nmtoken is a valid Nmtoken
389: */
390: public static boolean isXML11ValidNmtoken(String nmtoken) {
391: int length = nmtoken.length();
392: if (length == 0)
393: return false;
394: for (int i = 0; i < length; ++i) {
395: char ch = nmtoken.charAt(i);
396: if (!isXML11Name(ch)) {
397: if (++i < length && isXML11NameHighSurrogate(ch)) {
398: char ch2 = nmtoken.charAt(i);
399: if (!XMLChar.isLowSurrogate(ch2)
400: || !isXML11Name(XMLChar.supplemental(ch,
401: ch2))) {
402: return false;
403: }
404: } else {
405: return false;
406: }
407: }
408: }
409: return true;
410: } // isXML11ValidName(String):boolean
411:
412: } // class XML11Char
|