001: /*
002: * The Apache Software License, Version 1.1
003: *
004: *
005: * Copyright (c) 1999 The Apache Software Foundation. All rights
006: * reserved.
007: *
008: * Redistribution and use in source and binary forms, with or without
009: * modification, are permitted provided that the following conditions
010: * are met:
011: *
012: * 1. Redistributions of source code must retain the above copyright
013: * notice, this list of conditions and the following disclaimer.
014: *
015: * 2. Redistributions in binary form must reproduce the above copyright
016: * notice, this list of conditions and the following disclaimer in
017: * the documentation and/or other materials provided with the
018: * distribution.
019: *
020: * 3. The end-user documentation included with the redistribution,
021: * if any, must include the following acknowledgment:
022: * "This product includes software developed by the
023: * Apache Software Foundation (http://www.apache.org/)."
024: * Alternately, this acknowledgment may appear in the software itself,
025: * if and wherever such third-party acknowledgments normally appear.
026: *
027: * 4. The names "Xerces" and "Apache Software Foundation" must
028: * not be used to endorse or promote products derived from this
029: * software without prior written permission. For written
030: * permission, please contact apache@apache.org.
031: *
032: * 5. Products derived from this software may not be called "Apache",
033: * nor may "Apache" appear in their name, without prior written
034: * permission of the Apache Software Foundation.
035: *
036: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
037: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
038: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
039: * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
040: * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
041: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
042: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
043: * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
044: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
045: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
046: * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
047: * SUCH DAMAGE.
048: * ====================================================================
049: *
050: * This software consists of voluntary contributions made by many
051: * individuals on behalf of the Apache Software Foundation and was
052: * originally based on software copyright (c) 1999, International
053: * Business Machines, Inc., http://www.apache.org. For more
054: * information on the Apache Software Foundation, please see
055: * <http://www.apache.org/>.
056: */
057:
058: package org.apache.xerces.utils;
059:
060: /**
061: * A class representing properties of characters according to various
062: * W3C recommendations
063: *
064: * XMLCharacterProperties provides convenience methods for commonly used
065: * character tests.
066: *
067: * For performance reasons, the tables used by the convenience methods are
068: * also public, and are directly accessed by performance critical routines.
069: *
070: */
071:
072: public final class XMLCharacterProperties {
073: /*
074: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
075: *
076: * Note: This is the same as the ascii portion of the
077: * NameChar definition.
078: */
079: /**
080: * Check to see if a string is a valid version string according to
081: * [26] in the XML 1.0 Recommendation
082: *
083: * @param version string to check
084: * @return true if version is a valid version string
085: */
086: public static boolean validVersionNum(String version) {
087: int len = version.length();
088: if (len == 0)
089: return false;
090: for (int i = 0; i < len; i++) {
091: char ch = version.charAt(i);
092: if (ch > 'z' || fgAsciiNameChar[ch] == 0)
093: return false;
094: }
095: return true;
096: }
097:
098: /*
099: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
100: */
101: /**
102: * Check to see if a string is a valid encoding name according to [81]
103: * in the XML 1.0 Recommendation
104: *
105: * @param encoding string to check
106: * @return true if encoding is a valid encoding name
107: */
108: public static boolean validEncName(String encoding) {
109: int len = encoding.length();
110: if (len == 0)
111: return false;
112: char ch = encoding.charAt(0);
113: if (ch > 'z' || fgAsciiAlphaChar[ch] == 0)
114: return false;
115: for (int i = 1; i < len; i++) {
116: ch = encoding.charAt(i);
117: if (ch > 'z' || fgAsciiEncNameChar[ch] == 0)
118: return false;
119: }
120: return true;
121: }
122:
123: /*
124: * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
125: */
126: /**
127: * Check to see if a string is a valid public identifier according to [13]
128: * in the XML 1.0 Recommendation
129: *
130: * @param publicId string to check
131: * @return true if publicId is a valid public identifier
132: */
133: public static int validPublicId(String publicId) {
134: int len = publicId.length();
135: if (len == 0)
136: return -1;
137: for (int i = 0; i < len; i++) {
138: char ch = publicId.charAt(i);
139: if (ch > 'z' || fgAsciiPubidChar[ch] == 0)
140: return i;
141: }
142: return -1;
143: }
144:
145: /*
146: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
147: */
148: /**
149: * Check to see if a string is a valid Name according to [5]
150: * in the XML 1.0 Recommendation
151: *
152: * @param name string to check
153: * @return true if name is a valid Name
154: */
155: public static boolean validName(String name) {
156: int len = name.length();
157: if (len == 0)
158: return false;
159: char ch = name.charAt(0);
160: if (ch > 'z') {
161: if ((fgCharFlags[ch] & E_InitialNameCharFlag) == 0)
162: return false;
163: } else if (fgAsciiInitialNameChar[ch] == 0)
164: return false;
165: for (int i = 1; i < len; i++) {
166: ch = name.charAt(i);
167: if (ch > 'z') {
168: if ((fgCharFlags[ch] & E_NameCharFlag) == 0)
169: return false;
170: } else if (fgAsciiNameChar[ch] == 0)
171: return false;
172: }
173: return true;
174: }
175:
176: /*
177: * from the namespace rec
178: * [4] NCName ::= (Letter | '_') (NCNameChar)*
179: */
180: /**
181: * Check to see if a string is a valid NCName according to [4]
182: * from the XML Namespaces 1.0 Recommendation
183: *
184: * @param name string to check
185: * @return true if name is a valid NCName
186: */
187: public static boolean validNCName(String name) {
188: int len = name.length();
189: if (len == 0)
190: return false;
191: char ch = name.charAt(0);
192: if (ch > 'z') {
193: if ((fgCharFlags[ch] & E_InitialNameCharFlag) == 0)
194: return false;
195: } else if (fgAsciiInitialNCNameChar[ch] == 0)
196: return false;
197: for (int i = 1; i < len; i++) {
198: ch = name.charAt(i);
199: if (ch > 'z') {
200: if ((fgCharFlags[ch] & E_NameCharFlag) == 0)
201: return false;
202: } else if (fgAsciiNCNameChar[ch] == 0)
203: return false;
204: }
205: return true;
206: }
207:
208: /*
209: * [7] Nmtoken ::= (NameChar)+
210: */
211: /**
212: * Check to see if a string is a valid Nmtoken according to [7]
213: * in the XML 1.0 Recommendation
214: *
215: * @param nmtoken string to checj
216: * @return true if nmtoken is a valid Nmtoken
217: */
218: public static boolean validNmtoken(String nmtoken) {
219: int len = nmtoken.length();
220: if (len == 0)
221: return false;
222: for (int i = 0; i < len; i++) {
223: char ch = nmtoken.charAt(i);
224: if (ch > 'z') {
225: if ((fgCharFlags[ch] & E_NameCharFlag) == 0)
226: return false;
227: } else if (fgAsciiNameChar[ch] == 0) {
228: return false;
229: }
230: }
231: return true;
232: }
233:
234: /*
235: * Here are tables used to build character properties.
236: */
237: public static final byte fgAsciiXDigitChar[] = { 0, 0, 0, 0, 0, 0,
238: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
239: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
240: 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
241: 0, // '0' - '9'
242: 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
243: 0, // 'A' - 'F'
244: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
245: 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 'a' - 'f'
246: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
247: public static final byte fgAsciiAlphaChar[] = { 0, 0, 0, 0, 0, 0,
248: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
249: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
250: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
251: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'A' - 'O'
252: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, // 'P' - 'Z'
253: 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'a' - 'o'
254: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 // 'p' - 'z'
255: };
256: public static final byte fgAsciiEncNameChar[] = { 0, 0, 0, 0, 0, 0,
257: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
258: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
259: 1, 0, // '-' is 0x2D and '.' is 0x2E
260: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, // '0' - '9'
261: 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'A' - 'O'
262: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, // 'P' - 'Z' and '_' is 0x5F
263: 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'a' - 'o'
264: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 // 'p' - 'z'
265: };
266: public static final byte fgAsciiPubidChar[] = { 0, 0, 0, 0, 0, 0,
267: 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
268: 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
269: 1, 1, // ' ', '!', '#', '$', '%',
270: // '\'', '(', ')', '*', '+', ',', '-', '.', '/'
271: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, // '0' - '9', ':', ';', '=', '?'
272: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // '@', 'A' - 'O'
273: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, // 'P' - 'Z' and '_' is 0x5F
274: 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'a' - 'o'
275: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 // 'p' - 'z'
276: };
277: public static final byte fgAsciiInitialNameChar[] = { 0, 0, 0, 0,
278: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
279: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
280: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, // ':' is 0x3A
281: 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'A' - 'O'
282: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, // 'P' - 'Z' and '_' is 0x5F
283: 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'a' - 'o'
284: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 // 'p' - 'z'
285: };
286: public static final byte fgAsciiNameChar[] = { 0, 0, 0, 0, 0, 0, 0,
287: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
288: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
289: 0, // '-' is 0x2D and '.' is 0x2E
290: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, // '0' - '9' and ':' is 0x3A
291: 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'A' - 'O'
292: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, // 'P' - 'Z' and '_' is 0x5F
293: 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'a' - 'o'
294: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 // 'p' - 'z'
295: };
296: public static final byte fgAsciiInitialNCNameChar[] = { 0, 0, 0, 0,
297: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
298: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
299: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ':' is 0x3A
300: 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'A' - 'O'
301: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, // 'P' - 'Z' and '_' is 0x5F
302: 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'a' - 'o'
303: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 // 'p' - 'z'
304: };
305: public static final byte fgAsciiNCNameChar[] = { 0, 0, 0, 0, 0, 0,
306: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
307: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
308: 1, 0, // '-' is 0x2D and '.' is 0x2E
309: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, // '0' - '9' and ':' is 0x3A
310: 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'A' - 'O'
311: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, // 'P' - 'Z' and '_' is 0x5F
312: 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'a' - 'o'
313: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 // 'p' - 'z'
314: };
315: public static final byte fgAsciiCharData[] = { 4, 4, 4, 4, 4, 4, 4,
316: 4, 4, 0, 4, 4, 4,
317: 4,
318: 4,
319: 4, // tab is 0x09
320: 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
321: 0, 0, 2, 0, 0, 0, 0, 0, 0, 0,
322: 0,
323: 0, // '&' is 0x26
324: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
325: 0,
326: 0, // '<' is 0x3C
327: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
328: 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0,
329: 0, // ']' is 0x5D
330: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
331: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
332: public static final byte fgAsciiWSCharData[] = { 4, 4, 4, 4, 4, 4,
333: 4, 4, 4, 5, 5, 4, 4,
334: 5,
335: 4,
336: 4, // tab is 0x09, LF is 0x0A, CR is 0x0D
337: 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 0, 0, 0,
338: 0, 0, 2, 0, 0, 0, 0, 0, 0, 0,
339: 0,
340: 0, // ' ' is 0x20, '&' is 0x26
341: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
342: 0,
343: 0, // '<' is 0x3C
344: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
345: 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0,
346: 0, // ']' is 0x5D
347: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
348: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
349: public static final byte E_CharDataFlag = 1 << 0;
350: public static final byte E_InitialNameCharFlag = 1 << 1;
351: public static final byte E_NameCharFlag = 1 << 2;
352: public static byte[] fgCharFlags = null;
353:
354: public static synchronized void initCharFlags() {
355: if (fgCharFlags == null) {
356: fgCharFlags = new byte[0x10000];
357: setFlagForRange(fgCharDataRanges, E_CharDataFlag);
358: setFlagForRange(fgInitialNameCharRanges,
359: (byte) (E_InitialNameCharFlag | E_NameCharFlag));
360: setFlagForRange(fgNameCharRanges, E_NameCharFlag);
361: }
362: }
363:
364: private static void setFlagForRange(char[] ranges, byte flag) {
365: int i;
366: int ch;
367: for (i = 0; (ch = ranges[i]) != 0; i += 2) {
368: int endch = ranges[i + 1];
369: while (ch <= endch)
370: fgCharFlags[ch++] |= flag;
371: }
372: for (i++; (ch = ranges[i]) != 0; i++)
373: fgCharFlags[ch] |= flag;
374: }
375:
376: /*
377: * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] // any Unicode character, excluding the
378: * | [#xE000-#xFFFD] | [#x10000-#x10FFFF] // surrogate blocks, FFFE, and FFFF.
379: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
380: *
381: * We will use Char - ( [^<&] | ']' | #xA | #xD ) and handle the special cases inline.
382: */
383: private static final char fgCharDataRanges[] = { 0x0020, 0x0025, // '&' is 0x0026
384: 0x0027, 0x003B, // '<' is 0x003C
385: 0x003D, 0x005C, // ']' is 0x005D
386: 0x005E, 0xD7FF, 0xE000, 0xFFFD, 0x0000, 0x0009, // tab
387: 0x0000 };
388: /*
389: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
390: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender
391: * [84] Letter ::= BaseChar | Ideographic
392: * [85] BaseChar ::= <see standard>
393: * [86] Ideographic ::= <see standard>
394: * [87] CombiningChar ::= <see standard>
395: * [88] Digit ::= <see standard>
396: * [89] Extender ::= <see standard>
397: */
398: private static final char fgInitialNameCharRanges[] = {
399: //
400: // Ranges:
401: //
402: // BaseChar ranges
403: //
404: 0x0041, 0x005A, 0x0061, 0x007A, 0x00C0, 0x00D6, 0x00D8,
405: 0x00F6, 0x00F8, 0x0131, 0x0134, 0x013E, 0x0141, 0x0148,
406: 0x014A, 0x017E, 0x0180, 0x01C3, 0x01CD, 0x01F0, 0x01F4,
407: 0x01F5, 0x01FA, 0x0217, 0x0250, 0x02A8, 0x02BB, 0x02C1,
408: 0x0388, 0x038A, 0x038E, 0x03A1, 0x03A3, 0x03CE, 0x03D0,
409: 0x03D6, 0x03E2, 0x03F3, 0x0401, 0x040C, 0x040E, 0x044F,
410: 0x0451, 0x045C, 0x045E, 0x0481, 0x0490, 0x04C4, 0x04C7,
411: 0x04C8, 0x04CB, 0x04CC, 0x04D0, 0x04EB, 0x04EE, 0x04F5,
412: 0x04F8, 0x04F9, 0x0531, 0x0556, 0x0561, 0x0586, 0x05D0,
413: 0x05EA, 0x05F0, 0x05F2, 0x0621, 0x063A, 0x0641, 0x064A,
414: 0x0671, 0x06B7, 0x06BA, 0x06BE, 0x06C0, 0x06CE, 0x06D0,
415: 0x06D3, 0x06E5, 0x06E6, 0x0905, 0x0939, 0x0958, 0x0961,
416: 0x0985, 0x098C, 0x098F, 0x0990, 0x0993, 0x09A8, 0x09AA,
417: 0x09B0, 0x09B6, 0x09B9, 0x09DC, 0x09DD, 0x09DF, 0x09E1,
418: 0x09F0, 0x09F1, 0x0A05, 0x0A0A, 0x0A0F, 0x0A10, 0x0A13,
419: 0x0A28, 0x0A2A, 0x0A30, 0x0A32, 0x0A33, 0x0A35, 0x0A36,
420: 0x0A38, 0x0A39, 0x0A59, 0x0A5C, 0x0A72, 0x0A74, 0x0A85,
421: 0x0A8B, 0x0A8F, 0x0A91, 0x0A93, 0x0AA8, 0x0AAA, 0x0AB0,
422: 0x0AB2, 0x0AB3, 0x0AB5, 0x0AB9, 0x0B05, 0x0B0C, 0x0B0F,
423: 0x0B10, 0x0B13, 0x0B28, 0x0B2A, 0x0B30, 0x0B32, 0x0B33,
424: 0x0B36, 0x0B39, 0x0B5C, 0x0B5D, 0x0B5F, 0x0B61, 0x0B85,
425: 0x0B8A, 0x0B8E, 0x0B90, 0x0B92, 0x0B95, 0x0B99, 0x0B9A,
426: 0x0B9E, 0x0B9F, 0x0BA3, 0x0BA4, 0x0BA8, 0x0BAA, 0x0BAE,
427: 0x0BB5, 0x0BB7, 0x0BB9, 0x0C05, 0x0C0C, 0x0C0E, 0x0C10,
428: 0x0C12, 0x0C28, 0x0C2A, 0x0C33, 0x0C35, 0x0C39, 0x0C60,
429: 0x0C61, 0x0C85, 0x0C8C, 0x0C8E, 0x0C90, 0x0C92, 0x0CA8,
430: 0x0CAA, 0x0CB3, 0x0CB5, 0x0CB9, 0x0CE0, 0x0CE1, 0x0D05,
431: 0x0D0C, 0x0D0E, 0x0D10, 0x0D12,
432: 0x0D28,
433: 0x0D2A,
434: 0x0D39,
435: 0x0D60,
436: 0x0D61,
437: 0x0E01,
438: 0x0E2E,
439: 0x0E32,
440: 0x0E33,
441: 0x0E40,
442: 0x0E45,
443: 0x0E81,
444: 0x0E82,
445: 0x0E87,
446: 0x0E88,
447: 0x0E94,
448: 0x0E97,
449: 0x0E99,
450: 0x0E9F,
451: 0x0EA1,
452: 0x0EA3,
453: 0x0EAA,
454: 0x0EAB,
455: 0x0EAD,
456: 0x0EAE,
457: 0x0EB2,
458: 0x0EB3,
459: 0x0EC0,
460: 0x0EC4,
461: 0x0F40,
462: 0x0F47,
463: 0x0F49,
464: 0x0F69,
465: 0x10A0,
466: 0x10C5,
467: 0x10D0,
468: 0x10F6,
469: 0x1102,
470: 0x1103,
471: 0x1105,
472: 0x1107,
473: 0x110B,
474: 0x110C,
475: 0x110E,
476: 0x1112,
477: 0x1154,
478: 0x1155,
479: 0x115F,
480: 0x1161,
481: 0x116D,
482: 0x116E,
483: 0x1172,
484: 0x1173,
485: 0x11AE,
486: 0x11AF,
487: 0x11B7,
488: 0x11B8,
489: 0x11BC,
490: 0x11C2,
491: 0x1E00,
492: 0x1E9B,
493: 0x1EA0,
494: 0x1EF9,
495: 0x1F00,
496: 0x1F15,
497: 0x1F18,
498: 0x1F1D,
499: 0x1F20,
500: 0x1F45,
501: 0x1F48,
502: 0x1F4D,
503: 0x1F50,
504: 0x1F57,
505: 0x1F5F,
506: 0x1F7D,
507: 0x1F80,
508: 0x1FB4,
509: 0x1FB6,
510: 0x1FBC,
511: 0x1FC2,
512: 0x1FC4,
513: 0x1FC6,
514: 0x1FCC,
515: 0x1FD0,
516: 0x1FD3,
517: 0x1FD6,
518: 0x1FDB,
519: 0x1FE0,
520: 0x1FEC,
521: 0x1FF2,
522: 0x1FF4,
523: 0x1FF6,
524: 0x1FFC,
525: 0x212A,
526: 0x212B,
527: 0x2180,
528: 0x2182,
529: 0x3041,
530: 0x3094,
531: 0x30A1,
532: 0x30FA,
533: 0x3105,
534: 0x312C,
535: 0xAC00,
536: 0xD7A3,
537: //
538: // Ideographic ranges
539: //
540: 0x3021,
541: 0x3029,
542: 0x4E00,
543: 0x9FA5,
544: //
545: // Ranges end marker
546: //
547: 0x0000,
548: //
549: // Single char values
550: //
551: 0x003A, // ':'
552: 0x005F, // '_'
553: //
554: // BaseChar singles
555: //
556: 0x0386, 0x038C, 0x03DA, 0x03DC, 0x03DE, 0x03E0, 0x0559,
557: 0x06D5, 0x093D, 0x09B2, 0x0A5E, 0x0A8D, 0x0ABD, 0x0AE0,
558: 0x0B3D, 0x0B9C, 0x0CDE, 0x0E30, 0x0E84, 0x0E8A, 0x0E8D,
559: 0x0EA5, 0x0EA7, 0x0EB0, 0x0EBD, 0x1100, 0x1109, 0x113C,
560: 0x113E, 0x1140, 0x114C, 0x114E, 0x1150, 0x1159, 0x1163,
561: 0x1165, 0x1167, 0x1169, 0x1175, 0x119E, 0x11A8, 0x11AB,
562: 0x11BA, 0x11EB, 0x11F0, 0x11F9, 0x1F59, 0x1F5B, 0x1F5D,
563: 0x1FBE, 0x2126, 0x212E,
564: //
565: // Ideographic singles
566: //
567: 0x3007,
568: //
569: // Singles end marker
570: //
571: 0x0000 };
572: private static final char fgNameCharRanges[] = {
573: //
574: // Ranges:
575: //
576: 0x002D,
577: 0x002E, // '-' and '.'
578: //
579: // CombiningChar ranges
580: //
581: 0x0300, 0x0345, 0x0360, 0x0361, 0x0483, 0x0486, 0x0591,
582: 0x05A1, 0x05A3, 0x05B9, 0x05BB, 0x05BD, 0x05C1, 0x05C2,
583: 0x064B, 0x0652, 0x06D6, 0x06DC, 0x06DD, 0x06DF, 0x06E0,
584: 0x06E4, 0x06E7, 0x06E8, 0x06EA, 0x06ED, 0x0901, 0x0903,
585: 0x093E, 0x094C, 0x0951, 0x0954, 0x0962, 0x0963, 0x0981,
586: 0x0983, 0x09C0, 0x09C4, 0x09C7, 0x09C8, 0x09CB, 0x09CD,
587: 0x09E2, 0x09E3, 0x0A40, 0x0A42, 0x0A47, 0x0A48, 0x0A4B,
588: 0x0A4D, 0x0A70, 0x0A71, 0x0A81, 0x0A83, 0x0ABE, 0x0AC5,
589: 0x0AC7, 0x0AC9, 0x0ACB, 0x0ACD, 0x0B01, 0x0B03, 0x0B3E,
590: 0x0B43, 0x0B47, 0x0B48, 0x0B4B, 0x0B4D, 0x0B56, 0x0B57,
591: 0x0B82, 0x0B83, 0x0BBE, 0x0BC2, 0x0BC6, 0x0BC8,
592: 0x0BCA,
593: 0x0BCD,
594: 0x0C01,
595: 0x0C03,
596: 0x0C3E,
597: 0x0C44,
598: 0x0C46,
599: 0x0C48,
600: 0x0C4A,
601: 0x0C4D,
602: 0x0C55,
603: 0x0C56,
604: 0x0C82,
605: 0x0C83,
606: 0x0CBE,
607: 0x0CC4,
608: 0x0CC6,
609: 0x0CC8,
610: 0x0CCA,
611: 0x0CCD,
612: 0x0CD5,
613: 0x0CD6,
614: 0x0D02,
615: 0x0D03,
616: 0x0D3E,
617: 0x0D43,
618: 0x0D46,
619: 0x0D48,
620: 0x0D4A,
621: 0x0D4D,
622: 0x0E34,
623: 0x0E3A,
624: 0x0E47,
625: 0x0E4E,
626: 0x0EB4,
627: 0x0EB9,
628: 0x0EBB,
629: 0x0EBC,
630: 0x0EC8,
631: 0x0ECD,
632: 0x0F18,
633: 0x0F19,
634: 0x0F71,
635: 0x0F84,
636: 0x0F86,
637: 0x0F8B,
638: 0x0F90,
639: 0x0F95,
640: 0x0F99,
641: 0x0FAD,
642: 0x0FB1,
643: 0x0FB7,
644: 0x20D0,
645: 0x20DC,
646: 0x302A,
647: 0x302F,
648: //
649: // Digit ranges
650: //
651: 0x0030,
652: 0x0039,
653: 0x0660,
654: 0x0669,
655: 0x06F0,
656: 0x06F9,
657: 0x0966,
658: 0x096F,
659: 0x09E6,
660: 0x09EF,
661: 0x0A66,
662: 0x0A6F,
663: 0x0AE6,
664: 0x0AEF,
665: 0x0B66,
666: 0x0B6F,
667: 0x0BE7,
668: 0x0BEF,
669: 0x0C66,
670: 0x0C6F,
671: 0x0CE6,
672: 0x0CEF,
673: 0x0D66,
674: 0x0D6F,
675: 0x0E50,
676: 0x0E59,
677: 0x0ED0,
678: 0x0ED9,
679: 0x0F20,
680: 0x0F29,
681: //
682: // Extender ranges
683: //
684: 0x3031,
685: 0x3035,
686: 0x309D,
687: 0x309E,
688: 0x30FC,
689: 0x30FE,
690: //
691: // Ranges end marker
692: //
693: 0x0000,
694: //
695: // Single char values
696: //
697: // CombiningChar singles
698: //
699: 0x05BF, 0x05C4, 0x0670, 0x093C, 0x094D, 0x09BC, 0x09BE,
700: 0x09BF, 0x09D7, 0x0A02, 0x0A3C, 0x0A3E, 0x0A3F, 0x0ABC,
701: 0x0B3C, 0x0BD7, 0x0D57, 0x0E31, 0x0EB1, 0x0F35, 0x0F37,
702: 0x0F39, 0x0F3E, 0x0F3F, 0x0F97, 0x0FB9,
703: 0x20E1,
704: 0x3099,
705: 0x309A,
706: //
707: // Extender singles
708: //
709: 0x00B7, 0x02D0, 0x02D1, 0x0387, 0x0640, 0x0E46, 0x0EC6,
710: 0x3005,
711: //
712: // Singles end marker
713: //
714: 0x0000 };
715: }
|