001: /*
002: * Copyright (c) 1998-2008 Caucho Technology -- all rights reserved
003: *
004: * This file is part of Resin(R) Open Source
005: *
006: * Each copy or derived work must preserve the copyright notice and this
007: * notice unmodified.
008: *
009: * Resin Open Source is free software; you can redistribute it and/or modify
010: * it under the terms of the GNU General Public License as published by
011: * the Free Software Foundation; either version 2 of the License, or
012: * (at your option) any later version.
013: *
014: * Resin Open Source is distributed in the hope that it will be useful,
015: * but WITHOUT ANY WARRANTY; without even the implied warranty of
016: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or any warranty
017: * of NON-INFRINGEMENT. See the GNU General Public License for more
018: * details.
019: *
020: * You should have received a copy of the GNU General Public License
021: * along with Resin Open Source; if not, write to the
022: * Free SoftwareFoundation, Inc.
023: * 59 Temple Place, Suite 330
024: * Boston, MA 02111-1307 USA
025: *
026: * @author Scott Ferguson
027: */
028:
029: package com.caucho.xml2;
030:
031: /**
032: * XmlChar contains the XML character classes
033: */
034: public class XmlChar {
035: static boolean isAsciiNameChar[];
036:
037: private XmlChar() {
038: }
039:
040: public static boolean isWhitespace(int ch) {
041: return ch <= 0x20
042: && (ch == 0x20 || ch == 0x9 || ch == 0xa || ch == 0xd);
043: }
044:
045: public static boolean isChar(int ch) {
046: return (ch >= 0x20 && ch <= 0xd7ff || ch == 0x9 || ch == 0xa
047: || ch == 0xd || ch >= 0xe000 && ch <= 0xfff0);
048: }
049:
050: public static boolean isNameStart(int ch) {
051: return (ch >= 0x41 && ch <= 0x5a || ch >= 0x61 && ch <= 0x7a
052: || ch == '_' || ch == ':' || ch > 0x7f
053: && (isBaseChar(ch) || isIdeographic(ch)));
054: }
055:
056: /**
057: * Returns a boolean array testing for ascii name characters.
058: */
059: public static boolean[] getAsciiNameCharArray() {
060: return isAsciiNameChar;
061: }
062:
063: /**
064: * Returns true if the character is an XML name character.
065: */
066: public static boolean isNameChar(int ch) {
067: if (ch < 0x20)
068: return false;
069: else if (ch < 128)
070: return isAsciiNameChar[ch];
071: else
072: return (isBaseChar(ch) || isIdeographic(ch)
073: || isCombiningChar(ch) || isExtender(ch) || isDigit(ch));
074: }
075:
076: private static boolean isBaseChar(int ch) {
077: return (ch <= 0xff
078: && (ch >= 0x0041 && ch <= 0x005A || ch >= 0x0061
079: && ch <= 0x007A || ch >= 0x00C0 && ch <= 0x00D6
080: || ch >= 0x00D8 && ch <= 0x00F6 || ch >= 0x00F8
081: && ch <= 0x00FF)
082: || ch <= 0x1f5
083: && (ch >= 0x0100 && ch <= 0x0131 || ch >= 0x0134
084: && ch <= 0x013E || ch >= 0x0141 && ch <= 0x0148
085: || ch >= 0x014A && ch <= 0x017E || ch >= 0x0180
086: && ch <= 0x01C3 || ch >= 0x01CD && ch <= 0x01F0 || ch >= 0x01F4
087: && ch <= 0x01F5)
088: || ch <= 0x2ff
089: && (ch >= 0x01FA && ch <= 0x0217 || ch >= 0x0250
090: && ch <= 0x02A8 || ch >= 0x02BB && ch <= 0x02C1)
091: || ch <= 0x3ff
092: && (ch == 0x0386 || ch >= 0x0388 && ch <= 0x038A
093: || ch == 0x038C || ch >= 0x038E && ch <= 0x03A1
094: || ch >= 0x03A3 && ch <= 0x03CE || ch >= 0x03D0
095: && ch <= 0x03D6 || ch == 0x03DA || ch == 0x03DC
096: || ch == 0x03DE || ch == 0x03E0 || ch >= 0x03E2
097: && ch <= 0x03F3)
098: || ch <= 0x4ff
099: && (ch >= 0x0401 && ch <= 0x040C || ch >= 0x040E
100: && ch <= 0x044F || ch >= 0x0451 && ch <= 0x045C
101: || ch >= 0x045E && ch <= 0x0481 || ch >= 0x0490
102: && ch <= 0x04C4 || ch >= 0x04C7 && ch <= 0x04C8
103: || ch >= 0x04CB && ch <= 0x04CC || ch >= 0x04D0
104: && ch <= 0x04EB || ch >= 0x04EE && ch <= 0x04F5 || ch >= 0x04F8
105: && ch <= 0x04F9)
106: || ch <= 0x5ff
107: && (ch >= 0x0531 && ch <= 0x0556 || ch == 0x0559
108: || ch >= 0x0561 && ch <= 0x0586 || ch >= 0x05D0
109: && ch <= 0x05EA || ch >= 0x05F0 && ch <= 0x05F2)
110: || ch <= 0x6ff
111: && (ch >= 0x0621 && ch <= 0x063A || ch >= 0x0641
112: && ch <= 0x064A || ch >= 0x0671 && ch <= 0x06B7
113: || ch >= 0x06BA && ch <= 0x06BE || ch >= 0x06C0
114: && ch <= 0x06CE || ch >= 0x06D0 && ch <= 0x06D3
115: || ch == 0x06D5 || ch >= 0x06E5 && ch <= 0x06E6)
116: || ch <= 0x9ff
117: && (ch >= 0x0905 && ch <= 0x0939 || ch == 0x093D
118: || ch >= 0x0958 && ch <= 0x0961 || ch >= 0x0985
119: && ch <= 0x098C || ch >= 0x098F && ch <= 0x0990
120: || ch >= 0x0993 && ch <= 0x09A8 || ch >= 0x09AA
121: && ch <= 0x09B0 || ch == 0x09B2 || ch >= 0x09B6
122: && ch <= 0x09B9 || ch >= 0x09DC && ch <= 0x09DD
123: || ch >= 0x09DF && ch <= 0x09E1 || ch >= 0x09F0
124: && ch <= 0x09F1)
125: || ch <= 0xaff
126: && (ch >= 0x0A05 && ch <= 0x0A0A || ch >= 0x0A0F
127: && ch <= 0x0A10 || ch >= 0x0A13 && ch <= 0x0A28
128: || ch >= 0x0A2A && ch <= 0x0A30 || ch >= 0x0A32
129: && ch <= 0x0A33 || ch >= 0x0A35 && ch <= 0x0A36
130: || ch >= 0x0A38 && ch <= 0x0A39 || ch >= 0x0A59
131: && ch <= 0x0A5C || ch == 0x0A5E || ch >= 0x0A72
132: && ch <= 0x0A74 || ch >= 0x0A85 && ch <= 0x0A8B
133: || ch == 0x0A8D || ch >= 0x0A8F && ch <= 0x0A91
134: || ch >= 0x0A93 && ch <= 0x0AA8 || ch >= 0x0AAA
135: && ch <= 0x0AB0 || ch >= 0x0AB2 && ch <= 0x0AB3
136: || ch >= 0x0AB5 && ch <= 0x0AB9 || ch == 0x0ABD || ch == 0x0AE0)
137: || ch <= 0xbff
138: && (ch >= 0x0B05 && ch <= 0x0B0C || ch >= 0x0B0F
139: && ch <= 0x0B10 || ch >= 0x0B13 && ch <= 0x0B28
140: || ch >= 0x0B2A && ch <= 0x0B30 || ch >= 0x0B32
141: && ch <= 0x0B33 || ch >= 0x0B36 && ch <= 0x0B39
142: || ch == 0x0B3D || ch >= 0x0B5C && ch <= 0x0B5D
143: || ch >= 0x0B5F && ch <= 0x0B61 || ch >= 0x0B85
144: && ch <= 0x0B8A || ch >= 0x0B8E && ch <= 0x0B90
145: || ch >= 0x0B92 && ch <= 0x0B95 || ch >= 0x0B99
146: && ch <= 0x0B9A || ch == 0x0B9C || ch >= 0x0B9E
147: && ch <= 0x0B9F || ch >= 0x0BA3 && ch <= 0x0BA4
148: || ch >= 0x0BA8 && ch <= 0x0BAA || ch >= 0x0BAE
149: && ch <= 0x0BB5 || ch >= 0x0BB7 && ch <= 0x0BB9)
150: || ch <= 0xcff
151: && (ch >= 0x0C05 && ch <= 0x0C0C || ch >= 0x0C0E
152: && ch <= 0x0C10 || ch >= 0x0C12 && ch <= 0x0C28
153: || ch >= 0x0C2A && ch <= 0x0C33 || ch >= 0x0C35
154: && ch <= 0x0C39 || ch >= 0x0C60 && ch <= 0x0C61
155: || ch >= 0x0C85 && ch <= 0x0C8C || ch >= 0x0C8E
156: && ch <= 0x0C90 || ch >= 0x0C92 && ch <= 0x0CA8
157: || ch >= 0x0CAA && ch <= 0x0CB3 || ch >= 0x0CB5
158: && ch <= 0x0CB9 || ch == 0x0CDE || ch >= 0x0CE0
159: && ch <= 0x0CE1)
160: || ch <= 0xdff
161: && (ch >= 0x0D05 && ch <= 0x0D0C || ch >= 0x0D0E
162: && ch <= 0x0D10 || ch >= 0x0D12 && ch <= 0x0D28
163: || ch >= 0x0D2A && ch <= 0x0D39 || ch >= 0x0D60
164: && ch <= 0x0D61)
165: || ch <= 0xfff
166: && (ch >= 0x0E01 && ch <= 0x0E2E || ch == 0x0E30
167: || ch >= 0x0E32 && ch <= 0x0E33 || ch >= 0x0E40
168: && ch <= 0x0E45 || ch >= 0x0E81 && ch <= 0x0E82
169: || ch == 0x0E84 || ch >= 0x0E87 && ch <= 0x0E88
170: || ch == 0x0E8A || ch == 0x0E8D || ch >= 0x0E94
171: && ch <= 0x0E97 || ch >= 0x0E99 && ch <= 0x0E9F
172: || ch >= 0x0EA1 && ch <= 0x0EA3 || ch == 0x0EA5
173: || ch == 0x0EA7 || ch >= 0x0EAA && ch <= 0x0EAB
174: || ch >= 0x0EAD && ch <= 0x0EAE || ch == 0x0EB0
175: || ch >= 0x0EB2 && ch <= 0x0EB3 || ch == 0x0EBD
176: || ch >= 0x0EC0 && ch <= 0x0EC4 || ch >= 0x0F40
177: && ch <= 0x0F47 || ch >= 0x0F49 && ch <= 0x0F69)
178: || ch <= 0x10ff
179: && (ch >= 0x10A0 && ch <= 0x10C5 || ch >= 0x10D0
180: && ch <= 0x10F6)
181: || ch <= 0x11ff
182: && (ch == 0x1100 || ch >= 0x1102 && ch <= 0x1103
183: || ch >= 0x1105 && ch <= 0x1107 || ch == 0x1109
184: || ch >= 0x110B && ch <= 0x110C || ch >= 0x110E
185: && ch <= 0x1112 || ch == 0x113C || ch == 0x113E
186: || ch == 0x1140 || ch == 0x114C || ch == 0x114E
187: || ch == 0x1150 || ch >= 0x1154 && ch <= 0x1155
188: || ch == 0x1159 || ch >= 0x115F && ch <= 0x1161
189: || ch == 0x1163 || ch == 0x1165 || ch == 0x1167
190: || ch == 0x1169 || ch >= 0x116D && ch <= 0x116E
191: || ch >= 0x1172 && ch <= 0x1173 || ch == 0x1175
192: || ch == 0x119E || ch == 0x11A8 || ch == 0x11AB
193: || ch >= 0x11AE && ch <= 0x11AF || ch >= 0x11B7
194: && ch <= 0x11B8 || ch == 0x11BA || ch >= 0x11BC
195: && ch <= 0x11C2 || ch == 0x11EB || ch == 0x11F0 || ch == 0x11F9)
196: || ch <= 0x1fff
197: && (ch >= 0x1E00 && ch <= 0x1E9B || ch >= 0x1EA0
198: && ch <= 0x1EF9 || ch >= 0x1F00 && ch <= 0x1F15
199: || ch >= 0x1F18 && ch <= 0x1F1D || ch >= 0x1F20
200: && ch <= 0x1F45 || ch >= 0x1F48 && ch <= 0x1F4D
201: || ch >= 0x1F50 && ch <= 0x1F57 || ch == 0x1F59
202: || ch == 0x1F5B || ch == 0x1F5D || ch >= 0x1F5F
203: && ch <= 0x1F7D || ch >= 0x1F80 && ch <= 0x1FB4
204: || ch >= 0x1FB6 && ch <= 0x1FBC || ch == 0x1FBE
205: || ch >= 0x1FC2 && ch <= 0x1FC4 || ch >= 0x1FC6
206: && ch <= 0x1FCC || ch >= 0x1FD0 && ch <= 0x1FD3
207: || ch >= 0x1FD6 && ch <= 0x1FDB || ch >= 0x1FE0
208: && ch <= 0x1FEC || ch >= 0x1FF2 && ch <= 0x1FF4 || ch >= 0x1FF6
209: && ch <= 0x1FFC) || ch == 0x2126
210: || ch >= 0x212A && ch <= 0x212B || ch == 0x212E
211: || ch >= 0x2180 && ch <= 0x2182 || ch >= 0x3041
212: && ch <= 0x3094 || ch >= 0x30A1 && ch <= 0x30FA
213: || ch >= 0x3105 && ch <= 0x312C || ch >= 0xAC00
214: && ch <= 0xD7A3);
215: }
216:
217: private static boolean isIdeographic(int ch) {
218: return (ch >= 0x4e00 && ch <= 0x9fa5 || ch == 0x3007 || ch >= 0x3021
219: && ch <= 0x3029);
220: }
221:
222: private static boolean isCombiningChar(int ch) {
223: if (ch < 0x300)
224: return false;
225:
226: return (ch <= 0x6ff
227: && (ch >= 0x0300 && ch <= 0x0345 || ch >= 0x0360
228: && ch <= 0x0361 || ch >= 0x0483 && ch <= 0x0486
229: || ch >= 0x0591 && ch <= 0x05A1 || ch >= 0x05A3
230: && ch <= 0x05B9 || ch >= 0x05BB && ch <= 0x05BD
231: || ch == 0x05BF || ch >= 0x05C1 && ch <= 0x05C2
232: || ch == 0x05C4 || ch >= 0x064B && ch <= 0x0652
233: || ch == 0x0670 || ch >= 0x06D6 && ch <= 0x06DC
234: || ch >= 0x06DD && ch <= 0x06DF || ch >= 0x06E0
235: && ch <= 0x06E4 || ch >= 0x06E7 && ch <= 0x06E8 || ch >= 0x06EA
236: && ch <= 0x06ED)
237: || ch <= 0x9ff
238: && (ch >= 0x0901 && ch <= 0x0903 || ch == 0x093C
239: || ch >= 0x093E && ch <= 0x094C || ch == 0x094D
240: || ch >= 0x0951 && ch <= 0x0954 || ch >= 0x0962
241: && ch <= 0x0963 || ch >= 0x0981 && ch <= 0x0983
242: || ch == 0x09BC || ch == 0x09BE || ch == 0x09BF
243: || ch >= 0x09C0 && ch <= 0x09C4 || ch >= 0x09C7
244: && ch <= 0x09C8 || ch >= 0x09CB && ch <= 0x09CD
245: || ch == 0x09D7 || ch >= 0x09E2 && ch <= 0x09E3)
246: || ch <= 0xaff
247: && (ch == 0x0A02 || ch == 0x0A3C || ch == 0x0A3E
248: || ch == 0x0A3F || ch >= 0x0A40 && ch <= 0x0A42
249: || ch >= 0x0A47 && ch <= 0x0A48 || ch >= 0x0A4B
250: && ch <= 0x0A4D || ch >= 0x0A70 && ch <= 0x0A71
251: || ch >= 0x0A81 && ch <= 0x0A83 || ch == 0x0ABC
252: || ch >= 0x0ABE && ch <= 0x0AC5 || ch >= 0x0AC7
253: && ch <= 0x0AC9 || ch >= 0x0ACB && ch <= 0x0ACD)
254: || ch <= 0xbff
255: && (ch >= 0x0B01 && ch <= 0x0B03 || ch == 0x0B3C
256: || ch >= 0x0B3E && ch <= 0x0B43 || ch >= 0x0B47
257: && ch <= 0x0B48 || ch >= 0x0B4B && ch <= 0x0B4D
258: || ch >= 0x0B56 && ch <= 0x0B57 || ch >= 0x0B82
259: && ch <= 0x0B83 || ch >= 0x0BBE && ch <= 0x0BC2
260: || ch >= 0x0BC6 && ch <= 0x0BC8 || ch >= 0x0BCA
261: && ch <= 0x0BCD || ch == 0x0BD7)
262: || ch <= 0xc00
263: && (ch >= 0x0C01 && ch <= 0x0C03 || ch >= 0x0C3E
264: && ch <= 0x0C44 || ch >= 0x0C46 && ch <= 0x0C48
265: || ch >= 0x0C4A && ch <= 0x0C4D || ch >= 0x0C55
266: && ch <= 0x0C56 || ch >= 0x0C82 && ch <= 0x0C83
267: || ch >= 0x0CBE && ch <= 0x0CC4 || ch >= 0x0CC6
268: && ch <= 0x0CC8 || ch >= 0x0CCA && ch <= 0x0CCD || ch >= 0x0CD5
269: && ch <= 0x0CD6)
270: || ch <= 0xeff
271: && (ch >= 0x0D02 && ch <= 0x0D03 || ch >= 0x0D3E
272: && ch <= 0x0D43 || ch >= 0x0D46 && ch <= 0x0D48
273: || ch >= 0x0D4A && ch <= 0x0D4D || ch == 0x0D57
274: || ch == 0x0E31 || ch >= 0x0E34 && ch <= 0x0E3A
275: || ch >= 0x0E47 && ch <= 0x0E4E || ch == 0x0EB1
276: || ch >= 0x0EB4 && ch <= 0x0EB9 || ch >= 0x0EBB
277: && ch <= 0x0EBC || ch >= 0x0EC8 && ch <= 0x0ECD)
278: || ch <= 0xfff
279: && (ch >= 0x0F18 && ch <= 0x0F19 || ch == 0x0F35
280: || ch == 0x0F37 || ch == 0x0F39 || ch == 0x0F3E
281: || ch == 0x0F3F || ch >= 0x0F71 && ch <= 0x0F84
282: || ch >= 0x0F86 && ch <= 0x0F8B || ch >= 0x0F90
283: && ch <= 0x0F95 || ch == 0x0F97 || ch >= 0x0F99
284: && ch <= 0x0FAD || ch >= 0x0FB1 && ch <= 0x0FB7 || ch == 0x0FB9)
285: || ch >= 0x20D0 && ch <= 0x20DC || ch == 0x20E1
286: || ch >= 0x302A && ch <= 0x302F || ch == 0x3099 || ch == 0x309A);
287: }
288:
289: private static boolean isDigit(int ch) {
290: return (ch >= 0x0030 && ch <= 0x0039 || ch >= 0x0660
291: && ch <= 0x0669 || ch >= 0x06F0 && ch <= 0x06F9
292: || ch >= 0x0966 && ch <= 0x096F || ch >= 0x09E6
293: && ch <= 0x09EF || ch >= 0x0A66 && ch <= 0x0A6F
294: || ch >= 0x0AE6 && ch <= 0x0AEF || ch >= 0x0B66
295: && ch <= 0x0B6F || ch >= 0x0BE7 && ch <= 0x0BEF
296: || ch >= 0x0C66 && ch <= 0x0C6F || ch >= 0x0CE6
297: && ch <= 0x0CEF || ch >= 0x0D66 && ch <= 0x0D6F
298: || ch >= 0x0E50 && ch <= 0x0E59 || ch >= 0x0ED0
299: && ch <= 0x0ED9 || ch >= 0x0F20 && ch <= 0x0F29);
300: }
301:
302: private static boolean isExtender(int ch) {
303: return (ch == 0x00B7 || ch == 0x02D0 || ch == 0x02D1
304: || ch == 0x0387 || ch == 0x0640 || ch == 0x0E46
305: || ch == 0x0EC6 || ch == 0x3005 || ch >= 0x3031
306: && ch <= 0x3035 || ch >= 0x309D && ch <= 0x309E || ch >= 0x30FC
307: && ch <= 0x30FE);
308: }
309:
310: static {
311: isAsciiNameChar = new boolean[128];
312: for (int i = 0x30; i <= 0x39; i++)
313: isAsciiNameChar[i] = true;
314: for (int i = 0x41; i <= 0x5a; i++)
315: isAsciiNameChar[i] = true;
316: for (int i = 0x61; i <= 0x7a; i++)
317: isAsciiNameChar[i] = true;
318: isAsciiNameChar['_'] = true;
319: isAsciiNameChar[':'] = true;
320: isAsciiNameChar['.'] = true;
321: isAsciiNameChar['-'] = true;
322: }
323: }
|