001: /*
002: * @(#)CharacterDataLatin1.java 1.5 06/10/10
003: *
004: * Copyright 1990-2006 Sun Microsystems, Inc. All Rights Reserved.
005: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER
006: *
007: * This program is free software; you can redistribute it and/or
008: * modify it under the terms of the GNU General Public License version
009: * 2 only, as published by the Free Software Foundation.
010: *
011: * This program is distributed in the hope that it will be useful, but
012: * WITHOUT ANY WARRANTY; without even the implied warranty of
013: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
014: * General Public License version 2 for more details (a copy is
015: * included at /legal/license.txt).
016: *
017: * You should have received a copy of the GNU General Public License
018: * version 2 along with this work; if not, write to the Free Software
019: * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
020: * 02110-1301 USA
021: *
022: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
023: * Clara, CA 95054 or visit www.sun.com if you need additional
024: * information or have any questions.
025: */
026:
027: package java.lang;
028:
029: /** The CharacterData class encapsulates the large tables found in
030: Java.lang.Character. */
031:
032: class CharacterDataLatin1 {
033:
034: /* The character properties are currently encoded into 32 bits in the following manner:
035: 1 bit mirrored property
036: 4 bits directionality property
037: 9 bits signed offset used for converting case
038: 1 bit if 1, adding the signed offset converts the character to lowercase
039: 1 bit if 1, subtracting the signed offset converts the character to uppercase
040: 1 bit if 1, this character has a titlecase equivalent (possibly itself)
041: 3 bits 0 may not be part of an identifier
042: 1 ignorable control; may continue a Unicode identifier or Java identifier
043: 2 may continue a Java identifier but not a Unicode identifier (unused)
044: 3 may continue a Unicode identifier or Java identifier
045: 4 is a Java whitespace character
046: 5 may start or continue a Java identifier;
047: may continue but not start a Unicode identifier (underscores)
048: 6 may start or continue a Java identifier but not a Unicode identifier ($)
049: 7 may start or continue a Unicode identifier or Java identifier
050: Thus:
051: 5, 6, 7 may start a Java identifier
052: 1, 2, 3, 5, 6, 7 may continue a Java identifier
053: 7 may start a Unicode identifier
054: 1, 3, 5, 7 may continue a Unicode identifier
055: 1 is ignorable within an identifier
056: 4 is Java whitespace
057: 2 bits 0 this character has no numeric property
058: 1 adding the digit offset to the character code and then
059: masking with 0x1F will produce the desired numeric value
060: 2 this character has a "strange" numeric value
061: 3 a Java supradecimal digit: adding the digit offset to the
062: character code, then masking with 0x1F, then adding 10
063: will produce the desired numeric value
064: 5 bits digit offset
065: 5 bits character type
066:
067: The encoding of character properties is subject to change at any time.
068: */
069:
070: static int getProperties(char ch) {
071: return A[ch];
072: }
073:
074: static int getType(char ch) {
075: return getProperties(ch) & 0x1F;
076: }
077:
078: static boolean isLowerCase(char ch) {
079: return getType(ch) == Character.LOWERCASE_LETTER;
080: }
081:
082: static boolean isUpperCase(char ch) {
083: return getType(ch) == Character.UPPERCASE_LETTER;
084: }
085:
086: static boolean isTitleCase(char ch) {
087: return false;
088: }
089:
090: static boolean isDigit(char ch) {
091: return getType(ch) == Character.DECIMAL_DIGIT_NUMBER;
092: }
093:
094: static boolean isDefined(char ch) {
095: return getType(ch) != Character.UNASSIGNED;
096: }
097:
098: static boolean isLetter(char ch) {
099: return (((((1 << Character.UPPERCASE_LETTER)
100: | (1 << Character.LOWERCASE_LETTER)
101: | (1 << Character.TITLECASE_LETTER)
102: | (1 << Character.MODIFIER_LETTER) | (1 << Character.OTHER_LETTER)) >> getType(ch)) & 1) != 0);
103: }
104:
105: static boolean isLetterOrDigit(char ch) {
106: return (((((1 << Character.UPPERCASE_LETTER)
107: | (1 << Character.LOWERCASE_LETTER)
108: | (1 << Character.TITLECASE_LETTER)
109: | (1 << Character.MODIFIER_LETTER)
110: | (1 << Character.OTHER_LETTER) | (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(ch)) & 1) != 0);
111: }
112:
113: static boolean isSpaceChar(char ch) {
114: return (((((1 << Character.SPACE_SEPARATOR)
115: | (1 << Character.LINE_SEPARATOR) | (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(ch)) & 1) != 0);
116: }
117:
118: static boolean isJavaIdentifierStart(char ch) {
119: return (getProperties(ch) & 0x00007000) >= 0x00005000;
120: }
121:
122: static boolean isJavaIdentifierPart(char ch) {
123: return (getProperties(ch) & 0x00003000) != 0;
124: }
125:
126: static boolean isUnicodeIdentifierStart(char ch) {
127: return (getProperties(ch) & 0x00007000) == 0x00007000;
128: }
129:
130: static boolean isUnicodeIdentifierPart(char ch) {
131: return (getProperties(ch) & 0x00001000) != 0;
132: }
133:
134: static boolean isIdentifierIgnorable(char ch) {
135: return (getProperties(ch) & 0x00007000) == 0x00001000;
136: }
137:
138: static char toLowerCase(char ch) {
139: char mapChar = ch;
140: int val = getProperties(ch);
141:
142: if (((val & 0x00020000) != 0)
143: && ((val & 0x07FC0000) != 0x07FC0000)) {
144: int offset = val << 5 >> (5 + 18);
145: mapChar = (char) (ch + offset);
146: }
147: return mapChar;
148: }
149:
150: static char toUpperCase(char ch) {
151: char mapChar = ch;
152: int val = getProperties(ch);
153:
154: if ((val & 0x00010000) != 0) {
155: if ((val & 0x07FC0000) != 0x07FC0000) {
156: int offset = val << 5 >> (5 + 18);
157: mapChar = (char) (ch - offset);
158: } else if (ch == '\u00B5') {
159: mapChar = '\u039C';
160: }
161: }
162: return mapChar;
163: }
164:
165: static char toTitleCase(char ch) {
166: return toUpperCase(ch);
167: }
168:
169: static int digit(char ch, int radix) {
170: int value = -1;
171: if (radix >= Character.MIN_RADIX
172: && radix <= Character.MAX_RADIX) {
173: int val = getProperties(ch);
174: int kind = val & 0x1F;
175: if (kind == Character.DECIMAL_DIGIT_NUMBER) {
176: value = ch + ((val & 0x3E0) >> 5) & 0x1F;
177: } else if ((val & 0xC00) == 0x00000C00) {
178: // Java supradecimal digit
179: value = (ch + ((val & 0x3E0) >> 5) & 0x1F) + 10;
180: }
181: }
182: return (value < radix) ? value : -1;
183: }
184:
185: static int getNumericValue(char ch) {
186: int val = getProperties(ch);
187: int retval = -1;
188:
189: switch (val & 0xC00) {
190: default: // cannot occur
191: case (0x00000000): // not numeric
192: retval = -1;
193: break;
194: case (0x00000400): // simple numeric
195: retval = ch + ((val & 0x3E0) >> 5) & 0x1F;
196: break;
197: case (0x00000800): // "strange" numeric
198: retval = -2;
199: break;
200: case (0x00000C00): // Java supradecimal
201: retval = (ch + ((val & 0x3E0) >> 5) & 0x1F) + 10;
202: break;
203: }
204: return retval;
205: }
206:
207: static boolean isWhitespace(char ch) {
208: return (getProperties(ch) & 0x00007000) == 0x00004000;
209: }
210:
211: static byte getDirectionality(char ch) {
212: int val = getProperties(ch);
213: byte directionality = (byte) ((val & 0x78000000) >> 27);
214:
215: if (directionality == 0xF) {
216: directionality = -1;
217: }
218: return directionality;
219: }
220:
221: static boolean isMirrored(char ch) {
222: return (getProperties(ch) & 0x80000000) != 0;
223: }
224:
225: static char toUpperCaseEx(char ch) {
226: char mapChar = ch;
227: int val = getProperties(ch);
228:
229: if ((val & 0x00010000) != 0) {
230: if ((val & 0x07FC0000) != 0x07FC0000) {
231: int offset = val << 5 >> (5 + 18);
232: mapChar = (char) (ch - offset);
233: } else {
234: switch (ch) {
235: // map overflow characters
236: case '\u00B5':
237: mapChar = '\u039C';
238: break;
239: default:
240: mapChar = Character.CHAR_ERROR;
241: break;
242: }
243: }
244: }
245: return mapChar;
246: }
247:
248: // The following tables and code generated using:
249: // java GenerateCharacter -template /.../CharacterDataLatin1.java.template -spec /.../UnicodeData.txt -specialcasing /.../SpecialCasing.txt -o /.../CharacterDataLatin1.java -string -usecharforbyte -latin1 8
250: // The A table has 256 entries for a total of 1024 bytes.
251:
252: static final int A[] = new int[256];
253: /*
254: ** static final String A_DATA =
255: ** "\u4800\u100F\u4800\u100F\u4800\u100F\u4800\u100F\u4800\u100F\u4800\u100F\u4800"+
256: ** "\u100F\u4800\u100F\u4800\u100F\u5800\u400F\u5000\u400F\u5800\u400F\u6000\u400F"+
257: ** "\u5000\u400F\u4800\u100F\u4800\u100F\u4800\u100F\u4800\u100F\u4800\u100F\u4800"+
258: ** "\u100F\u4800\u100F\u4800\u100F\u4800\u100F\u4800\u100F\u4800\u100F\u4800\u100F"+
259: ** "\u4800\u100F\u4800\u100F\u5000\u400F\u5000\u400F\u5000\u400F\u5800\u400F\u6000"+
260: ** "\u400C\u6800\030\u6800\030\u2800\030\u2800\u601A\u2800\030\u6800\030\u6800"+
261: ** "\030\uE800\025\uE800\026\u6800\030\u2800\031\u3800\030\u2800\024\u3800\030"+
262: ** "\u2000\030\u1800\u3609\u1800\u3609\u1800\u3609\u1800\u3609\u1800\u3609\u1800"+
263: ** "\u3609\u1800\u3609\u1800\u3609\u1800\u3609\u1800\u3609\u3800\030\u6800\030"+
264: ** "\uE800\031\u6800\031\uE800\031\u6800\030\u6800\030\202\u7FE1\202\u7FE1\202"+
265: ** "\u7FE1\202\u7FE1\202\u7FE1\202\u7FE1\202\u7FE1\202\u7FE1\202\u7FE1\202\u7FE1"+
266: ** "\202\u7FE1\202\u7FE1\202\u7FE1\202\u7FE1\202\u7FE1\202\u7FE1\202\u7FE1\202"+
267: ** "\u7FE1\202\u7FE1\202\u7FE1\202\u7FE1\202\u7FE1\202\u7FE1\202\u7FE1\202\u7FE1"+
268: ** "\202\u7FE1\uE800\025\u6800\030\uE800\026\u6800\033\u6800\u5017\u6800\033\201"+
269: ** "\u7FE2\201\u7FE2\201\u7FE2\201\u7FE2\201\u7FE2\201\u7FE2\201\u7FE2\201\u7FE2"+
270: ** "\201\u7FE2\201\u7FE2\201\u7FE2\201\u7FE2\201\u7FE2\201\u7FE2\201\u7FE2\201"+
271: ** "\u7FE2\201\u7FE2\201\u7FE2\201\u7FE2\201\u7FE2\201\u7FE2\201\u7FE2\201\u7FE2"+
272: ** "\201\u7FE2\201\u7FE2\201\u7FE2\uE800\025\u6800\031\uE800\026\u6800\031\u4800"+
273: ** "\u100F\u4800\u100F\u4800\u100F\u4800\u100F\u4800\u100F\u4800\u100F\u5000\u100F"+
274: ** "\u4800\u100F\u4800\u100F\u4800\u100F\u4800\u100F\u4800\u100F\u4800\u100F\u4800"+
275: ** "\u100F\u4800\u100F\u4800\u100F\u4800\u100F\u4800\u100F\u4800\u100F\u4800\u100F"+
276: ** "\u4800\u100F\u4800\u100F\u4800\u100F\u4800\u100F\u4800\u100F\u4800\u100F\u4800"+
277: ** "\u100F\u4800\u100F\u4800\u100F\u4800\u100F\u4800\u100F\u4800\u100F\u4800\u100F"+
278: ** "\u3800\014\u6800\030\u2800\u601A\u2800\u601A\u2800\u601A\u2800\u601A\u6800"+
279: ** "\034\u6800\034\u6800\033\u6800\034\000\u7002\uE800\035\u6800\031\u6800\024"+
280: ** "\u6800\034\u6800\033\u2800\034\u2800\031\u1800\u060B\u1800\u060B\u6800\033"+
281: ** "\u07FD\u7002\u6800\034\u6800\030\u6800\033\u1800\u050B\000\u7002\uE800\036"+
282: ** "\u6800\u080B\u6800\u080B\u6800\u080B\u6800\030\202\u7001\202\u7001\202\u7001"+
283: ** "\202\u7001\202\u7001\202\u7001\202\u7001\202\u7001\202\u7001\202\u7001\202"+
284: ** "\u7001\202\u7001\202\u7001\202\u7001\202\u7001\202\u7001\202\u7001\202\u7001"+
285: ** "\202\u7001\202\u7001\202\u7001\202\u7001\202\u7001\u6800\031\202\u7001\202"+
286: ** "\u7001\202\u7001\202\u7001\202\u7001\202\u7001\202\u7001\u07FD\u7002\201\u7002"+
287: ** "\201\u7002\201\u7002\201\u7002\201\u7002\201\u7002\201\u7002\201\u7002\201"+
288: ** "\u7002\201\u7002\201\u7002\201\u7002\201\u7002\201\u7002\201\u7002\201\u7002"+
289: ** "\201\u7002\201\u7002\201\u7002\201\u7002\201\u7002\201\u7002\201\u7002\u6800"+
290: ** "\031\201\u7002\201\u7002\201\u7002\201\u7002\201\u7002\201\u7002\201\u7002"+
291: ** "\u061D\u7002";
292: **/
293:
294: // In all, the character property tables require 1024 bytes.
295: static {
296:
297: /*
298: ** { // THIS CODE WAS AUTOMATICALLY CREATED BY GenerateCharacter:
299: ** char[] data = A_DATA.toCharArray();
300: ** assert (data.length == (256 * 2));
301: ** int i = 0, j = 0;
302: ** while (i < (256 * 2)) {
303: ** int entry = data[i++] << 16;
304: ** A[j++] = entry | data[i++];
305: ** }
306: ** }
307: */
308: setArrays();
309:
310: }
311:
312: static native void setArrays();
313: }
|