001: /*******************************************************************************
002: * Copyright (c) 2005, 2006 IBM Corporation and others.
003: * All rights reserved. This program and the accompanying materials
004: * are made available under the terms of the Eclipse Public License v1.0
005: * which accompanies this distribution, and is available at
006: * http://www.eclipse.org/legal/epl-v10.html
007: *
008: * Contributors:
009: * IBM Corporation - initial API and implementation
010: * David Foerster - patch for toUpperCase as described in https://bugs.eclipse.org/bugs/show_bug.cgi?id=153125
011: *******************************************************************************/package org.eclipse.jdt.internal.compiler.parser;
012:
013: import java.io.DataInputStream;
014: import java.io.FileNotFoundException;
015: import java.io.IOException;
016:
017: import org.eclipse.jdt.core.compiler.InvalidInputException;
018: import org.eclipse.jdt.internal.compiler.ast.ASTNode;
019:
020: public class ScannerHelper {
021:
022: public final static long[] Bits = { ASTNode.Bit1, ASTNode.Bit2,
023: ASTNode.Bit3, ASTNode.Bit4, ASTNode.Bit5, ASTNode.Bit6,
024: ASTNode.Bit7, ASTNode.Bit8, ASTNode.Bit9, ASTNode.Bit10,
025: ASTNode.Bit11, ASTNode.Bit12, ASTNode.Bit13, ASTNode.Bit14,
026: ASTNode.Bit15, ASTNode.Bit16, ASTNode.Bit17, ASTNode.Bit18,
027: ASTNode.Bit19, ASTNode.Bit20, ASTNode.Bit21, ASTNode.Bit22,
028: ASTNode.Bit23, ASTNode.Bit24, ASTNode.Bit25, ASTNode.Bit26,
029: ASTNode.Bit27, ASTNode.Bit28, ASTNode.Bit29, ASTNode.Bit30,
030: ASTNode.Bit31, ASTNode.Bit32, ASTNode.Bit33L,
031: ASTNode.Bit34L, ASTNode.Bit35L, ASTNode.Bit36L,
032: ASTNode.Bit37L, ASTNode.Bit38L, ASTNode.Bit39L,
033: ASTNode.Bit40L, ASTNode.Bit41L, ASTNode.Bit42L,
034: ASTNode.Bit43L, ASTNode.Bit44L, ASTNode.Bit45L,
035: ASTNode.Bit46L, ASTNode.Bit47L, ASTNode.Bit48L,
036: ASTNode.Bit49L, ASTNode.Bit50L, ASTNode.Bit51L,
037: ASTNode.Bit52L, ASTNode.Bit53L, ASTNode.Bit54L,
038: ASTNode.Bit55L, ASTNode.Bit56L, ASTNode.Bit57L,
039: ASTNode.Bit58L, ASTNode.Bit59L, ASTNode.Bit60L,
040: ASTNode.Bit61L, ASTNode.Bit62L, ASTNode.Bit63L,
041: ASTNode.Bit64L, };
042:
043: private static final int START_INDEX = 0;
044: private static final int PART_INDEX = 1;
045:
046: private static long[][][] Tables;
047:
048: public final static int MAX_OBVIOUS = 128;
049: public final static int[] OBVIOUS_IDENT_CHAR_NATURES = new int[MAX_OBVIOUS];
050:
051: public final static int C_JLS_SPACE = ASTNode.Bit9;
052: public final static int C_SPECIAL = ASTNode.Bit8;
053: public final static int C_IDENT_START = ASTNode.Bit7;
054: public final static int C_UPPER_LETTER = ASTNode.Bit6;
055: public final static int C_LOWER_LETTER = ASTNode.Bit5;
056: public final static int C_IDENT_PART = ASTNode.Bit4;
057: public final static int C_DIGIT = ASTNode.Bit3;
058: public final static int C_SEPARATOR = ASTNode.Bit2;
059: public final static int C_SPACE = ASTNode.Bit1;
060:
061: static {
062: OBVIOUS_IDENT_CHAR_NATURES[0] = C_IDENT_PART;
063: OBVIOUS_IDENT_CHAR_NATURES[1] = C_IDENT_PART;
064: OBVIOUS_IDENT_CHAR_NATURES[2] = C_IDENT_PART;
065: OBVIOUS_IDENT_CHAR_NATURES[3] = C_IDENT_PART;
066: OBVIOUS_IDENT_CHAR_NATURES[4] = C_IDENT_PART;
067: OBVIOUS_IDENT_CHAR_NATURES[5] = C_IDENT_PART;
068: OBVIOUS_IDENT_CHAR_NATURES[6] = C_IDENT_PART;
069: OBVIOUS_IDENT_CHAR_NATURES[7] = C_IDENT_PART;
070: OBVIOUS_IDENT_CHAR_NATURES[8] = C_IDENT_PART;
071: OBVIOUS_IDENT_CHAR_NATURES[14] = C_IDENT_PART;
072: OBVIOUS_IDENT_CHAR_NATURES[15] = C_IDENT_PART;
073: OBVIOUS_IDENT_CHAR_NATURES[16] = C_IDENT_PART;
074: OBVIOUS_IDENT_CHAR_NATURES[17] = C_IDENT_PART;
075: OBVIOUS_IDENT_CHAR_NATURES[18] = C_IDENT_PART;
076: OBVIOUS_IDENT_CHAR_NATURES[19] = C_IDENT_PART;
077: OBVIOUS_IDENT_CHAR_NATURES[20] = C_IDENT_PART;
078: OBVIOUS_IDENT_CHAR_NATURES[21] = C_IDENT_PART;
079: OBVIOUS_IDENT_CHAR_NATURES[22] = C_IDENT_PART;
080: OBVIOUS_IDENT_CHAR_NATURES[23] = C_IDENT_PART;
081: OBVIOUS_IDENT_CHAR_NATURES[24] = C_IDENT_PART;
082: OBVIOUS_IDENT_CHAR_NATURES[25] = C_IDENT_PART;
083: OBVIOUS_IDENT_CHAR_NATURES[26] = C_IDENT_PART;
084: OBVIOUS_IDENT_CHAR_NATURES[27] = C_IDENT_PART;
085: OBVIOUS_IDENT_CHAR_NATURES[127] = C_IDENT_PART;
086:
087: for (int i = '0'; i <= '9'; i++)
088: OBVIOUS_IDENT_CHAR_NATURES[i] = C_DIGIT | C_IDENT_PART;
089:
090: for (int i = 'a'; i <= 'z'; i++)
091: OBVIOUS_IDENT_CHAR_NATURES[i] = C_LOWER_LETTER
092: | C_IDENT_PART | C_IDENT_START;
093: for (int i = 'A'; i <= 'Z'; i++)
094: OBVIOUS_IDENT_CHAR_NATURES[i] = C_UPPER_LETTER
095: | C_IDENT_PART | C_IDENT_START;
096:
097: OBVIOUS_IDENT_CHAR_NATURES['_'] = C_SPECIAL | C_IDENT_PART
098: | C_IDENT_START;
099: OBVIOUS_IDENT_CHAR_NATURES['$'] = C_SPECIAL | C_IDENT_PART
100: | C_IDENT_START;
101:
102: OBVIOUS_IDENT_CHAR_NATURES[9] = C_SPACE | C_JLS_SPACE; // \ u0009: HORIZONTAL TABULATION
103: OBVIOUS_IDENT_CHAR_NATURES[10] = C_SPACE | C_JLS_SPACE; // \ u000a: LINE FEED
104: OBVIOUS_IDENT_CHAR_NATURES[11] = C_SPACE;
105: OBVIOUS_IDENT_CHAR_NATURES[12] = C_SPACE | C_JLS_SPACE; // \ u000c: FORM FEED
106: OBVIOUS_IDENT_CHAR_NATURES[13] = C_SPACE | C_JLS_SPACE; // \ u000d: CARRIAGE RETURN
107: OBVIOUS_IDENT_CHAR_NATURES[28] = C_SPACE;
108: OBVIOUS_IDENT_CHAR_NATURES[29] = C_SPACE;
109: OBVIOUS_IDENT_CHAR_NATURES[30] = C_SPACE;
110: OBVIOUS_IDENT_CHAR_NATURES[31] = C_SPACE;
111: OBVIOUS_IDENT_CHAR_NATURES[32] = C_SPACE | C_JLS_SPACE; // \ u0020: SPACE
112:
113: OBVIOUS_IDENT_CHAR_NATURES['.'] = C_SEPARATOR;
114: OBVIOUS_IDENT_CHAR_NATURES[':'] = C_SEPARATOR;
115: OBVIOUS_IDENT_CHAR_NATURES[';'] = C_SEPARATOR;
116: OBVIOUS_IDENT_CHAR_NATURES[','] = C_SEPARATOR;
117: OBVIOUS_IDENT_CHAR_NATURES['['] = C_SEPARATOR;
118: OBVIOUS_IDENT_CHAR_NATURES[']'] = C_SEPARATOR;
119: OBVIOUS_IDENT_CHAR_NATURES['('] = C_SEPARATOR;
120: OBVIOUS_IDENT_CHAR_NATURES[')'] = C_SEPARATOR;
121: OBVIOUS_IDENT_CHAR_NATURES['{'] = C_SEPARATOR;
122: OBVIOUS_IDENT_CHAR_NATURES['}'] = C_SEPARATOR;
123: OBVIOUS_IDENT_CHAR_NATURES['+'] = C_SEPARATOR;
124: OBVIOUS_IDENT_CHAR_NATURES['-'] = C_SEPARATOR;
125: OBVIOUS_IDENT_CHAR_NATURES['*'] = C_SEPARATOR;
126: OBVIOUS_IDENT_CHAR_NATURES['/'] = C_SEPARATOR;
127: OBVIOUS_IDENT_CHAR_NATURES['='] = C_SEPARATOR;
128: OBVIOUS_IDENT_CHAR_NATURES['&'] = C_SEPARATOR;
129: OBVIOUS_IDENT_CHAR_NATURES['|'] = C_SEPARATOR;
130: OBVIOUS_IDENT_CHAR_NATURES['?'] = C_SEPARATOR;
131: OBVIOUS_IDENT_CHAR_NATURES['<'] = C_SEPARATOR;
132: OBVIOUS_IDENT_CHAR_NATURES['>'] = C_SEPARATOR;
133: OBVIOUS_IDENT_CHAR_NATURES['!'] = C_SEPARATOR;
134: OBVIOUS_IDENT_CHAR_NATURES['%'] = C_SEPARATOR;
135: OBVIOUS_IDENT_CHAR_NATURES['^'] = C_SEPARATOR;
136: OBVIOUS_IDENT_CHAR_NATURES['~'] = C_SEPARATOR;
137: OBVIOUS_IDENT_CHAR_NATURES['"'] = C_SEPARATOR;
138: OBVIOUS_IDENT_CHAR_NATURES['\''] = C_SEPARATOR;
139: }
140:
141: static {
142: Tables = new long[2][][];
143: Tables[START_INDEX] = new long[2][];
144: Tables[PART_INDEX] = new long[3][];
145: try {
146: DataInputStream inputStream = new DataInputStream(
147: ScannerHelper.class
148: .getResourceAsStream("start1.rsc")); //$NON-NLS-1$
149: long[] readValues = new long[1024];
150: for (int i = 0; i < 1024; i++) {
151: readValues[i] = inputStream.readLong();
152: }
153: inputStream.close();
154: Tables[START_INDEX][0] = readValues;
155: } catch (FileNotFoundException e) {
156: e.printStackTrace();
157: } catch (IOException e) {
158: e.printStackTrace();
159: }
160: try {
161: DataInputStream inputStream = new DataInputStream(
162: ScannerHelper.class
163: .getResourceAsStream("start2.rsc")); //$NON-NLS-1$
164: long[] readValues = new long[1024];
165: for (int i = 0; i < 1024; i++) {
166: readValues[i] = inputStream.readLong();
167: }
168: inputStream.close();
169: Tables[START_INDEX][1] = readValues;
170: } catch (FileNotFoundException e) {
171: e.printStackTrace();
172: } catch (IOException e) {
173: e.printStackTrace();
174: }
175: try {
176: DataInputStream inputStream = new DataInputStream(
177: ScannerHelper.class
178: .getResourceAsStream("part1.rsc")); //$NON-NLS-1$
179: long[] readValues = new long[1024];
180: for (int i = 0; i < 1024; i++) {
181: readValues[i] = inputStream.readLong();
182: }
183: inputStream.close();
184: Tables[PART_INDEX][0] = readValues;
185: } catch (FileNotFoundException e) {
186: e.printStackTrace();
187: } catch (IOException e) {
188: e.printStackTrace();
189: }
190: try {
191: DataInputStream inputStream = new DataInputStream(
192: ScannerHelper.class
193: .getResourceAsStream("part2.rsc")); //$NON-NLS-1$
194: long[] readValues = new long[1024];
195: for (int i = 0; i < 1024; i++) {
196: readValues[i] = inputStream.readLong();
197: }
198: inputStream.close();
199: Tables[PART_INDEX][1] = readValues;
200: } catch (FileNotFoundException e) {
201: e.printStackTrace();
202: } catch (IOException e) {
203: e.printStackTrace();
204: }
205: try {
206: DataInputStream inputStream = new DataInputStream(
207: ScannerHelper.class
208: .getResourceAsStream("part14.rsc")); //$NON-NLS-1$
209: long[] readValues = new long[1024];
210: for (int i = 0; i < 1024; i++) {
211: readValues[i] = inputStream.readLong();
212: }
213: inputStream.close();
214: Tables[PART_INDEX][2] = readValues;
215: } catch (FileNotFoundException e) {
216: e.printStackTrace();
217: } catch (IOException e) {
218: e.printStackTrace();
219: }
220: }
221:
222: private final static boolean isBitSet(long[] values, int i) {
223: try {
224: return (values[i / 64] & Bits[i % 64]) != 0;
225: } catch (NullPointerException e) {
226: return false;
227: }
228: }
229:
230: public static boolean isJavaIdentifierPart(char c) {
231: if (c < MAX_OBVIOUS) {
232: return (ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[c] & ScannerHelper.C_IDENT_PART) != 0;
233: }
234: return Character.isJavaIdentifierPart(c);
235: }
236:
237: public static boolean isJavaIdentifierPart(char high, char low) {
238: int codePoint = toCodePoint(high, low);
239: switch ((codePoint & 0x1F0000) >> 16) {
240: case 0:
241: return Character.isJavaIdentifierPart((char) codePoint);
242: case 1:
243: return isBitSet(Tables[PART_INDEX][0], codePoint & 0xFFFF);
244: case 2:
245: return isBitSet(Tables[PART_INDEX][1], codePoint & 0xFFFF);
246: case 14:
247: return isBitSet(Tables[PART_INDEX][2], codePoint & 0xFFFF);
248: }
249: return false;
250: }
251:
252: public static boolean isJavaIdentifierStart(char c) {
253: if (c < MAX_OBVIOUS) {
254: return (ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[c] & ScannerHelper.C_IDENT_START) != 0;
255: }
256: return Character.isJavaIdentifierStart(c);
257: }
258:
259: public static boolean isJavaIdentifierStart(char high, char low) {
260: int codePoint = toCodePoint(high, low);
261: switch ((codePoint & 0x1F0000) >> 16) {
262: case 0:
263: return Character.isJavaIdentifierStart((char) codePoint);
264: case 1:
265: return isBitSet(Tables[START_INDEX][0], codePoint & 0xFFFF);
266: case 2:
267: return isBitSet(Tables[START_INDEX][1], codePoint & 0xFFFF);
268: }
269: return false;
270: }
271:
272: private static int toCodePoint(char high, char low) {
273: return (high - Scanner.HIGH_SURROGATE_MIN_VALUE) * 0x400
274: + (low - Scanner.LOW_SURROGATE_MIN_VALUE) + 0x10000;
275: }
276:
277: public static boolean isDigit(char c) throws InvalidInputException {
278: if (c < ScannerHelper.MAX_OBVIOUS) {
279: return (ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[c] & ScannerHelper.C_DIGIT) != 0;
280: }
281: if (Character.isDigit(c)) {
282: throw new InvalidInputException(Scanner.INVALID_DIGIT);
283: }
284: return false;
285: }
286:
287: public static int digit(char c, int radix) {
288: if (c < ScannerHelper.MAX_OBVIOUS) {
289: switch (radix) {
290: case 8:
291: if (c >= 48 && c <= 55) {
292: return c - 48;
293: }
294: return -1;
295: case 10:
296: if (c >= 48 && c <= 57) {
297: return c - 48;
298: }
299: return -1;
300: case 16:
301: if (c >= 48 && c <= 57) {
302: return c - 48;
303: }
304: if (c >= 65 && c <= 70) {
305: return c - 65 + 10;
306: }
307: if (c >= 97 && c <= 102) {
308: return c - 97 + 10;
309: }
310: return -1;
311: }
312: }
313: return Character.digit(c, radix);
314: }
315:
316: public static int getNumericValue(char c) {
317: if (c < ScannerHelper.MAX_OBVIOUS) {
318: switch (ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[c]) {
319: case C_DIGIT:
320: return c - '0';
321: case C_LOWER_LETTER:
322: return 10 + c - 'a';
323: case C_UPPER_LETTER:
324: return 10 + c - 'A';
325: }
326: }
327: return Character.getNumericValue(c);
328: }
329:
330: public static char toUpperCase(char c) {
331: if (c < MAX_OBVIOUS) {
332: if ((ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[c] & ScannerHelper.C_UPPER_LETTER) != 0) {
333: return c;
334: } else if ((ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[c] & ScannerHelper.C_LOWER_LETTER) != 0) {
335: return (char) (c - 32);
336: }
337: }
338: return Character.toUpperCase(c);
339: }
340:
341: public static char toLowerCase(char c) {
342: if (c < MAX_OBVIOUS) {
343: if ((ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[c] & ScannerHelper.C_LOWER_LETTER) != 0) {
344: return c;
345: } else if ((ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[c] & ScannerHelper.C_UPPER_LETTER) != 0) {
346: return (char) (32 + c);
347: }
348: }
349: return Character.toLowerCase(c);
350: }
351:
352: public static boolean isLowerCase(char c) {
353: if (c < MAX_OBVIOUS) {
354: return (ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[c] & ScannerHelper.C_LOWER_LETTER) != 0;
355: }
356: return Character.isLowerCase(c);
357: }
358:
359: public static boolean isUpperCase(char c) {
360: if (c < MAX_OBVIOUS) {
361: return (ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[c] & ScannerHelper.C_UPPER_LETTER) != 0;
362: }
363: return Character.isUpperCase(c);
364: }
365:
366: /**
367: * Include also non JLS whitespaces.
368: *
369: * return true if Character.isWhitespace(c) would return true
370: */
371: public static boolean isWhitespace(char c) {
372: if (c < MAX_OBVIOUS) {
373: return (ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[c] & ScannerHelper.C_SPACE) != 0;
374: }
375: return Character.isWhitespace(c);
376: }
377:
378: public static boolean isLetter(char c) {
379: if (c < MAX_OBVIOUS) {
380: return (ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[c] & (ScannerHelper.C_UPPER_LETTER | ScannerHelper.C_LOWER_LETTER)) != 0;
381: }
382: return Character.isLetter(c);
383: }
384:
385: public static boolean isLetterOrDigit(char c) {
386: if (c < MAX_OBVIOUS) {
387: return (ScannerHelper.OBVIOUS_IDENT_CHAR_NATURES[c] & (ScannerHelper.C_UPPER_LETTER
388: | ScannerHelper.C_LOWER_LETTER | ScannerHelper.C_DIGIT)) != 0;
389: }
390: return Character.isLetterOrDigit(c);
391: }
392: }
|