001: /*
002: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
003: *
004: * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved.
005: *
006: * The contents of this file are subject to the terms of either the GNU
007: * General Public License Version 2 only ("GPL") or the Common
008: * Development and Distribution License("CDDL") (collectively, the
009: * "License"). You may not use this file except in compliance with the
010: * License. You can obtain a copy of the License at
011: * http://www.netbeans.org/cddl-gplv2.html
012: * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
013: * specific language governing permissions and limitations under the
014: * License. When distributing the software, include this License Header
015: * Notice in each file and include the License file at
016: * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this
017: * particular file as subject to the "Classpath" exception as provided
018: * by Sun in the GPL Version 2 section of the License file that
019: * accompanied this code. If applicable, add the following below the
020: * License Header, with the fields enclosed by brackets [] replaced by
021: * your own identifying information:
022: * "Portions Copyrighted [year] [name of copyright owner]"
023: *
024: * Contributor(s):
025: *
026: * The Original Software is NetBeans. The Initial Developer of the Original
027: * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun
028: * Microsystems, Inc. All Rights Reserved.
029: *
030: * If you wish your version of this file to be governed by only the CDDL
031: * or only the GPL Version 2, indicate your decision by adding
032: * "[Contributor] elects to include this software in this distribution
033: * under the [CDDL or GPL Version 2] license." If you do not indicate a
034: * single choice of license, a recipient has the option to distribute
035: * your version of this file under either the CDDL, the GPL Version 2 or
036: * to extend the choice of license to its licensees as provided above.
037: * However, if you add GPL Version 2 code and therefore, elected the GPL
038: * Version 2 license, then the option applies only if the new code is
039: * made subject to such option by the copyright holder.
040: */
041:
042: package org.netbeans.lib.lexer;
043:
044: import org.netbeans.spi.lexer.LexerInput;
045:
046: /**
047: * The character translator allows to translate a sequence
048: * of characters to a single character so it may be used
049: * for example for generic Unicode sequences translation.
050: *
051: * <p>
052: * The preprocessor must be stateless.
053: *
054: * @author Miloslav Metelka
055: * @version 1.00
056: */
057:
058: public final class UnicodeEscapesPreprocessor extends CharPreprocessor {
059:
060: /**
061: * Preprocess a input text preprocessing unicode escape sequences.
062: */
063: protected void preprocessChar() {
064: int ch;
065: switch (ch = inputRead()) {
066: case '\\': // 1 input-char-read
067: // According to JLS only odd number of backslashes
068: // opens a unicode escape sequence. Therefore after seeing two
069: // backslashes it's possible to pass them unchanged.
070: switch (ch = inputRead()) {
071: case 'u': // 2 input-chars-read
072: // Four hex digits should follow
073: int i;
074: int c;
075: for (i = 4; i > 0; i--) {
076: switch (c = inputRead()) {
077: case '0':
078: case '1':
079: case '2':
080: case '3':
081: case '4':
082: case '5':
083: case '6':
084: case '7':
085: case '8':
086: case '9':
087: ch = (ch << 4) + (c - '0');
088: break;
089: case 'a':
090: case 'b':
091: case 'c':
092: case 'd':
093: case 'e':
094: case 'f':
095: ch = (ch << 4) + (c - 'a' + 10);
096: break;
097: case 'A':
098: case 'B':
099: case 'C':
100: case 'D':
101: case 'E':
102: case 'F':
103: ch = (ch << 4) + (c - 'A' + 10);
104: break;
105: case LexerInput.EOF: // Do not backup EOF
106: // EOF does not count for a character
107: i = -i; // cause for() loop to break
108: break;
109: default: // Invalid char in the sequence
110: inputBackup(1); // backup the extra non-EOF char
111: i = -i; // cause for() loop to break
112: break;
113: }
114: }
115: if (i < 0) { // Invalid char or EOF
116: // Return Unicode invalid char
117: // i < 0 -> (4 - number-of-read-chars)
118: outputPreprocessed((char) 0xFFFF, 5 + i);
119: notifyError("Invalid unicode sequence");
120: } else {
121: outputPreprocessed((char) ch, 5);
122: }
123: break;
124:
125: // case '\\':
126: default:
127: outputOriginal('\\');
128: outputOriginal(ch);
129: break;
130: }
131: break;
132:
133: default:
134: outputOriginal(ch);
135: break;
136: }
137: }
138:
139: protected boolean isSensitiveChar(char ch) {
140: switch (ch) {
141: case '\\':
142: case '0':
143: case '1':
144: case '2':
145: case '3':
146: case '4':
147: case '5':
148: case '6':
149: case '7':
150: case '8':
151: case '9':
152: case 'a':
153: case 'b':
154: case 'c':
155: case 'd':
156: case 'e':
157: case 'f':
158: case 'A':
159: case 'B':
160: case 'C':
161: case 'D':
162: case 'E':
163: case 'F':
164: case 'u':
165: return true;
166:
167: default:
168: return false;
169: }
170: }
171:
172: protected int maxLookahead() {
173: // Only one extra character is necessary to decide
174: // whether the sequence continues or not.
175: // After finding '\' if the next char is '\' then
176: // there is no sequence.
177: // After '\' 'u' there may be arbitrary number of 'u' chars
178: // and then four hexadecimal digits.
179: return 1;
180: }
181:
182: }
|