001: /*
002: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
003: *
004: * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved.
005: *
006: * The contents of this file are subject to the terms of either the GNU
007: * General Public License Version 2 only ("GPL") or the Common
008: * Development and Distribution License("CDDL") (collectively, the
009: * "License"). You may not use this file except in compliance with the
010: * License. You can obtain a copy of the License at
011: * http://www.netbeans.org/cddl-gplv2.html
012: * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
013: * specific language governing permissions and limitations under the
014: * License. When distributing the software, include this License Header
015: * Notice in each file and include the License file at
016: * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this
017: * particular file as subject to the "Classpath" exception as provided
018: * by Sun in the GPL Version 2 section of the License file that
019: * accompanied this code. If applicable, add the following below the
020: * License Header, with the fields enclosed by brackets [] replaced by
021: * your own identifying information:
022: * "Portions Copyrighted [year] [name of copyright owner]"
023: *
024: * Contributor(s):
025: *
026: * The Original Software is NetBeans. The Initial Developer of the Original
027: * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun
028: * Microsystems, Inc. All Rights Reserved.
029: *
030: * If you wish your version of this file to be governed by only the CDDL
031: * or only the GPL Version 2, indicate your decision by adding
032: * "[Contributor] elects to include this software in this distribution
033: * under the [CDDL or GPL Version 2] license." If you do not indicate a
034: * single choice of license, a recipient has the option to distribute
035: * your version of this file under either the CDDL, the GPL Version 2 or
036: * to extend the choice of license to its licensees as provided above.
037: * However, if you add GPL Version 2 code and therefore, elected the GPL
038: * Version 2 license, then the option applies only if the new code is
039: * made subject to such option by the copyright holder.
040: */
041:
042: package org.netbeans.lib.lexer.test.dump;
043:
044: import org.netbeans.api.lexer.PartType;
045: import org.netbeans.api.lexer.Token;
046: import org.netbeans.spi.lexer.Lexer;
047: import org.netbeans.spi.lexer.LexerInput;
048: import org.netbeans.spi.lexer.LexerRestartInfo;
049: import org.netbeans.spi.lexer.TokenFactory;
050: import org.netbeans.spi.lexer.TokenPropertyProvider;
051: import org.netbeans.spi.lexer.TokenPropertyProvider;
052:
053: /**
054: * Simple implementation a lexer.
055: *
056: * @author mmetelka
057: */
058: final class TokenDumpLexer implements Lexer<TokenDumpTokenId> {
059:
060: // Copy of LexerInput.EOF
061: private static final int EOF = LexerInput.EOF;
062:
063: private LexerInput input;
064:
065: private TokenFactory<TokenDumpTokenId> tokenFactory;
066:
067: TokenDumpLexer(LexerRestartInfo<TokenDumpTokenId> info) {
068: this .input = info.input();
069: this .tokenFactory = info.tokenFactory();
070: }
071:
072: public Object state() {
073: return null;
074: }
075:
076: public Token<TokenDumpTokenId> nextToken() {
077: int c = input.read();
078: switch (c) {
079: case '\r':
080: input.consumeNewline(); // continue to '\n' handling
081: case '\n': // newline
082: return token(TokenDumpTokenId.NEWLINE);
083:
084: case EOF:
085: return null;
086:
087: case '.':
088: switch (c = input.read()) {
089: case '\\': // ".\"
090: if ((c = input.read()) == '.') { // ".\."
091: switch (c = input.read()) {
092: case 'b':
093: return finishCharLiteralOrText(
094: TokenDumpTokenId.BACKSPACE_CHAR, '\b');
095: case 'f':
096: return finishCharLiteralOrText(
097: TokenDumpTokenId.FORM_FEED_CHAR, '\f');
098: case 't':
099: return finishCharLiteralOrText(
100: TokenDumpTokenId.TAB_CHAR, '\t');
101: case 'r':
102: return finishCharLiteralOrText(
103: TokenDumpTokenId.CR_CHAR, '\r');
104: case 'n':
105: return finishCharLiteralOrText(
106: TokenDumpTokenId.NEWLINE_CHAR, '\n');
107: case 'u':
108: if ((c = input.read()) == '.')
109: return finishUnicodeOrText();
110: }
111: }
112: input.backup(1);
113: return finishText();
114:
115: case 'e': // ".e"
116: if ((c = input.read()) == '.'
117: && (c = input.read()) == 'o'
118: && (c = input.read()) == '.'
119: && (c = input.read()) == 'f'
120: && (c = input.read()) == '.')
121: return finishNewlineOrText(TokenDumpTokenId.EOF_VIRTUAL);
122: input.backup(1);
123: return finishText();
124:
125: case 't': // ".t"
126: if ((c = input.read()) == '.'
127: && (c = input.read()) == 'e'
128: && (c = input.read()) == '.'
129: && (c = input.read()) == 's'
130: && (c = input.read()) == '.'
131: && (c = input.read()) == 't'
132: && (c = input.read()) == '.') { // ".t.e.s.t."
133: return finishTillNewline(TokenDumpTokenId.TEST_NAME);
134: }
135: input.backup(1);
136: return finishText();
137:
138: case EOF:
139: return token(TokenDumpTokenId.TEXT);
140: }
141:
142: default:
143: return finishText();
144: }
145: }
146:
147: private Token<TokenDumpTokenId> finishText() {
148: return finishTillNewline(TokenDumpTokenId.TEXT);
149: }
150:
151: private Token<TokenDumpTokenId> finishTillNewline(
152: TokenDumpTokenId id) {
153: while (true) {
154: switch (input.read()) {
155: case '\r':
156: case '\n':
157: case EOF:
158: input.backup(1);
159: return token(id);
160: }
161: }
162: }
163:
164: private Token<TokenDumpTokenId> finishNewlineOrText(
165: TokenDumpTokenId id) {
166: // If newline follows then return the given id otherwise return text
167: switch (input.read()) {
168: case '\r':
169: case '\n':
170: case EOF: // EOF is also valid ending
171: input.backup(1);
172: return token(id);
173: }
174: return finishText();
175: }
176:
177: private Token<TokenDumpTokenId> finishUnicodeOrText() {
178: // If 4 unicode hex numbers followed by '.' return UNICODE_CHAR otherwise TEXT
179: int c;
180: int number = 0;
181: int hexDigit = 0;
182: for (int i = 4; i > 0; i--) { // read 4 unicode digits
183: switch (c = input.read()) {
184: case '0':
185: case '1':
186: case '2':
187: case '3':
188: case '4':
189: case '5':
190: case '6':
191: case '7':
192: case '8':
193: case '9':
194: hexDigit = c - '0';
195: break;
196: case 'a':
197: case 'b':
198: case 'c':
199: case 'd':
200: case 'e':
201: case 'f':
202: hexDigit = c - 'a' + 10;
203: break;
204: case 'A':
205: case 'B':
206: case 'C':
207: case 'D':
208: case 'E':
209: case 'F':
210: hexDigit = c - 'A' + 10;
211: break;
212: default:
213: input.backup(1);
214: return finishText();
215:
216: }
217: number = (number << 4) | hexDigit;
218: }
219: return finishCharLiteralOrText(TokenDumpTokenId.UNICODE_CHAR,
220: (char) number);
221: }
222:
223: private Token<TokenDumpTokenId> finishCharLiteralOrText(
224: TokenDumpTokenId id, char ch) {
225: int c;
226: if ((c = input.read()) == '.') {
227: switch (c = input.read()) {
228: case '\r':
229: case '\n':
230: case EOF:
231: input.backup(1);
232: return tokenFactory.createPropertyToken(id, input
233: .readLength(), new UnicodeCharValueProvider(
234: new Character(ch)), PartType.COMPLETE);
235: }
236: }
237: input.backup(1);
238: return finishText();
239: }
240:
241: private Token<TokenDumpTokenId> token(TokenDumpTokenId id) {
242: return tokenFactory.createToken(id);
243: }
244:
245: public void release() {
246: }
247:
248: private static final class UnicodeCharValueProvider implements
249: TokenPropertyProvider {
250:
251: private Character ch;
252:
253: UnicodeCharValueProvider(Character ch) {
254: this .ch = ch;
255: }
256:
257: public Object getValue(Token token, Object key) {
258: if (TokenDumpTokenId.UNICODE_CHAR_TOKEN_PROPERTY
259: .equals(key))
260: return ch;
261: return null; // no non-tokenStore value
262: }
263:
264: }
265:
266: }
|