001: /*
002: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
003: *
004: * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved.
005: *
006: * The contents of this file are subject to the terms of either the GNU
007: * General Public License Version 2 only ("GPL") or the Common
008: * Development and Distribution License("CDDL") (collectively, the
009: * "License"). You may not use this file except in compliance with the
010: * License. You can obtain a copy of the License at
011: * http://www.netbeans.org/cddl-gplv2.html
012: * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
013: * specific language governing permissions and limitations under the
014: * License. When distributing the software, include this License Header
015: * Notice in each file and include the License file at
016: * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this
017: * particular file as subject to the "Classpath" exception as provided
018: * by Sun in the GPL Version 2 section of the License file that
019: * accompanied this code. If applicable, add the following below the
020: * License Header, with the fields enclosed by brackets [] replaced by
021: * your own identifying information:
022: * "Portions Copyrighted [year] [name of copyright owner]"
023: *
024: * Contributor(s):
025: *
026: * The Original Software is NetBeans. The Initial Developer of the Original
027: * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun
028: * Microsystems, Inc. All Rights Reserved.
029: *
030: * If you wish your version of this file to be governed by only the CDDL
031: * or only the GPL Version 2, indicate your decision by adding
032: * "[Contributor] elects to include this software in this distribution
033: * under the [CDDL or GPL Version 2] license." If you do not indicate a
034: * single choice of license, a recipient has the option to distribute
035: * your version of this file under either the CDDL, the GPL Version 2 or
036: * to extend the choice of license to its licensees as provided above.
037: * However, if you add GPL Version 2 code and therefore, elected the GPL
038: * Version 2 license, then the option applies only if the new code is
039: * made subject to such option by the copyright holder.
040: */
041:
042: package org.netbeans.modules.lexer.demo.handcoded.link;
043:
044: import java.util.HashMap;
045: import java.util.Map;
046: import org.netbeans.api.lexer.Language;
047: import org.netbeans.api.lexer.Lexer;
048: import org.netbeans.api.lexer.LexerInput;
049: import org.netbeans.api.lexer.TokenId;
050: import org.netbeans.api.lexer.Token;
051: import org.netbeans.spi.lexer.util.Compatibility;
052:
053: /**
054: * Lexer that recognizes LinkLanguage.
055: *
056: * @author Miloslav Metelka
057: * @version 1.00
058: */
059:
060: final class LinkLexer implements Lexer {
061:
062: private static final LinkLanguage language = LinkLanguage.get();
063:
064: private static final int INIT = 0;
065: private static final int IN_SCHEME = 1;
066: private static final int AFTER_COLON = 2;
067: private static final int AFTER_SLASH = 3;
068:
069: /** Map for mapping scheme to uri type */
070: private static final Map scheme2uri = new HashMap();
071:
072: static {
073: scheme2uri.put("http", LinkLanguage.HTTP_URI);
074: scheme2uri.put("ftp", LinkLanguage.FTP_URI);
075: }
076:
077: private LexerInput lexerInput;
078:
079: /** Index of first char after scheme name e.g. "http" or "ftp" */
080: private int schemeEnd;
081:
082: /** Reused text buffer of the uri scheme */
083: private Object uriSchemeReusedText;
084:
085: public LinkLexer() {
086: }
087:
088: public Object getState() {
089: return null;
090: }
091:
092: public void restart(LexerInput input, Object state) {
093: this .lexerInput = input;
094: if (input == null) { // this input is no longer being used by this lexer
095: uriSchemeReusedText = null; // free the reused text
096: }
097: }
098:
099: public Token nextToken() {
100: Token token = null;
101: int uriStart = findURIStart();
102: switch (uriStart) {
103: case -1: // no link found
104: if (lexerInput.getReadLength() > 0) { // at least one char read
105: token = lexerInput.createToken(LinkLanguage.TEXT);
106: }
107: break;
108:
109: case 0: // link at the begining of token
110: // Reading is positioned after "scheme://"
111: findURIEnd();
112: // Now read is positioned at the first non-matching char
113:
114: // Get the scheme in compatible way - replacement of LexerInput.getReadText()
115: uriSchemeReusedText = Compatibility.getCompatibleReadText(
116: lexerInput, 0, schemeEnd, uriSchemeReusedText);
117:
118: TokenId uriType = (TokenId) scheme2uri
119: .get(uriSchemeReusedText);
120: if (uriType == null) {
121: uriType = LinkLanguage.URI;
122: }
123:
124: token = lexerInput.createToken(uriType);
125: break;
126:
127: default: // link occurs on the line but not at the begining
128: token = lexerInput.createToken(LinkLanguage.TEXT, uriStart);
129: lexerInput.backup(lexerInput.getReadLength()); // backup the extra read chars
130: break;
131: }
132:
133: return token;
134: }
135:
136: private int findURIStart() {
137: int state = INIT;
138: int uriStart = -1;
139:
140: schemeEnd = 0;
141:
142: int ch = lexerInput.read();
143: while (ch != LexerInput.EOF && ch != '\n') {
144: switch (ch) {
145: case ':':
146: switch (state) {
147: case IN_SCHEME:
148: state = AFTER_COLON;
149: schemeEnd = lexerInput.getReadLength() - 1; // exclude ':'
150: break;
151:
152: default:
153: uriStart = -1;
154: state = INIT;
155: break;
156: }
157: break;
158:
159: case '/':
160: switch (state) {
161: case AFTER_COLON:
162: state = AFTER_SLASH;
163: break;
164:
165: case AFTER_SLASH: // found "scheme://" => return success
166: return uriStart;
167:
168: default:
169: uriStart = -1;
170: state = INIT;
171: break;
172: }
173: break;
174:
175: case '.': // can be part of URI scheme
176: case '+': // can be part of URI scheme
177: case '-': // can be part of URI scheme
178: switch (state) {
179: // case IN_SCHEME: // stay in scheme
180: default:
181: uriStart = -1;
182: state = INIT;
183: break;
184: }
185: break;
186:
187: default:
188: if (isAlpha(ch)) { // alpha char
189: switch (state) {
190: case INIT:
191: // mark begining of possible uri
192: uriStart = lexerInput.getReadLength() - 1;
193: state = IN_SCHEME;
194: break;
195:
196: case IN_SCHEME: // stay in scheme
197: break;
198:
199: default:
200: uriStart = -1;
201: state = INIT;
202: break;
203: }
204:
205: } else if (isDigit(ch)) {
206: switch (state) {
207: case IN_SCHEME: // stay in scheme
208: break;
209:
210: default:
211: uriStart = -1;
212: state = INIT;
213: break;
214: }
215:
216: } else {
217: uriStart = -1;
218: state = INIT;
219: }
220: }
221:
222: ch = lexerInput.read();
223: }
224:
225: // EOF or '\n' reached
226: return -1;
227: }
228:
229: private int findURIEnd() {
230: int ch = lexerInput.read();
231: while (ch != LexerInput.EOF && ch != '\n') {
232: boolean stop = false;
233:
234: switch (ch) {
235: // Allowed chars after "scheme://" follow - there is no particular
236: // syntax observed although normally it should be
237: case '#':
238: case ':':
239: case '?':
240: case ';':
241: case '&':
242: case '@':
243: case '=':
244: case '+':
245: case '-':
246: case '$':
247: case ',':
248: case '/':
249: case '.':
250: case '_':
251: case '!':
252: case '~':
253: case '\'':
254: case ')':
255: case '(':
256: case '%':
257: break;
258:
259: default:
260: if (!isAlpha(ch) && !isDigit(ch)) {
261: stop = true;
262: }
263: break;
264:
265: }
266:
267: if (stop) {
268: break;
269: }
270:
271: ch = lexerInput.read();
272: }
273:
274: if (ch != LexerInput.EOF) { // rollback the last char
275: lexerInput.backup(1);
276: }
277:
278: // EOF or '\n' reached
279: return -1;
280: }
281:
282: private static boolean isAlpha(int ch) {
283: return ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z');
284: }
285:
286: private static boolean isDigit(int ch) {
287: return ('0' <= ch && ch <= '9');
288: }
289:
290: }
|