001: /*
002: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
003: *
004: * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved.
005: *
006: * The contents of this file are subject to the terms of either the GNU
007: * General Public License Version 2 only ("GPL") or the Common
008: * Development and Distribution License("CDDL") (collectively, the
009: * "License"). You may not use this file except in compliance with the
010: * License. You can obtain a copy of the License at
011: * http://www.netbeans.org/cddl-gplv2.html
012: * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
013: * specific language governing permissions and limitations under the
014: * License. When distributing the software, include this License Header
015: * Notice in each file and include the License file at
016: * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this
017: * particular file as subject to the "Classpath" exception as provided
018: * by Sun in the GPL Version 2 section of the License file that
019: * accompanied this code. If applicable, add the following below the
020: * License Header, with the fields enclosed by brackets [] replaced by
021: * your own identifying information:
022: * "Portions Copyrighted [year] [name of copyright owner]"
023: *
024: * Contributor(s):
025: *
026: * The Original Software is NetBeans. The Initial Developer of the Original
027: * Software is Sun Microsystems, Inc. Portions Copyright 1997-2006 Sun
028: * Microsystems, Inc. All Rights Reserved.
029: *
030: * If you wish your version of this file to be governed by only the CDDL
031: * or only the GPL Version 2, indicate your decision by adding
032: * "[Contributor] elects to include this software in this distribution
033: * under the [CDDL or GPL Version 2] license." If you do not indicate a
034: * single choice of license, a recipient has the option to distribute
035: * your version of this file under either the CDDL, the GPL Version 2 or
036: * to extend the choice of license to its licensees as provided above.
037: * However, if you add GPL Version 2 code and therefore, elected the GPL
038: * Version 2 license, then the option applies only if the new code is
039: * made subject to such option by the copyright holder.
040: */
041:
042: package org.netbeans.modules.db.sql.editor;
043:
044: import java.util.ArrayList;
045: import java.util.Arrays;
046: import java.util.HashSet;
047: import java.util.Iterator;
048: import java.util.StringTokenizer;
049: import org.netbeans.editor.Syntax;
050: import org.netbeans.editor.TokenID;
051: import org.netbeans.modules.db.api.sql.SQLKeywords;
052: import org.openide.util.NbBundle;
053:
054: /**
055: * This class implements SQL syntax recognition
056: *
057: * @author Jesse Beaumont
058: */
059: public class SQLSyntax extends Syntax {
060:
061: private static final int ISI_WHITESPACE = 2; // inside white space
062: private static final int ISI_LINE_COMMENT = 4; // inside line comment --
063: private static final int ISI_BLOCK_COMMENT = 5; // inside block comment /* ... */
064: private static final int ISI_STRING = 6; // inside string constant
065: private static final int ISI_STRING_A_QUOTE = 7; // inside string constant after '
066: private static final int ISI_IDENTIFIER = 10; // inside identifier
067: private static final int ISA_SLASH = 11; // slash char
068: private static final int ISA_OPERATOR = 12; // after '=', '/', '+'
069: private static final int ISA_MINUS = 13;
070: private static final int ISA_STAR = 20; // after '*'
071: private static final int ISA_STAR_I_BLOCK_COMMENT_END = 21; // after '*' in a block comment
072: private static final int ISA_EXCLAMATION = 26; // after '!'
073: private static final int ISA_ZERO = 27; // after '0'
074: private static final int ISI_INT = 28; // integer number
075: private static final int ISI_DOUBLE = 30; // double number
076: private static final int ISA_DOT = 33; // after '.'
077: private static final int ISA_COMMA = 34; // after ','
078: private static final int ISA_SEMICOLON = 35; //after ';'
079: private static final int ISA_LPAREN = 36; //after (
080: private static final int ISA_RPAREN = 37; //after )
081:
082: /**
083: * Creates a new instance of SQLSyntax
084: */
085: public SQLSyntax() {
086: tokenContextPath = SQLTokenContext.contextPath;
087: }
088:
089: /**
090: * Parse the next token
091: */
092: protected TokenID parseToken() {
093: char actChar; //the current character
094:
095: //while we still have stuff to parse, do so
096: while (offset < stopOffset) {
097: actChar = buffer[offset];
098:
099: //do the appropriate stuff based on what state the parser is in
100: switch (state) {
101: //the initial state (start of a new token)
102: case INIT:
103: switch (actChar) {
104: case '\'': // NOI18N
105: state = ISI_STRING;
106: break;
107: case '/':
108: state = ISA_SLASH;
109: break;
110: case '=':
111: case '>':
112: case '<':
113: case '+':
114: case ',':
115: case ')':
116: case '(':
117: case ';':
118: case '*':
119: case '!':
120: offset++;
121: state = INIT;
122: return SQLTokenContext.OPERATOR;
123: case '-':
124: state = ISA_MINUS;
125: break;
126: case '0':
127: state = ISA_ZERO;
128: break;
129: case '.':
130: state = ISA_DOT;
131: break;
132: default:
133: // Check for whitespace
134: if (Character.isWhitespace(actChar)) {
135: state = ISI_WHITESPACE;
136: break;
137: }
138:
139: // Check for digit
140: if (Character.isDigit(actChar)) {
141: state = ISI_INT;
142: break;
143: }
144:
145: // otherwise it's an identifier
146: state = ISI_IDENTIFIER;
147: break;
148: }
149: break;
150: //if we are currently in a whitespace token
151: case ISI_WHITESPACE: // white space
152: if (!Character.isWhitespace(actChar)) {
153: state = INIT;
154: return SQLTokenContext.WHITESPACE;
155: }
156: break;
157:
158: //if we are currently in a line comment
159: case ISI_LINE_COMMENT:
160: if (actChar == '\n') {
161: state = INIT;
162: return SQLTokenContext.LINE_COMMENT;
163: }
164: break;
165:
166: //if we are currently in a block comment
167: case ISI_BLOCK_COMMENT:
168: if (actChar == '*') {
169: state = ISA_STAR_I_BLOCK_COMMENT_END;
170: }
171: break;
172:
173: //if we are currently in a string literal
174: case ISI_STRING:
175: switch (actChar) {
176: case '\n':
177: state = INIT;
178: return SQLTokenContext.INCOMPLETE_STRING;
179: case '\'': // NOI18N
180: offset++;
181: state = INIT;
182: return SQLTokenContext.STRING;
183: }
184: break;
185:
186: //if we are currently in an identifier (e.g. a variable name)
187: case ISI_IDENTIFIER:
188: if (!Character.isLetterOrDigit(actChar)
189: && actChar != '_') {
190: state = INIT;
191: TokenID tid = matchKeyword(buffer, tokenOffset,
192: offset - tokenOffset);
193: if (tid != null) {
194: return tid;
195: } else {
196: return SQLTokenContext.IDENTIFIER;
197: }
198: }
199: break;
200:
201: //if we are after a slash (/)
202: case ISA_SLASH:
203: switch (actChar) {
204: case '*':
205: state = ISI_BLOCK_COMMENT;
206: break;
207: default:
208: if (Character.isWhitespace(actChar)
209: || actChar == '(') {
210: state = INIT;
211: return SQLTokenContext.OPERATOR;
212: }
213: }
214: break;
215:
216: //if we are after a -
217: case ISA_MINUS:
218: switch (actChar) {
219: case '-':
220: state = ISI_LINE_COMMENT;
221: break;
222: default:
223: state = INIT;
224: return SQLTokenContext.OPERATOR;
225: }
226: break;
227:
228: //if we are in the middle of a possible block comment end token
229: case ISA_STAR_I_BLOCK_COMMENT_END:
230: switch (actChar) {
231: case '/':
232: offset++;
233: state = INIT;
234: return SQLTokenContext.BLOCK_COMMENT;
235: default:
236: offset--;
237: state = ISI_BLOCK_COMMENT;
238: break;
239: }
240: break;
241:
242: //if we are after a 0
243: case ISA_ZERO:
244: switch (actChar) {
245: case '.':
246: state = ISI_DOUBLE;
247: break;
248: default:
249: if (Character.isDigit(actChar)) {
250: state = ISI_INT;
251: break;
252: } else {
253: state = INIT;
254: return SQLTokenContext.INT_LITERAL;
255: }
256: }
257: break;
258:
259: //if we are after an integer
260: case ISI_INT:
261: switch (actChar) {
262: case '.':
263: state = ISI_DOUBLE;
264: break;
265: default:
266: if (Character.isDigit(actChar)) {
267: state = ISI_INT;
268: break;
269: } else {
270: state = INIT;
271: return SQLTokenContext.INT_LITERAL;
272: }
273: }
274: break;
275:
276: //if we are in the middle of what we believe is a floating point
277: //number
278: case ISI_DOUBLE:
279: if (actChar >= '0' && actChar <= '9') {
280: state = ISI_DOUBLE;
281: break;
282: } else {
283: state = INIT;
284: return SQLTokenContext.DOUBLE_LITERAL;
285: }
286:
287: //if we are after a period
288: case ISA_DOT:
289: if (Character.isDigit(actChar)) {
290: state = ISI_DOUBLE;
291: } else { // only single dot
292: state = INIT;
293: return SQLTokenContext.DOT;
294: }
295: break;
296:
297: } // end of switch(state)
298:
299: offset++;
300: } // end of while(offset...)
301:
302: /*
303: * At this stage there's no more text in the scanned buffer.
304: * Scanner first checks whether this is completely the last
305: * available buffer.
306: */
307: if (lastBuffer) {
308: switch (state) {
309: case ISI_WHITESPACE:
310: state = INIT;
311: return SQLTokenContext.WHITESPACE;
312: case ISI_IDENTIFIER:
313: state = INIT;
314: TokenID tid = matchKeyword(buffer, tokenOffset, offset
315: - tokenOffset);
316: if (tid != null) {
317: return tid;
318: } else {
319: return SQLTokenContext.IDENTIFIER;
320: }
321: case ISI_LINE_COMMENT:
322: // stay in line-comment state
323: return SQLTokenContext.LINE_COMMENT;
324: case ISI_BLOCK_COMMENT:
325: case ISA_STAR_I_BLOCK_COMMENT_END:
326: // stay in block-comment state
327: return SQLTokenContext.BLOCK_COMMENT;
328: case ISI_STRING:
329: return SQLTokenContext.STRING; // hold the state
330: case ISA_ZERO:
331: case ISI_INT:
332: state = INIT;
333: return SQLTokenContext.INT_LITERAL;
334: case ISI_DOUBLE:
335: state = INIT;
336: return SQLTokenContext.DOUBLE_LITERAL;
337: case ISA_DOT:
338: state = INIT;
339: return SQLTokenContext.DOT;
340: case ISA_SLASH:
341: state = INIT;
342: return SQLTokenContext.OPERATOR;
343: }
344: }
345:
346: /*
347: * At this stage there's no more text in the scanned buffer, but
348: * this buffer is not the last so the
349: * scan will continue on another buffer.
350: * The scanner tries to minimize the amount of characters
351: * that will be prescanned in the next buffer by returning the token
352: * where possible.
353: */
354:
355: switch (state) {
356: case ISI_WHITESPACE:
357: return SQLTokenContext.WHITESPACE;
358: }
359:
360: return null; // nothing found
361: }
362:
363: /**
364: * Returns the state name for the state id
365: */
366: public String getStateName(int stateNumber) {
367: switch (stateNumber) {
368: case ISI_WHITESPACE:
369: return "ISI_WHITESPACE"; // NOI18N
370: case ISI_LINE_COMMENT:
371: return "ISI_LINE_COMMENT"; // NOI18N
372: case ISI_BLOCK_COMMENT:
373: return "ISI_BLOCK_COMMENT"; // NOI18N
374: case ISI_STRING:
375: return "ISI_STRING"; // NOI18N
376: case ISI_STRING_A_QUOTE:
377: return "ISI_STRING_A_QUOTE"; // NOI18N
378: case ISI_IDENTIFIER:
379: return "ISI_IDENTIFIER"; // NOI18N
380: case ISA_SLASH:
381: return "ISA_SLASH"; // NOI18N
382: case ISA_OPERATOR:
383: return "ISA_OPERATOR"; // NOI18N
384: case ISA_MINUS:
385: return "ISA_MINUS"; // NOI18N
386: case ISA_STAR:
387: return "ISA_STAR"; // NOI18N
388: case ISA_STAR_I_BLOCK_COMMENT_END:
389: return "ISA_STAR_I_BLOCK_COMMENT_END"; // NOI18N
390: case ISA_ZERO:
391: return "ISA_ZERO"; // NOI18N
392: case ISI_INT:
393: return "ISI_INT"; // NOI18N
394: case ISI_DOUBLE:
395: return "ISI_DOUBLE"; // NOI18N
396: case ISA_DOT:
397: return "ISA_DOT"; // NOI18N
398: case ISA_COMMA:
399: return "ISA_COMMA"; // NOI18N
400:
401: default:
402: return super .getStateName(stateNumber);
403: }
404: }
405:
406: /**
407: * Tries to match the specified sequence of characters to a SQL
408: * keyword.
409: *
410: * @return the KEYWORD id or null if no match was found
411: */
412: public TokenID matchKeyword(char[] buffer, int offset, int len) {
413: String keywordCandidate = new String(buffer, offset, len);
414:
415: if (SQLKeywords.isSQL99Keyword(keywordCandidate)) {
416: return SQLTokenContext.KEYWORD;
417: }
418:
419: return null;
420: }
421: }
|