001: /*
002: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
003: *
004: * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved.
005: *
006: * The contents of this file are subject to the terms of either the GNU
007: * General Public License Version 2 only ("GPL") or the Common
008: * Development and Distribution License("CDDL") (collectively, the
009: * "License"). You may not use this file except in compliance with the
010: * License. You can obtain a copy of the License at
011: * http://www.netbeans.org/cddl-gplv2.html
012: * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
013: * specific language governing permissions and limitations under the
014: * License. When distributing the software, include this License Header
015: * Notice in each file and include the License file at
016: * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this
017: * particular file as subject to the "Classpath" exception as provided
018: * by Sun in the GPL Version 2 section of the License file that
019: * accompanied this code. If applicable, add the following below the
020: * License Header, with the fields enclosed by brackets [] replaced by
021: * your own identifying information:
022: * "Portions Copyrighted [year] [name of copyright owner]"
023: *
024: * Contributor(s):
025: *
026: * The Original Software is NetBeans. The Initial Developer of the Original
027: * Software is Sun Microsystems, Inc. Portions Copyright 1997-2006 Sun
028: * Microsystems, Inc. All Rights Reserved.
029: *
030: * If you wish your version of this file to be governed by only the CDDL
031: * or only the GPL Version 2, indicate your decision by adding
032: * "[Contributor] elects to include this software in this distribution
033: * under the [CDDL or GPL Version 2] license." If you do not indicate a
034: * single choice of license, a recipient has the option to distribute
035: * your version of this file under either the CDDL, the GPL Version 2 or
036: * to extend the choice of license to its licensees as provided above.
037: * However, if you add GPL Version 2 code and therefore, elected the GPL
038: * Version 2 license, then the option applies only if the new code is
039: * made subject to such option by the copyright holder.
040: */
041: package org.netbeans.modules.ruby.lexer;
042:
043: import java.util.ArrayList;
044: import java.util.List;
045: import java.util.prefs.BackingStoreException;
046: import java.util.prefs.Preferences;
047:
048: import org.netbeans.api.lexer.Token;
049: import org.netbeans.spi.lexer.Lexer;
050: import org.netbeans.spi.lexer.LexerInput;
051: import org.netbeans.spi.lexer.LexerRestartInfo;
052: import org.netbeans.spi.lexer.TokenFactory;
053: import org.openide.ErrorManager;
054: import org.openide.util.NbPreferences;
055:
056: /**
057: * Lexical analyzer for Ruby comments which identifies TODO markers
058: * and highlights them specially.
059: *
060: * @todo Handle rdoc on/off directives (#++,#--). Since these occur on separate
061: * lines I can't handle it now.
062: * @todo Highlight only RDoc reserved words, or all that fit the pattern? For
063: * now I'm highlighting :\w+: sequences. Possibly I should only highlight
064: * @todo ___ shows up as an italic "_" - that aint right
065: * @todo Tokenize Ruby-style symbols (:foo) and use the ruby color preferences?
066: *
067: * @author Tor Norbye
068: */
069: public final class RubyCommentLexer implements
070: Lexer<RubyCommentTokenId> {
071: private static final int EOF = LexerInput.EOF;
072: private static final String[] RDOC_DIRECTIVES = { "arg", "args",
073: "yield", "yields", "notnew", "not-new", "not_new", "doc",
074: "nodoc", "stopdoc", "startdoc", "enddoc", "main", "title",
075: "section", "include" };
076: private final LexerInput input;
077: private final TokenFactory<RubyCommentTokenId> tokenFactory;
078: private boolean inWord;
079: private String[] markers;
080:
081: public RubyCommentLexer(LexerRestartInfo<RubyCommentTokenId> info) {
082: this .input = info.input();
083: this .tokenFactory = info.tokenFactory();
084: assert (info.state() == null); // passed argument always null
085: }
086:
087: public Object state() {
088: return null;
089: }
090:
091: /**
092: * Compute the set of markers to scan for in the user source code.
093: * The code tries to look for the same markers used by the TODO module
094: * in case the user has customized the set. (However, it is doing this
095: * by peeking at the Preferences possibly left by the docscan module,
096: * rather than having a contract API with it, based on
097: * tasklist/docscan/src/org/netbeans/modules/tasklist/docscan/Settings.java)
098: */
099: private String[] getTodoMarkers() {
100: if (markers == null) {
101: final String MARKER_PREFIX = "Tag"; // NOI18N
102: final int MARKER_PREFIX_LENGTH = MARKER_PREFIX.length();
103: List<String> markerList = new ArrayList<String>();
104:
105: try {
106: Preferences preferences = NbPreferences.root().node(
107: "org/netbeans/modules/tasklist/docscan"); // NOI18N
108: String[] keys = preferences.keys();
109:
110: for (int i = 0; i < keys.length; i++) {
111: String key = keys[i];
112:
113: if ((key != null) && key.startsWith(MARKER_PREFIX)) {
114: markerList.add(key
115: .substring(MARKER_PREFIX_LENGTH));
116: }
117: }
118: } catch (BackingStoreException bse) {
119: ErrorManager.getDefault().notify(bse);
120: }
121:
122: if (markerList.size() > 0) {
123: markerList.remove("@todo"); // Applies to javadoc, and these tags are now colorized separately
124: markers = markerList.toArray(new String[markerList
125: .size()]);
126: } else {
127: // Additional candidates: HACK, WORKAROUND, REMOVE, OLD
128: markers = new String[] { "TODO", "FIXME", "XXX",
129: "PENDING" }; // NOI18N
130: }
131: }
132:
133: return markers;
134: }
135:
136: public Preferences getDocscanPreferences() {
137: return NbPreferences.root().node(
138: "org/netbeans/modules/tasklist/docscan");
139: }
140:
141: public Token<RubyCommentTokenId> nextToken() {
142: inWord = false;
143:
144: inputLoop: while (true) {
145: int ch = input.read();
146:
147: switch (ch) {
148: case EOF: {
149: if (input.readLength() > 0) {
150: return token(RubyCommentTokenId.COMMENT_TEXT);
151: } else {
152: return null;
153: }
154: }
155:
156: case '\\':
157: // The next character is escaped...
158: input.read();
159:
160: continue;
161:
162: case '\n':
163: return token(RubyCommentTokenId.COMMENT_TEXT);
164:
165: case '#': { // Linked method
166:
167: // See if this is a method reference. It can be either "#method" or "Class#method".
168: // If the input is something like " #" we need to chop it off to start at "#"; if
169: // it's something like "foo Bar#baz" we need to chop it off at "Bar#baz", and
170: // if it's something impossible like " foo#bar" we can ignore it completely (the class
171: // must be uppercase).
172: CharSequence s = input.readText();
173: int classIndex = s.length() - 1;
174: assert s.charAt(classIndex) == '#';
175: for (classIndex--; classIndex >= 0; classIndex--) {
176: char c = s.charAt(classIndex);
177: if (!Character.isJavaIdentifierPart(c) && c != '_'
178: && c != ':') {
179: // The next character needs to be "#" or an uppercase character
180: assert classIndex < s.length() - 1;
181: char next = s.charAt(classIndex + 1);
182: if (!(next == '#' || Character
183: .isUpperCase(next))) {
184: // This "#" is not in an Upper# sequence
185: // just continue processing input
186: continue inputLoop;
187: }
188: break;
189: }
190: }
191: // Make sure uppercase
192: if (classIndex == -1) {
193: // It's the beginning of input - we're okay
194: char next = s.charAt(0);
195: if (!(next == '#' || Character.isUpperCase(next))) {
196: break;
197: }
198: } else {
199: input.backup(input.readLength() - (classIndex + 1));
200: return token(RubyCommentTokenId.COMMENT_TEXT);
201: }
202:
203: int originalLength = input.readLength();
204:
205: // See if we have what looks like a method name:
206: // method-only characters followed by whitespace, newlines or EOF:
207: boolean seenSuffixChar = false;
208: boolean seenPrefixChar = false;
209: while (ch != EOF) {
210: ch = input.read();
211:
212: if (ch == '$' || ch == '@') {
213: // TODO - what do I do here?
214: seenPrefixChar = true;
215: } else if (ch == '?' || ch == '=' || ch == '!') {
216: seenSuffixChar = true;
217: } else if (ch == ':'
218: || Character.isJavaIdentifierPart(ch)) {
219: if (seenSuffixChar) {
220: // These are only allowed at the end
221: break;
222: }
223: continue;
224: } else {
225: input.backup(1);
226: break;
227: }
228: }
229:
230: if (Character.isWhitespace(ch) || (ch == EOF)
231: || (ch == '.') || (ch == ',') || (ch == ')')
232: || (ch == '}') || (ch == '(')) {
233: if (input.readLength() > 2
234: && input.readLength() > originalLength) {
235: return token(RubyCommentTokenId.COMMENT_LINK);
236: }
237: }
238:
239: break;
240: }
241:
242: case 'f': // ftp:
243: case 'm': // mailto:
244: case 'w': // www.
245: case 'h': { // http links. TODO: link:, ftp:, mailto:, and www.
246:
247: if (inWord) {
248: break;
249: }
250:
251: int originalLength = input.readLength();
252: boolean foundLinkBegin = false;
253:
254: if (ch == 'h') { // http:
255:
256: if (input.read() == 't') {
257: if (input.read() == 't') {
258: if (input.read() == 'p') {
259: if (input.read() == ':') {
260: foundLinkBegin = true;
261: } else {
262: input.backup(4);
263: }
264: } else {
265: input.backup(3);
266: }
267: } else {
268: input.backup(2);
269: }
270: } else {
271: input.backup(1);
272: }
273: } else if (ch == 'f') { // ftp:
274:
275: if (input.read() == 't') {
276: if (input.read() == 'p') {
277: if (input.read() == ':') {
278: foundLinkBegin = true;
279: } else {
280: input.backup(3);
281: }
282: } else {
283: input.backup(2);
284: }
285: } else {
286: input.backup(1);
287: }
288: } else if (ch == 'm') { // mailto:
289:
290: if (input.read() == 'a') {
291: if (input.read() == 'i') {
292: if (input.read() == 'l') {
293: if (input.read() == 't') {
294: if (input.read() == 'o') {
295: if (input.read() == ':') {
296: foundLinkBegin = true;
297: } else {
298: input.backup(6);
299: }
300: } else {
301: input.backup(5);
302: }
303: } else {
304: input.backup(4);
305: }
306: } else {
307: input.backup(3);
308: }
309: } else {
310: input.backup(2);
311: }
312: } else {
313: input.backup(1);
314: }
315: } else if (ch == 'w') { // www.
316:
317: if (input.read() == 'w') {
318: if (input.read() == 'w') {
319: if (input.read() == '.') {
320: foundLinkBegin = true;
321: } else {
322: input.backup(3);
323: }
324: } else {
325: input.backup(2);
326: }
327: } else {
328: input.backup(1);
329: }
330: }
331:
332: if (foundLinkBegin) {
333: while (ch != EOF) {
334: ch = input.read();
335:
336: if ((ch == ']') || (ch == ')')
337: || Character.isWhitespace(ch)
338: || (ch == '\'') || (ch == '"')) {
339: input.backup(1);
340:
341: break;
342: }
343: }
344:
345: if (originalLength > 1) {
346: input.backup(input.readLengthEOF()
347: - originalLength + 1);
348:
349: return token(RubyCommentTokenId.COMMENT_TEXT);
350: }
351:
352: if (input.readLength() > 2) {
353: return token(RubyCommentTokenId.COMMENT_LINK);
354: }
355: }
356: break;
357: }
358:
359: case '_': // Italic text
360:
361: if (inWord) {
362: break;
363: }
364:
365: if (input.readLength() > 1) {
366: input.backup(1);
367:
368: return token(RubyCommentTokenId.COMMENT_TEXT);
369: }
370:
371: while (ch != EOF) {
372: ch = input.read();
373:
374: if (ch == '_') {
375: int next = input.read();
376: input.backup(1);
377:
378: if (Character.isLetter(next) || (next == '_')) {
379: continue;
380: }
381:
382: if (input.readLength() > 2) {
383: return token(RubyCommentTokenId.COMMENT_ITALIC);
384: }
385: } else if (!(Character.isLetter(ch) || (ch == '_'))) {
386: input.backup(1);
387: break;
388: }
389: }
390:
391: break;
392:
393: case '*': // Bold text
394:
395: if (inWord) {
396: break;
397: }
398:
399: if (input.readLength() > 1) {
400: input.backup(1);
401:
402: return token(RubyCommentTokenId.COMMENT_TEXT);
403: }
404:
405: while (ch != EOF) {
406: ch = input.read();
407:
408: if ((ch == '*') && (input.readLength() > 2)) {
409: return token(RubyCommentTokenId.COMMENT_BOLD);
410: } else if (!(Character.isLetter(ch) || (ch == '_'))) {
411: input.backup(1);
412: break;
413: }
414: }
415:
416: break;
417:
418: case '+': // Typewriter text
419:
420: if (inWord) {
421: break;
422: }
423:
424: if (input.readLength() > 1) {
425: input.backup(1);
426:
427: return token(RubyCommentTokenId.COMMENT_TEXT);
428: }
429:
430: while (ch != EOF) {
431: ch = input.read();
432:
433: if ((ch == '+') && (input.readLength() > 2)) {
434: return token(RubyCommentTokenId.COMMENT_HTMLTAG);
435: } else if (!(Character.isLetter(ch) || (ch == '_') || (ch == ':'))) { // ':' e.g. +::Module++
436: input.backup(1);
437: break;
438: }
439: }
440:
441: break;
442:
443: case '<': { // Html tag - rdoc
444:
445: // Only accept things that look like tags: <foo> or </foo>, not
446: // <<, < >, etc.
447: int next = input.read();
448: input.backup(1);
449:
450: if (!((next == '/') || Character.isLetter(next))) {
451: break;
452: }
453:
454: if (input.readLength() > 1) {
455: input.backup(1);
456:
457: return token(RubyCommentTokenId.COMMENT_TEXT);
458: }
459:
460: while (ch != EOF) {
461: ch = input.read();
462:
463: if (ch == '\n') {
464: break;
465: } else if (ch == '>') {
466: return token(RubyCommentTokenId.COMMENT_HTMLTAG);
467: }
468: }
469:
470: break;
471: }
472:
473: case ':': { // Possible rdoc tag, like :nodoc:
474: ch = input.read(); // input.readText()
475: if (ch == ':') {
476: // :: - possibly part of something like Foo::Bar
477: continue;
478: } else {
479: input.backup(1);
480: if (input.readText().toString().endsWith("::")) {
481: continue;
482: }
483: }
484:
485: if (input.readLength() > 1) {
486: input.backup(1);
487:
488: return token(RubyCommentTokenId.COMMENT_TEXT);
489: }
490:
491: int backup = 0;
492:
493: while (ch != EOF) {
494: ch = input.read();
495: backup++;
496:
497: if ((ch == '\n')
498: || (!Character.isLetter(ch) && (ch != '_') && (ch != '-'))) {
499: if ((ch == ':') && (input.readLength() > 2)) { // Don't recognize "::" since it's used a lot when mentioning modules
500: // I should be able to use input.readText(1, ...) here but it doesn't work right
501:
502: String seen = input.readText().toString();
503: String directive = seen.substring(1, seen
504: .length() - 1);
505:
506: for (String keyword : RDOC_DIRECTIVES) {
507: if (keyword.equals(directive)) {
508: return token(RubyCommentTokenId.COMMENT_RDOC);
509: }
510: }
511: }
512:
513: input.backup(backup);
514:
515: break;
516: }
517: }
518:
519: continue;
520: }
521:
522: default: {
523: if (!inWord) {
524: // See if we have a match from here on for any of the markers
525: String[] todoMarkers = getTodoMarkers();
526:
527: for (int i = 0; i < todoMarkers.length; i++) {
528: if (todoMarkers[i].charAt(0) == ch) {
529: if (input.readLength() > 1) {
530: input.backup(1);
531:
532: return token(RubyCommentTokenId.COMMENT_TEXT);
533: }
534:
535: // Possible match!
536: // Read ahead while matching further characters, but if they
537: // stop matching, back up and try another
538: int backup = 0;
539: String marker = todoMarkers[i];
540:
541: for (int c = 1, n = marker.length(); c < n; c++) {
542: backup++;
543:
544: if (input.read() != marker.charAt(c)) {
545: input.backup(backup);
546:
547: break;
548: }
549: }
550:
551: if (backup == (marker.length() - 1)) { // Found it
552: // Peek ahead and make sure this match is a whole word
553:
554: boolean separate = !Character
555: .isJavaIdentifierPart(input
556: .read());
557: input.backup(1);
558:
559: if (separate) {
560: return tokenFactory
561: .createToken(
562: RubyCommentTokenId.COMMENT_TODO,
563: input.readLength());
564: }
565: }
566: }
567: }
568: }
569: }
570: }
571:
572: inWord = Character.isJavaIdentifierPart(ch);
573: }
574: }
575:
576: private Token<RubyCommentTokenId> token(RubyCommentTokenId id) {
577: return tokenFactory.createToken(id);
578: }
579:
580: public void release() {
581: }
582: }
|