001: /*
002: * PythonTokenMarker.java - Python token marker
003: * Copyright (C) 1999 Jonathan Revusky
004: * Copyright (C) 1998, 1999 Slava Pestov
005: *
006: * You may use and modify this package for any purpose. Redistribution is
007: * permitted, in both source and binary form, provided that this notice
008: * remains intact in all source distributions of this package.
009: */
010:
011: package org.syntax.jedit.tokenmarker;
012:
013: import org.syntax.jedit.*;
014: import javax.swing.text.Segment;
015:
016: /**
017: * Python token marker.
018: *
019: * @author Jonathan Revusky
020: * @version $Id: PythonTokenMarker.java 3074 2004-11-08 04:24:58Z bquig $
021: */
022: public class PythonTokenMarker extends TokenMarker {
023: private static final byte TRIPLEQUOTE1 = Token.INTERNAL_FIRST;
024: private static final byte TRIPLEQUOTE2 = Token.INTERNAL_LAST;
025:
026: public PythonTokenMarker() {
027: this .keywords = getKeywords();
028: }
029:
030: public byte markTokensImpl(byte token, Segment line, int lineIndex) {
031: char[] array = line.array;
032: int offset = line.offset;
033: lastOffset = offset;
034: lastKeyword = offset;
035: int length = line.count + offset;
036: boolean backslash = false;
037:
038: loop: for (int i = offset; i < length; i++) {
039: int i1 = (i + 1);
040:
041: char c = array[i];
042: if (c == '\\') {
043: backslash = !backslash;
044: continue;
045: }
046:
047: switch (token) {
048: case Token.NULL:
049: switch (c) {
050: case '#':
051: if (backslash)
052: backslash = false;
053: else {
054: doKeyword(line, i, c);
055: addToken(i - lastOffset, token);
056: addToken(length - i, Token.COMMENT1);
057: lastOffset = lastKeyword = length;
058: break loop;
059: }
060: break;
061: case '"':
062: doKeyword(line, i, c);
063: if (backslash)
064: backslash = false;
065: else {
066: addToken(i - lastOffset, token);
067: if (SyntaxUtilities.regionMatches(false, line,
068: i1, "\"\"")) {
069: token = TRIPLEQUOTE1;
070: } else {
071: token = Token.LITERAL1;
072: }
073: lastOffset = lastKeyword = i;
074: }
075: break;
076: case '\'':
077: doKeyword(line, i, c);
078: if (backslash)
079: backslash = false;
080: else {
081: addToken(i - lastOffset, token);
082: if (SyntaxUtilities.regionMatches(false, line,
083: i1, "''")) {
084: token = TRIPLEQUOTE2;
085: } else {
086: token = Token.LITERAL2;
087: }
088: lastOffset = lastKeyword = i;
089: }
090: break;
091: default:
092: backslash = false;
093: if (!Character.isLetterOrDigit(c) && c != '_')
094: doKeyword(line, i, c);
095: break;
096: }
097: break;
098: case Token.LITERAL1:
099: if (backslash)
100: backslash = false;
101: else if (c == '"') {
102: addToken(i1 - lastOffset, token);
103: token = Token.NULL;
104: lastOffset = lastKeyword = i1;
105: }
106: break;
107: case Token.LITERAL2:
108: if (backslash)
109: backslash = false;
110: else if (c == '\'') {
111: addToken(i1 - lastOffset, Token.LITERAL1);
112: token = Token.NULL;
113: lastOffset = lastKeyword = i1;
114: }
115: break;
116: case TRIPLEQUOTE1:
117: if (backslash)
118: backslash = false;
119: else if (SyntaxUtilities.regionMatches(false, line, i,
120: "\"\"\"")) {
121: addToken((i += 4) - lastOffset, Token.LITERAL1);
122: token = Token.NULL;
123: lastOffset = lastKeyword = i;
124: }
125: break;
126: case TRIPLEQUOTE2:
127: if (backslash)
128: backslash = false;
129: else if (SyntaxUtilities.regionMatches(false, line, i,
130: "'''")) {
131: addToken((i += 4) - lastOffset, Token.LITERAL1);
132: token = Token.NULL;
133: lastOffset = lastKeyword = i;
134: }
135: break;
136: default:
137: throw new InternalError("Invalid state: " + token);
138: }
139: }
140:
141: switch (token) {
142: case TRIPLEQUOTE1:
143: case TRIPLEQUOTE2:
144: addToken(length - lastOffset, Token.LITERAL1);
145: break;
146: case Token.NULL:
147: doKeyword(line, length, '\0');
148: default:
149: addToken(length - lastOffset, token);
150: break;
151: }
152:
153: return token;
154: }
155:
156: public static KeywordMap getKeywords() {
157: if (pyKeywords == null) {
158: pyKeywords = new KeywordMap(false);
159: pyKeywords.add("and", Token.KEYWORD3);
160: pyKeywords.add("not", Token.KEYWORD3);
161: pyKeywords.add("or", Token.KEYWORD3);
162: pyKeywords.add("if", Token.KEYWORD1);
163: pyKeywords.add("for", Token.KEYWORD1);
164: pyKeywords.add("assert", Token.KEYWORD1);
165: pyKeywords.add("break", Token.KEYWORD1);
166: pyKeywords.add("continue", Token.KEYWORD1);
167: pyKeywords.add("elif", Token.KEYWORD1);
168: pyKeywords.add("else", Token.KEYWORD1);
169: pyKeywords.add("except", Token.KEYWORD1);
170: pyKeywords.add("exec", Token.KEYWORD1);
171: pyKeywords.add("finally", Token.KEYWORD1);
172: pyKeywords.add("raise", Token.KEYWORD1);
173: pyKeywords.add("return", Token.KEYWORD1);
174: pyKeywords.add("try", Token.KEYWORD1);
175: pyKeywords.add("while", Token.KEYWORD1);
176: pyKeywords.add("def", Token.KEYWORD2);
177: pyKeywords.add("class", Token.KEYWORD2);
178: pyKeywords.add("del", Token.KEYWORD2);
179: pyKeywords.add("from", Token.KEYWORD2);
180: pyKeywords.add("global", Token.KEYWORD2);
181: pyKeywords.add("import", Token.KEYWORD2);
182: pyKeywords.add("in", Token.KEYWORD2);
183: pyKeywords.add("is", Token.KEYWORD2);
184: pyKeywords.add("lambda", Token.KEYWORD2);
185: pyKeywords.add("pass", Token.KEYWORD2);
186: pyKeywords.add("print", Token.KEYWORD2);
187: }
188: return pyKeywords;
189: }
190:
191: // private members
192: private static KeywordMap pyKeywords;
193:
194: private KeywordMap keywords;
195: private int lastOffset;
196: private int lastKeyword;
197:
198: private boolean doKeyword(Segment line, int i, char c) {
199: int i1 = i + 1;
200:
201: int len = i - lastKeyword;
202: byte id = keywords.lookup(line, lastKeyword, len);
203: if (id != Token.NULL) {
204: if (lastKeyword != lastOffset)
205: addToken(lastKeyword - lastOffset, Token.NULL);
206: addToken(len, id);
207: lastOffset = i;
208: }
209: lastKeyword = i1;
210: return false;
211: }
212: }
|