001: /*
002: * PythonTokenMarker.java - Python token marker
003: * Copyright (C) 1999 Jonathan Revusky
004: * Copyright (C) 1998, 1999 Slava Pestov
005: *
006: * You may use and modify this package for any purpose. Redistribution is
007: * permitted, in both source and binary form, provided that this notice
008: * remains intact in all source distributions of this package.
009: */
010:
011: package org.syntax.jedit.tokenmarker;
012:
013: import javax.swing.text.Segment;
014:
015: import org.syntax.jedit.KeywordMap;
016: import org.syntax.jedit.SyntaxUtilities;
017:
018: /**
019: * Python token marker.
020: *
021: * @author Jonathan Revusky
022: * @version $Id: PythonTokenMarker.java,v 1.3 1999/12/14 04:20:35 sp Exp $
023: */
024: public class PythonTokenMarker extends TokenMarker {
025: private static final byte TRIPLEQUOTE1 = Token.INTERNAL_FIRST;
026: private static final byte TRIPLEQUOTE2 = Token.INTERNAL_LAST;
027:
028: public PythonTokenMarker() {
029: this .keywords = getKeywords();
030: }
031:
032: public byte markTokensImpl(byte token, Segment line, int lineIndex) {
033: char[] array = line.array;
034: int offset = line.offset;
035: lastOffset = offset;
036: lastKeyword = offset;
037: int length = line.count + offset;
038: boolean backslash = false;
039:
040: loop: for (int i = offset; i < length; i++) {
041: int i1 = (i + 1);
042:
043: char c = array[i];
044: if (c == '\\') {
045: backslash = !backslash;
046: continue;
047: }
048:
049: switch (token) {
050: case Token.NULL:
051: switch (c) {
052: case '#':
053: if (backslash)
054: backslash = false;
055: else {
056: doKeyword(line, i, c);
057: addToken(i - lastOffset, token);
058: addToken(length - i, Token.COMMENT1);
059: lastOffset = lastKeyword = length;
060: break loop;
061: }
062: break;
063: case '"':
064: doKeyword(line, i, c);
065: if (backslash)
066: backslash = false;
067: else {
068: addToken(i - lastOffset, token);
069: if (SyntaxUtilities.regionMatches(false, line,
070: i1, "\"\"")) {
071: token = TRIPLEQUOTE1;
072: } else {
073: token = Token.LITERAL1;
074: }
075: lastOffset = lastKeyword = i;
076: }
077: break;
078: case '\'':
079: doKeyword(line, i, c);
080: if (backslash)
081: backslash = false;
082: else {
083: addToken(i - lastOffset, token);
084: if (SyntaxUtilities.regionMatches(false, line,
085: i1, "''")) {
086: token = TRIPLEQUOTE2;
087: } else {
088: token = Token.LITERAL2;
089: }
090: lastOffset = lastKeyword = i;
091: }
092: break;
093: default:
094: backslash = false;
095: if (!Character.isLetterOrDigit(c) && c != '_')
096: doKeyword(line, i, c);
097: break;
098: }
099: break;
100: case Token.LITERAL1:
101: if (backslash)
102: backslash = false;
103: else if (c == '"') {
104: addToken(i1 - lastOffset, token);
105: token = Token.NULL;
106: lastOffset = lastKeyword = i1;
107: }
108: break;
109: case Token.LITERAL2:
110: if (backslash)
111: backslash = false;
112: else if (c == '\'') {
113: addToken(i1 - lastOffset, Token.LITERAL1);
114: token = Token.NULL;
115: lastOffset = lastKeyword = i1;
116: }
117: break;
118: case TRIPLEQUOTE1:
119: if (backslash)
120: backslash = false;
121: else if (SyntaxUtilities.regionMatches(false, line, i,
122: "\"\"\"")) {
123: addToken((i += 4) - lastOffset, Token.LITERAL1);
124: token = Token.NULL;
125: lastOffset = lastKeyword = i;
126: }
127: break;
128: case TRIPLEQUOTE2:
129: if (backslash)
130: backslash = false;
131: else if (SyntaxUtilities.regionMatches(false, line, i,
132: "'''")) {
133: addToken((i += 4) - lastOffset, Token.LITERAL1);
134: token = Token.NULL;
135: lastOffset = lastKeyword = i;
136: }
137: break;
138: default:
139: throw new InternalError("Invalid state: " + token);
140: }
141: }
142:
143: switch (token) {
144: case TRIPLEQUOTE1:
145: case TRIPLEQUOTE2:
146: addToken(length - lastOffset, Token.LITERAL1);
147: break;
148: case Token.NULL:
149: doKeyword(line, length, '\0');
150: default:
151: addToken(length - lastOffset, token);
152: break;
153: }
154:
155: return token;
156: }
157:
158: public static KeywordMap getKeywords() {
159: if (pyKeywords == null) {
160: pyKeywords = new KeywordMap(false);
161: pyKeywords.add("and", Token.KEYWORD3);
162: pyKeywords.add("not", Token.KEYWORD3);
163: pyKeywords.add("or", Token.KEYWORD3);
164: pyKeywords.add("if", Token.KEYWORD1);
165: pyKeywords.add("for", Token.KEYWORD1);
166: pyKeywords.add("assert", Token.KEYWORD1);
167: pyKeywords.add("break", Token.KEYWORD1);
168: pyKeywords.add("continue", Token.KEYWORD1);
169: pyKeywords.add("elif", Token.KEYWORD1);
170: pyKeywords.add("else", Token.KEYWORD1);
171: pyKeywords.add("except", Token.KEYWORD1);
172: pyKeywords.add("exec", Token.KEYWORD1);
173: pyKeywords.add("finally", Token.KEYWORD1);
174: pyKeywords.add("raise", Token.KEYWORD1);
175: pyKeywords.add("return", Token.KEYWORD1);
176: pyKeywords.add("try", Token.KEYWORD1);
177: pyKeywords.add("while", Token.KEYWORD1);
178: pyKeywords.add("def", Token.KEYWORD2);
179: pyKeywords.add("class", Token.KEYWORD2);
180: pyKeywords.add("del", Token.KEYWORD2);
181: pyKeywords.add("from", Token.KEYWORD2);
182: pyKeywords.add("global", Token.KEYWORD2);
183: pyKeywords.add("import", Token.KEYWORD2);
184: pyKeywords.add("in", Token.KEYWORD2);
185: pyKeywords.add("is", Token.KEYWORD2);
186: pyKeywords.add("lambda", Token.KEYWORD2);
187: pyKeywords.add("pass", Token.KEYWORD2);
188: pyKeywords.add("print", Token.KEYWORD2);
189: }
190: return pyKeywords;
191: }
192:
193: // private members
194: private static KeywordMap pyKeywords;
195:
196: private KeywordMap keywords;
197: private int lastOffset;
198: private int lastKeyword;
199:
200: private boolean doKeyword(Segment line, int i, char c) {
201: int i1 = i + 1;
202:
203: int len = i - lastKeyword;
204: byte id = keywords.lookup(line, lastKeyword, len);
205: if (id != Token.NULL) {
206: if (lastKeyword != lastOffset)
207: addToken(lastKeyword - lastOffset, Token.NULL);
208: addToken(len, id);
209: lastOffset = i;
210: }
211: lastKeyword = i1;
212: return false;
213: }
214: }
|