001: /*
002: * PythonFormatter.java
003: *
004: * Copyright (C) 2002 Peter Graves
005: * $Id: PythonFormatter.java,v 1.1.1.1 2002/09/24 16:08:58 piso Exp $
006: *
007: * This program is free software; you can redistribute it and/or
008: * modify it under the terms of the GNU General Public License
009: * as published by the Free Software Foundation; either version 2
010: * of the License, or (at your option) any later version.
011: *
012: * This program is distributed in the hope that it will be useful,
013: * but WITHOUT ANY WARRANTY; without even the implied warranty of
014: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
015: * GNU General Public License for more details.
016: *
017: * You should have received a copy of the GNU General Public License
018: * along with this program; if not, write to the Free Software
019: * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
020: */
021:
022: package org.armedbear.j;
023:
024: import java.util.ArrayList;
025: import java.util.List;
026:
027: public final class PythonFormatter extends Formatter {
028: private static final int PYTHON_STATE_NEUTRAL = 0;
029: private static final int PYTHON_STATE_SINGLE_QUOTE = 1;
030: private static final int PYTHON_STATE_DOUBLE_QUOTE = 2;
031: private static final int PYTHON_STATE_IDENTIFIER = 3;
032: private static final int PYTHON_STATE_COMMENT = 4;
033: private static final int PYTHON_STATE_BRACE = 5;
034: private static final int PYTHON_STATE_NUMBER = 6;
035: private static final int PYTHON_STATE_HEXNUMBER = 7;
036: private static final int PYTHON_STATE_OPERATOR = 8;
037: private static final int PYTHON_STATE_TRIPLE_SINGLE = 9;
038: private static final int PYTHON_STATE_TRIPLE_DOUBLE = 10;
039:
040: private static final int PYTHON_FORMAT_TEXT = 0;
041: private static final int PYTHON_FORMAT_COMMENT = 1;
042: private static final int PYTHON_FORMAT_STRING = 2;
043: private static final int PYTHON_FORMAT_IDENTIFIER = 3;
044: private static final int PYTHON_FORMAT_KEYWORD = 4;
045: private static final int PYTHON_FORMAT_FUNCTION = 5;
046: private static final int PYTHON_FORMAT_OPERATOR = 6;
047: private static final int PYTHON_FORMAT_BRACE = 7;
048: private static final int PYTHON_FORMAT_NUMBER = 8;
049:
050: private static final PythonMode mode = PythonMode.getMode();
051:
052: public PythonFormatter(Buffer buffer) {
053: this .buffer = buffer;
054: }
055:
056: private int begin = 0;
057:
058: private void endSegment(String text, int offset, int state) {
059: if (offset - begin > 0) {
060: int format;
061: switch (state) {
062: case PYTHON_STATE_NEUTRAL:
063: format = PYTHON_FORMAT_TEXT;
064: break;
065: case PYTHON_STATE_SINGLE_QUOTE:
066: case PYTHON_STATE_DOUBLE_QUOTE:
067: case PYTHON_STATE_TRIPLE_SINGLE:
068: case PYTHON_STATE_TRIPLE_DOUBLE:
069: format = PYTHON_FORMAT_STRING;
070: break;
071: case PYTHON_STATE_IDENTIFIER:
072: format = PYTHON_FORMAT_IDENTIFIER;
073: break;
074: case PYTHON_STATE_COMMENT:
075: format = PYTHON_FORMAT_COMMENT;
076: break;
077: case PYTHON_STATE_OPERATOR:
078: format = PYTHON_FORMAT_OPERATOR;
079: break;
080: case PYTHON_STATE_BRACE:
081: format = PYTHON_FORMAT_BRACE;
082: break;
083: case PYTHON_STATE_NUMBER:
084: case PYTHON_STATE_HEXNUMBER:
085: format = PYTHON_FORMAT_NUMBER;
086: break;
087: default:
088: format = PYTHON_FORMAT_TEXT;
089: break;
090: }
091: addSegment(text, begin, offset, format);
092: begin = offset;
093: }
094: }
095:
096: private void parseLine(Line line) {
097: String text;
098: if (Editor.tabsAreVisible())
099: text = Utilities.makeTabsVisible(line.getText(), buffer
100: .getTabWidth());
101: else
102: text = Utilities
103: .detab(line.getText(), buffer.getTabWidth());
104: begin = 0;
105: int state = line.flags();
106: int i = 0;
107: final int limit = text.length();
108:
109: // Skip whitespace at start of line.
110: while (i < limit) {
111: if (Character.isWhitespace(text.charAt(i))) {
112: ++i;
113: } else {
114: endSegment(text, i, state);
115: break;
116: }
117: }
118:
119: while (i < limit) {
120: char c = text.charAt(i);
121: if (c == '\\' && i < limit - 1) {
122: // Escape char.
123: i += 2;
124: continue;
125: }
126:
127: if (state == PYTHON_STATE_SINGLE_QUOTE) {
128: if (c == '\'') {
129: endSegment(text, i + 1, state);
130: state = PYTHON_STATE_NEUTRAL;
131: }
132: ++i;
133: continue;
134: }
135:
136: if (state == PYTHON_STATE_DOUBLE_QUOTE) {
137: if (c == '"') {
138: endSegment(text, i + 1, state);
139: state = PYTHON_STATE_NEUTRAL;
140: }
141: ++i;
142: continue;
143: }
144:
145: if (state == PYTHON_STATE_TRIPLE_SINGLE) {
146: if (c == '\'' && text.regionMatches(i, "'''", 0, 3)) {
147: i += 3;
148: endSegment(text, i, state);
149: state = PYTHON_STATE_NEUTRAL;
150: } else
151: ++i;
152: continue;
153: }
154:
155: if (state == PYTHON_STATE_TRIPLE_DOUBLE) {
156: if (c == '"' && text.regionMatches(i, "\"\"\"", 0, 3)) {
157: i += 3;
158: endSegment(text, i, state);
159: state = PYTHON_STATE_NEUTRAL;
160: } else
161: ++i;
162: continue;
163: }
164:
165: // Reaching here, we're not in a quoted string.
166: if (c == '\'') {
167: endSegment(text, i, state);
168: if (text.regionMatches(i, "'''", 0, 3)) {
169: state = PYTHON_STATE_TRIPLE_SINGLE;
170: i += 3;
171: } else {
172: state = PYTHON_STATE_SINGLE_QUOTE;
173: ++i;
174: }
175: continue;
176: }
177:
178: if (c == '"') {
179: endSegment(text, i, state);
180: if (text.regionMatches(i, "\"\"\"", 0, 3)) {
181: state = PYTHON_STATE_TRIPLE_DOUBLE;
182: i += 3;
183: } else {
184: state = PYTHON_STATE_DOUBLE_QUOTE;
185: ++i;
186: }
187: continue;
188: }
189:
190: if (c == '#') {
191: endSegment(text, i, state);
192: endSegment(text, limit, PYTHON_STATE_COMMENT);
193: return;
194: }
195:
196: if (isOperatorChar(c)) {
197: if (state != PYTHON_STATE_OPERATOR) {
198: endSegment(text, i, state);
199: state = PYTHON_STATE_OPERATOR;
200: }
201: ++i;
202: continue;
203: }
204:
205: if (c == '{' || c == '}') {
206: if (state != PYTHON_STATE_BRACE) {
207: endSegment(text, i, state);
208: // Check for keyword.
209: LineSegment segment = getLastSegment();
210: if (segment != null && isKeyword(segment.getText()))
211: segment.setFormat(PYTHON_FORMAT_KEYWORD);
212: state = PYTHON_STATE_BRACE;
213: }
214: ++i;
215: continue;
216: }
217:
218: if (state == PYTHON_STATE_OPERATOR
219: || state == PYTHON_STATE_BRACE) {
220: if (mode.isIdentifierStart(c)) {
221: endSegment(text, i, state);
222: state = PYTHON_STATE_IDENTIFIER;
223: } else if (Character.isDigit(c)) {
224: endSegment(text, i, state);
225: state = PYTHON_STATE_NUMBER;
226: } else {
227: endSegment(text, i, state);
228: state = PYTHON_STATE_NEUTRAL;
229: }
230: ++i;
231: continue;
232: }
233:
234: if (state == PYTHON_STATE_IDENTIFIER) {
235: if (!mode.isIdentifierPart(c)) {
236: endSegment(text, i, state);
237: // Check for keyword or function.
238: LineSegment segment = getLastSegment();
239: if (segment != null) {
240: String segmentText = segment.getText();
241: if (isKeyword(segment.getText())) {
242: segment.setFormat(PYTHON_FORMAT_KEYWORD);
243: } else if (c == '(') {
244: segment.setFormat(PYTHON_FORMAT_FUNCTION);
245: } else if (Character.isWhitespace(c)) {
246: // Look ahead to see if next non-whitespace char is '('.
247: int j = i + 1;
248: while (j < limit
249: && Character.isWhitespace(c = text
250: .charAt(j)))
251: ++j;
252: if (c == '(')
253: segment
254: .setFormat(PYTHON_FORMAT_FUNCTION);
255: }
256: }
257: state = PYTHON_STATE_NEUTRAL;
258: }
259: ++i;
260: continue;
261: }
262:
263: if (state == PYTHON_STATE_NUMBER) {
264: if (Character.isDigit(c))
265: ;
266: else if (c == 'l' || c == 'L')
267: ;
268: else if (i - begin == 1 && c == 'x' || c == 'X')
269: state = PYTHON_STATE_HEXNUMBER;
270: else {
271: endSegment(text, i, state);
272: if (mode.isIdentifierStart(c))
273: state = PYTHON_STATE_IDENTIFIER;
274: else
275: state = PYTHON_STATE_NEUTRAL;
276: }
277: ++i;
278: continue;
279: }
280:
281: if (state == PYTHON_STATE_HEXNUMBER) {
282: if (Character.isDigit(c))
283: ;
284: else if ((c >= 'a' && c <= 'f')
285: || (c >= 'A' && c <= 'F'))
286: ;
287: else if (c == 'l' || c == 'L')
288: ;
289: else {
290: endSegment(text, i, state);
291: if (mode.isIdentifierStart(c))
292: state = PYTHON_STATE_IDENTIFIER;
293: else
294: state = PYTHON_STATE_NEUTRAL;
295: }
296: ++i;
297: continue;
298: }
299:
300: if (state == PYTHON_STATE_NEUTRAL) {
301: if (mode.isIdentifierStart(c)) {
302: endSegment(text, i, state);
303: state = PYTHON_STATE_IDENTIFIER;
304: } else if (Character.isDigit(c)) {
305: endSegment(text, i, state);
306: state = PYTHON_STATE_NUMBER;
307: }
308: }
309: ++i;
310: }
311:
312: // Reached end of line.
313: endSegment(text, i, state);
314:
315: if (state == PYTHON_STATE_IDENTIFIER) {
316: // Last token might be a keyword.
317: LineSegment segment = getLastSegment();
318: if (segment != null && isKeyword(segment.getText()))
319: segment.setFormat(PYTHON_FORMAT_KEYWORD);
320: }
321: }
322:
323: public LineSegmentList formatLine(Line line) {
324: clearSegmentList();
325: parseLine(line);
326: return segmentList;
327: }
328:
329: public boolean parseBuffer() {
330: int state = PYTHON_STATE_NEUTRAL;
331: Line line = buffer.getFirstLine();
332: boolean changed = false;
333: while (line != null) {
334: if (state != line.flags()) {
335: line.setFlags(state);
336: changed = true;
337: }
338: final String text = line.getText();
339: final int limit = line.length();
340: int i = 0;
341: while (i < limit) {
342: char c = text.charAt(i);
343: if (c == '\\') {
344: // Escape.
345: i += 2;
346: continue;
347: }
348: if (state == PYTHON_STATE_SINGLE_QUOTE) {
349: if (c == '\'')
350: state = PYTHON_STATE_NEUTRAL;
351: ++i;
352: continue;
353: }
354: if (state == PYTHON_STATE_DOUBLE_QUOTE) {
355: if (c == '"')
356: state = PYTHON_STATE_NEUTRAL;
357: ++i;
358: continue;
359: }
360: if (state == PYTHON_STATE_TRIPLE_SINGLE) {
361: if (c == '\'' && text.regionMatches(i, "'''", 0, 3)) {
362: state = PYTHON_STATE_NEUTRAL;
363: i += 3;
364: } else
365: ++i;
366: continue;
367: }
368: if (state == PYTHON_STATE_TRIPLE_DOUBLE) {
369: if (c == '"'
370: && text.regionMatches(i, "\"\"\"", 0, 3)) {
371: state = PYTHON_STATE_NEUTRAL;
372: i += 3;
373: } else
374: ++i;
375: continue;
376: }
377: // Not in quoted string.
378: if (c == '\'') {
379: if (text.regionMatches(i, "'''", 0, 3)) {
380: state = PYTHON_STATE_TRIPLE_SINGLE;
381: i += 3;
382: } else {
383: state = PYTHON_STATE_SINGLE_QUOTE;
384: ++i;
385: }
386: continue;
387: }
388: if (c == '"') {
389: if (text.regionMatches(i, "\"\"\"", 0, 3)) {
390: state = PYTHON_STATE_TRIPLE_DOUBLE;
391: i += 3;
392: } else {
393: state = PYTHON_STATE_DOUBLE_QUOTE;
394: ++i;
395: }
396: continue;
397: }
398: if (c == '#')
399: break;
400: ++i;
401: }
402: line = line.next();
403: }
404: buffer.setNeedsParsing(false);
405: return changed;
406: }
407:
408: private static final boolean isOperatorChar(char c) {
409: return "!&|<>=+/*-".indexOf(c) >= 0;
410: }
411:
412: public FormatTable getFormatTable() {
413: if (formatTable == null) {
414: formatTable = new FormatTable(null);
415: formatTable.addEntryFromPrefs(PYTHON_FORMAT_TEXT, "text");
416: formatTable.addEntryFromPrefs(PYTHON_FORMAT_COMMENT,
417: "comment");
418: formatTable.addEntryFromPrefs(PYTHON_FORMAT_STRING,
419: "string");
420: formatTable.addEntryFromPrefs(PYTHON_FORMAT_IDENTIFIER,
421: "identifier", "text");
422: formatTable.addEntryFromPrefs(PYTHON_FORMAT_KEYWORD,
423: "keyword");
424: formatTable.addEntryFromPrefs(PYTHON_FORMAT_FUNCTION,
425: "function");
426: formatTable.addEntryFromPrefs(PYTHON_FORMAT_OPERATOR,
427: "operator");
428: formatTable.addEntryFromPrefs(PYTHON_FORMAT_BRACE, "brace");
429: formatTable.addEntryFromPrefs(PYTHON_FORMAT_NUMBER,
430: "number");
431: }
432: return formatTable;
433: }
434: }
|