001: /*
002: * CFormatter.java
003: *
004: * Copyright (C) 1998-2003 Peter Graves
005: * $Id: CFormatter.java,v 1.3 2003/12/29 19:25:24 piso Exp $
006: *
007: * This program is free software; you can redistribute it and/or
008: * modify it under the terms of the GNU General Public License
009: * as published by the Free Software Foundation; either version 2
010: * of the License, or (at your option) any later version.
011: *
012: * This program is distributed in the hope that it will be useful,
013: * but WITHOUT ANY WARRANTY; without even the implied warranty of
014: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
015: * GNU General Public License for more details.
016: *
017: * You should have received a copy of the GNU General Public License
018: * along with this program; if not, write to the Free Software
019: * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
020: */
021:
022: package org.armedbear.j;
023:
024: import gnu.regexp.RE;
025: import gnu.regexp.UncheckedRE;
026: import java.util.HashSet;
027:
028: public final class CFormatter extends Formatter implements Constants {
029: private static final int C_FORMAT_TEXT = 0;
030: private static final int C_FORMAT_COMMENT = 1;
031: private static final int C_FORMAT_STRING = 2;
032: private static final int C_FORMAT_IDENTIFIER = 3;
033: private static final int C_FORMAT_KEYWORD = 4;
034: private static final int C_FORMAT_FUNCTION = 5;
035: private static final int C_FORMAT_OPERATOR = 6;
036: private static final int C_FORMAT_BRACE = 7;
037: private static final int C_FORMAT_NUMBER = 8;
038: private static final int C_FORMAT_PREPROCESSOR = 9;
039: private static final int C_FORMAT_DISABLED = 10;
040:
041: private static final RE lynxArgsRE = new UncheckedRE(
042: "ARGS[0-9][0-9]?");
043:
044: private final Mode mode;
045:
046: public CFormatter(Buffer buffer, int language) {
047: this .buffer = buffer;
048: switch (language) {
049: case LANGUAGE_C:
050: mode = CMode.getMode();
051: break;
052: case LANGUAGE_CPP:
053: mode = CppMode.getMode();
054: break;
055: case LANGUAGE_OBJC:
056: mode = ObjCMode.getMode();
057: break;
058: default:
059: Debug.assertTrue(false);
060: mode = null;
061: break;
062: }
063: }
064:
065: private int tokenBegin = 0;
066:
067: private void endToken(String text, int tokenEnd, int state) {
068: if (tokenEnd - tokenBegin > 0) {
069: int format = C_FORMAT_TEXT;
070: switch (state) {
071: case STATE_NEUTRAL:
072: format = C_FORMAT_TEXT;
073: break;
074: case STATE_QUOTE:
075: format = C_FORMAT_STRING;
076: break;
077: case STATE_IDENTIFIER:
078: format = C_FORMAT_IDENTIFIER;
079: break;
080: case STATE_COMMENT:
081: format = C_FORMAT_COMMENT;
082: break;
083: case STATE_OPERATOR:
084: format = C_FORMAT_OPERATOR;
085: break;
086: case STATE_BRACE:
087: format = C_FORMAT_BRACE;
088: break;
089: case STATE_NUMBER:
090: case STATE_HEXNUMBER:
091: format = C_FORMAT_NUMBER;
092: break;
093: case STATE_PREPROCESSOR:
094: format = C_FORMAT_PREPROCESSOR;
095: break;
096: }
097: addSegment(text, tokenBegin, tokenEnd, format);
098: tokenBegin = tokenEnd;
099: }
100: }
101:
102: private void parseLine(Line line) {
103: if (line == null) {
104: addSegment("", C_FORMAT_TEXT);
105: return;
106: }
107: String text;
108: if (Editor.tabsAreVisible())
109: text = Utilities.makeTabsVisible(line.getText(), buffer
110: .getTabWidth());
111: else
112: text = Utilities
113: .detab(line.getText(), buffer.getTabWidth());
114: if (line.flags() == STATE_DISABLED) {
115: addSegment(text, C_FORMAT_DISABLED);
116: return;
117: }
118: tokenBegin = 0;
119: boolean isPreprocessorLine = false;
120: char quoteChar = '\0';
121: int state = line.flags();
122: if (state == STATE_QUOTE)
123: quoteChar = '"';
124: int i = 0;
125: final int limit = text.length();
126:
127: // Skip whitespace at start of line.
128: while (i < limit) {
129: if (Character.isWhitespace(text.charAt(i))) {
130: ++i;
131: } else {
132: endToken(text, i, state);
133: break;
134: }
135: }
136:
137: char c;
138:
139: // Test for preprocessor directive. Must be first non-whitespace character.
140: if (i < limit && state == STATE_NEUTRAL) {
141: c = text.charAt(i);
142: if (c == '#') {
143: state = STATE_PREPROCESSOR;
144: isPreprocessorLine = true;
145: ++i;
146: while (i < limit
147: && (Character.isWhitespace(c = text.charAt(i)) || c == '#'))
148: ++i;
149: while (i < limit && (c = text.charAt(i)) >= 'a'
150: && c <= 'z')
151: ++i;
152: endToken(text, i, state);
153: state = STATE_NEUTRAL;
154: }
155: }
156:
157: while (i < limit) {
158: c = text.charAt(i);
159: if (state == STATE_COMMENT) {
160: if (i < limit - 1 && c == '*'
161: && text.charAt(i + 1) == '/') {
162: endToken(text, i + 2, state);
163: state = STATE_NEUTRAL;
164: i += 2;
165: } else
166: ++i;
167: continue;
168: }
169: if (state == STATE_QUOTE) {
170: if (c == quoteChar) {
171: endToken(text, i + 1, state);
172: state = STATE_NEUTRAL;
173: } else if (c == '\\' && i < limit - 1) {
174: // Escape char.
175: ++i;
176: }
177: ++i;
178: continue;
179: }
180:
181: // Reaching here, we're not in a comment or a quoted string.
182: if (c == '"' || c == '\'') {
183: endToken(text, i, state);
184: state = STATE_QUOTE;
185: quoteChar = c;
186: ++i;
187: continue;
188: }
189: if (c == '/') {
190: if (i < limit - 1) {
191: if (text.charAt(i + 1) == '*') {
192: endToken(text, i, state);
193: state = STATE_COMMENT;
194: i += 2;
195: } else if (text.charAt(i + 1) == '/') {
196: endToken(text, i, state);
197: endToken(text, limit, STATE_COMMENT);
198: return;
199: } else
200: ++i;
201: } else
202: ++i;
203: continue;
204: }
205: if (!isPreprocessorLine && isOperatorChar(c)) {
206: if (state != STATE_OPERATOR) {
207: endToken(text, i, state);
208: // Check for keyword (as in e.g. "char*").
209: LineSegment segment = getLastSegment();
210: if (segment != null && isKeyword(segment.getText()))
211: segment.setFormat(C_FORMAT_KEYWORD);
212: state = STATE_OPERATOR;
213: }
214: ++i;
215: continue;
216: }
217: if (c == '{' || c == '}') {
218: if (state != STATE_BRACE) {
219: endToken(text, i, state);
220: if (!isPreprocessorLine) {
221: // Check for keyword.
222: LineSegment segment = getLastSegment();
223: if (segment != null
224: && isKeyword(segment.getText()))
225: segment.setFormat(C_FORMAT_KEYWORD);
226: }
227: state = STATE_BRACE;
228: }
229: ++i;
230: continue;
231: }
232: if (state == STATE_OPERATOR || state == STATE_BRACE) {
233: if (mode.isIdentifierStart(c)) {
234: endToken(text, i, state);
235: state = STATE_IDENTIFIER;
236: } else if (Character.isDigit(c)) {
237: endToken(text, i, state);
238: state = STATE_NUMBER;
239: } else {
240: endToken(text, i, state);
241: state = STATE_NEUTRAL;
242: }
243: ++i;
244: continue;
245: }
246: if (state == STATE_IDENTIFIER) {
247: if (!mode.isIdentifierPart(c)) {
248: endToken(text, i, state);
249: // Check for keyword or function.
250: LineSegment segment = getLastSegment();
251: if (segment != null) {
252: final String segmentText = segment.getText();
253: if (segmentText.startsWith("ARGS")
254: && lynxArgsRE.isMatch(segmentText)) {
255: // Lynx source "ARGSnn" macro.
256: ;
257: } else if (!isPreprocessorLine
258: && isKeyword(segmentText)) {
259: segment.setFormat(C_FORMAT_KEYWORD);
260: } else if (c == '(') {
261: segment.setFormat(C_FORMAT_FUNCTION);
262: } else if (Character.isWhitespace(c)) {
263: // Look ahead to see if next non-whitespace char is '('.
264: int j = i + 1;
265: while (j < limit
266: && Character.isWhitespace(c = text
267: .charAt(j)))
268: ++j;
269: if (c == '(') {
270: segment.setFormat(C_FORMAT_FUNCTION);
271: } else if (c == 'A'
272: && text.regionMatches(j, "ARGS", 0,
273: 4)) {
274: // Lynx "ARGSnn" macro.
275: if (lynxArgsRE.getMatch(text
276: .substring(j)) != null)
277: segment
278: .setFormat(C_FORMAT_FUNCTION);
279: } else if (c == 'N'
280: && text.regionMatches(j, "NOARGS",
281: 0, 6)) {
282: // Lynx macro.
283: segment.setFormat(C_FORMAT_FUNCTION);
284: }
285: }
286: }
287: state = STATE_NEUTRAL;
288: }
289: ++i;
290: continue;
291: }
292: if (state == STATE_NUMBER) {
293: if (Character.isDigit(c))
294: ;
295: else if (c == 'u' || c == 'U' || c == 'l' || c == 'L')
296: ;
297: else if (i - tokenBegin == 1 && c == 'x' || c == 'X')
298: state = STATE_HEXNUMBER;
299: else {
300: endToken(text, i, state);
301: if (mode.isIdentifierStart(c))
302: state = STATE_IDENTIFIER;
303: else
304: state = STATE_NEUTRAL;
305: }
306: ++i;
307: continue;
308: }
309: if (state == STATE_HEXNUMBER) {
310: if (Character.isDigit(c))
311: ;
312: else if ((c >= 'a' && c <= 'f')
313: || (c >= 'A' && c <= 'F'))
314: ;
315: else if (c == 'u' || c == 'U' || c == 'l' || c == 'L')
316: ;
317: else {
318: endToken(text, i, state);
319: if (mode.isIdentifierStart(c))
320: state = STATE_IDENTIFIER;
321: else
322: state = STATE_NEUTRAL;
323: }
324: ++i;
325: continue;
326: }
327: if (state == STATE_NEUTRAL) {
328: if (mode.isIdentifierStart(c)) {
329: endToken(text, i, state);
330: state = STATE_IDENTIFIER;
331: } else if (Character.isDigit(c)) {
332: endToken(text, i, state);
333: state = STATE_NUMBER;
334: }
335: }
336: ++i;
337: }
338:
339: // Reached end of line.
340: endToken(text, i, state);
341: if (state == STATE_IDENTIFIER && !isPreprocessorLine) {
342: // Last token might be a keyword.
343: LineSegment segment = getLastSegment();
344: if (segment != null && isKeyword(segment.getText()))
345: segment.setFormat(C_FORMAT_KEYWORD);
346: }
347: }
348:
349: public LineSegmentList formatLine(Line line) {
350: clearSegmentList();
351: parseLine(line);
352: return segmentList;
353: }
354:
355: public boolean parseBuffer() {
356: int state = STATE_NEUTRAL;
357: boolean continued = false;
358: Line line = buffer.getFirstLine();
359: boolean changed = false;
360: while (line != null) {
361: int oldflags = line.flags();
362: // Quoted strings can't span lines in C.
363: if (state == STATE_QUOTE && !continued)
364: state = STATE_NEUTRAL;
365: if (state != oldflags) {
366: line.setFlags(state);
367: changed = true;
368: }
369: if (state == STATE_NEUTRAL) {
370: if (line.getText().startsWith("#if 0")) {
371: // Might be null.
372: Line match = CMode.findMatchPreprocessor(line);
373: while (line != null && line != match) {
374: oldflags = line.flags();
375: if (oldflags != STATE_DISABLED) {
376: line.setFlags(STATE_DISABLED);
377: changed = true;
378: }
379: line = line.next();
380: }
381: if (line != null
382: && line.getText().startsWith("#en")) {
383: oldflags = line.flags();
384: if (oldflags != STATE_DISABLED) {
385: line.setFlags(STATE_DISABLED);
386: changed = true;
387: }
388: line = line.next();
389: }
390: continue;
391: }
392: }
393: char quoteChar = '\0';
394: final int limit = line.length();
395: char c = '\0';
396: continued = false;
397: for (int i = 0; i < limit; i++) {
398: c = line.charAt(i);
399: if (c == '\\' && i < limit - 1) {
400: // Escape.
401: ++i;
402: continue;
403: }
404: if (state == STATE_COMMENT) {
405: if (c == '*' && i < limit - 1) {
406: c = line.charAt(i + 1);
407: if (c == '/') {
408: ++i;
409: state = STATE_NEUTRAL;
410: }
411: }
412: continue;
413: }
414: if (state == STATE_QUOTE) {
415: if (c == quoteChar) {
416: state = STATE_NEUTRAL;
417: quoteChar = '\0';
418: }
419: continue;
420: }
421: // Not in comment or quoted string.
422: if (c == '/' && i < limit - 1) {
423: c = line.charAt(++i);
424: if (c == '/') {
425: // Single-line comment beginning.
426: // Ignore rest of line.
427: break;
428: } else if (c == '*')
429: state = STATE_COMMENT;
430: } else if (c == '"' || c == '\'') {
431: state = STATE_QUOTE;
432: quoteChar = c;
433: }
434: }
435: if (c == '\\')
436: continued = true;
437: line = line.next();
438: }
439: buffer.setNeedsParsing(false);
440: return changed;
441: }
442:
443: private static final boolean isOperatorChar(char c) {
444: return "!&|<>=+/*-".indexOf(c) >= 0;
445: }
446:
447: public FormatTable getFormatTable() {
448: if (formatTable == null) {
449: formatTable = new FormatTable("CMode");
450: formatTable.addEntryFromPrefs(C_FORMAT_TEXT, "text");
451: formatTable.addEntryFromPrefs(C_FORMAT_COMMENT, "comment");
452: formatTable.addEntryFromPrefs(C_FORMAT_STRING, "string");
453: formatTable.addEntryFromPrefs(C_FORMAT_IDENTIFIER,
454: "identifier", "text");
455: formatTable.addEntryFromPrefs(C_FORMAT_KEYWORD, "keyword");
456: formatTable
457: .addEntryFromPrefs(C_FORMAT_FUNCTION, "function");
458: formatTable
459: .addEntryFromPrefs(C_FORMAT_OPERATOR, "operator");
460: formatTable.addEntryFromPrefs(C_FORMAT_BRACE, "brace");
461: formatTable.addEntryFromPrefs(C_FORMAT_NUMBER, "number");
462: formatTable.addEntryFromPrefs(C_FORMAT_PREPROCESSOR,
463: "preprocessor");
464: formatTable
465: .addEntryFromPrefs(C_FORMAT_DISABLED, "disabled");
466: }
467: return formatTable;
468: }
469: }
|