001: /*
002: * LispFormatter.java
003: *
004: * Copyright (C) 1998-2004 Peter Graves
005: * $Id: LispFormatter.java,v 1.35 2004/04/11 19:10:40 piso Exp $
006: *
007: * This program is free software; you can redistribute it and/or
008: * modify it under the terms of the GNU General Public License
009: * as published by the Free Software Foundation; either version 2
010: * of the License, or (at your option) any later version.
011: *
012: * This program is distributed in the hope that it will be useful,
013: * but WITHOUT ANY WARRANTY; without even the implied warranty of
014: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
015: * GNU General Public License for more details.
016: *
017: * You should have received a copy of the GNU General Public License
018: * along with this program; if not, write to the Free Software
019: * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
020: */
021:
022: package org.armedbear.j;
023:
024: import gnu.regexp.RE;
025: import gnu.regexp.REMatch;
026: import gnu.regexp.UncheckedRE;
027:
028: public final class LispFormatter extends Formatter {
029: // States.
030: private static final int STATE_OPEN_PAREN = STATE_LAST + 1;
031: private static final int STATE_CLOSE_PAREN = STATE_LAST + 2;
032: private static final int STATE_CAR = STATE_LAST + 3;
033: private static final int STATE_DEFUN = STATE_LAST + 4;
034: private static final int STATE_DEFINITION = STATE_LAST + 5;
035: private static final int STATE_NAME = STATE_LAST + 6;
036: private static final int STATE_SUBSTITUTION = STATE_LAST + 7;
037: private static final int STATE_SECONDARY_KEYWORD = STATE_LAST + 8;
038: private static final int STATE_PUNCTUATION = STATE_LAST + 9;
039: private static final int STATE_ARGLIST = STATE_LAST + 10;
040: private static final int STATE_QUOTED_LIST = STATE_LAST + 11;
041:
042: // Formats.
043: private static final int LISP_FORMAT_TEXT = 0;
044: private static final int LISP_FORMAT_COMMENT = 1;
045: private static final int LISP_FORMAT_STRING = 2;
046: private static final int LISP_FORMAT_KEYWORD = 3;
047: private static final int LISP_FORMAT_DEFUN = 4;
048: private static final int LISP_FORMAT_NAME = 5;
049: private static final int LISP_FORMAT_PARENTHESIS = 6;
050: private static final int LISP_FORMAT_PUNCTUATION = 7;
051: private static final int LISP_FORMAT_SUBSTITUTION = 8;
052: private static final int LISP_FORMAT_SECONDARY_KEYWORD = 9;
053:
054: private static final RE condRE = new UncheckedRE(
055: "\\([ \t]*cond[ \t]*\\(\\(");
056:
057: private static final RE dolistRE = new UncheckedRE(
058: "\\([ \t]*dolist[ \t]*\\(");
059:
060: // Matches e.g. "(do () ((endp list1))".
061: private static final RE doRE = new UncheckedRE(
062: "\\([ \t]*do\\*?[ \t]*\\(.*\\)[ \t]\\(\\(");
063:
064: private static final RE letOrDoRE = new UncheckedRE(
065: "\\([ \t]*(let|do)\\*?[ \t]*\\(\\(");
066:
067: private final Mode mode;
068:
069: public LispFormatter(Buffer buffer) {
070: this .buffer = buffer;
071: this .mode = buffer.getMode();
072: }
073:
074: private Line currentLine;
075: private int tokenBegin = 0;
076:
077: private void endToken(String text, int tokenEnd, int state) {
078: if (tokenEnd - tokenBegin > 0) {
079: int format = LISP_FORMAT_TEXT;
080: switch (state) {
081: case STATE_NEUTRAL:
082: case STATE_ARGLIST:
083: case STATE_QUOTED_LIST:
084: break;
085: case STATE_QUOTE:
086: format = LISP_FORMAT_STRING;
087: break;
088: case STATE_OPEN_PAREN:
089: case STATE_CLOSE_PAREN:
090: format = LISP_FORMAT_PARENTHESIS;
091: break;
092: case STATE_CAR:
093: break;
094: case STATE_DEFUN: {
095: String token = text.substring(tokenBegin, tokenEnd)
096: .trim();
097: if (isKeyword(token)) {
098: if (isDefiner(token))
099: format = LISP_FORMAT_DEFUN;
100: else
101: format = LISP_FORMAT_KEYWORD;
102: }
103: break;
104: }
105: case STATE_NAME:
106: format = LISP_FORMAT_NAME;
107: break;
108: case STATE_DEFINITION:
109: case STATE_IDENTIFIER:
110: break;
111: case STATE_SECONDARY_KEYWORD:
112: format = LISP_FORMAT_SECONDARY_KEYWORD;
113: break;
114: case STATE_SUBSTITUTION:
115: format = LISP_FORMAT_SUBSTITUTION;
116: break;
117: case STATE_COMMENT:
118: format = LISP_FORMAT_COMMENT;
119: break;
120: case STATE_PUNCTUATION:
121: format = LISP_FORMAT_PUNCTUATION;
122: }
123: addSegment(text, tokenBegin, tokenEnd, format);
124: tokenBegin = tokenEnd;
125: }
126: }
127:
128: private static final boolean isDefiner(String s) {
129: if (s.length() >= 5 && s.startsWith("def")) {
130: String translated = LispMode.translateDefiner(s);
131: if (translated != null) {
132: // Exclude DEFCONSTANT, DEFPARAMETER, DEFVAR.
133: if (translated.equals("defconstant"))
134: return false;
135: if (translated.equals("defparameter"))
136: return false;
137: if (translated.equals("defvar"))
138: return false;
139: return true;
140: }
141: }
142: return false;
143: }
144:
145: // Returns true if token at specified offset in detabbed text from line is
146: // in functional position, based on context.
147: private static final boolean isPositionFunctional(
148: final String text, // Detabbed text.
149: final int offset, // Offset of token in detabbed text.
150: final Line line) // Line (which may contain tab characters).
151: {
152: if (offset >= 1 && text.charAt(offset - 1) == '(') {
153: if (offset >= 2 && text.charAt(offset - 2) == '(') {
154: // Token is preceded by "((".
155: if (countLeadingSpaces(text) == offset - 2) {
156: // First non-whitespace text on line.
157: Position pos = LispMode
158: .findContainingSexp(new Position(line, 0));
159: if (pos != null) {
160: // Skip '('.
161: pos.skip();
162: String s = parseToken(pos).toLowerCase();
163: if (s.equals("cond"))
164: return true;
165: // Check for end-test form after DO/DO*.
166: if (s.equals("do") || s.equals("do*"))
167: return true;
168: }
169: }
170: REMatch m = condRE.getMatch(text);
171: if (m != null && m.getEndIndex() == offset) {
172: return true;
173: }
174: m = doRE.getMatch(text);
175: if (m != null && m.getEndIndex() == offset)
176: return true;
177: return false;
178: }
179: // Text is preceded by single '('.
180: if (countLeadingSpaces(text) == offset - 1) {
181: // First non-whitespace on line.
182: Position pos = LispMode
183: .findContainingSexp(new Position(line, 0));
184: if (pos != null) {
185: if (pos.lookingAt("((")) {
186: REMatch m = letOrDoRE.getMatch(pos.getLine()
187: .getText());
188: if (m != null
189: && m.getEndIndex() == pos.getOffset() + 2)
190: return false;
191: } else {
192: // Skip '('.
193: pos.skip();
194: String s = parseToken(pos).toLowerCase();
195: if (s.equals("case"))
196: return false;
197: if (s.equals("ccase"))
198: return false;
199: if (s.equals("ecase"))
200: return false;
201: if (s.equals("typecase"))
202: return false;
203: if (s.equals("ctypecase"))
204: return false;
205: if (s.equals("etypecase"))
206: return false;
207: }
208: }
209: } else {
210: // Not first whitespace on line.
211: REMatch m = dolistRE.getMatch(text);
212: if (m != null && m.getEndIndex() == offset)
213: return false;
214: }
215: }
216: return true;
217: }
218:
219: // Returns next whitespace-delimited token starting at (or after) pos.
220: // Same line only. Never returns null.
221: private static final String parseToken(Position pos) {
222: final Line line = pos.getLine();
223: final int limit = line.length();
224: int begin = pos.getOffset();
225: while (begin < limit
226: && Character.isWhitespace(line.charAt(begin)))
227: ++begin;
228: if (begin == limit)
229: return "";
230: int end = begin + 1;
231: while (end < limit && !Character.isWhitespace(line.charAt(end)))
232: ++end;
233: return line.getText().substring(begin, end);
234: }
235:
236: private static final int countLeadingSpaces(String s) {
237: final int limit = s.length();
238: for (int i = 0; i < limit; i++) {
239: if (s.charAt(i) != ' ')
240: return i;
241: }
242: return limit;
243: }
244:
245: private void parseLine(Line line) {
246: currentLine = line;
247: tokenBegin = 0;
248: final String text = getDetabbedText(line);
249: int state = line.flags();
250: clearSegmentList();
251: final int limit = text.length();
252: int i = 0;
253: while (i < limit) {
254: char c = text.charAt(i);
255: if (c == '\\' && i < limit - 1) {
256: i += 2;
257: continue;
258: }
259: if (state == STATE_COMMENT) {
260: if (c == '|' && i < limit - 1) {
261: c = text.charAt(i + 1);
262: if (c == '#') {
263: i += 2;
264: endToken(text, i, state);
265: state = STATE_NEUTRAL;
266: continue;
267: }
268: }
269: ++i;
270: continue;
271: }
272: if (state == STATE_QUOTE) {
273: if (c == '"') {
274: endToken(text, i + 1, state);
275: state = STATE_NEUTRAL;
276: }
277: ++i;
278: continue;
279: }
280: // Reaching here, we're not in a comment or quoted string.
281: if (c == '"') {
282: endToken(text, i, state);
283: state = STATE_QUOTE;
284: ++i;
285: continue;
286: }
287: if (c == ';') {
288: endToken(text, i, state);
289: endToken(text, limit, STATE_COMMENT);
290: return;
291: }
292: if (c == '#' && i < limit - 1) {
293: endToken(text, i, state);
294: c = text.charAt(i + 1);
295: if (c == '|') {
296: state = STATE_COMMENT;
297: i += 2;
298: continue;
299: }
300: if (c == '\'') {
301: i += 2;
302: continue;
303: }
304: if (c == ':') {
305: // Uninterned symbol.
306: i += 2;
307: continue;
308: }
309: state = STATE_NEUTRAL;
310: ++i;
311: continue;
312: }
313: if (c == '\'') {
314: endToken(text, i, state);
315: state = STATE_NEUTRAL;
316: i = skipQuotedObject(text, ++i, state);
317: continue;
318: }
319: if (c == '`') {
320: // Backquote.
321: endToken(text, i, state);
322: state = STATE_PUNCTUATION;
323: ++i;
324: endToken(text, i, state);
325: state = STATE_NEUTRAL;
326: continue;
327: }
328: if (c == ',') {
329: endToken(text, i, state);
330: state = STATE_PUNCTUATION;
331: ++i;
332: if (i < limit) {
333: c = text.charAt(i);
334: if (c == '@' || c == '.')
335: ++i;
336: }
337: endToken(text, i, state);
338: state = STATE_SUBSTITUTION;
339: continue;
340: }
341: if (state == STATE_ARGLIST) {
342: if (c == '(') {
343: endToken(text, i, state);
344: ++i;
345: endToken(text, i, STATE_OPEN_PAREN);
346: continue;
347: }
348: }
349: if (c == '(') {
350: endToken(text, i, state);
351: state = STATE_OPEN_PAREN;
352: ++i;
353: continue;
354: }
355: if (c == ')') {
356: endToken(text, i, state);
357: state = STATE_CLOSE_PAREN;
358: ++i;
359: continue;
360: }
361: if (state == STATE_OPEN_PAREN) {
362: if (c == ':' || c == '&') {
363: endToken(text, i, state);
364: state = STATE_SECONDARY_KEYWORD;
365: } else if (!Character.isWhitespace(c)) {
366: endToken(text, i, state);
367: if (isPositionFunctional(text, i, currentLine))
368: state = STATE_DEFUN;
369: else
370: state = STATE_CAR;
371: }
372: ++i;
373: continue;
374: }
375: if (state == STATE_CLOSE_PAREN) {
376: if (c != ')') {
377: endToken(text, i, state);
378: state = STATE_NEUTRAL;
379: }
380: ++i;
381: continue;
382: }
383: if (state == STATE_CAR) {
384: if (Character.isWhitespace(c)) {
385: endToken(text, i, state);
386: state = STATE_NEUTRAL;
387: }
388: ++i;
389: continue;
390: }
391: if (state == STATE_DEFUN) {
392: if (Character.isWhitespace(c)) {
393: endToken(text, i, state);
394: LineSegment s = segmentList.getLastSegment();
395: if (s != null) {
396: String translated = LispMode.translateDefiner(s
397: .getText());
398: if (translated != null && isDefiner(translated)) {
399: state = STATE_DEFINITION;
400: ++i;
401: continue;
402: }
403: }
404: state = STATE_NEUTRAL;
405: }
406: ++i;
407: continue;
408: }
409: if (state == STATE_NAME) {
410: if (!mode.isIdentifierPart(c) && c != ':') {
411: endToken(text, i, state);
412: state = STATE_ARGLIST;
413: }
414: ++i;
415: continue;
416: }
417: if (state == STATE_IDENTIFIER) {
418: if (!mode.isIdentifierPart(c) && c != ':') {
419: endToken(text, i, state);
420: state = STATE_NEUTRAL;
421: }
422: ++i;
423: continue;
424: }
425: if (state == STATE_SECONDARY_KEYWORD
426: || state == STATE_SUBSTITUTION) {
427: if (!mode.isIdentifierPart(c)) {
428: endToken(text, i, state);
429: state = STATE_NEUTRAL;
430: }
431: ++i;
432: continue;
433: }
434: if (state == STATE_DEFINITION) {
435: if (mode.isIdentifierStart(c))
436: state = STATE_NAME;
437: ++i;
438: continue;
439: }
440: if (state == STATE_NEUTRAL || state == STATE_ARGLIST
441: || state == STATE_QUOTED_LIST) {
442: if (c == ':' || c == '&') {
443: endToken(text, i, state);
444: state = STATE_SECONDARY_KEYWORD;
445: } else if (mode.isIdentifierStart(c)) {
446: endToken(text, i, state);
447: state = STATE_IDENTIFIER;
448: } else
449: // Still neutral...
450: ;
451: }
452: ++i;
453: }
454: endToken(text, i, state);
455: }
456:
457: public LineSegmentList formatLine(Line line) {
458: if (line == null) {
459: clearSegmentList();
460: addSegment("", LISP_FORMAT_TEXT);
461: return segmentList;
462: }
463: parseLine(line);
464: return segmentList;
465: }
466:
467: public boolean parseBuffer() {
468: int state = STATE_NEUTRAL;
469: boolean changed = false;
470: Position pos = new Position(buffer.getFirstLine(), 0);
471: while (!pos.atEnd()) {
472: char c = pos.getChar();
473: if (c == EOL) {
474: if (pos.nextLine()) {
475: changed = setLineFlags(pos.getLine(), state)
476: || changed;
477: continue;
478: } else
479: break; // Reached end of buffer.
480: }
481: if (c == '\\') {
482: // Escape.
483: pos.skip();
484: pos.next();
485: continue;
486: }
487: // Not in comment or quoted string.
488: if (c == ';') {
489: // Single-line comment beginning. Ignore rest of line.
490: if (pos.nextLine()) {
491: changed = setLineFlags(pos.getLine(), state)
492: || changed;
493: continue;
494: } else {
495: pos.moveTo(pos.getLine(), pos.getLine().length());
496: break; // Reached end of buffer.
497: }
498: }
499: if (c == '#') {
500: if (pos.lookingAt("#|")) {
501: pos.skip(2);
502: changed = skipBalancedComment(pos) || changed;
503: } else if (pos.lookingAt("#'"))
504: pos.skip(2);
505: else
506: pos.skip();
507: continue;
508: }
509: if (c == '"') {
510: pos.skip();
511: changed = skipString(pos) || changed;
512: continue;
513: }
514: if (c == '\'') {
515: pos.skip();
516: changed = skipQuotedObject(pos) || changed;
517: continue;
518: }
519: if (c == '(') {
520: state = STATE_OPEN_PAREN;
521: pos.skip();
522: continue;
523: }
524: if (state == STATE_OPEN_PAREN) {
525: if (!Character.isWhitespace(c))
526: state = STATE_CAR;
527: pos.next();
528: continue;
529: }
530: if (state == STATE_CAR) {
531: if (c == ')' || Character.isWhitespace(c))
532: state = STATE_NEUTRAL;
533: pos.next();
534: continue;
535: }
536: // Default.
537: pos.skip();
538: continue;
539: }
540: buffer.setNeedsParsing(false);
541: return changed;
542: }
543:
544: private static boolean skipString(Position pos) {
545: boolean changed = false;
546: while (!pos.atEnd()) {
547: char c = pos.getChar();
548: if (c == EOL) {
549: if (pos.nextLine()) {
550: changed = setLineFlags(pos.getLine(), STATE_QUOTE)
551: || changed;
552: continue;
553: } else
554: break; // Reached end of buffer.
555: }
556: if (c == '\\') {
557: // Escape.
558: pos.skip();
559: if (pos.getChar() == EOL) {
560: if (pos.nextLine()) {
561: changed = setLineFlags(pos.getLine(),
562: STATE_QUOTE)
563: || changed;
564: continue;
565: } else
566: break; // End of buffer.
567: } else {
568: // Not end of line.
569: pos.next();
570: continue;
571: }
572: }
573: if (c == '"') {
574: pos.next();
575: break;
576: }
577: // Default.
578: pos.skip();
579: }
580: return changed;
581: }
582:
583: private static boolean skipBalancedComment(Position pos) {
584: boolean changed = false;
585: int count = 1;
586: while (!pos.atEnd()) {
587: char c = pos.getChar();
588: if (c == EOL) {
589: if (pos.nextLine()) {
590: changed = setLineFlags(pos.getLine(), STATE_COMMENT)
591: || changed;
592: continue;
593: } else
594: break; // End of buffer.
595: }
596: if (c == '\\') {
597: // Escape.
598: pos.skip();
599: pos.next();
600: continue;
601: }
602: if (c == '#' && pos.lookingAt("#|")) {
603: pos.skip(2);
604: ++count;
605: continue;
606: }
607: if (c == '|' && pos.lookingAt("|#")) {
608: pos.skip(2);
609: if (--count == 0)
610: break; // End of comment.
611: else
612: continue;
613: }
614: // Default.
615: pos.skip();
616: }
617: return changed;
618: }
619:
620: private int skipQuotedObject(String text, int i, int state) {
621: int count = 0;
622: final int limit = text.length();
623: // Skip whitespace after quote character.
624: while (i < limit && Character.isWhitespace(text.charAt(i)))
625: ++i;
626: while (i < limit) {
627: switch (text.charAt(i)) {
628: case ' ':
629: case '\t':
630: return i;
631: case '(':
632: endToken(text, i, state);
633: ++count;
634: ++i;
635: endToken(text, i, STATE_OPEN_PAREN);
636: break;
637: case ')':
638: endToken(text, i, state);
639: ++i;
640: endToken(text, i, STATE_CLOSE_PAREN);
641: if (--count <= 0)
642: return i;
643: break;
644: case '\\':
645: ++i;
646: if (i < limit)
647: ++i;
648: break;
649: case ';':
650: case ',':
651: case '"':
652: return i;
653: case ':':
654: if (i > 0) {
655: char c = text.charAt(i - 1);
656: if (!mode.isIdentifierPart(c) && c != ':')
657: return i;
658: }
659: ++i;
660: break;
661: default:
662: ++i;
663: break;
664: }
665: }
666: return i;
667: }
668:
669: private static boolean skipQuotedObject(Position pos) {
670: boolean changed = false;
671: int count = 0;
672: while (!pos.atEnd()) {
673: char c = pos.getChar();
674: if (c == EOL) {
675: if (pos.nextLine()) {
676: changed = setLineFlags(pos.getLine(),
677: STATE_QUOTED_LIST)
678: || changed;
679: continue;
680: } else
681: break; // End of buffer.
682: }
683: if (Character.isWhitespace(c)) {
684: pos.skip();
685: continue;
686: }
687: if (c == '\\') {
688: pos.skip();
689: pos.next();
690: continue;
691: }
692: if (c == '"') {
693: pos.skip();
694: changed = skipString(pos) || changed;
695: continue;
696: }
697: if (c == '#' && pos.lookingAt("#(")) {
698: ++count;
699: pos.skip(2);
700: continue;
701: }
702: if (c == '(') {
703: ++count;
704: pos.skip();
705: continue;
706: }
707: if (c == ')') {
708: pos.skip();
709: if (count > 0) {
710: --count;
711: if (count == 0)
712: break;
713: }
714: continue;
715: }
716: // Not EOL, whitespace or paren.
717: if (count == 0) {
718: skipToken(pos);
719: break;
720: }
721: // Default.
722: pos.skip();
723: }
724: return changed;
725: }
726:
727: private static boolean setLineFlags(Line line, int newFlags) {
728: if (line.flags() == newFlags)
729: return false; // No change.
730: line.setFlags(newFlags);
731: return true;
732: }
733:
734: private static void skipToken(Position pos) {
735: while (!Character.isWhitespace(pos.getChar()) && pos.next())
736: ;
737: }
738:
739: public FormatTable getFormatTable() {
740: if (formatTable == null) {
741: formatTable = new FormatTable("LispMode");
742: formatTable.addEntryFromPrefs(LISP_FORMAT_TEXT, "text");
743: formatTable.addEntryFromPrefs(LISP_FORMAT_COMMENT,
744: "comment");
745: formatTable.addEntryFromPrefs(LISP_FORMAT_STRING, "string");
746: formatTable.addEntryFromPrefs(LISP_FORMAT_KEYWORD,
747: "keyword");
748: formatTable.addEntryFromPrefs(LISP_FORMAT_DEFUN, "keyword");
749: formatTable.addEntryFromPrefs(LISP_FORMAT_NAME, "function");
750: formatTable.addEntryFromPrefs(LISP_FORMAT_PARENTHESIS,
751: "parenthesis", "text");
752: formatTable.addEntryFromPrefs(LISP_FORMAT_PUNCTUATION,
753: "punctuation", "text");
754: formatTable.addEntryFromPrefs(LISP_FORMAT_SUBSTITUTION,
755: "substitution", "text");
756: formatTable.addEntryFromPrefs(
757: LISP_FORMAT_SECONDARY_KEYWORD, "secondaryKeyword",
758: "text");
759: }
760: return formatTable;
761: }
762: }
|