001: /*
002: * RubyFormatter.java
003: *
004: * Copyright (C) 2002 Jens Luedicke <jens@irs-net.com>
005: * based on PythonFormatter.java
006: * $Id: RubyFormatter.java,v 1.1.1.1 2002/09/24 16:09:23 piso Exp $
007: *
008: * This program is free software; you can redistribute it and/or
009: * modify it under the terms of the GNU General Public License
010: * as published by the Free Software Foundation; either version 2
011: * of the License, or (at your option) any later version.
012: *
013: * This program is distributed in the hope that it will be useful,
014: * but WITHOUT ANY WARRANTY; without even the implied warranty of
015: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
016: * GNU General Public License for more details.
017: *
018: * You should have received a copy of the GNU General Public License
019: * along with this program; if not, write to the Free Software
020: * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
021: */
022:
023: package org.armedbear.j;
024:
025: public final class RubyFormatter extends Formatter {
026: private static final int RUBY_STATE_NEUTRAL = 0;
027: private static final int RUBY_STATE_SINGLE_QUOTE = 1;
028: private static final int RUBY_STATE_DOUBLE_QUOTE = 2;
029: private static final int RUBY_STATE_IDENTIFIER = 3;
030: private static final int RUBY_STATE_COMMENT = 4;
031: private static final int RUBY_STATE_BRACE = 5;
032: private static final int RUBY_STATE_NUMBER = 6;
033: private static final int RUBY_STATE_HEXNUMBER = 7;
034: private static final int RUBY_STATE_OPERATOR = 8;
035: private static final int RUBY_STATE_HERE_DOCUMENT = 9;
036: private static final int RUBY_STATE_POD = 10;
037: private static final int RUBY_STATE_REGEXP = 11;
038: private static final int RUBY_STATE_REGEXP_DELIMITER = 12;
039:
040: private static final int RUBY_FORMAT_TEXT = 0;
041: private static final int RUBY_FORMAT_COMMENT = 1;
042: private static final int RUBY_FORMAT_STRING = 2;
043: private static final int RUBY_FORMAT_IDENTIFIER = 3;
044: private static final int RUBY_FORMAT_KEYWORD = 4;
045: private static final int RUBY_FORMAT_FUNCTION = 5;
046: private static final int RUBY_FORMAT_OPERATOR = 6;
047: private static final int RUBY_FORMAT_BRACE = 7;
048: private static final int RUBY_FORMAT_NUMBER = 8;
049:
050: private static final RubyMode mode = RubyMode.getMode();
051:
052: private String endOfText;
053:
054: public RubyFormatter(Buffer buffer) {
055: this .buffer = buffer;
056: }
057:
058: private int begin = 0;
059:
060: private void endSegment(String text, int offset, int state) {
061: if (offset - begin > 0) {
062: int format;
063: switch (state) {
064: case RUBY_STATE_NEUTRAL:
065: format = RUBY_FORMAT_TEXT;
066: break;
067: case RUBY_STATE_SINGLE_QUOTE:
068: case RUBY_STATE_DOUBLE_QUOTE:
069: case RUBY_STATE_HERE_DOCUMENT:
070: case RUBY_STATE_REGEXP:
071: format = RUBY_FORMAT_STRING;
072: break;
073: case RUBY_STATE_REGEXP_DELIMITER:
074: format = RUBY_FORMAT_FUNCTION;
075: break;
076: case RUBY_STATE_IDENTIFIER:
077: format = RUBY_FORMAT_IDENTIFIER;
078: break;
079: case RUBY_STATE_COMMENT:
080: case RUBY_STATE_POD:
081: format = RUBY_FORMAT_COMMENT;
082: break;
083: case RUBY_STATE_OPERATOR:
084: format = RUBY_FORMAT_OPERATOR;
085: break;
086: case RUBY_STATE_BRACE:
087: format = RUBY_FORMAT_BRACE;
088: break;
089: case RUBY_STATE_NUMBER:
090: case RUBY_STATE_HEXNUMBER:
091: format = RUBY_FORMAT_NUMBER;
092: break;
093: default:
094: format = RUBY_FORMAT_TEXT;
095: break;
096: }
097: addSegment(text, begin, offset, format);
098: begin = offset;
099: }
100: }
101:
102: private void parseLine(Line line) {
103: String text;
104: if (Editor.tabsAreVisible())
105: text = Utilities.makeTabsVisible(line.getText(), buffer
106: .getTabWidth());
107: else
108: text = Utilities
109: .detab(line.getText(), buffer.getTabWidth());
110: begin = 0;
111: int state = line.flags();
112: int i = 0;
113: final int limit = text.length();
114: if (state == RUBY_STATE_HERE_DOCUMENT) {
115: if (text.trim().startsWith(endOfText))
116: state = RUBY_STATE_NEUTRAL;
117: else {
118: endSegment(text, limit, state);
119: return;
120: }
121: }
122: if (state == RUBY_STATE_POD) {
123: endSegment(text, limit, state);
124: return;
125: }
126: // Skip whitespace at start of line.
127: while (i < limit) {
128: if (Character.isWhitespace(text.charAt(i))) {
129: ++i;
130: } else {
131: endSegment(text, i, state);
132: break;
133: }
134: }
135: while (i < limit) {
136: char c = text.charAt(i);
137: if (c == '\\' && i < limit - 1) {
138: // Escape char.
139: i += 2;
140: continue;
141: }
142: if (state == RUBY_STATE_SINGLE_QUOTE) {
143: if (c == '\'') {
144: endSegment(text, i + 1, state);
145: state = RUBY_STATE_NEUTRAL;
146: }
147: ++i;
148: continue;
149: }
150: if (state == RUBY_STATE_DOUBLE_QUOTE) {
151: if (c == '"') {
152: endSegment(text, i + 1, state);
153: state = RUBY_STATE_NEUTRAL;
154: }
155: ++i;
156: continue;
157: }
158: if (state == RUBY_STATE_REGEXP) {
159: if (c == '/') {
160: endSegment(text, i, state);
161: endSegment(text, i + 1, RUBY_STATE_REGEXP_DELIMITER);
162: state = RUBY_STATE_NEUTRAL;
163: }
164: ++i;
165: continue;
166: }
167: // Reaching here, we're not in a quoted string or regexp.
168: if (c == '\'') {
169: if (i == 0 || text.charAt(i - 1) != '$') {
170: endSegment(text, i, state);
171: state = RUBY_STATE_SINGLE_QUOTE;
172: }
173: ++i;
174: continue;
175: }
176: if (c == '"') {
177: if (i == 0 || text.charAt(i - 1) != '$') {
178: endSegment(text, i, state);
179: state = RUBY_STATE_DOUBLE_QUOTE;
180: }
181: ++i;
182: continue;
183: }
184: if (c == '/') {
185: if (isRegExp(text, i)) {
186: endSegment(text, i, state);
187: endSegment(text, i + 1, RUBY_STATE_REGEXP_DELIMITER);
188: state = RUBY_STATE_REGEXP;
189: }
190: ++i;
191: continue;
192: }
193: if (c == '#') {
194: endSegment(text, i, state);
195: endSegment(text, limit, RUBY_STATE_COMMENT);
196: return;
197: }
198: if (isOperatorChar(c)) {
199: if (state != RUBY_STATE_OPERATOR) {
200: endSegment(text, i, state);
201: state = RUBY_STATE_OPERATOR;
202: }
203: ++i;
204: continue;
205: }
206: if (c == '{' || c == '}') {
207: if (state != RUBY_STATE_BRACE) {
208: endSegment(text, i, state);
209: // Check for keyword.
210: LineSegment segment = getLastSegment();
211: if (segment != null && isKeyword(segment.getText()))
212: segment.setFormat(RUBY_FORMAT_KEYWORD);
213: state = RUBY_STATE_BRACE;
214: }
215: ++i;
216: continue;
217: }
218: if (state == RUBY_STATE_OPERATOR
219: || state == RUBY_STATE_BRACE) {
220: if (mode.isIdentifierStart(c)) {
221: endSegment(text, i, state);
222: state = RUBY_STATE_IDENTIFIER;
223: } else if (Character.isDigit(c)) {
224: endSegment(text, i, state);
225: state = RUBY_STATE_NUMBER;
226: } else {
227: endSegment(text, i, state);
228: state = RUBY_STATE_NEUTRAL;
229: }
230: ++i;
231: continue;
232: }
233: if (state == RUBY_STATE_IDENTIFIER) {
234: if (!mode.isIdentifierPart(c)) {
235: endSegment(text, i, state);
236: // Check for keyword or function.
237: LineSegment segment = getLastSegment();
238: if (segment != null) {
239: String segmentText = segment.getText();
240: if (isKeyword(segment.getText())) {
241: segment.setFormat(RUBY_FORMAT_KEYWORD);
242: } else if (c == '(') {
243: segment.setFormat(RUBY_FORMAT_FUNCTION);
244: } else if (Character.isWhitespace(c)) {
245: // Look ahead to see if next non-whitespace char is '('.
246: int j = i + 1;
247: while (j < limit
248: && Character.isWhitespace(c = text
249: .charAt(j)))
250: ++j;
251: if (c == '(')
252: segment.setFormat(RUBY_FORMAT_FUNCTION);
253: }
254: }
255: state = RUBY_STATE_NEUTRAL;
256: }
257: ++i;
258: continue;
259: }
260: if (state == RUBY_STATE_NUMBER) {
261: if (Character.isDigit(c))
262: ;
263: else if (c == 'l' || c == 'L')
264: ;
265: else if (i - begin == 1 && c == 'x' || c == 'X')
266: state = RUBY_STATE_HEXNUMBER;
267: else {
268: endSegment(text, i, state);
269: if (mode.isIdentifierStart(c))
270: state = RUBY_STATE_IDENTIFIER;
271: else
272: state = RUBY_STATE_NEUTRAL;
273: }
274: ++i;
275: continue;
276: }
277: if (state == RUBY_STATE_HEXNUMBER) {
278: if (Character.isDigit(c))
279: ;
280: else if ((c >= 'a' && c <= 'f')
281: || (c >= 'A' && c <= 'F'))
282: ;
283: else if (c == 'l' || c == 'L')
284: ;
285: else {
286: endSegment(text, i, state);
287: if (mode.isIdentifierStart(c))
288: state = RUBY_STATE_IDENTIFIER;
289: else
290: state = RUBY_STATE_NEUTRAL;
291: }
292: ++i;
293: continue;
294: }
295: if (state == RUBY_STATE_NEUTRAL) {
296: if (mode.isIdentifierStart(c)) {
297: endSegment(text, i, state);
298: state = RUBY_STATE_IDENTIFIER;
299: } else if (Character.isDigit(c)) {
300: if (i == 0 || text.charAt(i - 1) != '$') {
301: endSegment(text, i, state);
302: state = RUBY_STATE_NUMBER;
303: }
304: }
305: }
306: ++i;
307: }
308: // Reached end of line.
309: endSegment(text, i, state);
310: if (state == RUBY_STATE_IDENTIFIER) {
311: // Last token might be a keyword.
312: LineSegment segment = getLastSegment();
313: if (segment != null && isKeyword(segment.getText()))
314: segment.setFormat(RUBY_FORMAT_KEYWORD);
315: }
316: }
317:
318: // Make sure the '/' at i is not the division operator.
319: public static boolean isRegExp(String text, int i) {
320: Debug.assertTrue(text.charAt(i) == '/');
321: if (i == 0) {
322: // It's the first character on the line.
323: return true;
324: }
325: // Consider the previous character.
326: char c = text.charAt(i - 1);
327: if (c == '(')
328: return true;
329:
330: if (mode.isIdentifierPart(c))
331: return false;
332:
333: if (!Character.isWhitespace(c))
334: return false;
335:
336: // The immediately previous character is whitespace.
337: final String s = text.substring(0, i - 1).trim();
338: final int length = s.length();
339: if (length == 0) {
340: // The '/' is the first non-whitespace character on the line.
341: return true;
342: }
343: c = s.charAt(length - 1);
344: if (c == ')')
345: return false; // "(a + b) / c"
346: if (c == '}')
347: return false;
348: if (!mode.isIdentifierPart(c))
349: return true;
350:
351: // Last non-whitespace character is a valid identifier character.
352: FastStringBuffer sb = new FastStringBuffer(c);
353: for (int j = s.length() - 2; j >= 0; j--) {
354: c = s.charAt(j);
355: if (mode.isIdentifierPart(c))
356: sb.append(c);
357: else
358: break;
359: }
360: String token = sb.reverse().toString();
361: String[] ok = { "and", "or", "not", "if", "unless", "when" };
362: if (Utilities.isOneOf(token, ok))
363: return true;
364:
365: return false;
366: }
367:
368: public LineSegmentList formatLine(Line line) {
369: clearSegmentList();
370: parseLine(line);
371: return segmentList;
372: }
373:
374: public boolean parseBuffer() {
375: int state = RUBY_STATE_NEUTRAL;
376: Line line = buffer.getFirstLine();
377: boolean changed = false;
378: while (line != null) {
379: int oldflags = line.flags();
380: if (state == RUBY_STATE_HERE_DOCUMENT) {
381: if (line.getText().equals(endOfText))
382: state = RUBY_STATE_NEUTRAL;
383: }
384: if (state == RUBY_STATE_POD) {
385: if (line.getText().startsWith("=end")) {
386: if (state != oldflags) {
387: line.setFlags(state);
388: changed = true;
389: }
390: state = RUBY_STATE_NEUTRAL;
391: line = line.next();
392: continue;
393: }
394: }
395: if (state == RUBY_STATE_NEUTRAL)
396: if (line.getText().startsWith("=begin"))
397: state = RUBY_STATE_POD;
398: if (state != oldflags) {
399: line.setFlags(state);
400: changed = true;
401: }
402: if (state == RUBY_STATE_HERE_DOCUMENT
403: || state == RUBY_STATE_POD) {
404: line = line.next();
405: continue;
406: }
407: final String text = line.getText();
408: final int limit = line.length();
409: int i = 0;
410: while (i < limit) {
411: char c = text.charAt(i);
412: if (c == '\\') {
413: // Escape.
414: i += 2;
415: continue;
416: }
417: if (state == RUBY_STATE_SINGLE_QUOTE) {
418: if (c == '\'')
419: state = RUBY_STATE_NEUTRAL;
420: ++i;
421: continue;
422: }
423: if (state == RUBY_STATE_DOUBLE_QUOTE) {
424: if (c == '"')
425: state = RUBY_STATE_NEUTRAL;
426: ++i;
427: continue;
428: }
429: if (state == RUBY_STATE_REGEXP) {
430: if (c == '/')
431: state = RUBY_STATE_NEUTRAL;
432: ++i;
433: continue;
434: }
435: // Not in quoted string or regexp.
436: if (c == '/') {
437: if (isRegExp(text, i))
438: state = RUBY_STATE_REGEXP;
439: ++i;
440: continue;
441: }
442: if (c == '<' && i < limit - 3
443: && line.charAt(i + 1) == '<') {
444: // There must be no space between "<<" and the terminator.
445: if (!Character.isWhitespace(line.charAt(i + 2))) {
446: endOfText = line.substring(i + 2).trim();
447: int length = endOfText.length();
448: // Remove ';' at end of line.
449: if (length > 0
450: && endOfText.charAt(length - 1) == ';')
451: endOfText = endOfText
452: .substring(0, --length);
453: // Remove leading '-'.
454: if (length > 0 && endOfText.charAt(0) == '-') {
455: endOfText = endOfText.substring(1);
456: --length;
457: }
458: // Remove enclosing quotes.
459: if (length > 2) {
460: char firstChar = endOfText.charAt(0);
461: if ("\"'`".indexOf(firstChar) >= 0)
462: if (endOfText.charAt(length - 1) == firstChar)
463: endOfText = endOfText.substring(1,
464: length - 1);
465: }
466: if (endOfText.length() > 0) {
467: // Make sure "<<" is not shift operator.
468: if (Character.isLetter(endOfText.charAt(0))) {
469: state = RUBY_STATE_HERE_DOCUMENT;
470: break;
471: }
472: }
473: }
474: ++i;
475: continue;
476: }
477: if (c == '\'') {
478: if (i == 0 || line.charAt(i - 1) != '$')
479: state = RUBY_STATE_SINGLE_QUOTE;
480: ++i;
481: continue;
482: }
483: if (c == '"') {
484: if (i == 0 || line.charAt(i - 1) != '$')
485: state = RUBY_STATE_DOUBLE_QUOTE;
486: ++i;
487: continue;
488: }
489: if (c == '#')
490: break;
491: ++i;
492: }
493: line = line.next();
494: }
495: buffer.setNeedsParsing(false);
496: return changed;
497: }
498:
499: private static final boolean isOperatorChar(char c) {
500: return "!&|<>=+/*-".indexOf(c) >= 0;
501: }
502:
503: public FormatTable getFormatTable() {
504: if (formatTable == null) {
505: formatTable = new FormatTable(null);
506: formatTable.addEntryFromPrefs(RUBY_FORMAT_TEXT, "text");
507: formatTable.addEntryFromPrefs(RUBY_FORMAT_COMMENT,
508: "comment");
509: formatTable.addEntryFromPrefs(RUBY_FORMAT_STRING, "string");
510: formatTable.addEntryFromPrefs(RUBY_FORMAT_IDENTIFIER,
511: "identifier", "text");
512: formatTable.addEntryFromPrefs(RUBY_FORMAT_KEYWORD,
513: "keyword");
514: formatTable.addEntryFromPrefs(RUBY_FORMAT_FUNCTION,
515: "function");
516: formatTable.addEntryFromPrefs(RUBY_FORMAT_OPERATOR,
517: "operator");
518: formatTable.addEntryFromPrefs(RUBY_FORMAT_BRACE, "brace");
519: formatTable.addEntryFromPrefs(RUBY_FORMAT_NUMBER, "number");
520: }
521: return formatTable;
522: }
523: }
|