001: /*
002: * PerlSyntaxIterator.java
003: *
004: * Copyright (C) 1998-2003 Peter Graves
005: * $Id: PerlSyntaxIterator.java,v 1.2 2003/04/25 14:20:50 piso Exp $
006: *
007: * This program is free software; you can redistribute it and/or
008: * modify it under the terms of the GNU General Public License
009: * as published by the Free Software Foundation; either version 2
010: * of the License, or (at your option) any later version.
011: *
012: * This program is distributed in the hope that it will be useful,
013: * but WITHOUT ANY WARRANTY; without even the implied warranty of
014: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
015: * GNU General Public License for more details.
016: *
017: * You should have received a copy of the GNU General Public License
018: * along with this program; if not, write to the Free Software
019: * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
020: */
021:
022: package org.armedbear.j;
023:
024: import gnu.regexp.RE;
025: import gnu.regexp.REMatch;
026: import gnu.regexp.UncheckedRE;
027:
028: // Supports movement through the syntactically important text of a buffer,
029: // i.e. skipping whitespace and comments.
030: public class PerlSyntaxIterator extends DefaultSyntaxIterator {
031: private static final int STATE_NEUTRAL = 0;
032: private static final int STATE_QUOTE = 1;
033: private static final int STATE_REGEXP = 2;
034: private static final int STATE_SUBST = 3;
035:
036: private static RE matchRE = new UncheckedRE(
037: "(=~|!~)[ \t]+m[^a-zA-Z0-9]");
038:
039: public PerlSyntaxIterator(Position pos) {
040: super (pos);
041: }
042:
043: // Returns char array with syntactic whitespace (quotes and comments)
044: // replaced with actual space characters.
045: public char[] hideSyntacticWhitespace(String s) {
046: char[] chars = s.toCharArray();
047: char quoteChar = 0;
048: char delimiter = 0;
049: int state = STATE_NEUTRAL;
050: final int length = chars.length;
051: for (int i = 0; i < length; i++) {
052: char c = chars[i];
053: if (c == '\\' && i < length - 1) {
054: // Escape!
055: chars[i++] = ' ';
056: chars[i] = ' ';
057: } else if (state == STATE_QUOTE) {
058: chars[i] = ' ';
059: if (c == quoteChar)
060: state = STATE_NEUTRAL;
061: } else if (state == STATE_REGEXP) {
062: if (c == delimiter)
063: state = STATE_NEUTRAL;
064: else
065: chars[i] = ' ';
066: } else if (state == STATE_SUBST) {
067: if (c == delimiter)
068: state = STATE_REGEXP;
069: else
070: chars[i] = ' ';
071: } else if (c == '"' || c == '\'' || c == '`') {
072: quoteChar = c;
073: state = STATE_QUOTE;
074: chars[i] = ' ';
075: } else if (c == '/') {
076: if (PerlFormatter.isSubst(s, i)) {
077: state = STATE_SUBST;
078: delimiter = '/';
079: } else if (PerlFormatter.isRegExp(s, i)) {
080: state = STATE_REGEXP;
081: delimiter = '/';
082: }
083: } else if (c == '=' || c == '!') {
084: REMatch match = matchRE.getMatch(s.substring(i));
085: if (match != null) {
086: final String m = match.toString();
087: final int len = m.length();
088: delimiter = m.charAt(len - 1);
089: if (delimiter == '{')
090: delimiter = '}';
091: state = STATE_REGEXP;
092: i += len - 1;
093: }
094: }
095: }
096: // Handle comment part if any.
097: int index = -1;
098: for (int i = 0; i < length; i++) {
099: if (chars[i] == '#') {
100: if (i > 0) {
101: // Ignore '#' if escaped or if preceding char is '$'.
102: char c = chars[i - 1];
103: if (c == '\\' || c == '$')
104: continue;
105: }
106: // Otherwise the rest of the line is a comment.
107: index = i;
108: break;
109: }
110: }
111: if (index >= 0) {
112: for (int i = index; i < length; i++)
113: chars[i] = ' ';
114: }
115: return chars;
116: }
117: }
|