0001: /*
0002: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
0003: *
0004: * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved.
0005: *
0006: * The contents of this file are subject to the terms of either the GNU
0007: * General Public License Version 2 only ("GPL") or the Common
0008: * Development and Distribution License("CDDL") (collectively, the
0009: * "License"). You may not use this file except in compliance with the
0010: * License. You can obtain a copy of the License at
0011: * http://www.netbeans.org/cddl-gplv2.html
0012: * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
0013: * specific language governing permissions and limitations under the
0014: * License. When distributing the software, include this License Header
0015: * Notice in each file and include the License file at
0016: * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this
0017: * particular file as subject to the "Classpath" exception as provided
0018: * by Sun in the GPL Version 2 section of the License file that
0019: * accompanied this code. If applicable, add the following below the
0020: * License Header, with the fields enclosed by brackets [] replaced by
0021: * your own identifying information:
0022: * "Portions Copyrighted [year] [name of copyright owner]"
0023: *
0024: * Contributor(s):
0025: *
0026: * The Original Software is NetBeans. The Initial Developer of the Original
0027: * Software is Sun Microsystems, Inc. Portions Copyright 1997-2006 Sun
0028: * Microsystems, Inc. All Rights Reserved.
0029: *
0030: * If you wish your version of this file to be governed by only the CDDL
0031: * or only the GPL Version 2, indicate your decision by adding
0032: * "[Contributor] elects to include this software in this distribution
0033: * under the [CDDL or GPL Version 2] license." If you do not indicate a
0034: * single choice of license, a recipient has the option to distribute
0035: * your version of this file under either the CDDL, the GPL Version 2 or
0036: * to extend the choice of license to its licensees as provided above.
0037: * However, if you add GPL Version 2 code and therefore, elected the GPL
0038: * Version 2 license, then the option applies only if the new code is
0039: * made subject to such option by the copyright holder.
0040: */
0041: package org.netbeans.modules.ruby.lexer;
0042:
0043: import java.util.HashSet;
0044: import java.util.List;
0045: import java.util.Set;
0046:
0047: import javax.swing.text.BadLocationException;
0048: import javax.swing.text.Document;
0049: import org.netbeans.modules.gsf.api.CompilationInfo;
0050:
0051: import org.netbeans.modules.gsf.api.OffsetRange;
0052: import org.netbeans.modules.gsf.api.ParserResult;
0053: import org.netbeans.modules.gsf.api.TranslatedSource;
0054: import org.netbeans.api.lexer.Token;
0055: import org.netbeans.api.lexer.TokenHierarchy;
0056: import org.netbeans.api.lexer.TokenId;
0057: import org.netbeans.api.lexer.TokenSequence;
0058: import org.netbeans.api.ruby.platform.RubyInstallation;
0059: import org.netbeans.editor.BaseDocument;
0060: import org.netbeans.editor.Utilities;
0061: import org.netbeans.modules.ruby.RubyMimeResolver;
0062: import org.openide.filesystems.FileUtil;
0063: import org.openide.loaders.DataObject;
0064: import org.openide.util.Exceptions;
0065:
0066: /**
0067: * Utilities associated with lexing or analyzing the document at the
0068: * lexical level, unlike AstUtilities which is contains utilities
0069: * to analyze parsed information about a document.
0070: *
0071: * @author Tor Norbye
0072: */
0073: public class LexUtilities {
0074: /** Tokens that match a corresponding END statement. Even though while, unless etc.
0075: * can be statement modifiers, those luckily have different token ids so are not a problem
0076: * here.
0077: */
0078: private static final Set<TokenId> END_PAIRS = new HashSet<TokenId>();
0079:
0080: /**
0081: * Tokens that should cause indentation of the next line. This is true for all {@link #END_PAIRS},
0082: * but also includes tokens like "else" that are not themselves matched with end but also contribute
0083: * structure for indentation.
0084: *
0085: */
0086: private static final Set<TokenId> INDENT_WORDS = new HashSet<TokenId>();
0087:
0088: static {
0089: END_PAIRS.add(RubyTokenId.BEGIN);
0090: END_PAIRS.add(RubyTokenId.FOR);
0091: END_PAIRS.add(RubyTokenId.CLASS);
0092: END_PAIRS.add(RubyTokenId.DEF);
0093: END_PAIRS.add(RubyTokenId.DO);
0094: END_PAIRS.add(RubyTokenId.WHILE);
0095: END_PAIRS.add(RubyTokenId.IF);
0096: END_PAIRS.add(RubyTokenId.CLASS);
0097: END_PAIRS.add(RubyTokenId.MODULE);
0098: END_PAIRS.add(RubyTokenId.CASE);
0099: END_PAIRS.add(RubyTokenId.LOOP);
0100: END_PAIRS.add(RubyTokenId.UNTIL);
0101: END_PAIRS.add(RubyTokenId.UNLESS);
0102:
0103: INDENT_WORDS.addAll(END_PAIRS);
0104: // Add words that are not matched themselves with an "end",
0105: // but which also provide block structure to indented content
0106: // (usually part of a multi-keyword structure such as if-then-elsif-else-end
0107: // where only the "if" is considered an end-pair.)
0108: INDENT_WORDS.add(RubyTokenId.ELSE);
0109: INDENT_WORDS.add(RubyTokenId.ELSIF);
0110: INDENT_WORDS.add(RubyTokenId.ENSURE);
0111: INDENT_WORDS.add(RubyTokenId.WHEN);
0112: INDENT_WORDS.add(RubyTokenId.RESCUE);
0113:
0114: // XXX What about BEGIN{} and END{} ?
0115: }
0116:
0117: private LexUtilities() {
0118: }
0119:
0120: /** For a possibly generated offset in an AST, return the corresponding lexing/true document offset */
0121: public static int getLexerOffset(CompilationInfo info, int astOffset) {
0122: ParserResult result = info.getEmbeddedResult(
0123: RubyMimeResolver.RUBY_MIME_TYPE, 0);
0124: if (result != null) {
0125: TranslatedSource ts = result.getTranslatedSource();
0126: if (ts != null) {
0127: return ts.getLexicalOffset(astOffset);
0128: }
0129: }
0130:
0131: return astOffset;
0132: }
0133:
0134: public static OffsetRange getLexerOffsets(CompilationInfo info,
0135: OffsetRange astRange) {
0136: ParserResult result = info.getEmbeddedResult(
0137: RubyMimeResolver.RUBY_MIME_TYPE, 0);
0138: if (result != null) {
0139: TranslatedSource ts = result.getTranslatedSource();
0140: if (ts != null) {
0141: int rangeStart = astRange.getStart();
0142: int start = ts.getLexicalOffset(rangeStart);
0143: if (start == rangeStart) {
0144: return astRange;
0145: } else if (start == -1) {
0146: return OffsetRange.NONE;
0147: } else {
0148: // Assumes the translated range maintains size
0149: return new OffsetRange(start, start
0150: + astRange.getLength());
0151: }
0152: }
0153: }
0154:
0155: return astRange;
0156: }
0157:
0158: /** Find the ruby token sequence (in case it's embedded in something else at the top level */
0159: @SuppressWarnings("unchecked")
0160: public static TokenSequence<? extends RubyTokenId> getRubyTokenSequence(
0161: BaseDocument doc, int offset) {
0162: TokenHierarchy<Document> th = TokenHierarchy
0163: .get((Document) doc);
0164: return getRubyTokenSequence(th, offset);
0165: }
0166:
0167: @SuppressWarnings("unchecked")
0168: private static TokenSequence<? extends RubyTokenId> findRhtmlDelimited(
0169: TokenSequence t, int offset) {
0170: if (t.language().mimeType().equals(
0171: RubyInstallation.RHTML_MIME_TYPE)) {
0172: t.move(offset);
0173: if (t.moveNext()
0174: && t.token() != null
0175: && "ruby-delimiter".equals(t.token().id()
0176: .primaryCategory())) { // NOI18N
0177: // It's a delimiter - move ahead and see if we find it
0178: if (t.moveNext()
0179: && t.token() != null
0180: && "ruby".equals(t.token().id()
0181: .primaryCategory())) { // NOI18N
0182: TokenSequence<?> ets = t.embedded();
0183: if (ets != null) {
0184: return (TokenSequence<? extends RubyTokenId>) ets;
0185: }
0186: }
0187: }
0188: }
0189:
0190: return null;
0191: }
0192:
0193: @SuppressWarnings("unchecked")
0194: public static TokenSequence<? extends RubyTokenId> getRubyTokenSequence(
0195: TokenHierarchy<Document> th, int offset) {
0196: TokenSequence<? extends RubyTokenId> ts = th
0197: .tokenSequence(RubyTokenId.language());
0198:
0199: if (ts == null) {
0200: // Possibly an embedding scenario such as an RHTML file
0201: // First try with backward bias true
0202: List<TokenSequence<?>> list = th.embeddedTokenSequences(
0203: offset, true);
0204:
0205: for (TokenSequence t : list) {
0206: if (t.language() == RubyTokenId.language()) {
0207: ts = t;
0208:
0209: break;
0210: } else {
0211: TokenSequence<? extends RubyTokenId> ets = findRhtmlDelimited(
0212: t, offset);
0213: if (ets != null) {
0214: return ets;
0215: }
0216: }
0217: }
0218:
0219: if (ts == null) {
0220: list = th.embeddedTokenSequences(offset, false);
0221:
0222: for (TokenSequence t : list) {
0223: if (t.language() == RubyTokenId.language()) {
0224: ts = t;
0225:
0226: break;
0227: } else {
0228: TokenSequence<? extends RubyTokenId> ets = findRhtmlDelimited(
0229: t, offset);
0230: if (ets != null) {
0231: return ets;
0232: }
0233: }
0234: }
0235: }
0236: }
0237:
0238: return ts;
0239: }
0240:
0241: public static TokenSequence<? extends RubyTokenId> getPositionedSequence(
0242: BaseDocument doc, int offset) {
0243: TokenSequence<? extends RubyTokenId> ts = getRubyTokenSequence(
0244: doc, offset);
0245:
0246: if (ts != null) {
0247: try {
0248: ts.move(offset);
0249: } catch (AssertionError e) {
0250: DataObject dobj = (DataObject) doc
0251: .getProperty(Document.StreamDescriptionProperty);
0252:
0253: if (dobj != null) {
0254: Exceptions.attachMessage(e, FileUtil
0255: .getFileDisplayName(dobj.getPrimaryFile()));
0256: }
0257:
0258: throw e;
0259: }
0260:
0261: if (!ts.moveNext() && !ts.movePrevious()) {
0262: return null;
0263: }
0264:
0265: return ts;
0266: }
0267:
0268: return null;
0269: }
0270:
0271: public static Token<? extends RubyTokenId> getToken(
0272: BaseDocument doc, int offset) {
0273: TokenSequence<? extends RubyTokenId> ts = getPositionedSequence(
0274: doc, offset);
0275:
0276: if (ts != null) {
0277: return ts.token();
0278: }
0279:
0280: return null;
0281: }
0282:
0283: public static char getTokenChar(BaseDocument doc, int offset) {
0284: Token<? extends RubyTokenId> token = getToken(doc, offset);
0285:
0286: if (token != null) {
0287: String text = token.text().toString();
0288:
0289: if (text.length() > 0) { // Usually true, but I could have gotten EOF right?
0290:
0291: return text.charAt(0);
0292: }
0293: }
0294:
0295: return 0;
0296: }
0297:
0298: /** Search forwards in the token sequence until a token of type <code>down</code> is found */
0299: public static OffsetRange findHeredocEnd(
0300: TokenSequence<? extends RubyTokenId> ts,
0301: Token<? extends RubyTokenId> startToken) {
0302: // Look for the end of the given heredoc
0303: String text = startToken.text().toString();
0304: assert text.startsWith("<<");
0305: text = text.substring(2);
0306: if (text.startsWith("-")) {
0307: text = text.substring(1);
0308: }
0309: if ((text.startsWith("\"") && text.endsWith("\""))
0310: || (text.startsWith("'") && text.endsWith("'"))) {
0311: text = text.substring(0, text.length() - 2);
0312: }
0313: String textn = text + "\n";
0314:
0315: while (ts.moveNext()) {
0316: Token<? extends RubyTokenId> token = ts.token();
0317: TokenId id = token.id();
0318:
0319: if (id == RubyTokenId.STRING_END
0320: || id == RubyTokenId.QUOTED_STRING_END) {
0321: String t = token.text().toString();
0322: if (text.equals(t) || textn.equals(t)) {
0323: return new OffsetRange(ts.offset(), ts.offset()
0324: + token.length());
0325: }
0326: }
0327: }
0328:
0329: return OffsetRange.NONE;
0330: }
0331:
0332: /** Search forwards in the token sequence until a token of type <code>down</code> is found */
0333: public static OffsetRange findHeredocBegin(
0334: TokenSequence<? extends RubyTokenId> ts,
0335: Token<? extends RubyTokenId> endToken) {
0336: // Look for the end of the given heredoc
0337: String text = endToken.text().toString();
0338: if (text.endsWith("\n")) {
0339: text = text.substring(0, text.length() - 1);
0340: }
0341: String textQuotes = "\"" + text + "\"";
0342: String textSQuotes = "'" + text + "'";
0343:
0344: while (ts.movePrevious()) {
0345: Token<? extends RubyTokenId> token = ts.token();
0346: TokenId id = token.id();
0347:
0348: if (id == RubyTokenId.STRING_BEGIN
0349: || id == RubyTokenId.QUOTED_STRING_BEGIN) {
0350: String t = token.text().toString();
0351: String marker = null;
0352: if (t.startsWith("<<-")) {
0353: marker = t.substring(3);
0354: } else if (t.startsWith("<<")) {
0355: marker = t.substring(2);
0356: }
0357: if (marker != null
0358: && (text.equals(marker)
0359: || textQuotes.equals(marker) || textSQuotes
0360: .equals(marker))) {
0361: return new OffsetRange(ts.offset(), ts.offset()
0362: + token.length());
0363: }
0364: }
0365: }
0366:
0367: return OffsetRange.NONE;
0368: }
0369:
0370: /** Search forwards in the token sequence until a token of type <code>down</code> is found */
0371: public static OffsetRange findFwd(BaseDocument doc,
0372: TokenSequence<? extends RubyTokenId> ts, TokenId up,
0373: TokenId down) {
0374: int balance = 0;
0375:
0376: while (ts.moveNext()) {
0377: Token<? extends RubyTokenId> token = ts.token();
0378: TokenId id = token.id();
0379:
0380: if (id == up) {
0381: balance++;
0382: } else if (id == down) {
0383: if (balance == 0) {
0384: return new OffsetRange(ts.offset(), ts.offset()
0385: + token.length());
0386: }
0387:
0388: balance--;
0389: }
0390: }
0391:
0392: return OffsetRange.NONE;
0393: }
0394:
0395: /** Search backwards in the token sequence until a token of type <code>up</code> is found */
0396: public static OffsetRange findBwd(BaseDocument doc,
0397: TokenSequence<? extends RubyTokenId> ts, TokenId up,
0398: TokenId down) {
0399: int balance = 0;
0400:
0401: while (ts.movePrevious()) {
0402: Token<? extends RubyTokenId> token = ts.token();
0403: TokenId id = token.id();
0404:
0405: if (id == up) {
0406: if (balance == 0) {
0407: return new OffsetRange(ts.offset(), ts.offset()
0408: + token.length());
0409: }
0410:
0411: balance++;
0412: } else if (id == down) {
0413: balance--;
0414: }
0415: }
0416:
0417: return OffsetRange.NONE;
0418: }
0419:
0420: /** Find the token that begins a block terminated by "end". This is a token
0421: * in the END_PAIRS array. Walk backwards and find the corresponding token.
0422: * It does not use indentation for clues since this could be wrong and be
0423: * precisely the reason why the user is using pair matching to see what's wrong.
0424: */
0425: public static OffsetRange findBegin(BaseDocument doc,
0426: TokenSequence<? extends RubyTokenId> ts) {
0427: int balance = 0;
0428:
0429: while (ts.movePrevious()) {
0430: Token<? extends RubyTokenId> token = ts.token();
0431: TokenId id = token.id();
0432:
0433: if (isBeginToken(id, doc, ts)) {
0434: // No matching dot for "do" used in conditionals etc.)) {
0435: if (balance == 0) {
0436: return new OffsetRange(ts.offset(), ts.offset()
0437: + token.length());
0438: }
0439:
0440: balance--;
0441: } else if (id == RubyTokenId.END) {
0442: balance++;
0443: }
0444: }
0445:
0446: return OffsetRange.NONE;
0447: }
0448:
0449: public static OffsetRange findEnd(BaseDocument doc,
0450: TokenSequence<? extends RubyTokenId> ts) {
0451: int balance = 0;
0452:
0453: while (ts.moveNext()) {
0454: Token<? extends RubyTokenId> token = ts.token();
0455: TokenId id = token.id();
0456:
0457: if (isBeginToken(id, doc, ts)) {
0458: balance--;
0459: } else if (id == RubyTokenId.END) {
0460: if (balance == 0) {
0461: return new OffsetRange(ts.offset(), ts.offset()
0462: + token.length());
0463: }
0464:
0465: balance++;
0466: }
0467: }
0468:
0469: return OffsetRange.NONE;
0470: }
0471:
0472: /** Determine whether "do" is an indent-token (e.g. matches an end) or if
0473: * it's simply a separator in while,until,for expressions)
0474: */
0475: public static boolean isEndmatchingDo(BaseDocument doc, int offset) {
0476: // In the following case, do is dominant:
0477: // expression.do
0478: // whatever
0479: // end
0480: //
0481: // However, not here:
0482: // while true do
0483: // whatever
0484: // end
0485: //
0486: // In the second case, the end matches the while, but in the first case
0487: // the end matches the do
0488:
0489: // Look at the first token of the current line
0490: try {
0491: int first = Utilities.getRowFirstNonWhite(doc, offset);
0492: if (first != -1) {
0493: Token<? extends RubyTokenId> token = getToken(doc,
0494: first);
0495: if (token != null) {
0496: TokenId id = token.id();
0497: if (id == RubyTokenId.WHILE
0498: || id == RubyTokenId.UNTIL
0499: || id == RubyTokenId.FOR) {
0500: return false;
0501: }
0502: }
0503: }
0504: } catch (BadLocationException ble) {
0505: Exceptions.printStackTrace(ble);
0506: }
0507:
0508: return true;
0509: }
0510:
0511: /**
0512: * Return true iff the given token is a token that should be matched
0513: * with a corresponding "end" token, such as "begin", "def", "module",
0514: * etc.
0515: */
0516: public static boolean isBeginToken(TokenId id, BaseDocument doc,
0517: int offset) {
0518: if (id == RubyTokenId.DO) {
0519: return isEndmatchingDo(doc, offset);
0520: }
0521: return END_PAIRS.contains(id);
0522: }
0523:
0524: /**
0525: * Return true iff the given token is a token that should be matched
0526: * with a corresponding "end" token, such as "begin", "def", "module",
0527: * etc.
0528: */
0529: public static boolean isBeginToken(TokenId id, BaseDocument doc,
0530: TokenSequence<? extends RubyTokenId> ts) {
0531: if (id == RubyTokenId.DO) {
0532: return isEndmatchingDo(doc, ts.offset());
0533: }
0534: return END_PAIRS.contains(id);
0535: }
0536:
0537: /**
0538: * Return true iff the given token is a token that indents its content,
0539: * such as the various begin tokens as well as "else", "when", etc.
0540: */
0541: public static boolean isIndentToken(TokenId id) {
0542: return INDENT_WORDS.contains(id);
0543: }
0544:
0545: /** Compute the balance of begin/end tokens on the line.
0546: * @param doc the document
0547: * @param offset The offset somewhere on the line
0548: * @param upToOffset If true, only compute the line balance up to the given offset (inclusive),
0549: * and if false compute the balance for the whole line
0550: */
0551: public static int getBeginEndLineBalance(BaseDocument doc,
0552: int offset, boolean upToOffset) {
0553: try {
0554: int begin = Utilities.getRowStart(doc, offset);
0555: int end = upToOffset ? offset : Utilities.getRowEnd(doc,
0556: offset);
0557:
0558: TokenSequence<? extends RubyTokenId> ts = LexUtilities
0559: .getRubyTokenSequence(doc, begin);
0560: if (ts == null) {
0561: return 0;
0562: }
0563:
0564: ts.move(begin);
0565:
0566: if (!ts.moveNext()) {
0567: return 0;
0568: }
0569:
0570: int balance = 0;
0571:
0572: do {
0573: Token<? extends RubyTokenId> token = ts.token();
0574: TokenId id = token.id();
0575:
0576: if (isBeginToken(id, doc, ts)) {
0577: balance++;
0578: } else if (id == RubyTokenId.END) {
0579: balance--;
0580: }
0581: } while (ts.moveNext() && (ts.offset() <= end));
0582:
0583: return balance;
0584: } catch (BadLocationException ble) {
0585: Exceptions.printStackTrace(ble);
0586:
0587: return 0;
0588: }
0589: }
0590:
0591: /** Compute the balance of begin/end tokens on the line */
0592: public static int getLineBalance(BaseDocument doc, int offset,
0593: TokenId up, TokenId down) {
0594: try {
0595: int begin = Utilities.getRowStart(doc, offset);
0596: int end = Utilities.getRowEnd(doc, offset);
0597:
0598: TokenSequence<? extends RubyTokenId> ts = LexUtilities
0599: .getRubyTokenSequence(doc, begin);
0600: if (ts == null) {
0601: return 0;
0602: }
0603:
0604: ts.move(begin);
0605:
0606: if (!ts.moveNext()) {
0607: return 0;
0608: }
0609:
0610: int balance = 0;
0611:
0612: do {
0613: Token<? extends RubyTokenId> token = ts.token();
0614: TokenId id = token.id();
0615:
0616: if (id == up) {
0617: balance++;
0618: } else if (id == down) {
0619: balance--;
0620: }
0621: } while (ts.moveNext() && (ts.offset() <= end));
0622:
0623: return balance;
0624: } catch (BadLocationException ble) {
0625: Exceptions.printStackTrace(ble);
0626:
0627: return 0;
0628: }
0629: }
0630:
0631: /**
0632: * The same as braceBalance but generalized to any pair of matching
0633: * tokens.
0634: * @param open the token that increses the count
0635: * @param close the token that decreses the count
0636: */
0637: public static int getTokenBalance(BaseDocument doc, TokenId open,
0638: TokenId close, int offset) throws BadLocationException {
0639: TokenSequence<? extends RubyTokenId> ts = LexUtilities
0640: .getRubyTokenSequence(doc, 0);
0641: if (ts == null) {
0642: return 0;
0643: }
0644:
0645: // XXX Why 0? Why not offset?
0646: ts.moveIndex(0);
0647:
0648: if (!ts.moveNext()) {
0649: return 0;
0650: }
0651:
0652: int balance = 0;
0653:
0654: do {
0655: Token t = ts.token();
0656:
0657: if (t.id() == open) {
0658: balance++;
0659: } else if (t.id() == close) {
0660: balance--;
0661: }
0662: } while (ts.moveNext());
0663:
0664: return balance;
0665: }
0666:
0667: public static int getLineIndent(BaseDocument doc, int offset) {
0668: try {
0669: int start = Utilities.getRowStart(doc, offset);
0670: int end;
0671:
0672: if (Utilities.isRowWhite(doc, start)) {
0673: end = Utilities.getRowEnd(doc, offset);
0674: } else {
0675: end = Utilities.getRowFirstNonWhite(doc, start);
0676: }
0677:
0678: int indent = Utilities.getVisualColumn(doc, end);
0679:
0680: return indent;
0681: } catch (BadLocationException ble) {
0682: Exceptions.printStackTrace(ble);
0683:
0684: return 0;
0685: }
0686: }
0687:
0688: public static void indent(StringBuilder sb, int indent) {
0689: for (int i = 0; i < indent; i++) {
0690: sb.append(' ');
0691: }
0692: }
0693:
0694: public static String getIndentString(int indent) {
0695: StringBuilder sb = new StringBuilder(indent);
0696: indent(sb, indent);
0697:
0698: return sb.toString();
0699: }
0700:
0701: /**
0702: * Return true iff the line for the given offset is a Ruby comment line.
0703: * This will return false for lines that contain comments (even when the
0704: * offset is within the comment portion) but also contain code.
0705: */
0706: public static boolean isCommentOnlyLine(BaseDocument doc, int offset)
0707: throws BadLocationException {
0708: int begin = Utilities.getRowFirstNonWhite(doc, offset);
0709:
0710: if (begin == -1) {
0711: return false; // whitespace only
0712: }
0713:
0714: if (begin == doc.getLength()) {
0715: return false;
0716: }
0717:
0718: return doc.getText(begin, 1).equals("#");
0719: }
0720:
0721: public static void adjustLineIndentation(BaseDocument doc,
0722: int offset, int adjustment) {
0723: try {
0724: int lineBegin = Utilities.getRowStart(doc, offset);
0725:
0726: if (adjustment > 0) {
0727: doc.remove(lineBegin, adjustment);
0728: } else if (adjustment < 0) {
0729: doc.insertString(adjustment, LexUtilities
0730: .getIndentString(adjustment), null);
0731: }
0732: } catch (BadLocationException ble) {
0733: Exceptions.printStackTrace(ble);
0734: }
0735: }
0736:
0737: /** Adjust the indentation of the line containing the given offset to the provided
0738: * indentation, and return the new indent.
0739: */
0740: public static int setLineIndentation(BaseDocument doc, int offset,
0741: int indent) {
0742: int currentIndent = getLineIndent(doc, offset);
0743:
0744: try {
0745: int lineBegin = Utilities.getRowStart(doc, offset);
0746:
0747: if (lineBegin == -1) {
0748: return currentIndent;
0749: }
0750:
0751: int adjust = currentIndent - indent;
0752:
0753: if (adjust > 0) {
0754: // Make sure that we are only removing spaces here
0755: String text = doc.getText(lineBegin, adjust);
0756:
0757: for (int i = 0; i < text.length(); i++) {
0758: if (!Character.isWhitespace(text.charAt(i))) {
0759: throw new RuntimeException(
0760: "Illegal indentation adjustment: Deleting non-whitespace chars: "
0761: + text);
0762: }
0763: }
0764:
0765: doc.remove(lineBegin, adjust);
0766: } else if (adjust < 0) {
0767: adjust = -adjust;
0768: doc.insertString(lineBegin, getIndentString(adjust),
0769: null);
0770: }
0771:
0772: return indent;
0773: } catch (BadLocationException ble) {
0774: Exceptions.printStackTrace(ble);
0775:
0776: return currentIndent;
0777: }
0778: }
0779:
0780: /**
0781: * Return the string at the given position, or null if none
0782: */
0783: @SuppressWarnings("unchecked")
0784: public static String getStringAt(int caretOffset,
0785: TokenHierarchy<Document> th) {
0786: TokenSequence<? extends RubyTokenId> ts = getRubyTokenSequence(
0787: th, caretOffset);
0788:
0789: if (ts == null) {
0790: return null;
0791: }
0792:
0793: ts.move(caretOffset);
0794:
0795: if (!ts.moveNext() && !ts.movePrevious()) {
0796: return null;
0797: }
0798:
0799: if (ts.offset() == caretOffset) {
0800: // We're looking at the offset to the RIGHT of the caret
0801: // and here I care about what's on the left
0802: ts.movePrevious();
0803: }
0804:
0805: Token<? extends RubyTokenId> token = ts.token();
0806:
0807: if (token != null) {
0808: TokenId id = token.id();
0809:
0810: // We're within a String that has embedded Ruby. Drop into the
0811: // embedded language and see if we're within a literal string there.
0812: if (id == RubyTokenId.EMBEDDED_RUBY) {
0813: ts = (TokenSequence) ts.embedded();
0814: assert ts != null;
0815: ts.move(caretOffset);
0816:
0817: if (!ts.moveNext() && !ts.movePrevious()) {
0818: return null;
0819: }
0820:
0821: token = ts.token();
0822: id = token.id();
0823: }
0824:
0825: String string = null;
0826:
0827: // Skip over embedded Ruby segments and literal strings until you find the beginning
0828: int segments = 0;
0829:
0830: while ((id == RubyTokenId.ERROR)
0831: || (id == RubyTokenId.STRING_LITERAL)
0832: || (id == RubyTokenId.QUOTED_STRING_LITERAL)
0833: || (id == RubyTokenId.EMBEDDED_RUBY)) {
0834: string = token.text().toString();
0835: segments++;
0836: ts.movePrevious();
0837: token = ts.token();
0838: id = token.id();
0839: }
0840:
0841: if ((id == RubyTokenId.STRING_BEGIN)
0842: || (id == RubyTokenId.QUOTED_STRING_BEGIN)) {
0843: if (segments == 1) {
0844: return string;
0845: } else {
0846: // Build up the String from the sequence
0847: StringBuilder sb = new StringBuilder();
0848:
0849: while (ts.moveNext()) {
0850: token = ts.token();
0851: id = token.id();
0852:
0853: if ((id == RubyTokenId.ERROR)
0854: || (id == RubyTokenId.STRING_LITERAL)
0855: || (id == RubyTokenId.QUOTED_STRING_LITERAL)
0856: || (id == RubyTokenId.EMBEDDED_RUBY)) {
0857: sb.append(token.text());
0858: } else {
0859: break;
0860: }
0861: }
0862:
0863: return sb.toString();
0864: }
0865: }
0866: }
0867:
0868: return null;
0869: }
0870:
0871: /**
0872: * Check if the caret is inside a literal string that is associated with
0873: * a require statement.
0874: *
0875: * @return The offset of the beginning of the require string, or -1
0876: * if the offset is not inside a require string.
0877: */
0878: public static int getRequireStringOffset(int caretOffset,
0879: TokenHierarchy<Document> th) {
0880: TokenSequence<? extends RubyTokenId> ts = getRubyTokenSequence(
0881: th, caretOffset);
0882:
0883: if (ts == null) {
0884: return -1;
0885: }
0886:
0887: ts.move(caretOffset);
0888:
0889: if (!ts.moveNext() && !ts.movePrevious()) {
0890: return -1;
0891: }
0892:
0893: if (ts.offset() == caretOffset) {
0894: // We're looking at the offset to the RIGHT of the caret
0895: // and here I care about what's on the left
0896: ts.movePrevious();
0897: }
0898:
0899: Token<? extends RubyTokenId> token = ts.token();
0900:
0901: if (token != null) {
0902: TokenId id = token.id();
0903:
0904: // Skip over embedded Ruby segments and literal strings until you find the beginning
0905: while ((id == RubyTokenId.ERROR)
0906: || (id == RubyTokenId.STRING_LITERAL)
0907: || (id == RubyTokenId.QUOTED_STRING_LITERAL)
0908: || (id == RubyTokenId.EMBEDDED_RUBY)) {
0909: ts.movePrevious();
0910: token = ts.token();
0911: id = token.id();
0912: }
0913:
0914: int stringStart = ts.offset() + token.length();
0915:
0916: if ((id == RubyTokenId.STRING_BEGIN)
0917: || (id == RubyTokenId.QUOTED_STRING_BEGIN)) {
0918: // Completion of literal strings within require calls
0919: while (ts.movePrevious()) {
0920: token = ts.token();
0921:
0922: id = token.id();
0923:
0924: if ((id == RubyTokenId.WHITESPACE)
0925: || (id == RubyTokenId.LPAREN)
0926: || (id == RubyTokenId.STRING_LITERAL)
0927: || (id == RubyTokenId.QUOTED_STRING_LITERAL)) {
0928: continue;
0929: }
0930:
0931: if (id == RubyTokenId.IDENTIFIER) {
0932: String text = token.text().toString();
0933:
0934: if (text.equals("require")
0935: || text.equals("load")) {
0936: return stringStart;
0937: } else {
0938: return -1;
0939: }
0940: } else {
0941: return -1;
0942: }
0943: }
0944: }
0945: }
0946:
0947: return -1;
0948: }
0949:
0950: public static int getSingleQuotedStringOffset(int caretOffset,
0951: TokenHierarchy<Document> th) {
0952: return getLiteralStringOffset(caretOffset, th,
0953: RubyTokenId.STRING_BEGIN);
0954: }
0955:
0956: public static int getDoubleQuotedStringOffset(int caretOffset,
0957: TokenHierarchy<Document> th) {
0958: return getLiteralStringOffset(caretOffset, th,
0959: RubyTokenId.QUOTED_STRING_BEGIN);
0960: }
0961:
0962: public static int getRegexpOffset(int caretOffset,
0963: TokenHierarchy<Document> th) {
0964: return getLiteralStringOffset(caretOffset, th,
0965: RubyTokenId.REGEXP_BEGIN);
0966: }
0967:
0968: /**
0969: * Determine if the caret is inside a literal string, and if so, return its starting
0970: * offset. Return -1 otherwise.
0971: */
0972: @SuppressWarnings("unchecked")
0973: private static int getLiteralStringOffset(int caretOffset,
0974: TokenHierarchy<Document> th, RubyTokenId begin) {
0975: TokenSequence<? extends RubyTokenId> ts = getRubyTokenSequence(
0976: th, caretOffset);
0977:
0978: if (ts == null) {
0979: return -1;
0980: }
0981:
0982: ts.move(caretOffset);
0983:
0984: if (!ts.moveNext() && !ts.movePrevious()) {
0985: return -1;
0986: }
0987:
0988: if (ts.offset() == caretOffset) {
0989: // We're looking at the offset to the RIGHT of the caret
0990: // and here I care about what's on the left
0991: ts.movePrevious();
0992: }
0993:
0994: Token<? extends RubyTokenId> token = ts.token();
0995:
0996: if (token != null) {
0997: TokenId id = token.id();
0998:
0999: // We're within a String that has embedded Ruby. Drop into the
1000: // embedded language and see if we're within a literal string there.
1001: if (id == RubyTokenId.EMBEDDED_RUBY) {
1002: ts = (TokenSequence) ts.embedded();
1003: assert ts != null;
1004: ts.move(caretOffset);
1005:
1006: if (!ts.moveNext() && !ts.movePrevious()) {
1007: return -1;
1008: }
1009:
1010: token = ts.token();
1011: id = token.id();
1012: }
1013:
1014: // Skip over embedded Ruby segments and literal strings until you find the beginning
1015: while ((id == RubyTokenId.ERROR)
1016: || (id == RubyTokenId.STRING_LITERAL)
1017: || (id == RubyTokenId.QUOTED_STRING_LITERAL)
1018: || (id == RubyTokenId.REGEXP_LITERAL)
1019: || (id == RubyTokenId.EMBEDDED_RUBY)) {
1020: ts.movePrevious();
1021: token = ts.token();
1022: id = token.id();
1023: }
1024:
1025: if (id == begin) {
1026: if (!ts.moveNext()) {
1027: return -1;
1028: }
1029:
1030: return ts.offset();
1031: }
1032: }
1033:
1034: return -1;
1035: }
1036:
1037: public static boolean isInsideQuotedString(BaseDocument doc,
1038: int offset) {
1039: TokenSequence<? extends RubyTokenId> ts = LexUtilities
1040: .getRubyTokenSequence(doc, offset);
1041:
1042: if (ts == null) {
1043: return false;
1044: }
1045:
1046: ts.move(offset);
1047:
1048: if (ts.moveNext()) {
1049: Token<? extends RubyTokenId> token = ts.token();
1050: TokenId id = token.id();
1051: if (id == RubyTokenId.QUOTED_STRING_LITERAL
1052: || id == RubyTokenId.QUOTED_STRING_END) {
1053: return true;
1054: }
1055: }
1056: if (ts.movePrevious()) {
1057: Token<? extends RubyTokenId> token = ts.token();
1058: TokenId id = token.id();
1059: if (id == RubyTokenId.QUOTED_STRING_LITERAL
1060: || id == RubyTokenId.QUOTED_STRING_BEGIN) {
1061: return true;
1062: }
1063: }
1064:
1065: return false;
1066: }
1067:
1068: public static boolean isInsideRegexp(BaseDocument doc, int offset) {
1069: TokenSequence<? extends RubyTokenId> ts = LexUtilities
1070: .getRubyTokenSequence(doc, offset);
1071:
1072: if (ts == null) {
1073: return false;
1074: }
1075:
1076: ts.move(offset);
1077:
1078: if (ts.moveNext()) {
1079: Token<? extends RubyTokenId> token = ts.token();
1080: TokenId id = token.id();
1081: if (id == RubyTokenId.REGEXP_LITERAL
1082: || id == RubyTokenId.REGEXP_END) {
1083: return true;
1084: }
1085: }
1086: if (ts.movePrevious()) {
1087: Token<? extends RubyTokenId> token = ts.token();
1088: TokenId id = token.id();
1089: if (id == RubyTokenId.REGEXP_LITERAL
1090: || id == RubyTokenId.REGEXP_BEGIN) {
1091: return true;
1092: }
1093: }
1094:
1095: return false;
1096: }
1097:
1098: public static OffsetRange getCommentBlock(BaseDocument doc,
1099: int caretOffset) {
1100: // Check if the caret is within a comment, and if so insert a new
1101: // leaf "node" which contains the comment line and then comment block
1102: try {
1103: Token<? extends RubyTokenId> token = LexUtilities.getToken(
1104: doc, caretOffset);
1105:
1106: if ((token != null)
1107: && (token.id() == RubyTokenId.LINE_COMMENT)) {
1108: // First add a range for the current line
1109: int begin = Utilities.getRowStart(doc, caretOffset);
1110: int end = Utilities.getRowEnd(doc, caretOffset);
1111:
1112: if (LexUtilities.isCommentOnlyLine(doc, caretOffset)) {
1113:
1114: while (begin > 0) {
1115: int newBegin = Utilities.getRowStart(doc,
1116: begin - 1);
1117:
1118: if ((newBegin < 0)
1119: || !LexUtilities.isCommentOnlyLine(doc,
1120: newBegin)) {
1121: begin = Utilities.getRowFirstNonWhite(doc,
1122: begin);
1123: break;
1124: }
1125:
1126: begin = newBegin;
1127: }
1128:
1129: int length = doc.getLength();
1130:
1131: while (true) {
1132: int newEnd = Utilities.getRowEnd(doc, end + 1);
1133:
1134: if ((newEnd >= length)
1135: || !LexUtilities.isCommentOnlyLine(doc,
1136: newEnd)) {
1137: end = Utilities
1138: .getRowLastNonWhite(doc, end) + 1;
1139: break;
1140: }
1141:
1142: end = newEnd;
1143: }
1144:
1145: if (begin < end) {
1146: return new OffsetRange(begin, end);
1147: }
1148: } else {
1149: // It's just a line comment next to some code
1150: TokenHierarchy<Document> th = TokenHierarchy
1151: .get((Document) doc);
1152: int offset = token.offset(th);
1153: return new OffsetRange(offset, offset
1154: + token.length());
1155: }
1156: } else if (token != null
1157: && token.id() == RubyTokenId.DOCUMENTATION) {
1158: // Select the whole token block
1159: TokenHierarchy<BaseDocument> th = TokenHierarchy
1160: .get(doc);
1161: int begin = token.offset(th);
1162: int end = begin + token.length();
1163: return new OffsetRange(begin, end);
1164: }
1165: } catch (BadLocationException ble) {
1166: Exceptions.printStackTrace(ble);
1167: }
1168:
1169: return OffsetRange.NONE;
1170: }
1171:
1172: /**
1173: * Back up to the first space character prior to the given offset - as long as
1174: * it's on the same line! If there's only leading whitespace on the line up
1175: * to the lex offset, return the offset itself
1176: */
1177: public static int findSpaceBegin(BaseDocument doc, int lexOffset) {
1178: TokenSequence ts = LexUtilities.getRubyTokenSequence(doc,
1179: lexOffset);
1180: if (ts == null) {
1181: return lexOffset;
1182: }
1183: boolean allowPrevLine = false;
1184: int lineStart;
1185: try {
1186: lineStart = Utilities.getRowStart(doc, Math.min(lexOffset,
1187: doc.getLength()));
1188: int prevLast = lineStart - 1;
1189: if (lineStart > 0) {
1190: prevLast = Utilities.getRowLastNonWhite(doc,
1191: lineStart - 1);
1192: if (prevLast != -1) {
1193: char c = doc.getText(prevLast, 1).charAt(0);
1194: if (c == ',') {
1195: // Arglist continuation? // TODO : check lexing
1196: allowPrevLine = true;
1197: }
1198: }
1199: }
1200: if (!allowPrevLine) {
1201: int firstNonWhite = Utilities.getRowFirstNonWhite(doc,
1202: lineStart);
1203: if (lexOffset <= firstNonWhite || firstNonWhite == -1) {
1204: return lexOffset;
1205: }
1206: } else {
1207: // Make lineStart so small that Math.max won't cause any problems
1208: int firstNonWhite = Utilities.getRowFirstNonWhite(doc,
1209: lineStart);
1210: if (prevLast >= 0
1211: && (lexOffset <= firstNonWhite || firstNonWhite == -1)) {
1212: return prevLast + 1;
1213: }
1214: lineStart = 0;
1215: }
1216: } catch (BadLocationException ble) {
1217: Exceptions.printStackTrace(ble);
1218: return lexOffset;
1219: }
1220: ts.move(lexOffset);
1221: if (ts.moveNext()) {
1222: if (lexOffset > ts.offset()) {
1223: // We're in the middle of a token
1224: return Math
1225: .max(
1226: (ts.token().id() == RubyTokenId.WHITESPACE) ? ts
1227: .offset()
1228: : lexOffset, lineStart);
1229: }
1230: while (ts.movePrevious()) {
1231: Token token = ts.token();
1232: if (token.id() != RubyTokenId.WHITESPACE) {
1233: return Math.max(ts.offset() + token.length(),
1234: lineStart);
1235: }
1236: }
1237: }
1238:
1239: return lexOffset;
1240: }
1241:
1242: /**
1243: * Get the rdoc documentation associated with the given node in the given document.
1244: * The node must have position information that matches the source in the document.
1245: */
1246: public static OffsetRange findRDocRange(BaseDocument baseDoc,
1247: int methodBegin) {
1248: int begin = methodBegin;
1249: try {
1250: if (methodBegin >= baseDoc.getLength()) {
1251: return OffsetRange.NONE;
1252: }
1253:
1254: // Search to previous lines, locate comments. Once we have a non-whitespace line that isn't
1255: // a comment, we're done
1256:
1257: int offset = Utilities.getRowStart(baseDoc, methodBegin);
1258: offset--;
1259:
1260: // Skip empty and whitespace lines
1261: while (offset >= 0) {
1262: // Find beginning of line
1263: offset = Utilities.getRowStart(baseDoc, offset);
1264:
1265: if (!Utilities.isRowEmpty(baseDoc, offset)
1266: && !Utilities.isRowWhite(baseDoc, offset)) {
1267: break;
1268: }
1269:
1270: offset--;
1271: }
1272:
1273: if (offset < 0) {
1274: return OffsetRange.NONE;
1275: }
1276:
1277: while (offset >= 0) {
1278: // Find beginning of line
1279: offset = Utilities.getRowStart(baseDoc, offset);
1280:
1281: if (Utilities.isRowEmpty(baseDoc, offset)
1282: || Utilities.isRowWhite(baseDoc, offset)) {
1283: // Empty lines not allowed within an rdoc
1284: break;
1285: }
1286:
1287: // This is a comment line we should include
1288: int lineBegin = Utilities.getRowFirstNonWhite(baseDoc,
1289: offset);
1290: int lineEnd = Utilities.getRowLastNonWhite(baseDoc,
1291: offset) + 1;
1292: String line = baseDoc.getText(lineBegin, lineEnd
1293: - lineBegin);
1294:
1295: // Tolerate "public", "private" and "protected" here --
1296: // Test::Unit::Assertions likes to put these in front of each
1297: // method.
1298: if (line.startsWith("#")) {
1299: begin = lineBegin;
1300: } else if (line.startsWith("=end")
1301: && (lineBegin == Utilities.getRowStart(baseDoc,
1302: offset))) {
1303: // It could be a =begin,=end document - see scanf.rb in Ruby lib for example. Treat this differently.
1304: int docBegin = findInlineDocStart(baseDoc, offset);
1305: if (docBegin != -1) {
1306: begin = docBegin;
1307: } else {
1308: return OffsetRange.NONE;
1309: }
1310: } else if (line.equals("public")
1311: || line.equals("private")
1312: || line.equals("protected")) { // NOI18N
1313: // Skip newlines back up to the comment
1314: offset--;
1315:
1316: while (offset >= 0) {
1317: // Find beginning of line
1318: offset = Utilities.getRowStart(baseDoc, offset);
1319:
1320: if (!Utilities.isRowEmpty(baseDoc, offset)
1321: && !Utilities.isRowWhite(baseDoc,
1322: offset)) {
1323: break;
1324: }
1325:
1326: offset--;
1327: }
1328:
1329: continue;
1330: } else {
1331: // No longer in a comment
1332: break;
1333: }
1334:
1335: // Previous line
1336: offset--;
1337: }
1338: } catch (BadLocationException ble) {
1339: Exceptions.printStackTrace(ble);
1340: }
1341:
1342: if (methodBegin > begin) {
1343: return new OffsetRange(begin, methodBegin);
1344: } else {
1345: return OffsetRange.NONE;
1346: }
1347: }
1348:
1349: private static int findInlineDocStart(BaseDocument baseDoc,
1350: int offset) throws BadLocationException {
1351: // offset points to a line containing =end
1352: // Skip the =end list
1353: offset = Utilities.getRowStart(baseDoc, offset);
1354: offset--;
1355:
1356: // Search backwards in the document for the =begin (if any) and add all lines in reverse
1357: // order in between.
1358: while (offset >= 0) {
1359: // Find beginning of line
1360: offset = Utilities.getRowStart(baseDoc, offset);
1361:
1362: // This is a comment line we should include
1363: int lineBegin = offset;
1364: int lineEnd = Utilities.getRowEnd(baseDoc, offset);
1365: String line = baseDoc.getText(lineBegin, lineEnd
1366: - lineBegin);
1367:
1368: if (line.startsWith("=begin")) {
1369: // We're done!
1370: return lineBegin;
1371: }
1372:
1373: // Previous line
1374: offset--;
1375: }
1376:
1377: return -1;
1378: }
1379: }
|