0001: /*
0002: ***** BEGIN LICENSE BLOCK *****
0003: * Version: CPL 1.0/GPL 2.0/LGPL 2.1
0004: *
0005: * The contents of this file are subject to the Common Public
0006: * License Version 1.0 (the "License"); you may not use this file
0007: * except in compliance with the License. You may obtain a copy of
0008: * the License at http://www.eclipse.org/legal/cpl-v10.html
0009: *
0010: * Software distributed under the License is distributed on an "AS
0011: * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
0012: * implied. See the License for the specific language governing
0013: * rights and limitations under the License.
0014: *
0015: * Copyright (C) 2002 Benoit Cerrina <b.cerrina@wanadoo.fr>
0016: * Copyright (C) 2002-2004 Anders Bengtsson <ndrsbngtssn@yahoo.se>
0017: * Copyright (C) 2002-2004 Jan Arne Petersen <jpetersen@uni-bonn.de>
0018: * Copyright (C) 2004-2006 Thomas E Enebo <enebo@acm.org>
0019: * Copyright (C) 2004 Stefan Matthias Aust <sma@3plus4.de>
0020: * Copyright (C) 2004-2005 David Corbin <dcorbin@users.sourceforge.net>
0021: * Copyright (C) 2005 Zach Dennis <zdennis@mktec.com>
0022: * Copyright (C) 2006 Thomas Corbat <tcorbat@hsr.ch>
0023: *
0024: * Alternatively, the contents of this file may be used under the terms of
0025: * either of the GNU General Public License Version 2 or later (the "GPL"),
0026: * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
0027: * in which case the provisions of the GPL or the LGPL are applicable instead
0028: * of those above. If you wish to allow use of your version of this file only
0029: * under the terms of either the GPL or the LGPL, and not to allow others to
0030: * use your version of this file under the terms of the CPL, indicate your
0031: * decision by deleting the provisions above and replace them with the notice
0032: * and other provisions required by the GPL or the LGPL. If you do not delete
0033: * the provisions above, a recipient may use your version of this file under
0034: * the terms of any one of the CPL, the GPL or the LGPL.
0035: ***** END LICENSE BLOCK *****/
0036: package org.jruby.lexer.yacc;
0037:
0038: import java.io.IOException;
0039:
0040: import java.math.BigInteger;
0041:
0042: import org.jruby.ast.BackRefNode;
0043: import org.jruby.ast.BignumNode;
0044: import org.jruby.ast.CommentNode;
0045: import org.jruby.ast.FixnumNode;
0046: import org.jruby.ast.FloatNode;
0047: import org.jruby.ast.NthRefNode;
0048: import org.jruby.common.IRubyWarnings;
0049: import org.jruby.parser.BlockStaticScope;
0050: import org.jruby.parser.ParserSupport;
0051: import org.jruby.parser.StaticScope;
0052: import org.jruby.parser.Tokens;
0053: import org.jruby.util.IdUtil;
0054:
0055: /** This is a port of the MRI lexer to Java it is compatible to Ruby 1.8.1.
0056: */
0057: public class RubyYaccLexer {
0058: // Last token read via yylex().
0059: private int token;
0060:
0061: // Value of last token which had a value associated with it.
0062: Object yaccValue;
0063:
0064: // Stream of data that yylex() examines.
0065: private LexerSource src;
0066:
0067: // Used for tiny smidgen of grammar in lexer (see setParserSupport())
0068: private ParserSupport parserSupport = null;
0069:
0070: // What handles warnings
0071: private IRubyWarnings warnings;
0072:
0073: // Additional context surrounding tokens that both the lexer and
0074: // grammar use.
0075: private LexState lex_state;
0076:
0077: // BEGIN NETBEANS MODIFICATIONS
0078: // Whether or not the lexer should be "space preserving" - see setPreserveSpaces/getPreserveSpaces
0079: // the parser should consider whitespace sequences and code comments to be separate
0080: // tokens to return to the client. Parsers typically do not want to see any
0081: // whitespace or comment tokens - but an IDE trying to tokenize a chunk of source code
0082: // does want to identify these separately. The default, false, means the parser mode.
0083: private boolean preserveSpaces;
0084:
0085: // List of HeredocTerms to be applied when we see a new line.
0086: // This is done to be able to handle heredocs in input source order (instead of
0087: // the normal JRuby operation of handling it out of order by stashing the rest of
0088: // the line on the side while searching for the end of the heredoc, and then pushing
0089: // the line back on the input before proceeding). Out-of-order handling of tokens
0090: // is difficult for the IDE to handle, so in syntax highlighting mode we process the
0091: // output differently. When we see a heredoc token, we return a normal string-begin
0092: // token, but we also push the heredoc term (without line-state) into the "newline-list"
0093: // and continue processing normally (with no string strterm in effect).
0094: // Whenever we get to a new line, we look at the newline list, and if we find something
0095: // there, we pull it off and set it as the current string term and use it to process
0096: // the string literal and end token.
0097: // NOTE:: This list should not be modified but rather duplicated, in order to ensure
0098: // that incremental lexing (which relies on pulling out these lists at token boundaries)
0099: // will not interfere with each other.
0100:
0101: public static class HeredocContext {
0102: private HeredocTerm[] heredocTerms;
0103: private boolean[] lookingForEnds;
0104:
0105: public HeredocContext(HeredocTerm term) {
0106: this .heredocTerms = new HeredocTerm[] { term, term };
0107: this .lookingForEnds = new boolean[] { false, true };
0108: }
0109:
0110: private HeredocContext(HeredocTerm[] terms,
0111: boolean[] lookingForEnds) {
0112: this .heredocTerms = terms;
0113: this .lookingForEnds = lookingForEnds;
0114: }
0115:
0116: private HeredocContext add(HeredocTerm h) {
0117: // Add 2 entries: one for starting lexing of the string, one for the end token
0118: HeredocTerm[] copy = new HeredocTerm[heredocTerms.length + 2];
0119: System.arraycopy(heredocTerms, 0, copy, 0,
0120: heredocTerms.length);
0121: copy[heredocTerms.length] = h;
0122: copy[heredocTerms.length + 1] = h;
0123:
0124: boolean[] copy2 = new boolean[lookingForEnds.length + 2];
0125: System.arraycopy(lookingForEnds, 0, copy2, 0,
0126: lookingForEnds.length);
0127: copy2[lookingForEnds.length] = false;
0128: copy2[lookingForEnds.length + 1] = true;
0129:
0130: HeredocContext hc = new HeredocContext(copy, copy2);
0131:
0132: return hc;
0133: }
0134:
0135: private HeredocTerm getTerm() {
0136: return heredocTerms[0];
0137: }
0138:
0139: private HeredocContext pop() {
0140: if (heredocTerms.length > 1) {
0141: HeredocTerm[] copy = new HeredocTerm[heredocTerms.length - 1];
0142: System.arraycopy(heredocTerms, 1, copy, 0, copy.length);
0143:
0144: boolean[] copy2 = new boolean[lookingForEnds.length - 1];
0145: System.arraycopy(lookingForEnds, 1, copy2, 0,
0146: copy2.length);
0147:
0148: HeredocContext hc = new HeredocContext(copy, copy2);
0149: return hc;
0150: } else {
0151: return null;
0152: }
0153: }
0154:
0155: public boolean isLookingForEnd() {
0156: return lookingForEnds[0];
0157: }
0158:
0159: //@Override
0160: public String toString() {
0161: StringBuilder sb = new StringBuilder(
0162: "HeredocContext(count=");
0163: sb.append(Integer.toString(heredocTerms.length));
0164: sb.append("):");
0165: for (int i = 0; i < heredocTerms.length; i++) {
0166: if (i > 0) {
0167: sb.append(",");
0168: }
0169: sb.append("end:");
0170: sb.append(lookingForEnds[i]);
0171: sb.append(",term:");
0172: sb.append(heredocTerms[i]);
0173: }
0174: return sb.toString();
0175: }
0176:
0177: //@Override
0178: public int hashCode() {
0179: return heredocTerms[0].getMutableState().hashCode();
0180: }
0181:
0182: //@Override
0183: public boolean equals(Object other) {
0184: if (other instanceof HeredocContext) {
0185: HeredocContext o = (HeredocContext) other;
0186: if (o.heredocTerms.length != heredocTerms.length) {
0187: return false;
0188: }
0189: return heredocTerms[0].getMutableState().equals(
0190: o.heredocTerms[0].getMutableState());
0191: } else {
0192: return false;
0193: }
0194: }
0195: }
0196:
0197: public HeredocContext heredocContext;
0198:
0199: // END NETBEANS MODIFICATIONS
0200:
0201: // Tempory buffer to build up a potential token. Consumer takes responsibility to reset
0202: // this before use.
0203: private StringBuffer tokenBuffer = new StringBuffer(60);
0204:
0205: private StackState conditionState = new StackState();
0206: private StackState cmdArgumentState = new StackState();
0207: private StrTerm lex_strterm;
0208: private boolean commandStart;
0209:
0210: // Give a name to a value. Enebo: This should be used more.
0211: static final int EOF = 0;
0212:
0213: // ruby constants for strings (should this be moved somewhere else?)
0214: static final int STR_FUNC_ESCAPE = 0x01;
0215: static final int STR_FUNC_EXPAND = 0x02;
0216: static final int STR_FUNC_REGEXP = 0x04;
0217: static final int STR_FUNC_QWORDS = 0x08;
0218: static final int STR_FUNC_SYMBOL = 0x10;
0219: // When the heredoc identifier specifies <<-EOF that indents before ident. are ok (the '-').
0220: static final int STR_FUNC_INDENT = 0x20;
0221:
0222: private final int str_squote = 0;
0223: private final int str_dquote = STR_FUNC_EXPAND;
0224: private final int str_xquote = STR_FUNC_EXPAND;
0225: private final int str_regexp = STR_FUNC_REGEXP | STR_FUNC_ESCAPE
0226: | STR_FUNC_EXPAND;
0227: private final int str_ssym = STR_FUNC_SYMBOL;
0228: private final int str_dsym = STR_FUNC_SYMBOL | STR_FUNC_EXPAND;
0229:
0230: public RubyYaccLexer() {
0231: reset();
0232: }
0233:
0234: public void reset() {
0235: token = 0;
0236: yaccValue = null;
0237: src = null;
0238: lex_state = null;
0239: // BEGIN NETBEANS MODIFICATIONS
0240: // The null state causes problems in some scenarios for me. Besides using null to
0241: // represent an initial state doesn't seem like a good idea.
0242: lex_state = LexState.EXPR_BEG;
0243: // END NETBEANS MODIFICATIONS
0244: resetStacks();
0245: lex_strterm = null;
0246: commandStart = true;
0247: }
0248:
0249: /**
0250: * How the parser advances to the next token.
0251: *
0252: * @return true if not at end of file (EOF).
0253: */
0254: public boolean advance() throws IOException {
0255: return (token = yylex()) != EOF;
0256: }
0257:
0258: /**
0259: * Last token read from the lexer at the end of a call to yylex()
0260: *
0261: * @return last token read
0262: */
0263: public int token() {
0264: return token;
0265: }
0266:
0267: public StringBuffer getTokenBuffer() {
0268: return tokenBuffer;
0269: }
0270:
0271: /**
0272: * Value of last token (if it is a token which has a value).
0273: *
0274: * @return value of last value-laden token
0275: */
0276: public Object value() {
0277: return yaccValue;
0278: }
0279:
0280: public ISourcePositionFactory getPositionFactory() {
0281: return src.getPositionFactory();
0282: }
0283:
0284: /**
0285: * Get position information for Token/Node that follows node represented by startPosition
0286: * and current lexer location.
0287: *
0288: * @param startPosition previous node/token
0289: * @param inclusive include previous node into position information of current node
0290: * @return a new position
0291: */
0292: public ISourcePosition getPosition(ISourcePosition startPosition,
0293: boolean inclusive) {
0294: return src.getPosition(startPosition, inclusive);
0295: }
0296:
0297: public ISourcePosition getPosition() {
0298: return src.getPosition(null, false);
0299: }
0300:
0301: /**
0302: * Parse must pass its support object for some check at bottom of
0303: * yylex(). Ruby does it this way as well (i.e. a little parsing
0304: * logic in the lexer).
0305: *
0306: * @param parserSupport
0307: */
0308: public void setParserSupport(ParserSupport parserSupport) {
0309: this .parserSupport = parserSupport;
0310: }
0311:
0312: /**
0313: * Allow the parser to set the source for its lexer.
0314: *
0315: * @param source where the lexer gets raw data
0316: */
0317: public void setSource(LexerSource source) {
0318: this .src = source;
0319: }
0320:
0321: public StrTerm getStrTerm() {
0322: return lex_strterm;
0323: }
0324:
0325: public void setStrTerm(StrTerm strterm) {
0326: this .lex_strterm = strterm;
0327: }
0328:
0329: public void resetStacks() {
0330: conditionState.reset();
0331: cmdArgumentState.reset();
0332: }
0333:
0334: public void setWarnings(IRubyWarnings warnings) {
0335: this .warnings = warnings;
0336: }
0337:
0338: public void setState(LexState state) {
0339: this .lex_state = state;
0340: }
0341:
0342: public StackState getCmdArgumentState() {
0343: return cmdArgumentState;
0344: }
0345:
0346: public StackState getConditionState() {
0347: return conditionState;
0348: }
0349:
0350: public void setValue(Object yaccValue) {
0351: this .yaccValue = yaccValue;
0352: }
0353:
0354: private boolean isNext_identchar() throws IOException {
0355: char c = src.read();
0356: src.unread(c);
0357:
0358: return c != EOF && (Character.isLetterOrDigit(c) || c == '_');
0359: }
0360:
0361: private Object getInteger(String value, int radix) {
0362: try {
0363: return new FixnumNode(getPosition(), Long.parseLong(value,
0364: radix));
0365: } catch (NumberFormatException e) {
0366: return new BignumNode(getPosition(), new BigInteger(value,
0367: radix));
0368: }
0369: }
0370:
0371: /**
0372: * Do the next characters from the source match provided String in a case insensitive manner.
0373: * If so, then consume those characters and that string. Otherwise, consume none of them and
0374: * return null.
0375: *
0376: * @param s to be matched against
0377: * @return string if string matches, null otherwise
0378: */
0379: private String isNextNoCase(String s) throws IOException {
0380: StringBuffer buf = new StringBuffer();
0381:
0382: for (int i = 0; i < s.length(); i++) {
0383: char c = s.charAt(i);
0384: char r = src.read();
0385: buf.append(r);
0386:
0387: if (Character.toLowerCase(c) != r
0388: && Character.toUpperCase(c) != r) {
0389: src.unreadMany(buf);
0390: return null;
0391: }
0392: }
0393:
0394: return buf.toString();
0395: }
0396:
0397: /**
0398: * @param c the character to test
0399: * @return true if character is a hex value (0-9a-f)
0400: */
0401: static final boolean isHexChar(char c) {
0402: return Character.isDigit(c) || ('a' <= c && c <= 'f')
0403: || ('A' <= c && c <= 'F');
0404: }
0405:
0406: /**
0407: * @param c the character to test
0408: * @return true if character is an octal value (0-7)
0409: */
0410: static final boolean isOctChar(char c) {
0411: return '0' <= c && c <= '7';
0412: }
0413:
0414: /**
0415: * @param c is character to be compared
0416: * @return whether c is an identifier or not
0417: */
0418: private static final boolean isIdentifierChar(char c) {
0419: return Character.isLetterOrDigit(c) || c == '_';
0420: }
0421:
0422: /**
0423: * What type/kind of quote are we dealing with?
0424: *
0425: * @param c first character the the quote construct
0426: * @return a token that specifies the quote type
0427: */
0428: private int parseQuote(char c) throws IOException {
0429: char begin, end;
0430: boolean shortHand;
0431:
0432: // Short-hand (e.g. %{,%.,%!,... versus %Q{).
0433: if (!Character.isLetterOrDigit(c)) {
0434: begin = c;
0435: c = 'Q';
0436: shortHand = true;
0437: // Long-hand (e.g. %Q{}).
0438: } else {
0439: shortHand = false;
0440: begin = src.read();
0441: if (Character.isLetterOrDigit(begin) /* no mb || ismbchar(term)*/) {
0442: throw new SyntaxException(getPosition(),
0443: "unknown type of %string");
0444: }
0445: }
0446: if (c == EOF || begin == EOF) {
0447: throw new SyntaxException(getPosition(),
0448: "unterminated quoted string meets end of file");
0449: }
0450:
0451: // Figure end-char. '\0' is special to indicate begin=end and that no nesting?
0452: if (begin == '(')
0453: end = ')';
0454: else if (begin == '[')
0455: end = ']';
0456: else if (begin == '{')
0457: end = '}';
0458: else if (begin == '<')
0459: end = '>';
0460: else {
0461: end = begin;
0462: begin = '\0';
0463: }
0464: ;
0465:
0466: switch (c) {
0467: case 'Q':
0468: lex_strterm = new StringTerm(str_dquote, end, begin);
0469: yaccValue = new Token("%"
0470: + (shortHand ? ("" + end) : ("" + c + begin)),
0471: getPosition());
0472: return Tokens.tSTRING_BEG;
0473:
0474: case 'q':
0475: lex_strterm = new StringTerm(str_squote, end, begin);
0476: yaccValue = new Token("%" + c + begin, getPosition());
0477: return Tokens.tSTRING_BEG;
0478:
0479: case 'W':
0480: lex_strterm = new StringTerm(str_dquote | STR_FUNC_QWORDS,
0481: end, begin);
0482: do {
0483: c = src.read();
0484: } while (Character.isWhitespace(c));
0485: src.unread(c);
0486: yaccValue = new Token("%" + c + begin, getPosition());
0487: return Tokens.tWORDS_BEG;
0488:
0489: case 'w':
0490: lex_strterm = new StringTerm(str_squote | STR_FUNC_QWORDS,
0491: end, begin);
0492: do {
0493: c = src.read();
0494: } while (Character.isWhitespace(c));
0495: src.unread(c);
0496: yaccValue = new Token("%" + c + begin, getPosition());
0497: return Tokens.tQWORDS_BEG;
0498:
0499: case 'x':
0500: lex_strterm = new StringTerm(str_xquote, end, begin);
0501: yaccValue = new Token("%" + c + begin, getPosition());
0502: return Tokens.tXSTRING_BEG;
0503:
0504: case 'r':
0505: lex_strterm = new StringTerm(str_regexp, end, begin);
0506: yaccValue = new Token("%" + c + begin, getPosition());
0507: return Tokens.tREGEXP_BEG;
0508:
0509: case 's':
0510: lex_strterm = new StringTerm(str_ssym, end, begin);
0511: lex_state = LexState.EXPR_FNAME;
0512: yaccValue = new Token("%" + c + begin, getPosition());
0513: return Tokens.tSYMBEG;
0514:
0515: default:
0516: throw new SyntaxException(
0517: getPosition(),
0518: "Unknown type of %string. Expected 'Q', 'q', 'w', 'x', 'r' or any non letter character, but found '"
0519: + c + "'.");
0520: }
0521: }
0522:
0523: private int hereDocumentIdentifier() throws IOException {
0524: char c = src.read();
0525: int term;
0526:
0527: int func = 0;
0528: if (c == '-') {
0529: c = src.read();
0530: func = STR_FUNC_INDENT;
0531: }
0532:
0533: if (c == '\'' || c == '"' || c == '`') {
0534: if (c == '\'') {
0535: func |= str_squote;
0536: } else if (c == '"') {
0537: func |= str_dquote;
0538: } else {
0539: func |= str_xquote;
0540: }
0541:
0542: tokenBuffer.setLength(0);
0543: term = c;
0544: while ((c = src.read()) != EOF && c != term) {
0545: tokenBuffer.append(c);
0546: }
0547: if (c == EOF) {
0548: throw new SyntaxException(getPosition(),
0549: "unterminated here document identifier");
0550: }
0551: } else {
0552: if (!isIdentifierChar(c)) {
0553: src.unread(c);
0554: if ((func & STR_FUNC_INDENT) != 0) {
0555: src.unread('-');
0556: }
0557: return 0;
0558: }
0559: tokenBuffer.setLength(0);
0560: term = '"';
0561: func |= str_dquote;
0562: do {
0563: tokenBuffer.append(c);
0564: } while ((c = src.read()) != EOF && isIdentifierChar(c));
0565: src.unread(c);
0566: }
0567: // BEGIN NETBEANS MODIFICATIONS
0568: // See issue #93990
0569: // It is very difficult for the IDE (especially with incremental lexing)
0570: // to handle heredocs with additional input on the line, where the
0571: // input end up getting processed out of order (JRuby will read the rest
0572: // of the line, process up to the end token, then stash the rest of the line
0573: // back on the input and continue (which could process another heredoc)
0574: // and then just jump over the heredocs since input is processed out of order.
0575: // Instead, use our own HeredocTerms which behave differently; they don't
0576: // mess with the output, and will be handled differently from within
0577: // the lexer in that it gets invited back on the next line (in order)
0578: if (preserveSpaces) {
0579: String tok = tokenBuffer.toString();
0580: HeredocTerm h = new HeredocTerm(tok, func, null);
0581:
0582: if (term == '`') {
0583: yaccValue = new Token("`", getPosition());
0584: return Tokens.tXSTRING_BEG;
0585: }
0586:
0587: yaccValue = new Token("\"", getPosition());
0588:
0589: if (heredocContext == null) {
0590: heredocContext = new HeredocContext(h);
0591: } else {
0592: heredocContext = heredocContext.add(h);
0593: }
0594:
0595: return Tokens.tSTRING_BEG;
0596:
0597: }
0598: // END NETBEANS MODIFICATIONS
0599:
0600: String line = src.readLine() + '\n';
0601: String tok = tokenBuffer.toString();
0602: lex_strterm = new HeredocTerm(tok, func, line);
0603:
0604: if (term == '`') {
0605: yaccValue = new Token("`", getPosition());
0606: return Tokens.tXSTRING_BEG;
0607: }
0608:
0609: yaccValue = new Token("\"", getPosition());
0610: // Hacky: Advance position to eat newline here....
0611: getPosition();
0612: return Tokens.tSTRING_BEG;
0613: }
0614:
0615: private void arg_ambiguous() {
0616: warnings.warning(getPosition(),
0617: "Ambiguous first argument; make sure.");
0618: }
0619:
0620: /**
0621: * Read a comment up to end of line. When found each comment will get stored away into
0622: * the parser result so that any interested party can use them as they seem fit. One idea
0623: * is that IDE authors can do distance based heuristics to associate these comments to the
0624: * AST node they think they belong to.
0625: *
0626: * @param c last character read from lexer source
0627: * @return newline or eof value
0628: */
0629: protected int readComment(char c) throws IOException {
0630: ISourcePosition startPosition = src.getPosition();
0631: tokenBuffer.setLength(0);
0632: tokenBuffer.append(c);
0633:
0634: // FIXME: Consider making a better LexerSource.readLine
0635: while ((c = src.read()) != '\n') {
0636: if (c == EOF) {
0637: break;
0638: }
0639: tokenBuffer.append(c);
0640: }
0641: src.unread(c);
0642:
0643: // BEGIN NETBEANS MODIFICATIONS
0644: if (parserSupport != null) {
0645: // END NETBEANS MODIFICATIONS
0646: // Store away each comment to parser result so IDEs can do whatever they want with them.
0647: ISourcePosition position = startPosition
0648: .union(getPosition());
0649: parserSupport.getResult().addComment(
0650: new CommentNode(position, tokenBuffer.toString()));
0651: // BEGIN NETBEANS MODIFICATIONS
0652: }
0653: // END NETBEANS MODIFICATIONS
0654:
0655: return c;
0656: }
0657:
0658: /*
0659: * Not normally used, but is left in here since it can be useful in debugging
0660: * grammar and lexing problems.
0661: private void printToken(int token) {
0662: //System.out.print("LOC: " + support.getPosition() + " ~ ");
0663:
0664: switch (token) {
0665: case Tokens.yyErrorCode: System.err.print("yyErrorCode,"); break;
0666: case Tokens.kCLASS: System.err.print("kClass,"); break;
0667: case Tokens.kMODULE: System.err.print("kModule,"); break;
0668: case Tokens.kDEF: System.err.print("kDEF,"); break;
0669: case Tokens.kUNDEF: System.err.print("kUNDEF,"); break;
0670: case Tokens.kBEGIN: System.err.print("kBEGIN,"); break;
0671: case Tokens.kRESCUE: System.err.print("kRESCUE,"); break;
0672: case Tokens.kENSURE: System.err.print("kENSURE,"); break;
0673: case Tokens.kEND: System.err.print("kEND,"); break;
0674: case Tokens.kIF: System.err.print("kIF,"); break;
0675: case Tokens.kUNLESS: System.err.print("kUNLESS,"); break;
0676: case Tokens.kTHEN: System.err.print("kTHEN,"); break;
0677: case Tokens.kELSIF: System.err.print("kELSIF,"); break;
0678: case Tokens.kELSE: System.err.print("kELSE,"); break;
0679: case Tokens.kCASE: System.err.print("kCASE,"); break;
0680: case Tokens.kWHEN: System.err.print("kWHEN,"); break;
0681: case Tokens.kWHILE: System.err.print("kWHILE,"); break;
0682: case Tokens.kUNTIL: System.err.print("kUNTIL,"); break;
0683: case Tokens.kFOR: System.err.print("kFOR,"); break;
0684: case Tokens.kBREAK: System.err.print("kBREAK,"); break;
0685: case Tokens.kNEXT: System.err.print("kNEXT,"); break;
0686: case Tokens.kREDO: System.err.print("kREDO,"); break;
0687: case Tokens.kRETRY: System.err.print("kRETRY,"); break;
0688: case Tokens.kIN: System.err.print("kIN,"); break;
0689: case Tokens.kDO: System.err.print("kDO,"); break;
0690: case Tokens.kDO_COND: System.err.print("kDO_COND,"); break;
0691: case Tokens.kDO_BLOCK: System.err.print("kDO_BLOCK,"); break;
0692: case Tokens.kRETURN: System.err.print("kRETURN,"); break;
0693: case Tokens.kYIELD: System.err.print("kYIELD,"); break;
0694: case Tokens.kSUPER: System.err.print("kSUPER,"); break;
0695: case Tokens.kSELF: System.err.print("kSELF,"); break;
0696: case Tokens.kNIL: System.err.print("kNIL,"); break;
0697: case Tokens.kTRUE: System.err.print("kTRUE,"); break;
0698: case Tokens.kFALSE: System.err.print("kFALSE,"); break;
0699: case Tokens.kAND: System.err.print("kAND,"); break;
0700: case Tokens.kOR: System.err.print("kOR,"); break;
0701: case Tokens.kNOT: System.err.print("kNOT,"); break;
0702: case Tokens.kIF_MOD: System.err.print("kIF_MOD,"); break;
0703: case Tokens.kUNLESS_MOD: System.err.print("kUNLESS_MOD,"); break;
0704: case Tokens.kWHILE_MOD: System.err.print("kWHILE_MOD,"); break;
0705: case Tokens.kUNTIL_MOD: System.err.print("kUNTIL_MOD,"); break;
0706: case Tokens.kRESCUE_MOD: System.err.print("kRESCUE_MOD,"); break;
0707: case Tokens.kALIAS: System.err.print("kALIAS,"); break;
0708: case Tokens.kDEFINED: System.err.print("kDEFINED,"); break;
0709: case Tokens.klBEGIN: System.err.print("klBEGIN,"); break;
0710: case Tokens.klEND: System.err.print("klEND,"); break;
0711: case Tokens.k__LINE__: System.err.print("k__LINE__,"); break;
0712: case Tokens.k__FILE__: System.err.print("k__FILE__,"); break;
0713: case Tokens.tIDENTIFIER: System.err.print("tIDENTIFIER["+ value() + "],"); break;
0714: case Tokens.tFID: System.err.print("tFID[" + value() + "],"); break;
0715: case Tokens.tGVAR: System.err.print("tGVAR[" + value() + "],"); break;
0716: case Tokens.tIVAR: System.err.print("tIVAR[" + value() +"],"); break;
0717: case Tokens.tCONSTANT: System.err.print("tCONSTANT["+ value() +"],"); break;
0718: case Tokens.tCVAR: System.err.print("tCVAR,"); break;
0719: case Tokens.tINTEGER: System.err.print("tINTEGER,"); break;
0720: case Tokens.tFLOAT: System.err.print("tFLOAT,"); break;
0721: case Tokens.tSTRING_CONTENT: System.err.print("tSTRING_CONTENT[" + yaccValue + "],"); break;
0722: case Tokens.tSTRING_BEG: System.err.print("tSTRING_BEG,"); break;
0723: case Tokens.tSTRING_END: System.err.print("tSTRING_END,"); break;
0724: case Tokens.tSTRING_DBEG: System.err.print("STRING_DBEG,"); break;
0725: case Tokens.tSTRING_DVAR: System.err.print("tSTRING_DVAR,"); break;
0726: case Tokens.tXSTRING_BEG: System.err.print("tXSTRING_BEG,"); break;
0727: case Tokens.tREGEXP_BEG: System.err.print("tREGEXP_BEG,"); break;
0728: case Tokens.tREGEXP_END: System.err.print("tREGEXP_END,"); break;
0729: case Tokens.tWORDS_BEG: System.err.print("tWORDS_BEG,"); break;
0730: case Tokens.tQWORDS_BEG: System.err.print("tQWORDS_BEG,"); break;
0731: case Tokens.tBACK_REF: System.err.print("tBACK_REF,"); break;
0732: case Tokens.tNTH_REF: System.err.print("tNTH_REF,"); break;
0733: case Tokens.tUPLUS: System.err.print("tUPLUS"); break;
0734: case Tokens.tUMINUS: System.err.print("tUMINUS,"); break;
0735: case Tokens.tPOW: System.err.print("tPOW,"); break;
0736: case Tokens.tCMP: System.err.print("tCMP,"); break;
0737: case Tokens.tEQ: System.err.print("tEQ,"); break;
0738: case Tokens.tEQQ: System.err.print("tEQQ,"); break;
0739: case Tokens.tNEQ: System.err.print("tNEQ,"); break;
0740: case Tokens.tGEQ: System.err.print("tGEQ,"); break;
0741: case Tokens.tLEQ: System.err.print("tLEQ,"); break;
0742: case Tokens.tANDOP: System.err.print("tANDOP,"); break;
0743: case Tokens.tOROP: System.err.print("tOROP,"); break;
0744: case Tokens.tMATCH: System.err.print("tMATCH,"); break;
0745: case Tokens.tNMATCH: System.err.print("tNMATCH,"); break;
0746: case Tokens.tDOT2: System.err.print("tDOT2,"); break;
0747: case Tokens.tDOT3: System.err.print("tDOT3,"); break;
0748: case Tokens.tAREF: System.err.print("tAREF,"); break;
0749: case Tokens.tASET: System.err.print("tASET,"); break;
0750: case Tokens.tLSHFT: System.err.print("tLSHFT,"); break;
0751: case Tokens.tRSHFT: System.err.print("tRSHFT,"); break;
0752: case Tokens.tCOLON2: System.err.print("tCOLON2,"); break;
0753: case Tokens.tCOLON3: System.err.print("tCOLON3,"); break;
0754: case Tokens.tOP_ASGN: System.err.print("tOP_ASGN,"); break;
0755: case Tokens.tASSOC: System.err.print("tASSOC,"); break;
0756: case Tokens.tLPAREN: System.err.print("tLPAREN,"); break;
0757: case Tokens.tLPAREN_ARG: System.err.print("tLPAREN_ARG,"); break;
0758: case Tokens.tLBRACK: System.err.print("tLBRACK,"); break;
0759: case Tokens.tLBRACE: System.err.print("tLBRACE,"); break;
0760: case Tokens.tSTAR: System.err.print("tSTAR,"); break;
0761: case Tokens.tAMPER: System.err.print("tAMPER,"); break;
0762: case Tokens.tSYMBEG: System.err.print("tSYMBEG,"); break;
0763: case '\n': System.err.println("NL"); break;
0764: default: System.err.print("'" + (int)token + "',"); break;
0765: }
0766: }
0767:
0768: // DEBUGGING HELP
0769: private int yylex() throws IOException {
0770: int token = yylex2();
0771:
0772: printToken(token);
0773:
0774: return token;
0775: }
0776: */
0777:
0778: /**
0779: * Returns the next token. Also sets yyVal is needed.
0780: *
0781: *@return Description of the Returned Value
0782: */
0783: private int yylex() throws IOException {
0784: char c;
0785: boolean spaceSeen = false;
0786: boolean commandState;
0787:
0788: // BEGIN NETBEANS MODIFICATIONS
0789: if (setSpaceSeen) {
0790: spaceSeen = true;
0791: setSpaceSeen = false;
0792: }
0793: // END NETBEANS MODIFICATIONS
0794:
0795: // BEGIN NETBEANS MODIFICATIONS
0796: // On new lines, possibly resume heredoc processing
0797: // See documentation for newlineTerms for an explanation of this
0798: if (heredocContext != null) {
0799: if (heredocContext.isLookingForEnd()) {
0800: HeredocTerm ht = heredocContext.getTerm();
0801: lex_strterm = ht;
0802: } else if (src.isANewLine()) {
0803: // Can be triggered, disabling for now to cause
0804: // less severe symptoms
0805: //assert lex_strterm == null;
0806:
0807: HeredocTerm ht = heredocContext.getTerm();
0808: lex_strterm = ht;
0809: heredocContext = heredocContext.pop();
0810: }
0811: }
0812: // END NETBEANS MODIFICATIONS
0813:
0814: if (lex_strterm != null) {
0815: // BEGIN NETBEANS MODIFICATIONS
0816: try {
0817: // END NETBEANS MODIFICATIONS
0818: int tok = lex_strterm.parseString(this , src);
0819: if (tok == Tokens.tSTRING_END
0820: || tok == Tokens.tREGEXP_END) {
0821: lex_strterm = null;
0822: lex_state = LexState.EXPR_END;
0823: // BEGIN NETBEANS MODIFICATIONS
0824: if (heredocContext != null
0825: && heredocContext.isLookingForEnd()) {
0826: heredocContext = heredocContext.pop();
0827: }
0828: // END NETBEANS MODIFICATIONS
0829: }
0830: return tok;
0831: // BEGIN NETBEANS MODIFICATIONS
0832: } catch (SyntaxException se) {
0833: // If we abort in string parsing, throw away the str term
0834: // such that we don't try again on restart
0835: lex_strterm = null;
0836: lex_state = LexState.EXPR_END;
0837: throw se;
0838: }
0839: // END NETBEANS MODIFICATIONS
0840: }
0841:
0842: commandState = commandStart;
0843: commandStart = false;
0844:
0845: LexState last_state = lex_state;
0846:
0847: retry: for (;;) {
0848: c = src.read();
0849: switch (c) {
0850: case '\004': /* ^D */
0851: case '\032': /* ^Z */
0852: case 0: /* end of script. */
0853: return 0;
0854:
0855: /* white spaces */
0856: case ' ':
0857: case '\t':
0858: case '\f':
0859: case '\r':
0860: case '\13': /* '\v' */
0861: // BEGIN NETBEANS MODIFICATIONS
0862: if (preserveSpaces) {
0863: // Collapse all whitespace into one token
0864: while (true) {
0865: c = src.read();
0866: if (c != ' ' && c != '\t' && c != '\f'
0867: && c != '\r' && c != '\13') {
0868: break;
0869: }
0870: }
0871: src.unread(c);
0872: yaccValue = new Token("whitespace", getPosition());
0873: setSpaceSeen = true;
0874: return Tokens.tWHITESPACE;
0875: } else {
0876: // END NETBEANS MODIFICATIONS
0877: getPosition();
0878: spaceSeen = true;
0879: continue retry;
0880: // BEGIN NETBEANS MODIFICATIONS
0881: }
0882: // END NETBEANS MODIFICATIONS
0883: case '#': /* it's a comment */
0884: // BEGIN NETBEANS MODIFICATIONS
0885: if (preserveSpaces) {
0886: // Skip to end of the comment
0887: while ((c = src.read()) != '\n') {
0888: if (c == EOF) {
0889: break;
0890: }
0891: }
0892:
0893: yaccValue = new Token("line-comment", getPosition());
0894: setSpaceSeen = spaceSeen;
0895: // Ensure that commandStart and lex_state is updated
0896: // as it otherwise would have if preserveSpaces was false
0897: if (!(lex_state == LexState.EXPR_BEG
0898: || lex_state == LexState.EXPR_FNAME
0899: || lex_state == LexState.EXPR_DOT || lex_state == LexState.EXPR_CLASS)) {
0900: commandStart = true;
0901: lex_state = LexState.EXPR_BEG;
0902: }
0903: return Tokens.tCOMMENT;
0904: } else {
0905: // END NETBEANS MODIFICATIONS
0906: if (readComment(c) == 0)
0907: return 0;
0908:
0909: /* fall through */
0910: // BEGIN NETBEANS MODIFICATIONS
0911: }
0912: // END NETBEANS MODIFICATIONS
0913: case '\n':
0914: // Replace a string of newlines with a single one
0915: while ((c = src.read()) == '\n') {
0916:
0917: }
0918: src.unread(c);
0919: getPosition();
0920: // BEGIN NETBEANS MODIFICATIONS
0921: if (preserveSpaces) {
0922: src.setIsANewLine(true);
0923: yaccValue = new Token("whitespace", getPosition());
0924: // Ensure that commandStart and lex_state is updated
0925: // as it otherwise would have if preserveSpaces was false
0926: if (!(lex_state == LexState.EXPR_BEG
0927: || lex_state == LexState.EXPR_FNAME
0928: || lex_state == LexState.EXPR_DOT || lex_state == LexState.EXPR_CLASS)) {
0929: commandStart = true;
0930: lex_state = LexState.EXPR_BEG;
0931: }
0932: return Tokens.tWHITESPACE;
0933: }
0934: // END NETBEANS MODIFICATIONS
0935:
0936: if (lex_state == LexState.EXPR_BEG
0937: || lex_state == LexState.EXPR_FNAME
0938: || lex_state == LexState.EXPR_DOT
0939: || lex_state == LexState.EXPR_CLASS) {
0940: continue retry;
0941: }
0942:
0943: commandStart = true;
0944: lex_state = LexState.EXPR_BEG;
0945: return '\n';
0946:
0947: case '*':
0948: if ((c = src.read()) == '*') {
0949: if ((c = src.read()) == '=') {
0950: lex_state = LexState.EXPR_BEG;
0951: yaccValue = new Token("**", getPosition());
0952: return Tokens.tOP_ASGN;
0953: }
0954: src.unread(c);
0955: yaccValue = new Token("**", getPosition());
0956: c = Tokens.tPOW;
0957: } else {
0958: if (c == '=') {
0959: lex_state = LexState.EXPR_BEG;
0960: yaccValue = new Token("*", getPosition());
0961: return Tokens.tOP_ASGN;
0962: }
0963: src.unread(c);
0964: if (lex_state.isArgument() && spaceSeen
0965: && !Character.isWhitespace(c)) {
0966: warnings.warning(getPosition(),
0967: "`*' interpreted as argument prefix");
0968: c = Tokens.tSTAR;
0969: } else if (lex_state == LexState.EXPR_BEG
0970: || lex_state == LexState.EXPR_MID) {
0971: c = Tokens.tSTAR;
0972: } else {
0973: c = Tokens.tSTAR2;
0974: }
0975: yaccValue = new Token("*", getPosition());
0976: }
0977: if (lex_state == LexState.EXPR_FNAME
0978: || lex_state == LexState.EXPR_DOT) {
0979: lex_state = LexState.EXPR_ARG;
0980: } else {
0981: lex_state = LexState.EXPR_BEG;
0982: }
0983: return c;
0984:
0985: case '!':
0986: lex_state = LexState.EXPR_BEG;
0987: if ((c = src.read()) == '=') {
0988: yaccValue = new Token("!=", getPosition());
0989: return Tokens.tNEQ;
0990: }
0991: if (c == '~') {
0992: yaccValue = new Token("!~", getPosition());
0993: return Tokens.tNMATCH;
0994: }
0995: src.unread(c);
0996: yaccValue = new Token("!", getPosition());
0997: return Tokens.tBANG;
0998:
0999: case '=':
1000: // documentation nodes
1001: if (src.wasBeginOfLine()) {
1002: String equalLabel;
1003: if ((equalLabel = isNextNoCase("begin")) != null) {
1004: tokenBuffer.setLength(0);
1005: tokenBuffer.append(equalLabel);
1006: c = src.read();
1007:
1008: if (Character.isWhitespace(c)) {
1009: // In case last next was the newline.
1010: src.unread(c);
1011: for (;;) {
1012: c = src.read();
1013: tokenBuffer.append(c);
1014:
1015: // If a line is followed by a blank line put
1016: // it back.
1017: while (c == '\n') {
1018: c = src.read();
1019: tokenBuffer.append(c);
1020: }
1021: if (c == EOF) {
1022: throw new SyntaxException(
1023: getPosition(),
1024: "embedded document meets end of file");
1025: }
1026: if (c != '=')
1027: continue;
1028: if (src.wasBeginOfLine()
1029: && (equalLabel = isNextNoCase("end")) != null) {
1030: tokenBuffer.append(equalLabel);
1031: tokenBuffer.append(src.readLine());
1032: src.unread('\n');
1033: // PENDING: src.setIsANewLine(true);
1034: break;
1035: }
1036: }
1037:
1038: // BEGIN NETBEANS MODIFICATIONS
1039: if (parserSupport != null)
1040: // END NETBEANS MODIFICATIONS
1041: parserSupport
1042: .getResult()
1043: .addComment(
1044: new CommentNode(
1045: getPosition(),
1046: tokenBuffer
1047: .toString()));
1048: // BEGIN NETBEANS MODIFICATIONS
1049: if (preserveSpaces) {
1050: yaccValue = new Token("here-doc",
1051: getPosition());
1052: return Tokens.tDOCUMENTATION;
1053: }
1054: // END NETBEANS MODIFICATIONS
1055: continue retry;
1056: }
1057: src.unread(c);
1058: }
1059: }
1060:
1061: if (lex_state == LexState.EXPR_FNAME
1062: || lex_state == LexState.EXPR_DOT) {
1063: lex_state = LexState.EXPR_ARG;
1064: } else {
1065: lex_state = LexState.EXPR_BEG;
1066: }
1067:
1068: c = src.read();
1069: if (c == '=') {
1070: c = src.read();
1071: if (c == '=') {
1072: yaccValue = new Token("===", getPosition());
1073: return Tokens.tEQQ;
1074: }
1075: src.unread(c);
1076: yaccValue = new Token("==", getPosition());
1077: return Tokens.tEQ;
1078: }
1079: if (c == '~') {
1080: yaccValue = new Token("=~", getPosition());
1081: return Tokens.tMATCH;
1082: } else if (c == '>') {
1083: yaccValue = new Token("=>", getPosition());
1084: return Tokens.tASSOC;
1085: }
1086: src.unread(c);
1087: yaccValue = new Token("=", getPosition());
1088: return '=';
1089:
1090: case '<':
1091: c = src.read();
1092: if (c == '<' && lex_state != LexState.EXPR_END
1093: && lex_state != LexState.EXPR_DOT
1094: && lex_state != LexState.EXPR_ENDARG
1095: && lex_state != LexState.EXPR_CLASS
1096: && (!lex_state.isArgument() || spaceSeen)) {
1097: int tok = hereDocumentIdentifier();
1098: if (tok != 0)
1099: return tok;
1100: }
1101: if (lex_state == LexState.EXPR_FNAME
1102: || lex_state == LexState.EXPR_DOT) {
1103: lex_state = LexState.EXPR_ARG;
1104: } else {
1105: lex_state = LexState.EXPR_BEG;
1106: }
1107: if (c == '=') {
1108: if ((c = src.read()) == '>') {
1109: yaccValue = new Token("<=>", getPosition());
1110: return Tokens.tCMP;
1111: }
1112: src.unread(c);
1113: yaccValue = new Token("<=", getPosition());
1114: return Tokens.tLEQ;
1115: }
1116: if (c == '<') {
1117: if ((c = src.read()) == '=') {
1118: lex_state = LexState.EXPR_BEG;
1119: yaccValue = new Token("<<", getPosition());
1120: return Tokens.tOP_ASGN;
1121: }
1122: src.unread(c);
1123: yaccValue = new Token("<<", getPosition());
1124: return Tokens.tLSHFT;
1125: }
1126: yaccValue = new Token("<", getPosition());
1127: src.unread(c);
1128: return Tokens.tLT;
1129:
1130: case '>':
1131: if (lex_state == LexState.EXPR_FNAME
1132: || lex_state == LexState.EXPR_DOT) {
1133: lex_state = LexState.EXPR_ARG;
1134: } else {
1135: lex_state = LexState.EXPR_BEG;
1136: }
1137:
1138: if ((c = src.read()) == '=') {
1139: yaccValue = new Token(">=", getPosition());
1140: return Tokens.tGEQ;
1141: }
1142: if (c == '>') {
1143: if ((c = src.read()) == '=') {
1144: lex_state = LexState.EXPR_BEG;
1145: yaccValue = new Token(">>", getPosition());
1146: return Tokens.tOP_ASGN;
1147: }
1148: src.unread(c);
1149: yaccValue = new Token(">>", getPosition());
1150: return Tokens.tRSHFT;
1151: }
1152: src.unread(c);
1153: yaccValue = new Token(">", getPosition());
1154: return Tokens.tGT;
1155:
1156: case '"':
1157: lex_strterm = new StringTerm(str_dquote, '"', '\0');
1158: yaccValue = new Token("\"", getPosition());
1159: return Tokens.tSTRING_BEG;
1160:
1161: case '`':
1162: yaccValue = new Token("`", getPosition());
1163: if (lex_state == LexState.EXPR_FNAME) {
1164: lex_state = LexState.EXPR_END;
1165: return Tokens.tBACK_REF2;
1166: }
1167: if (lex_state == LexState.EXPR_DOT) {
1168: if (commandState) {
1169: lex_state = LexState.EXPR_CMDARG;
1170: } else {
1171: lex_state = LexState.EXPR_ARG;
1172: }
1173: return Tokens.tBACK_REF2;
1174: }
1175: lex_strterm = new StringTerm(str_xquote, '`', '\0');
1176: return Tokens.tXSTRING_BEG;
1177:
1178: case '\'':
1179: lex_strterm = new StringTerm(str_squote, '\'', '\0');
1180: yaccValue = new Token("'", getPosition());
1181: return Tokens.tSTRING_BEG;
1182:
1183: case '?':
1184: if (lex_state == LexState.EXPR_END
1185: || lex_state == LexState.EXPR_ENDARG) {
1186: lex_state = LexState.EXPR_BEG;
1187: yaccValue = new Token("?", getPosition());
1188: return '?';
1189: }
1190: c = src.read();
1191: if (c == EOF) {
1192: throw new SyntaxException(getPosition(),
1193: "incomplete character syntax");
1194: }
1195: if (Character.isWhitespace(c)) {
1196: if (!lex_state.isArgument()) {
1197: int c2 = 0;
1198: switch (c) {
1199: case ' ':
1200: c2 = 's';
1201: break;
1202: case '\n':
1203: c2 = 'n';
1204: break;
1205: case '\t':
1206: c2 = 't';
1207: break;
1208: /* What is \v in C?
1209: case '\v':
1210: c2 = 'v';
1211: break;
1212: */
1213: case '\r':
1214: c2 = 'r';
1215: break;
1216: case '\f':
1217: c2 = 'f';
1218: break;
1219: }
1220: if (c2 != 0) {
1221: warnings.warn(getPosition(),
1222: "invalid character syntax; use ?\\"
1223: + c2);
1224: }
1225: }
1226: src.unread(c);
1227: lex_state = LexState.EXPR_BEG;
1228: yaccValue = new Token("?", getPosition());
1229: return '?';
1230: /*} else if (ismbchar(c)) { // ruby - we don't support them either?
1231: rb_warn("multibyte character literal not supported yet; use ?\\" + c);
1232: support.unread(c);
1233: lexState = LexState.EXPR_BEG;
1234: return '?';*/
1235: } else if ((Character.isLetterOrDigit(c) || c == '_')
1236: && !src.peek('\n') && isNext_identchar()) {
1237: src.unread(c);
1238: lex_state = LexState.EXPR_BEG;
1239: yaccValue = new Token("?", getPosition());
1240: return '?';
1241: } else if (c == '\\') {
1242: c = src.readEscape();
1243: }
1244: c &= 0xff;
1245: lex_state = LexState.EXPR_END;
1246: yaccValue = new FixnumNode(getPosition(), c);
1247: return Tokens.tINTEGER;
1248:
1249: case '&':
1250: if ((c = src.read()) == '&') {
1251: lex_state = LexState.EXPR_BEG;
1252: if ((c = src.read()) == '=') {
1253: yaccValue = new Token("&&", getPosition());
1254: lex_state = LexState.EXPR_BEG;
1255: return Tokens.tOP_ASGN;
1256: }
1257: src.unread(c);
1258: yaccValue = new Token("&&", getPosition());
1259: return Tokens.tANDOP;
1260: } else if (c == '=') {
1261: yaccValue = new Token("&", getPosition());
1262: lex_state = LexState.EXPR_BEG;
1263: return Tokens.tOP_ASGN;
1264: }
1265: src.unread(c);
1266: //tmpPosition is required because getPosition()'s side effects.
1267: //if the warning is generated, the getPosition() on line 954 (this line + 18) will create
1268: //a wrong position if the "inclusive" flag is not set.
1269: ISourcePosition tmpPosition = getPosition();
1270: if (lex_state.isArgument() && spaceSeen
1271: && !Character.isWhitespace(c)) {
1272: warnings.warning(tmpPosition,
1273: "`&' interpreted as argument prefix");
1274: c = Tokens.tAMPER;
1275: } else if (lex_state == LexState.EXPR_BEG
1276: || lex_state == LexState.EXPR_MID) {
1277: c = Tokens.tAMPER;
1278: } else {
1279: c = Tokens.tAMPER2;
1280: }
1281:
1282: if (lex_state == LexState.EXPR_FNAME
1283: || lex_state == LexState.EXPR_DOT) {
1284: lex_state = LexState.EXPR_ARG;
1285: } else {
1286: lex_state = LexState.EXPR_BEG;
1287: }
1288: yaccValue = new Token("&", tmpPosition);
1289: return c;
1290:
1291: case '|':
1292: if ((c = src.read()) == '|') {
1293: lex_state = LexState.EXPR_BEG;
1294: if ((c = src.read()) == '=') {
1295: lex_state = LexState.EXPR_BEG;
1296: yaccValue = new Token("||", getPosition());
1297: return Tokens.tOP_ASGN;
1298: }
1299: src.unread(c);
1300: yaccValue = new Token("||", getPosition());
1301: return Tokens.tOROP;
1302: }
1303: if (c == '=') {
1304: lex_state = LexState.EXPR_BEG;
1305: yaccValue = new Token("|", getPosition());
1306: return Tokens.tOP_ASGN;
1307: }
1308: if (lex_state == LexState.EXPR_FNAME
1309: || lex_state == LexState.EXPR_DOT) {
1310: lex_state = LexState.EXPR_ARG;
1311: } else {
1312: lex_state = LexState.EXPR_BEG;
1313: }
1314: src.unread(c);
1315: yaccValue = new Token("|", getPosition());
1316: return Tokens.tPIPE;
1317:
1318: case '+':
1319: c = src.read();
1320: if (lex_state == LexState.EXPR_FNAME
1321: || lex_state == LexState.EXPR_DOT) {
1322: lex_state = LexState.EXPR_ARG;
1323: if (c == '@') {
1324: yaccValue = new Token("+@", getPosition());
1325: return Tokens.tUPLUS;
1326: }
1327: src.unread(c);
1328: yaccValue = new Token("+", getPosition());
1329: return Tokens.tPLUS;
1330: }
1331: if (c == '=') {
1332: lex_state = LexState.EXPR_BEG;
1333: yaccValue = new Token("+", getPosition());
1334: return Tokens.tOP_ASGN;
1335: }
1336: if (lex_state == LexState.EXPR_BEG
1337: || lex_state == LexState.EXPR_MID
1338: || (lex_state.isArgument() && spaceSeen && !Character
1339: .isWhitespace(c))) {
1340: if (lex_state.isArgument())
1341: arg_ambiguous();
1342: lex_state = LexState.EXPR_BEG;
1343: src.unread(c);
1344: if (Character.isDigit(c)) {
1345: c = '+';
1346: return parseNumber(c);
1347: }
1348: yaccValue = new Token("+", getPosition());
1349: return Tokens.tUPLUS;
1350: }
1351: lex_state = LexState.EXPR_BEG;
1352: src.unread(c);
1353: yaccValue = new Token("+", getPosition());
1354: return Tokens.tPLUS;
1355:
1356: case '-':
1357: c = src.read();
1358: if (lex_state == LexState.EXPR_FNAME
1359: || lex_state == LexState.EXPR_DOT) {
1360: lex_state = LexState.EXPR_ARG;
1361: if (c == '@') {
1362: yaccValue = new Token("-@", getPosition());
1363: return Tokens.tUMINUS;
1364: }
1365: src.unread(c);
1366: yaccValue = new Token("-", getPosition());
1367: return Tokens.tMINUS;
1368: }
1369: if (c == '=') {
1370: lex_state = LexState.EXPR_BEG;
1371: yaccValue = new Token("-", getPosition());
1372: return Tokens.tOP_ASGN;
1373: }
1374: if (lex_state == LexState.EXPR_BEG
1375: || lex_state == LexState.EXPR_MID
1376: || (lex_state.isArgument() && spaceSeen && !Character
1377: .isWhitespace(c))) {
1378: if (lex_state.isArgument())
1379: arg_ambiguous();
1380: lex_state = LexState.EXPR_BEG;
1381: src.unread(c);
1382: yaccValue = new Token("-", getPosition());
1383: if (Character.isDigit(c)) {
1384: return Tokens.tUMINUS_NUM;
1385: }
1386: return Tokens.tUMINUS;
1387: }
1388: lex_state = LexState.EXPR_BEG;
1389: src.unread(c);
1390: yaccValue = new Token("-", getPosition());
1391: return Tokens.tMINUS;
1392:
1393: case '.':
1394: lex_state = LexState.EXPR_BEG;
1395: if ((c = src.read()) == '.') {
1396: if ((c = src.read()) == '.') {
1397: yaccValue = new Token("...", getPosition());
1398: return Tokens.tDOT3;
1399: }
1400: src.unread(c);
1401: yaccValue = new Token("..", getPosition());
1402: return Tokens.tDOT2;
1403: }
1404: src.unread(c);
1405: if (Character.isDigit(c)) {
1406: throw new SyntaxException(getPosition(),
1407: "no .<digit> floating literal anymore; put 0 before dot");
1408: }
1409: lex_state = LexState.EXPR_DOT;
1410: yaccValue = new Token(".", getPosition());
1411: return Tokens.tDOT;
1412: case '0':
1413: case '1':
1414: case '2':
1415: case '3':
1416: case '4':
1417: case '5':
1418: case '6':
1419: case '7':
1420: case '8':
1421: case '9':
1422: return parseNumber(c);
1423:
1424: case ')':
1425: conditionState.restart();
1426: cmdArgumentState.restart();
1427: lex_state = LexState.EXPR_END;
1428: yaccValue = new Token(")", getPosition());
1429: return Tokens.tRPAREN;
1430: case ']':
1431: conditionState.restart();
1432: cmdArgumentState.restart();
1433: lex_state = LexState.EXPR_END;
1434: yaccValue = new Token(")", getPosition());
1435: return Tokens.tRBRACK;
1436: case '}':
1437: conditionState.restart();
1438: cmdArgumentState.restart();
1439: lex_state = LexState.EXPR_END;
1440: yaccValue = new Token("}", getPosition());
1441: return Tokens.tRCURLY;
1442:
1443: case ':':
1444: c = src.read();
1445: if (c == ':') {
1446: if (lex_state == LexState.EXPR_BEG
1447: || lex_state == LexState.EXPR_MID
1448: || lex_state == LexState.EXPR_CLASS
1449: || (lex_state.isArgument() && spaceSeen)) {
1450: lex_state = LexState.EXPR_BEG;
1451: yaccValue = new Token("::", getPosition());
1452: return Tokens.tCOLON3;
1453: }
1454: lex_state = LexState.EXPR_DOT;
1455: yaccValue = new Token(":", getPosition());
1456: return Tokens.tCOLON2;
1457: }
1458: if (lex_state == LexState.EXPR_END
1459: || lex_state == LexState.EXPR_ENDARG
1460: || Character.isWhitespace(c)) {
1461: src.unread(c);
1462: lex_state = LexState.EXPR_BEG;
1463: yaccValue = new Token(":", getPosition());
1464: return ':';
1465: }
1466: switch (c) {
1467: case '\'':
1468: lex_strterm = new StringTerm(str_ssym, c, '\0');
1469: break;
1470: case '"':
1471: lex_strterm = new StringTerm(str_dsym, c, '\0');
1472: break;
1473: default:
1474: src.unread(c);
1475: break;
1476: }
1477: lex_state = LexState.EXPR_FNAME;
1478: yaccValue = new Token(":", getPosition());
1479: return Tokens.tSYMBEG;
1480:
1481: case '/':
1482: if (lex_state == LexState.EXPR_BEG
1483: || lex_state == LexState.EXPR_MID) {
1484: lex_strterm = new StringTerm(str_regexp, '/', '\0');
1485: yaccValue = new Token("/", getPosition());
1486: return Tokens.tREGEXP_BEG;
1487: }
1488:
1489: if ((c = src.read()) == '=') {
1490: yaccValue = new Token("/", getPosition());
1491: lex_state = LexState.EXPR_BEG;
1492: return Tokens.tOP_ASGN;
1493: }
1494: src.unread(c);
1495: if (lex_state.isArgument() && spaceSeen) {
1496: if (!Character.isWhitespace(c)) {
1497: arg_ambiguous();
1498: lex_strterm = new StringTerm(str_regexp, '/',
1499: '\0');
1500: yaccValue = new Token("/", getPosition());
1501: return Tokens.tREGEXP_BEG;
1502: }
1503: }
1504: if (lex_state == LexState.EXPR_FNAME
1505: || lex_state == LexState.EXPR_DOT) {
1506: lex_state = LexState.EXPR_ARG;
1507: } else {
1508: lex_state = LexState.EXPR_BEG;
1509: }
1510: yaccValue = new Token("/", getPosition());
1511: return Tokens.tDIVIDE;
1512:
1513: case '^':
1514: if ((c = src.read()) == '=') {
1515: lex_state = LexState.EXPR_BEG;
1516: yaccValue = new Token("^", getPosition());
1517: return Tokens.tOP_ASGN;
1518: }
1519: if (lex_state == LexState.EXPR_FNAME
1520: || lex_state == LexState.EXPR_DOT) {
1521: lex_state = LexState.EXPR_ARG;
1522: } else {
1523: lex_state = LexState.EXPR_BEG;
1524: }
1525: src.unread(c);
1526: yaccValue = new Token("^", getPosition());
1527: return Tokens.tCARET;
1528:
1529: case ';':
1530: commandStart = true;
1531: case ',':
1532: lex_state = LexState.EXPR_BEG;
1533: yaccValue = new Token(",", getPosition());
1534: return c;
1535:
1536: case '~':
1537: if (lex_state == LexState.EXPR_FNAME
1538: || lex_state == LexState.EXPR_DOT) {
1539: if ((c = src.read()) != '@') {
1540: src.unread(c);
1541: }
1542: }
1543: if (lex_state == LexState.EXPR_FNAME
1544: || lex_state == LexState.EXPR_DOT) {
1545: lex_state = LexState.EXPR_ARG;
1546: } else {
1547: lex_state = LexState.EXPR_BEG;
1548: }
1549: yaccValue = new Token("~", getPosition());
1550: return Tokens.tTILDE;
1551: case '(':
1552: c = Tokens.tLPAREN2;
1553: commandStart = true;
1554: if (lex_state == LexState.EXPR_BEG
1555: || lex_state == LexState.EXPR_MID) {
1556: c = Tokens.tLPAREN;
1557: } else if (spaceSeen) {
1558: if (lex_state == LexState.EXPR_CMDARG) {
1559: c = Tokens.tLPAREN_ARG;
1560: } else if (lex_state == LexState.EXPR_ARG) {
1561: warnings
1562: .warn(getPosition(),
1563: "don't put space before argument parentheses");
1564: c = Tokens.tLPAREN2;
1565: }
1566: }
1567: conditionState.stop();
1568: cmdArgumentState.stop();
1569: lex_state = LexState.EXPR_BEG;
1570: yaccValue = new Token("(", getPosition());
1571: return c;
1572:
1573: case '[':
1574: if (lex_state == LexState.EXPR_FNAME
1575: || lex_state == LexState.EXPR_DOT) {
1576: lex_state = LexState.EXPR_ARG;
1577: if ((c = src.read()) == ']') {
1578: if (src.peek('=')) {
1579: c = src.read();
1580: yaccValue = new Token("[]=", getPosition());
1581: return Tokens.tASET;
1582: }
1583: yaccValue = new Token("[]", getPosition());
1584: return Tokens.tAREF;
1585: }
1586: src.unread(c);
1587: yaccValue = new Token("[", getPosition());
1588: return '[';
1589: } else if (lex_state == LexState.EXPR_BEG
1590: || lex_state == LexState.EXPR_MID) {
1591: c = Tokens.tLBRACK;
1592: } else if (lex_state.isArgument() && spaceSeen) {
1593: c = Tokens.tLBRACK;
1594: }
1595: lex_state = LexState.EXPR_BEG;
1596: conditionState.stop();
1597: cmdArgumentState.stop();
1598: yaccValue = new Token("[", getPosition());
1599: return c;
1600:
1601: case '{':
1602: c = Tokens.tLCURLY;
1603:
1604: if (lex_state.isArgument()
1605: || lex_state == LexState.EXPR_END) {
1606: c = Tokens.tLCURLY; /* block (primary) */
1607: } else if (lex_state == LexState.EXPR_ENDARG) {
1608: c = Tokens.tLBRACE_ARG; /* block (expr) */
1609: } else {
1610: c = Tokens.tLBRACE; /* hash */
1611: }
1612: conditionState.stop();
1613: cmdArgumentState.stop();
1614: lex_state = LexState.EXPR_BEG;
1615: yaccValue = new Token("{", getPosition());
1616: return c;
1617:
1618: case '\\':
1619: c = src.read();
1620: if (c == '\n') {
1621: spaceSeen = true;
1622: continue retry; /* skip \\n */
1623: }
1624: src.unread(c);
1625: yaccValue = new Token("\\", getPosition());
1626: return '\\';
1627:
1628: case '%':
1629: if (lex_state == LexState.EXPR_BEG
1630: || lex_state == LexState.EXPR_MID) {
1631: return parseQuote(src.read());
1632: }
1633: if ((c = src.read()) == '=') {
1634: lex_state = LexState.EXPR_BEG;
1635: yaccValue = new Token("%", getPosition());
1636: return Tokens.tOP_ASGN;
1637: }
1638: if (lex_state.isArgument() && spaceSeen
1639: && !Character.isWhitespace(c)) {
1640: return parseQuote(c);
1641: }
1642: if (lex_state == LexState.EXPR_FNAME
1643: || lex_state == LexState.EXPR_DOT) {
1644: lex_state = LexState.EXPR_ARG;
1645: } else {
1646: lex_state = LexState.EXPR_BEG;
1647: }
1648: src.unread(c);
1649: yaccValue = new Token("%", getPosition());
1650: return Tokens.tPERCENT;
1651:
1652: case '$':
1653: last_state = lex_state;
1654: lex_state = LexState.EXPR_END;
1655: tokenBuffer.setLength(0);
1656: c = src.read();
1657: switch (c) {
1658: case '_': /* $_: last read line string */
1659: c = src.read();
1660: if (isIdentifierChar(c)) {
1661: tokenBuffer.append('$');
1662: tokenBuffer.append('_');
1663: break;
1664: }
1665: src.unread(c);
1666: c = '_';
1667: /* fall through */
1668: case '~': /* $~: match-data */
1669: case '*': /* $*: argv */
1670: case '$': /* $$: pid */
1671: case '?': /* $?: last status */
1672: case '!': /* $!: error string */
1673: case '@': /* $@: error position */
1674: case '/': /* $/: input record separator */
1675: case '\\': /* $\: output record separator */
1676: case ';': /* $;: field separator */
1677: case ',': /* $,: output field separator */
1678: case '.': /* $.: last read line number */
1679: case '=': /* $=: ignorecase */
1680: case ':': /* $:: load path */
1681: case '<': /* $<: reading filename */
1682: case '>': /* $>: default output handle */
1683: case '\"': /* $": already loaded files */
1684: tokenBuffer.append('$');
1685: tokenBuffer.append(c);
1686: yaccValue = new Token(tokenBuffer.toString(),
1687: getPosition());
1688: return Tokens.tGVAR;
1689:
1690: case '-':
1691: tokenBuffer.append('$');
1692: tokenBuffer.append(c);
1693: c = src.read();
1694: if (isIdentifierChar(c)) {
1695: tokenBuffer.append(c);
1696: } else {
1697: src.unread(c);
1698: }
1699: yaccValue = new Token(tokenBuffer.toString(),
1700: getPosition());
1701: /* xxx shouldn't check if valid option variable */
1702: return Tokens.tGVAR;
1703:
1704: case '&': /* $&: last match */
1705: case '`': /* $`: string before last match */
1706: case '\'': /* $': string after last match */
1707: case '+': /* $+: string matches last paren. */
1708: // Explicit reference to these vars as symbols...
1709: if (last_state == LexState.EXPR_FNAME) {
1710: tokenBuffer.append('$');
1711: tokenBuffer.append(c);
1712: yaccValue = new Token(tokenBuffer.toString(),
1713: getPosition());
1714: return Tokens.tGVAR;
1715: }
1716:
1717: yaccValue = new BackRefNode(getPosition(), c);
1718: return Tokens.tBACK_REF;
1719:
1720: case '1':
1721: case '2':
1722: case '3':
1723: case '4':
1724: case '5':
1725: case '6':
1726: case '7':
1727: case '8':
1728: case '9':
1729: tokenBuffer.append('$');
1730: do {
1731: tokenBuffer.append(c);
1732: c = src.read();
1733: } while (Character.isDigit(c));
1734: src.unread(c);
1735: if (last_state == LexState.EXPR_FNAME) {
1736: yaccValue = new Token(tokenBuffer.toString(),
1737: getPosition());
1738: return Tokens.tGVAR;
1739: } else {
1740: yaccValue = new NthRefNode(getPosition(),
1741: Integer.parseInt(tokenBuffer
1742: .substring(1)));
1743: return Tokens.tNTH_REF;
1744: }
1745: default:
1746: if (!isIdentifierChar(c)) {
1747: src.unread(c);
1748: yaccValue = new Token("$", getPosition());
1749: return '$';
1750: }
1751: case '0':
1752: tokenBuffer.append('$');
1753: }
1754: break;
1755:
1756: case '@':
1757: c = src.read();
1758: tokenBuffer.setLength(0);
1759: tokenBuffer.append('@');
1760: if (c == '@') {
1761: tokenBuffer.append('@');
1762: c = src.read();
1763: }
1764: if (Character.isDigit(c)) {
1765: if (tokenBuffer.length() == 1) {
1766: throw new SyntaxException(
1767: getPosition(),
1768: "`@"
1769: + c
1770: + "' is not allowed as an instance variable name");
1771: }
1772: throw new SyntaxException(
1773: getPosition(),
1774: "`@@"
1775: + c
1776: + "' is not allowed as a class variable name");
1777: }
1778: if (!isIdentifierChar(c)) {
1779: src.unread(c);
1780: yaccValue = new Token("@", getPosition());
1781: return '@';
1782: }
1783: break;
1784:
1785: case '_':
1786: if (src.wasBeginOfLine()
1787: && src.matchString("_END__", false)) {
1788: // BEGIN NETBEANS MODIFICATIONS
1789: if (parserSupport != null)
1790: // END NETBEANS MODIFICATIONS
1791: parserSupport.getResult().setEndSeen(true);
1792: return 0;
1793: }
1794: tokenBuffer.setLength(0);
1795: break;
1796:
1797: default:
1798: if (!isIdentifierChar(c)) {
1799: throw new SyntaxException(getPosition(),
1800: "Invalid char `\\"
1801: + Integer.toOctalString(c & 0xff)
1802: + "' in expression");
1803: }
1804:
1805: tokenBuffer.setLength(0);
1806: break;
1807: }
1808:
1809: // BEGIN NETBEANS MODIFICATIONS
1810: // Need to undo newline status after reading too far
1811: boolean wasNewline = src.wasBeginOfLine();
1812: // END NETBEANS MODIFICATIONS
1813: do {
1814: tokenBuffer.append(c);
1815: /* no special multibyte character handling is needed in Java
1816: * if (ismbchar(c)) {
1817: int i, len = mbclen(c)-1;
1818:
1819: for (i = 0; i < len; i++) {
1820: c = src.read();
1821: tokenBuffer.append(c);
1822: }
1823: }*/
1824: // BEGIN NETBEANS MODIFICATIONS
1825: wasNewline = src.wasBeginOfLine();
1826: // END NETBEANS MODIFICATIONS
1827: c = src.read();
1828: } while (isIdentifierChar(c));
1829:
1830: char peek = src.read();
1831: if ((c == '!' || c == '?')
1832: && isIdentifierChar(tokenBuffer.charAt(0))
1833: && peek != '=') {
1834: src.unread(peek);
1835: tokenBuffer.append(c);
1836: } else {
1837: src.unread(peek);
1838: src.unread(c);
1839: }
1840: // BEGIN NETBEANS MODIFICATIONS
1841: src.setIsANewLine(wasNewline);
1842: // END NETBEANS MODIFICATIONS
1843:
1844: int result = 0;
1845:
1846: last_state = lex_state;
1847: switch (tokenBuffer.charAt(0)) {
1848: case '$':
1849: lex_state = LexState.EXPR_END;
1850: result = Tokens.tGVAR;
1851: break;
1852: case '@':
1853: lex_state = LexState.EXPR_END;
1854: if (tokenBuffer.charAt(1) == '@') {
1855: result = Tokens.tCVAR;
1856: } else {
1857: result = Tokens.tIVAR;
1858: }
1859: break;
1860:
1861: default:
1862: char last = tokenBuffer
1863: .charAt(tokenBuffer.length() - 1);
1864: if (last == '!' || last == '?') {
1865: result = Tokens.tFID;
1866: } else {
1867: if (lex_state == LexState.EXPR_FNAME) {
1868: if ((c = src.read()) == '=') {
1869: char c2 = src.read();
1870:
1871: if (c2 != '~'
1872: && c2 != '>'
1873: && (c2 != '=' || (c2 == '\n' && src
1874: .peek('>')))) {
1875: result = Tokens.tIDENTIFIER;
1876: tokenBuffer.append(c);
1877: src.unread(c2);
1878: } else {
1879: src.unread(c2);
1880: src.unread(c);
1881: }
1882: } else {
1883: src.unread(c);
1884: }
1885: }
1886: if (result == 0
1887: && Character.isUpperCase(tokenBuffer
1888: .charAt(0))) {
1889: result = Tokens.tCONSTANT;
1890: } else {
1891: result = Tokens.tIDENTIFIER;
1892: }
1893: }
1894:
1895: if (lex_state != LexState.EXPR_DOT) {
1896: /* See if it is a reserved word. */
1897: Keyword keyword = Keyword.getKeyword(tokenBuffer
1898: .toString(), tokenBuffer.length());
1899: if (keyword != null) {
1900: // enum lex_state
1901: LexState state = lex_state;
1902:
1903: lex_state = keyword.state;
1904: if (state.isExprFName()) {
1905: yaccValue = new Token(keyword.name,
1906: getPosition());
1907: } else {
1908: yaccValue = new Token(tokenBuffer
1909: .toString(), getPosition());
1910: }
1911: if (keyword.id0 == Tokens.kDO) {
1912: if (conditionState.isInState()) {
1913: return Tokens.kDO_COND;
1914: }
1915: if (cmdArgumentState.isInState()
1916: && state != LexState.EXPR_CMDARG) {
1917: return Tokens.kDO_BLOCK;
1918: }
1919: if (state == LexState.EXPR_ENDARG) {
1920: return Tokens.kDO_BLOCK;
1921: }
1922: return Tokens.kDO;
1923: }
1924:
1925: if (state == LexState.EXPR_BEG) {
1926: return keyword.id0;
1927: }
1928: if (keyword.id0 != keyword.id1) {
1929: lex_state = LexState.EXPR_BEG;
1930: }
1931: return keyword.id1;
1932: }
1933: }
1934:
1935: if (lex_state == LexState.EXPR_BEG
1936: || lex_state == LexState.EXPR_MID
1937: || lex_state == LexState.EXPR_DOT
1938: || lex_state == LexState.EXPR_ARG
1939: || lex_state == LexState.EXPR_CMDARG) {
1940: if (commandState) {
1941: lex_state = LexState.EXPR_CMDARG;
1942: } else {
1943: lex_state = LexState.EXPR_ARG;
1944: }
1945: } else {
1946: lex_state = LexState.EXPR_END;
1947: }
1948: }
1949:
1950: String tempVal = tokenBuffer.toString();
1951:
1952: // Lame: parsing logic made it into lexer in ruby...So we
1953: // are emulating
1954: // FIXME: I believe this is much simpler now...
1955: // BEGIN NETBEANS MODIFICATIONS
1956: if (parserSupport != null) {
1957: // END NETBEANS MODIFICATIONS
1958: StaticScope scope = parserSupport.getCurrentScope();
1959: if (IdUtil.getVarType(tempVal) == IdUtil.LOCAL_VAR
1960: && last_state != LexState.EXPR_DOT
1961: && (scope instanceof BlockStaticScope && (scope
1962: .isDefined(tempVal) >= 0))
1963: || (scope.getLocalScope().isDefined(tempVal) >= 0)) {
1964: lex_state = LexState.EXPR_END;
1965: }
1966: // BEGIN NETBEANS MODIFICATIONS
1967: }
1968: // END NETBEANS MODIFICATIONS
1969: yaccValue = new Token(tempVal, getPosition());
1970:
1971: return result;
1972: }
1973: }
1974:
1975: /**
1976: * Parse a number from the input stream.
1977: *
1978: *@param c The first character of the number.
1979: *@return A int constant wich represents a token.
1980: */
1981: private int parseNumber(char c) throws IOException {
1982: lex_state = LexState.EXPR_END;
1983:
1984: tokenBuffer.setLength(0);
1985:
1986: if (c == '-') {
1987: tokenBuffer.append(c);
1988: c = src.read();
1989: } else if (c == '+') {
1990: // We don't append '+' since Java number parser gets confused
1991: c = src.read();
1992: }
1993:
1994: char nondigit = '\0';
1995:
1996: if (c == '0') {
1997: int startLen = tokenBuffer.length();
1998:
1999: switch (c = src.read()) {
2000: case 'x':
2001: case 'X': // hexadecimal
2002: c = src.read();
2003: if (isHexChar(c)) {
2004: for (;; c = src.read()) {
2005: if (c == '_') {
2006: if (nondigit != '\0') {
2007: break;
2008: }
2009: nondigit = c;
2010: } else if (isHexChar(c)) {
2011: nondigit = '\0';
2012: tokenBuffer.append(c);
2013: } else {
2014: break;
2015: }
2016: }
2017: }
2018: src.unread(c);
2019:
2020: if (tokenBuffer.length() == startLen) {
2021: throw new SyntaxException(getPosition(),
2022: "Hexadecimal number without hex-digits.");
2023: } else if (nondigit != '\0') {
2024: throw new SyntaxException(getPosition(),
2025: "Trailing '_' in number.");
2026: }
2027: yaccValue = getInteger(tokenBuffer.toString(), 16);
2028: return Tokens.tINTEGER;
2029: case 'b':
2030: case 'B': // binary
2031: c = src.read();
2032: if (c == '0' || c == '1') {
2033: for (;; c = src.read()) {
2034: if (c == '_') {
2035: if (nondigit != '\0') {
2036: break;
2037: }
2038: nondigit = c;
2039: } else if (c == '0' || c == '1') {
2040: nondigit = '\0';
2041: tokenBuffer.append(c);
2042: } else {
2043: break;
2044: }
2045: }
2046: }
2047: src.unread(c);
2048:
2049: if (tokenBuffer.length() == startLen) {
2050: throw new SyntaxException(getPosition(),
2051: "Binary number without digits.");
2052: } else if (nondigit != '\0') {
2053: throw new SyntaxException(getPosition(),
2054: "Trailing '_' in number.");
2055: }
2056: yaccValue = getInteger(tokenBuffer.toString(), 2);
2057: return Tokens.tINTEGER;
2058: case 'd':
2059: case 'D': // decimal
2060: c = src.read();
2061: if (Character.isDigit(c)) {
2062: for (;; c = src.read()) {
2063: if (c == '_') {
2064: if (nondigit != '\0') {
2065: break;
2066: }
2067: nondigit = c;
2068: } else if (Character.isDigit(c)) {
2069: nondigit = '\0';
2070: tokenBuffer.append(c);
2071: } else {
2072: break;
2073: }
2074: }
2075: }
2076: src.unread(c);
2077:
2078: if (tokenBuffer.length() == startLen) {
2079: throw new SyntaxException(getPosition(),
2080: "Binary number without digits.");
2081: } else if (nondigit != '\0') {
2082: throw new SyntaxException(getPosition(),
2083: "Trailing '_' in number.");
2084: }
2085: yaccValue = getInteger(tokenBuffer.toString(), 2);
2086: return Tokens.tINTEGER;
2087: case 'o':
2088: c = src.read();
2089: case '0':
2090: case '1':
2091: case '2':
2092: case '3':
2093: case '4': //Octal
2094: case '5':
2095: case '6':
2096: case '7':
2097: case '_':
2098: for (;; c = src.read()) {
2099: if (c == '_') {
2100: if (nondigit != '\0') {
2101: break;
2102: }
2103: nondigit = c;
2104: } else if (c >= '0' && c <= '7') {
2105: nondigit = '\0';
2106: tokenBuffer.append(c);
2107: } else {
2108: break;
2109: }
2110: }
2111: if (tokenBuffer.length() > startLen) {
2112: src.unread(c);
2113:
2114: if (nondigit != '\0') {
2115: throw new SyntaxException(getPosition(),
2116: "Trailing '_' in number.");
2117: }
2118:
2119: yaccValue = getInteger(tokenBuffer.toString(), 8);
2120: return Tokens.tINTEGER;
2121: }
2122: case '8':
2123: case '9':
2124: throw new SyntaxException(getPosition(),
2125: "Illegal octal digit.");
2126: case '.':
2127: case 'e':
2128: case 'E':
2129: tokenBuffer.append('0');
2130: break;
2131: default:
2132: src.unread(c);
2133: yaccValue = new FixnumNode(getPosition(), 0);
2134: return Tokens.tINTEGER;
2135: }
2136: }
2137:
2138: boolean seen_point = false;
2139: boolean seen_e = false;
2140:
2141: for (;; c = src.read()) {
2142: switch (c) {
2143: case '0':
2144: case '1':
2145: case '2':
2146: case '3':
2147: case '4':
2148: case '5':
2149: case '6':
2150: case '7':
2151: case '8':
2152: case '9':
2153: nondigit = '\0';
2154: tokenBuffer.append(c);
2155: break;
2156: case '.':
2157: if (nondigit != '\0') {
2158: src.unread(c);
2159: throw new SyntaxException(getPosition(),
2160: "Trailing '_' in number.");
2161: } else if (seen_point || seen_e) {
2162: src.unread(c);
2163: return getNumberToken(tokenBuffer.toString(), true,
2164: nondigit);
2165: } else {
2166: char c2;
2167: if (!Character.isDigit(c2 = src.read())) {
2168: src.unread(c2);
2169: src.unread('.');
2170: if (c == '_') {
2171: // Enebo: c can never be antrhign but '.'
2172: // Why did I put this here?
2173: } else {
2174: yaccValue = getInteger(tokenBuffer
2175: .toString(), 10);
2176: return Tokens.tINTEGER;
2177: }
2178: } else {
2179: tokenBuffer.append('.');
2180: tokenBuffer.append(c2);
2181: seen_point = true;
2182: nondigit = '\0';
2183: }
2184: }
2185: break;
2186: case 'e':
2187: case 'E':
2188: if (nondigit != '\0') {
2189: throw new SyntaxException(getPosition(),
2190: "Trailing '_' in number.");
2191: } else if (seen_e) {
2192: src.unread(c);
2193: return getNumberToken(tokenBuffer.toString(), true,
2194: nondigit);
2195: } else {
2196: tokenBuffer.append(c);
2197: seen_e = true;
2198: nondigit = c;
2199: c = src.read();
2200: if (c == '-' || c == '+') {
2201: tokenBuffer.append(c);
2202: nondigit = c;
2203: } else {
2204: src.unread(c);
2205: }
2206: }
2207: break;
2208: case '_': // '_' in number just ignored
2209: if (nondigit != '\0') {
2210: throw new SyntaxException(getPosition(),
2211: "Trailing '_' in number.");
2212: }
2213: nondigit = c;
2214: break;
2215: default:
2216: src.unread(c);
2217: return getNumberToken(tokenBuffer.toString(), seen_e
2218: || seen_point, nondigit);
2219: }
2220: }
2221: }
2222:
2223: private int getNumberToken(String number, boolean isFloat,
2224: char nondigit) {
2225: if (nondigit != '\0') {
2226: throw new SyntaxException(getPosition(),
2227: "Trailing '_' in number.");
2228: }
2229: if (isFloat) {
2230: double d;
2231: try {
2232: d = Double.parseDouble(number);
2233: } catch (NumberFormatException e) {
2234: warnings.warn(getPosition(), "Float " + number
2235: + " out of range.");
2236:
2237: d = number.startsWith("-") ? Double.NEGATIVE_INFINITY
2238: : Double.POSITIVE_INFINITY;
2239: }
2240: yaccValue = new FloatNode(getPosition(), d);
2241: return Tokens.tFLOAT;
2242: }
2243: yaccValue = getInteger(number, 10);
2244: return Tokens.tINTEGER;
2245: }
2246:
2247: // BEGIN NETBEANS MODIFICATIONS
2248: /**
2249: * Set whether or not the lexer should be "space preserving" - in other words, whether
2250: * the parser should consider whitespace sequences and code comments to be separate
2251: * tokens to return to the client. Parsers typically do not want to see any
2252: * whitespace or comment tokens - but an IDE trying to tokenize a chunk of source code
2253: * does want to identify these separately. The default, false, means the parser mode.
2254: *
2255: * @param preserveSpaces If true, return space and comment sequences as tokens, if false, skip these
2256: * @see #getPreserveSpaces
2257: */
2258: public void setPreserveSpaces(final boolean preserveSpaces) {
2259: this .preserveSpaces = preserveSpaces;
2260: }
2261:
2262: /**
2263: * Return whether or not the lexer should be "space preserving". For a description
2264: * of what this means, see {@link #setPreserveSpaces}.
2265: *
2266: * @return preserveSpaces True iff space and comment sequences will be returned as
2267: * tokens, and false otherwise.
2268: *
2269: * @see #setPreserveSpaces
2270: */
2271: public boolean getPreserveSpaces() {
2272: return preserveSpaces;
2273: }
2274:
2275: public LexState getLexState() {
2276: return lex_state;
2277: }
2278:
2279: public void setLexState(final LexState lex_state) {
2280: this .lex_state = lex_state;
2281: }
2282:
2283: public boolean isSetSpaceSeen() {
2284: return setSpaceSeen;
2285: }
2286:
2287: public void setSpaceSeen(boolean setSpaceSeen) {
2288: this .setSpaceSeen = setSpaceSeen;
2289: }
2290:
2291: public boolean isCommandStart() {
2292: return commandStart;
2293: }
2294:
2295: public void setCommandStart(boolean commandStart) {
2296: this .commandStart = commandStart;
2297: }
2298:
2299: public LexerSource getSource() {
2300: return this .src;
2301: }
2302:
2303: /* In normal JRuby, there is a "spaceSeen" flag which is local to yylex. It is
2304: * used to interpret input based on whether a space was recently seen.
2305: * Since I now bail -out- of yylex() when I see space, I need to be able
2306: * to preserve this flag across yylex() calls. In most cases, "spaceSeen"
2307: * should be set to false (as it previous was at the beginning of yylex().
2308: * However, when I've seen a space and have bailed out, I need to set spaceSeen=true
2309: * on the next call to yylex(). This is what the following flag is all about.
2310: * It is set to true when we bail out on space (or other states that didn't
2311: * previous bail out and spaceSeen is true).
2312: */
2313: private boolean setSpaceSeen;
2314:
2315: // END NETBEANS MODIFICATIONS
2316: }
|