0001: /*
0002: * tclParseExpr.c -> ParseExpr.java
0003: *
0004: * This file contains procedures that parse Tcl expressions. They
0005: * do so in a general-purpose fashion that can be used for many
0006: * different purposes, including compilation, direct execution,
0007: * code analysis, etc.
0008: *
0009: * Copyright (c) 1997 Sun Microsystems, Inc.
0010: * Copyright (c) 1998-2000 by Scriptics Corporation.
0011: * Copyright (c) 2005 One Moon Scientific, Inc.
0012: *
0013: * See the file "license.terms" for information on usage and redistribution
0014: * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
0015: *
0016: * RCS: @(#) $Id: ParseExpr.java,v 1.6 2006/05/22 21:23:35 mdejong Exp $
0017: */
0018:
0019: package tcl.lang;
0020:
0021: class ParseExpr {
0022:
0023: // Definitions of the different lexemes that appear in expressions. The
0024: // order of these must match the corresponding entries in the
0025: // operatorStrings array below.
0026:
0027: static final int LITERAL = 0;
0028: static final int FUNC_NAME = 1;
0029: static final int OPEN_BRACKET = 2;
0030: static final int OPEN_BRACE = 3;
0031: static final int OPEN_PAREN = 4;
0032: static final int CLOSE_PAREN = 5;
0033: static final int DOLLAR = 6;
0034: static final int QUOTE = 7;
0035: static final int COMMA = 8;
0036: static final int END = 9;
0037: static final int UNKNOWN = 10;
0038: static final int UNKNOWN_CHAR = 11;
0039:
0040: // Binary operators:
0041:
0042: static final int MULT = 12;
0043: static final int DIVIDE = 13;
0044: static final int MOD = 14;
0045: static final int PLUS = 15;
0046: static final int MINUS = 16;
0047: static final int LEFT_SHIFT = 17;
0048: static final int RIGHT_SHIFT = 18;
0049: static final int LESS = 19;
0050: static final int GREATER = 20;
0051: static final int LEQ = 21;
0052: static final int GEQ = 22;
0053: static final int EQUAL = 23;
0054: static final int NEQ = 24;
0055: static final int BIT_AND = 25;
0056: static final int BIT_XOR = 26;
0057: static final int BIT_OR = 27;
0058: static final int AND = 28;
0059: static final int OR = 29;
0060: static final int QUESTY = 30;
0061: static final int COLON = 31;
0062:
0063: // Unary operators. Unary minus and plus are represented by the (binary)
0064: // lexemes MINUS and PLUS.
0065:
0066: static final int NOT = 32;
0067: static final int BIT_NOT = 33;
0068:
0069: static final int STREQ = 34;
0070: static final int STRNEQ = 35;
0071:
0072: // Mapping from lexemes to strings; used for debugging messages. These
0073: // entries must match the order and number of the lexeme definitions above.
0074:
0075: static String lexemeStrings[] = { "LITERAL", "FUNCNAME", "[", "{",
0076: "(", ")", "$", "\"", ",", "END", "UNKNOWN", "*", "/", "%",
0077: "+", "-", "<<", ">>", "<", ">", "<=", ">=", "==", "!=",
0078: "&", "^", "|", "&&", "||", "?", ":", "!", "~", "eq", "ne", };
0079:
0080: // The ParseInfo structure holds state while parsing an expression.
0081: // A pointer to an ParseInfo record is passed among the routines in
0082: // this module.
0083:
0084: static class ParseInfo {
0085: TclParse parseObj; // Object to fill in with
0086: // information about the expression.
0087: int lexeme; // Type of last lexeme scanned in expr.
0088: // See below for definitions. Corresponds to
0089: // size characters beginning at start.
0090: int start; // First character in lexeme.
0091: int size; // Number of chars in lexeme.
0092: int next; // Position of the next character to be
0093: // scanned in the expression string.
0094: int prevEnd; // Position of the character just after the
0095: // last one in the previous lexeme. Used to
0096: // compute size of subexpression tokens.
0097: char[] originalExpr; // When combined with originalExprStart, these
0098: // values provide the orignial script info
0099: // passed to Tcl_ParseExpr.
0100: int originalExprStart; // Index of original start_index in the array,
0101: int originalExprSize; // Number of chars in original expr
0102: int lastChar; // Index of last character of expr.
0103:
0104: ParseInfo() {
0105: }
0106:
0107: ParseInfo(TclParse parseObj, char[] script_array,
0108: int script_index, int length) {
0109: this .parseObj = parseObj;
0110: lexeme = UNKNOWN;
0111: originalExpr = script_array;
0112: originalExprStart = script_index;
0113: start = -1;
0114: originalExprSize = length;
0115: size = length;
0116: next = script_index;
0117: prevEnd = script_index;
0118: lastChar = script_index + length;
0119: }
0120:
0121: // Return the original expression as a string. The start and size fields
0122: // of a ParseInfo struct can be changed while parsing, so use special
0123: // fields to get the original expression.
0124:
0125: String getOriginalExpr() {
0126: return new String(originalExpr, originalExprStart,
0127: originalExprSize);
0128: }
0129:
0130: // Return a copy of this ParseInfo object.
0131:
0132: ParseInfo duplicate() {
0133: ParseInfo dup = new ParseInfo();
0134: dup.parseObj = this .parseObj;
0135: dup.lexeme = this .lexeme;
0136: dup.start = this .start;
0137: dup.size = this .size;
0138: dup.next = this .next;
0139: dup.prevEnd = this .prevEnd;
0140: dup.originalExpr = this .originalExpr;
0141: dup.originalExprStart = this .originalExprStart;
0142: dup.originalExprSize = this .originalExprSize;
0143: dup.lastChar = this .lastChar;
0144: return dup;
0145: }
0146: }
0147:
0148: /*
0149: *----------------------------------------------------------------------
0150: *
0151: * Tcl_ParseExpr -> parseExpr
0152: *
0153: * Given a string, this procedure parses the first Tcl expression
0154: * in the string and returns information about the structure of
0155: * the expression. This procedure is the top-level interface to the
0156: * the expression parsing module.
0157: *
0158: * Results:
0159: * The return value is Tcl.OK if the command was parsed successfully
0160: * and TCL_ERROR otherwise. If an error occurs and interp isn't NULL
0161: * then an error message is left in its result. On a successful return,
0162: * parseObj is filled in with information about the expression that
0163: * was parsed.
0164: *
0165: * Side effects:
0166: * If there is insufficient space in parseObj to hold all the
0167: * information about the expression, then additional space is
0168: * malloc-ed. If the procedure returns Tcl.OK then the caller must
0169: * eventually invoke Tcl_FreeParse to release any additional space
0170: * that was allocated.
0171: *
0172: *----------------------------------------------------------------------
0173: */
0174:
0175: static TclParse parseExpr(Interp interp, // Used for error reporting.
0176: char[] script_array, // References the script and contains an
0177: int script_index, // index to the next character to parse.
0178: int numChars) // Number of characters in script. If < 0, the
0179: // script consists of all characters up to the
0180: // first null character.
0181: {
0182: int code;
0183: char savedChar;
0184: ParseInfo info;
0185: String fileName = "unknown";
0186: int lineNum = 0;
0187:
0188: int script_length = script_array.length - 1;
0189:
0190: if (numChars < 0) {
0191: numChars = script_length - script_index;
0192: }
0193: int endIndex = script_index + numChars;
0194: if (endIndex > script_length) {
0195: endIndex = script_length;
0196: }
0197:
0198: TclParse parse = new TclParse(interp, script_array, endIndex,
0199: fileName, lineNum);
0200:
0201: // Initialize the ParseInfo structure that holds state while parsing
0202: // the expression.
0203:
0204: info = new ParseInfo(parse, script_array, script_index,
0205: numChars);
0206:
0207: try {
0208: // Get the first lexeme then parse the expression.
0209:
0210: GetLexeme(interp, info);
0211:
0212: //System.out.println("after lex "+new String(info.originalExpr)+" "+lexemeStrings[info.lexeme]);
0213: ParseCondExpr(interp, info);
0214:
0215: if (info.lexeme != END) {
0216: LogSyntaxError(info,
0217: "extra tokens at end of expression");
0218: }
0219: } catch (TclException te) {
0220: parse.result = TCL.ERROR;
0221: return parse;
0222: }
0223:
0224: if (parse.result != TCL.OK) {
0225: throw new TclRuntimeError(
0226: "non TCL.OK parse result in parseExpr(): "
0227: + " TclException should have been raised");
0228: }
0229:
0230: parse.result = TCL.OK;
0231: return parse;
0232: }
0233:
0234: /*
0235: *----------------------------------------------------------------------
0236: *
0237: * ParseCondExpr --
0238: *
0239: * This procedure parses a Tcl conditional expression:
0240: * condExpr ::= lorExpr ['?' condExpr ':' condExpr]
0241: *
0242: * Note that this is the topmost recursive-descent parsing routine used
0243: * by TclParseExpr to parse expressions. This avoids an extra procedure
0244: * call since such a procedure would only return the result of calling
0245: * ParseCondExpr. Other recursive-descent procedures that need to parse
0246: * complete expressions also call ParseCondExpr.
0247: *
0248: * Results:
0249: * The return value is Tcl.OK on a successful parse and TCL_ERROR
0250: * on failure. If TCL_ERROR is returned, then the interpreter's result
0251: * contains an error message.
0252: *
0253: * Side effects:
0254: * If there is insufficient space in parseObj to hold all the
0255: * information about the subexpression, then additional space is
0256: * malloc-ed.
0257: *
0258: *----------------------------------------------------------------------
0259: */
0260:
0261: static void ParseCondExpr(Interp interp, ParseInfo info)
0262: throws TclException
0263: // info Holds the parse state for the expression being parsed.
0264: {
0265: TclParse parseObj = info.parseObj;
0266: TclToken token, firstToken, condToken;
0267: int firstIndex, numToMove, code;
0268: int srcStart;
0269:
0270: //HERE("condExpr", 1);
0271: srcStart = info.start;
0272: firstIndex = parseObj.numTokens;
0273:
0274: ParseLorExpr(interp, info);
0275:
0276: if (info.lexeme == QUESTY) {
0277: // Emit two tokens: one TCL_TOKEN_SUB_EXPR token for the entire
0278: // conditional expression, and a TCL_TOKEN_OPERATOR token for
0279: // the "?" operator. Note that these two tokens must be inserted
0280: // before the LOR operand tokens generated above.
0281:
0282: parseObj.insertInTokenArray(firstIndex, 2);
0283: parseObj.numTokens += 2;
0284:
0285: token = parseObj.getToken(firstIndex);
0286: token.type = Parser.TCL_TOKEN_SUB_EXPR;
0287: token.script_array = info.originalExpr;
0288: token.script_index = srcStart;
0289: token.size = 0;
0290:
0291: token = parseObj.getToken(firstIndex + 1);
0292: token.type = Parser.TCL_TOKEN_OPERATOR;
0293: token.script_array = info.originalExpr;
0294: token.script_index = info.start;
0295: token.size = 1;
0296: token.numComponents = 0;
0297:
0298: // Skip over the '?'.
0299:
0300: GetLexeme(interp, info);
0301:
0302: // Parse the "then" expression.
0303:
0304: ParseCondExpr(interp, info);
0305: if (info.lexeme != COLON) {
0306: LogSyntaxError(info,
0307: "missing colon from ternary conditional");
0308: }
0309: GetLexeme(interp, info); // skip over the ':'
0310:
0311: // Parse the "else" expression.
0312:
0313: ParseCondExpr(interp, info);
0314:
0315: // Now set the size-related fields in the '?' subexpression token.
0316:
0317: condToken = parseObj.getToken(firstIndex);
0318: condToken.script_array = info.originalExpr;
0319: condToken.size = (info.prevEnd - srcStart);
0320: condToken.numComponents = parseObj.numTokens
0321: - (firstIndex + 1);
0322: }
0323: }
0324:
0325: /*
0326: *----------------------------------------------------------------------
0327: *
0328: * ParseLorExpr --
0329: *
0330: * This procedure parses a Tcl logical or expression:
0331: * lorExpr ::= landExpr {'||' landExpr}
0332: *
0333: * Results:
0334: * The return value is Tcl.OK on a successful parse and TCL_ERROR
0335: * on failure. If TCL_ERROR is returned, then the interpreter's result
0336: * contains an error message.
0337: *
0338: * Side effects:
0339: * If there is insufficient space in parseObj to hold all the
0340: * information about the subexpression, then additional space is
0341: * malloc-ed.
0342: *
0343: *----------------------------------------------------------------------
0344: */
0345:
0346: static void ParseLorExpr(Interp interp, ParseInfo info)
0347: throws TclException {
0348: TclParse parseObj = info.parseObj;
0349: int firstIndex, code;
0350: int srcStart;
0351: int operator;
0352:
0353: //HERE("lorExpr", 2);
0354: srcStart = info.start;
0355: firstIndex = parseObj.numTokens;
0356:
0357: ParseLandExpr(interp, info);
0358:
0359: while (info.lexeme == OR) {
0360: operator = info.start;
0361: GetLexeme(interp, info); // skip over the '||'
0362: ParseLandExpr(interp, info);
0363:
0364: // Generate tokens for the LOR subexpression and the '||' operator.
0365:
0366: PrependSubExprTokens(operator, 2, srcStart,
0367: (info.prevEnd - srcStart), firstIndex, info);
0368: }
0369: }
0370:
0371: /*
0372: *----------------------------------------------------------------------
0373: *
0374: * ParseLandExpr --
0375: *
0376: * This procedure parses a Tcl logical and expression:
0377: * landExpr ::= bitOrExpr {'&&' bitOrExpr}
0378: *
0379: * Results:
0380: * The return value is Tcl.OK on a successful parse and TCL_ERROR
0381: * on failure. If TCL_ERROR is returned, then the interpreter's result
0382: * contains an error message.
0383: *
0384: * Side effects:
0385: * If there is insufficient space in parseObj to hold all the
0386: * information about the subexpression, then additional space is
0387: * malloc-ed.
0388: *
0389: *----------------------------------------------------------------------
0390: */
0391:
0392: static void ParseLandExpr(Interp interp, ParseInfo info)
0393: throws TclException {
0394: TclParse parseObj = info.parseObj;
0395: int firstIndex, code;
0396: int srcStart, operator;
0397:
0398: //HERE("landExpr", 3);
0399: srcStart = info.start;
0400: firstIndex = parseObj.numTokens;
0401:
0402: ParseBitOrExpr(interp, info);
0403:
0404: while (info.lexeme == AND) {
0405: operator = info.start;
0406: GetLexeme(interp, info); // skip over the '&&'
0407: ParseBitOrExpr(interp, info);
0408:
0409: // Generate tokens for the LAND subexpression and the '&&' operator.
0410:
0411: PrependSubExprTokens(operator, 2, srcStart,
0412: (info.prevEnd - srcStart), firstIndex, info);
0413: }
0414: }
0415:
0416: /*
0417: *----------------------------------------------------------------------
0418: *
0419: * ParseBitOrExpr --
0420: *
0421: * This procedure parses a Tcl bitwise or expression:
0422: * bitOrExpr ::= bitXorExpr {'|' bitXorExpr}
0423: *
0424: * Results:
0425: * The return value is Tcl.OK on a successful parse and TCL_ERROR
0426: * on failure. If TCL_ERROR is returned, then the interpreter's result
0427: * contains an error message.
0428: *
0429: * Side effects:
0430: * If there is insufficient space in parseObj to hold all the
0431: * information about the subexpression, then additional space is
0432: * malloc-ed.
0433: *
0434: *----------------------------------------------------------------------
0435: */
0436:
0437: static void ParseBitOrExpr(Interp interp, ParseInfo info)
0438: throws TclException {
0439: TclParse parseObj = info.parseObj;
0440: int firstIndex, code;
0441: int srcStart, operator;
0442:
0443: //HERE("bitOrExpr", 4);
0444: srcStart = info.start;
0445: firstIndex = parseObj.numTokens;
0446:
0447: ParseBitXorExpr(interp, info);
0448:
0449: while (info.lexeme == BIT_OR) {
0450: operator = info.start;
0451: GetLexeme(interp, info); // skip over the '|'
0452:
0453: ParseBitXorExpr(interp, info);
0454:
0455: // Generate tokens for the BITOR subexpression and the '|' operator.
0456:
0457: PrependSubExprTokens(operator, 1, srcStart,
0458: (info.prevEnd - srcStart), firstIndex, info);
0459: }
0460: }
0461:
0462: /*
0463: *----------------------------------------------------------------------
0464: *
0465: * ParseBitXorExpr --
0466: *
0467: * This procedure parses a Tcl bitwise exclusive or expression:
0468: * bitXorExpr ::= bitAndExpr {'^' bitAndExpr}
0469: *
0470: * Results:
0471: * The return value is Tcl.OK on a successful parse and TCL_ERROR
0472: * on failure. If TCL_ERROR is returned, then the interpreter's result
0473: * contains an error message.
0474: *
0475: * Side effects:
0476: * If there is insufficient space in parseObj to hold all the
0477: * information about the subexpression, then additional space is
0478: * malloc-ed.
0479: *
0480: *----------------------------------------------------------------------
0481: */
0482:
0483: static void ParseBitXorExpr(Interp interp, ParseInfo info)
0484: throws TclException {
0485: TclParse parseObj = info.parseObj;
0486: int firstIndex, code;
0487: int srcStart, operator;
0488:
0489: //HERE("bitXorExpr", 5);
0490: srcStart = info.start;
0491: firstIndex = parseObj.numTokens;
0492:
0493: ParseBitAndExpr(interp, info);
0494:
0495: while (info.lexeme == BIT_XOR) {
0496: operator = info.start;
0497: GetLexeme(interp, info); // skip over the '^'
0498:
0499: ParseBitAndExpr(interp, info);
0500:
0501: // Generate tokens for the XOR subexpression and the '^' operator.
0502:
0503: PrependSubExprTokens(operator, 1, srcStart,
0504: (info.prevEnd - srcStart), firstIndex, info);
0505: }
0506: }
0507:
0508: /*
0509: *----------------------------------------------------------------------
0510: *
0511: * ParseBitAndExpr --
0512: *
0513: * This procedure parses a Tcl bitwise and expression:
0514: * bitAndExpr ::= equalityExpr {'&' equalityExpr}
0515: *
0516: * Results:
0517: * The return value is Tcl.OK on a successful parse and TCL_ERROR
0518: * on failure. If TCL_ERROR is returned, then the interpreter's result
0519: * contains an error message.
0520: *
0521: * Side effects:
0522: * If there is insufficient space in parseObj to hold all the
0523: * information about the subexpression, then additional space is
0524: * malloc-ed.
0525: *
0526: *----------------------------------------------------------------------
0527: */
0528:
0529: static void ParseBitAndExpr(Interp interp, ParseInfo info)
0530: throws TclException {
0531: TclParse parseObj = info.parseObj;
0532: int firstIndex, code;
0533: int srcStart, operator;
0534:
0535: //HERE("bitAndExpr", 6);
0536: srcStart = info.start;
0537: firstIndex = parseObj.numTokens;
0538:
0539: ParseEqualityExpr(interp, info);
0540:
0541: while (info.lexeme == BIT_AND) {
0542: operator = info.start;
0543: GetLexeme(interp, info); // skip over the '&'
0544: ParseEqualityExpr(interp, info);
0545:
0546: // Generate tokens for the BITAND subexpression and '&' operator.
0547:
0548: PrependSubExprTokens(operator, 1, srcStart,
0549: (info.prevEnd - srcStart), firstIndex, info);
0550: }
0551: }
0552:
0553: /*
0554: *----------------------------------------------------------------------
0555: *
0556: * ParseEqualityExpr --
0557: *
0558: * This procedure parses a Tcl equality (inequality) expression:
0559: * equalityExpr ::= relationalExpr
0560: * {('==' | '!=' | 'ne' | 'eq') relationalExpr}
0561: *
0562: * Results:
0563: * The return value is Tcl.OK on a successful parse and TCL_ERROR
0564: * on failure. If TCL_ERROR is returned, then the interpreter's result
0565: * contains an error message.
0566: *
0567: * Side effects:
0568: * If there is insufficient space in parseObj to hold all the
0569: * information about the subexpression, then additional space is
0570: * malloc-ed.
0571: *
0572: *----------------------------------------------------------------------
0573: */
0574:
0575: static void ParseEqualityExpr(Interp interp, ParseInfo info)
0576: throws TclException {
0577: TclParse parseObj = info.parseObj;
0578: int firstIndex, lexeme, code;
0579: int srcStart, operator;
0580:
0581: //HERE("equalityExpr", 7);
0582: srcStart = info.start;
0583: firstIndex = parseObj.numTokens;
0584:
0585: ParseRelationalExpr(interp, info);
0586:
0587: lexeme = info.lexeme;
0588: while ((lexeme == EQUAL) || (lexeme == NEQ)
0589: || (lexeme == STREQ) || (lexeme == STRNEQ)) {
0590: operator = info.start;
0591: GetLexeme(interp, info); // skip over ==, !=, 'eq' or 'ne'
0592: ParseRelationalExpr(interp, info);
0593:
0594: // Generate tokens for the subexpression and '==', '!=', 'eq' or 'ne'
0595: // operator.
0596:
0597: PrependSubExprTokens(operator, 2, srcStart,
0598: (info.prevEnd - srcStart), firstIndex, info);
0599: lexeme = info.lexeme;
0600: }
0601: }
0602:
0603: /*
0604: *----------------------------------------------------------------------
0605: *
0606: * ParseRelationalExpr --
0607: *
0608: * This procedure parses a Tcl relational expression:
0609: * relationalExpr ::= shiftExpr {('<' | '>' | '<=' | '>=') shiftExpr}
0610: *
0611: * Results:
0612: * The return value is Tcl.OK on a successful parse and TCL_ERROR
0613: * on failure. If TCL_ERROR is returned, then the interpreter's result
0614: * contains an error message.
0615: *
0616: * Side effects:
0617: * If there is insufficient space in parseObj to hold all the
0618: * information about the subexpression, then additional space is
0619: * malloc-ed.
0620: *
0621: *----------------------------------------------------------------------
0622: */
0623:
0624: static void ParseRelationalExpr(Interp interp, ParseInfo info)
0625: throws TclException {
0626: TclParse parseObj = info.parseObj;
0627: int firstIndex, lexeme, operatorSize, code;
0628: int srcStart, operator;
0629:
0630: //HERE("relationalExpr", 8);
0631: srcStart = info.start;
0632: firstIndex = parseObj.numTokens;
0633:
0634: ParseShiftExpr(interp, info);
0635:
0636: lexeme = info.lexeme;
0637: while ((lexeme == LESS) || (lexeme == GREATER)
0638: || (lexeme == LEQ) || (lexeme == GEQ)) {
0639: operator = info.start;
0640: if ((lexeme == LEQ) || (lexeme == GEQ)) {
0641: operatorSize = 2;
0642: } else {
0643: operatorSize = 1;
0644: }
0645: GetLexeme(interp, info); // skip over the operator
0646: ParseShiftExpr(interp, info);
0647:
0648: // Generate tokens for the subexpression and the operator.
0649:
0650: PrependSubExprTokens(operator, operatorSize, srcStart,
0651: (info.prevEnd - srcStart), firstIndex, info);
0652: lexeme = info.lexeme;
0653: }
0654: }
0655:
0656: /*
0657: *----------------------------------------------------------------------
0658: *
0659: * ParseShiftExpr --
0660: *
0661: * This procedure parses a Tcl shift expression:
0662: * shiftExpr ::= addExpr {('<<' | '>>') addExpr}
0663: *
0664: * Results:
0665: * The return value is Tcl.OK on a successful parse and TCL_ERROR
0666: * on failure. If TCL_ERROR is returned, then the interpreter's result
0667: * contains an error message.
0668: *
0669: * Side effects:
0670: * If there is insufficient space in parseObj to hold all the
0671: * information about the subexpression, then additional space is
0672: * malloc-ed.
0673: *
0674: *----------------------------------------------------------------------
0675: */
0676:
0677: static void ParseShiftExpr(Interp interp, ParseInfo info)
0678: throws TclException {
0679: TclParse parseObj = info.parseObj;
0680: int firstIndex, lexeme, code;
0681: int srcStart, operator;
0682:
0683: //HERE("shiftExpr", 9);
0684: srcStart = info.start;
0685: firstIndex = parseObj.numTokens;
0686:
0687: ParseAddExpr(interp, info);
0688:
0689: lexeme = info.lexeme;
0690: while ((lexeme == LEFT_SHIFT) || (lexeme == RIGHT_SHIFT)) {
0691: operator = info.start;
0692: GetLexeme(interp, info); // skip over << or >>
0693: ParseAddExpr(interp, info);
0694:
0695: // Generate tokens for the subexpression and '<<' or '>>' operator.
0696:
0697: PrependSubExprTokens(operator, 2, srcStart,
0698: (info.prevEnd - srcStart), firstIndex, info);
0699: lexeme = info.lexeme;
0700: }
0701: }
0702:
0703: /*
0704: *----------------------------------------------------------------------
0705: *
0706: * ParseAddExpr --
0707: *
0708: * This procedure parses a Tcl addition expression:
0709: * addExpr ::= multiplyExpr {('+' | '-') multiplyExpr}
0710: *
0711: * Results:
0712: * The return value is Tcl.OK on a successful parse and TCL_ERROR
0713: * on failure. If TCL_ERROR is returned, then the interpreter's result
0714: * contains an error message.
0715: *
0716: * Side effects:
0717: * If there is insufficient space in parseObj to hold all the
0718: * information about the subexpression, then additional space is
0719: * malloc-ed.
0720: *
0721: *----------------------------------------------------------------------
0722: */
0723:
0724: static void ParseAddExpr(Interp interp, ParseInfo info)
0725: throws TclException {
0726: TclParse parseObj = info.parseObj;
0727: int firstIndex, lexeme, code;
0728: int srcStart, operator;
0729:
0730: //HERE("addExpr", 10);
0731: srcStart = info.start;
0732: firstIndex = parseObj.numTokens;
0733: //System.out.println("parse adda "+info.start+" "+info.size);
0734:
0735: ParseMultiplyExpr(interp, info);
0736:
0737: lexeme = info.lexeme;
0738: //System.out.println("parse add "+info.start);
0739: while ((lexeme == PLUS) || (lexeme == MINUS)) {
0740: //System.out.println("add while");
0741: operator = info.start;
0742: GetLexeme(interp, info); // skip over + or -
0743: //System.out.println("after getlex "+info.start+" "+info.size);
0744: ParseMultiplyExpr(interp, info);
0745: //System.out.println("parse after mult "+info.start);
0746:
0747: // Generate tokens for the subexpression and '+' or '-' operator.
0748:
0749: PrependSubExprTokens(operator, 1, srcStart,
0750: (info.prevEnd - srcStart), firstIndex, info);
0751: lexeme = info.lexeme;
0752: }
0753: }
0754:
0755: /*
0756: *----------------------------------------------------------------------
0757: *
0758: * ParseMultiplyExpr --
0759: *
0760: * This procedure parses a Tcl multiply expression:
0761: * multiplyExpr ::= unaryExpr {('*' | '/' | '%') unaryExpr}
0762: *
0763: * Results:
0764: * The return value is Tcl.OK on a successful parse and TCL_ERROR
0765: * on failure. If TCL_ERROR is returned, then the interpreter's result
0766: * contains an error message.
0767: *
0768: * Side effects:
0769: * If there is insufficient space in parseObj to hold all the
0770: * information about the subexpression, then additional space is
0771: * malloc-ed.
0772: *
0773: *----------------------------------------------------------------------
0774: */
0775:
0776: static void ParseMultiplyExpr(Interp interp, ParseInfo info)
0777: throws TclException {
0778: TclParse parseObj = info.parseObj;
0779: int firstIndex, lexeme, code;
0780: int srcStart, operator;
0781:
0782: //HERE("multiplyExpr", 11);
0783: srcStart = info.start;
0784: firstIndex = parseObj.numTokens;
0785:
0786: ParseUnaryExpr(interp, info);
0787:
0788: lexeme = info.lexeme;
0789: while ((lexeme == MULT) || (lexeme == DIVIDE)
0790: || (lexeme == MOD)) {
0791: operator = info.start;
0792: GetLexeme(interp, info); // skip over * or / or %
0793: ParseUnaryExpr(interp, info);
0794:
0795: // Generate tokens for the subexpression and * or / or % operator.
0796:
0797: PrependSubExprTokens(operator, 1, srcStart,
0798: (info.prevEnd - srcStart), firstIndex, info);
0799: lexeme = info.lexeme;
0800: }
0801: }
0802:
0803: /*
0804: *----------------------------------------------------------------------
0805: *
0806: * ParseUnaryExpr --
0807: *
0808: * This procedure parses a Tcl unary expression:
0809: * unaryExpr ::= ('+' | '-' | '~' | '!') unaryExpr | primaryExpr
0810: *
0811: * Results:
0812: * The return value is Tcl.OK on a successful parse and TCL_ERROR
0813: * on failure. If TCL_ERROR is returned, then the interpreter's result
0814: * contains an error message.
0815: *
0816: * Side effects:
0817: * If there is insufficient space in parseObj to hold all the
0818: * information about the subexpression, then additional space is
0819: * malloc-ed.
0820: *
0821: *----------------------------------------------------------------------
0822: */
0823:
0824: static void ParseUnaryExpr(Interp interp, ParseInfo info)
0825: throws TclException {
0826: TclParse parseObj = info.parseObj;
0827: int firstIndex, lexeme, code;
0828: int srcStart, operator;
0829:
0830: //HERE("unaryExpr", 12);
0831: srcStart = info.start;
0832: firstIndex = parseObj.numTokens;
0833:
0834: lexeme = info.lexeme;
0835: if ((lexeme == PLUS) || (lexeme == MINUS)
0836: || (lexeme == BIT_NOT) || (lexeme == NOT)) {
0837: operator = info.start;
0838: GetLexeme(interp, info); // skip over the unary operator
0839: //System.out.println("after getlex "+info.start+" "+info.size);
0840: ParseUnaryExpr(interp, info);
0841:
0842: // Generate tokens for the subexpression and the operator.
0843:
0844: PrependSubExprTokens(operator, 1, srcStart,
0845: (info.prevEnd - srcStart), firstIndex, info);
0846: } else { // must be a primaryExpr
0847: ParsePrimaryExpr(interp, info);
0848: }
0849: }
0850:
0851: /*
0852: *----------------------------------------------------------------------
0853: *
0854: * ParsePrimaryExpr --
0855: *
0856: * This procedure parses a Tcl primary expression:
0857: * primaryExpr ::= literal | varReference | quotedString |
0858: * '[' command ']' | mathFuncCall | '(' condExpr ')'
0859: *
0860: * Results:
0861: * The return value is Tcl.OK on a successful parse and TCL_ERROR
0862: * on failure. If TCL_ERROR is returned, then the interpreter's result
0863: * contains an error message.
0864: *
0865: * Side effects:
0866: * If there is insufficient space in parseObj to hold all the
0867: * information about the subexpression, then additional space is
0868: * malloc-ed.
0869: *
0870: *----------------------------------------------------------------------
0871: */
0872:
0873: static void ParsePrimaryExpr(Interp interp, ParseInfo info)
0874: throws TclException {
0875: TclParse parseObj = info.parseObj;
0876: TclToken token, exprToken;
0877: TclParse nested;
0878: int dollar, stringStart, term, src;
0879: int lexeme, exprIndex, firstIndex, numToMove, code;
0880: //System.out.println("parse primary "+info.lexeme+" "+info.start+" "+info.size+" "+parseObj.numTokens);
0881: //System.out.println(info.originalExpr);
0882:
0883: // We simply recurse on parenthesized subexpressions.
0884:
0885: //HERE("primaryExpr", 13);
0886: lexeme = info.lexeme;
0887: if (lexeme == OPEN_PAREN) {
0888: GetLexeme(interp, info); // skip over the '('
0889: ParseCondExpr(interp, info);
0890: if (info.lexeme != CLOSE_PAREN) {
0891: LogSyntaxError(info, "looking for close parenthesis");
0892: }
0893: GetLexeme(interp, info); // skip over the ')'
0894: return;
0895: }
0896:
0897: // Start a TCL_TOKEN_SUB_EXPR token for the primary.
0898:
0899: if (parseObj.numTokens == parseObj.tokensAvailable) {
0900: parseObj.expandTokenArray(parseObj.numTokens);
0901: }
0902: exprIndex = parseObj.numTokens;
0903: exprToken = parseObj.getToken(exprIndex);
0904: exprToken.type = Parser.TCL_TOKEN_SUB_EXPR;
0905: exprToken.script_array = info.originalExpr;
0906: exprToken.script_index = info.start;
0907: parseObj.numTokens++;
0908:
0909: // Process the primary then finish setting the fields of the
0910: // TCL_TOKEN_SUB_EXPR token. Note that we can't use the pointer now
0911: // stored in "exprToken" in the code below since the token array
0912: // might be reallocated.
0913:
0914: firstIndex = parseObj.numTokens;
0915: switch (lexeme) {
0916: case LITERAL:
0917: // Int or double number.
0918:
0919: //tokenizeLiteral:
0920: if (parseObj.numTokens == parseObj.tokensAvailable) {
0921: parseObj.expandTokenArray(parseObj.numTokens);
0922: }
0923: //System.out.println("literal " + parseObj.numTokens);
0924: token = parseObj.getToken(parseObj.numTokens);
0925: token.type = Parser.TCL_TOKEN_TEXT;
0926: token.script_array = info.originalExpr;
0927: token.script_index = info.start;
0928: token.size = info.size;
0929: info.next = info.start + info.size;
0930: token.numComponents = 0;
0931: parseObj.numTokens++;
0932:
0933: exprToken.script_array = info.originalExpr;
0934: exprToken.size = info.size;
0935: exprToken.numComponents = 1;
0936: break;
0937:
0938: case DOLLAR:
0939: // $var variable reference.
0940:
0941: dollar = (info.next - 1);
0942: //System.out.println("dollar "+dollar+" "+info.lastChar);
0943: parseObj = Parser.parseVarName(interp, info.originalExpr,
0944: dollar, info.lastChar - dollar, parseObj, true);
0945:
0946: if (parseObj.result != TCL.OK) {
0947: throw new TclException(parseObj.result);
0948: }
0949:
0950: info.next = dollar + parseObj.getToken(firstIndex).size;
0951:
0952: exprToken = parseObj.getToken(exprIndex);
0953: exprToken.size = parseObj.getToken(firstIndex).size;
0954: exprToken.numComponents = parseObj.getToken(firstIndex).numComponents + 1;
0955: exprToken.script_array = info.originalExpr;
0956: break;
0957:
0958: case QUOTE:
0959: // '"' string '"'
0960:
0961: stringStart = info.next;
0962:
0963: // Raises a TclException on error
0964: parseObj = Parser.ParseQuotedString(interp,
0965: info.originalExpr, (info.next - 1),
0966: (info.lastChar - stringStart), parseObj, true);
0967:
0968: term = parseObj.extra;
0969: info.next = term;
0970:
0971: exprToken = parseObj.getToken(exprIndex);
0972: exprToken.size = (term - exprToken.script_index);
0973: exprToken.numComponents = parseObj.numTokens - firstIndex;
0974: exprToken.script_array = info.originalExpr;
0975:
0976: // If parsing the quoted string resulted in more than one token,
0977: // insert a TCL_TOKEN_WORD token before them. This indicates that
0978: // the quoted string represents a concatenation of multiple tokens.
0979:
0980: if (exprToken.numComponents > 1) {
0981: if (parseObj.numTokens >= parseObj.tokensAvailable) {
0982: parseObj.expandTokenArray(parseObj.numTokens + 1);
0983: }
0984: parseObj.insertInTokenArray(firstIndex, 1);
0985: parseObj.numTokens++;
0986: token = parseObj.getToken(firstIndex);
0987:
0988: exprToken = parseObj.getToken(exprIndex);
0989: exprToken.numComponents++;
0990: exprToken.script_array = info.originalExpr;
0991:
0992: token.type = Parser.TCL_TOKEN_WORD;
0993: token.script_array = info.originalExpr;
0994: token.script_index = exprToken.script_index;
0995: token.size = exprToken.size;
0996: token.numComponents = (exprToken.numComponents - 1);
0997: }
0998: break;
0999:
1000: case OPEN_BRACKET:
1001: // '[' command {command} ']'
1002:
1003: if (parseObj.numTokens == parseObj.tokensAvailable) {
1004: parseObj.expandTokenArray(parseObj.numTokens);
1005: }
1006: token = parseObj.getToken(parseObj.numTokens);
1007: token.type = Parser.TCL_TOKEN_COMMAND;
1008: token.script_array = info.originalExpr;
1009: token.script_index = info.start;
1010: token.numComponents = 0;
1011: parseObj.numTokens++;
1012:
1013: // Call Tcl_ParseCommand repeatedly to parse the nested command(s)
1014: // to find their end, then throw away that parse information.
1015:
1016: src = info.next;
1017: while (true) {
1018: nested = Parser.parseCommand(interp, info.originalExpr,
1019: src, parseObj.endIndex - src,
1020: parseObj.fileName, parseObj.lineNum, true);
1021: if (nested.result != TCL.OK) {
1022: parseObj.termIndex = nested.termIndex;
1023: parseObj.errorType = nested.errorType;
1024: parseObj.incomplete = nested.incomplete;
1025: parseObj.result = nested.result;
1026: }
1027: src = (nested.commandStart + nested.commandSize);
1028:
1029: // Check for the closing ']' that ends the command substitution.
1030: // It must have been the last character of the parsed command.
1031:
1032: if ((nested.termIndex < parseObj.endIndex)
1033: && (info.originalExpr[nested.termIndex] == ']')
1034: && !nested.incomplete) {
1035: break;
1036: }
1037: if (src == parseObj.endIndex) {
1038: parseObj.termIndex = token.script_index;
1039: parseObj.incomplete = true;
1040: parseObj.result = TCL.ERROR;
1041: throw new TclException(parseObj.interp,
1042: "missing close-bracket");
1043: }
1044: }
1045: token.size = src - token.script_index;
1046: info.next = src;
1047:
1048: exprToken = parseObj.getToken(exprIndex);
1049: exprToken.size = src - token.script_index;
1050: exprToken.numComponents = 1;
1051: exprToken.script_array = info.originalExpr;
1052: break;
1053:
1054: case OPEN_BRACE:
1055: // '{' string '}'
1056:
1057: parseObj = Parser.ParseBraces(interp, info.originalExpr,
1058: info.start, (info.lastChar - info.start), parseObj,
1059: true);
1060: term = parseObj.extra;
1061: info.next = term;
1062:
1063: exprToken = parseObj.getToken(exprIndex);
1064: exprToken.size = (term - info.start);
1065: exprToken.numComponents = parseObj.numTokens - firstIndex;
1066: // exprToken.script_array = info.originalExpr; // Does not appear in C impl
1067:
1068: // If parsing the braced string resulted in more than one token,
1069: // insert a TCL_TOKEN_WORD token before them. This indicates that
1070: // the braced string represents a concatenation of multiple tokens.
1071:
1072: if (exprToken.numComponents > 1) {
1073: if (parseObj.numTokens >= parseObj.tokensAvailable) {
1074: parseObj.expandTokenArray(parseObj.numTokens + 1);
1075: }
1076: parseObj.insertInTokenArray(firstIndex, 1);
1077: parseObj.numTokens++;
1078: token = parseObj.getToken(firstIndex);
1079:
1080: exprToken = parseObj.getToken(exprIndex);
1081: // exprToken.script_array = info.originalExpr; // Does not appear in C impl
1082: exprToken.numComponents++;
1083:
1084: token.type = Parser.TCL_TOKEN_WORD;
1085: token.script_array = exprToken.script_array;
1086: token.script_index = exprToken.script_index;
1087: token.size = exprToken.size;
1088: token.numComponents = exprToken.numComponents - 1;
1089: }
1090: break;
1091:
1092: case FUNC_NAME:
1093: // math_func '(' expr {',' expr} ')'
1094:
1095: ParseInfo savedInfo = info.duplicate();
1096:
1097: GetLexeme(interp, info); // skip over function name
1098: if (info.lexeme != OPEN_PAREN) {
1099: //StringBuffer functionName;
1100: TclObject obj = TclString.newInstance(new String(
1101: savedInfo.originalExpr, savedInfo.start,
1102: savedInfo.size));
1103:
1104: // Check for boolean literals (true, false, yes, no, on, off)
1105: obj.preserve();
1106: try {
1107: TclBoolean.get(interp, obj);
1108:
1109: // If we get this far, then boolean conversion worked
1110: info = savedInfo;
1111:
1112: // goto tokenizeLiteral;
1113: if (parseObj.numTokens == parseObj.tokensAvailable) {
1114: parseObj.expandTokenArray(parseObj.numTokens);
1115: }
1116: //System.out.println("literal " + parseObj.numTokens);
1117: token = parseObj.getToken(parseObj.numTokens);
1118: token.type = Parser.TCL_TOKEN_TEXT;
1119: token.script_array = info.originalExpr;
1120: token.script_index = info.start;
1121: token.size = info.size;
1122: info.next = info.start + info.size;
1123: token.numComponents = 0;
1124: parseObj.numTokens++;
1125:
1126: exprToken.script_array = info.originalExpr;
1127: exprToken.size = info.size;
1128: exprToken.numComponents = 1;
1129:
1130: break; // out of switch
1131: } catch (TclException ex) {
1132: // Do nothing when boolean conversion fails,
1133: // continue on and raise a syntax error.
1134: } finally {
1135: obj.release();
1136: }
1137:
1138: // FIXME: Implement function name vs var lookup error msg
1139: LogSyntaxError(info, null);
1140: }
1141:
1142: if (parseObj.numTokens == parseObj.tokensAvailable) {
1143: parseObj.expandTokenArray(parseObj.numTokens);
1144: }
1145: token = parseObj.getToken(parseObj.numTokens);
1146: token.type = Parser.TCL_TOKEN_OPERATOR;
1147: token.script_array = savedInfo.originalExpr;
1148: token.script_index = savedInfo.start;
1149: token.size = savedInfo.size;
1150: token.numComponents = 0;
1151: parseObj.numTokens++;
1152:
1153: GetLexeme(interp, info); // skip over '('
1154:
1155: while (info.lexeme != CLOSE_PAREN) {
1156: ParseCondExpr(interp, info);
1157:
1158: if (info.lexeme == COMMA) {
1159: GetLexeme(interp, info); // skip over ,
1160: } else if (info.lexeme != CLOSE_PAREN) {
1161: LogSyntaxError(info,
1162: "missing close parenthesis at end of function call");
1163: }
1164: }
1165:
1166: exprToken = parseObj.getToken(exprIndex);
1167: exprToken.size = (info.next - exprToken.script_index);
1168: //System.out.println("exprToken size "+exprToken.size+" "+info.next+" "+exprToken.script_index);
1169: exprToken.numComponents = parseObj.numTokens - firstIndex;
1170: exprToken.script_array = info.originalExpr;
1171: break;
1172:
1173: case COMMA:
1174: LogSyntaxError(info,
1175: "commas can only separate function arguments");
1176: case END:
1177: LogSyntaxError(info, "premature end of expression");
1178: case UNKNOWN:
1179: LogSyntaxError(info,
1180: "single equality character not legal in expressions");
1181: case UNKNOWN_CHAR:
1182: LogSyntaxError(info, "character not legal in expressions");
1183: case QUESTY:
1184: LogSyntaxError(info, "unexpected ternary 'then' separator");
1185: case COLON:
1186: LogSyntaxError(info, "unexpected ternary 'else' separator");
1187: case CLOSE_PAREN:
1188: LogSyntaxError(info, "unexpected close parenthesis");
1189:
1190: default:
1191: String msg = "unexpected operator " + lexemeStrings[lexeme];
1192: LogSyntaxError(info, msg);
1193: }
1194:
1195: // Advance to the next lexeme before returning.
1196:
1197: GetLexeme(interp, info);
1198: parseObj.termIndex = info.next;
1199: return;
1200: }
1201:
1202: /*
1203: *----------------------------------------------------------------------
1204: *
1205: * GetLexeme --
1206: *
1207: * Lexical scanner for Tcl expressions: scans a single operator or
1208: * other syntactic element from an expression string.
1209: *
1210: * Results:
1211: * Tcl.OK is returned unless an error occurred. In that case a standard
1212: * Tcl error code is returned and, if info.parseObj.interp is
1213: * non-NULL, the interpreter's result is set to hold an error
1214: * message. TCL_ERROR is returned if an integer overflow, or a
1215: * floating-point overflow or underflow occurred while reading in a
1216: * number. If the lexical analysis is successful, info.lexeme
1217: * refers to the next symbol in the expression string, and
1218: * info.next is advanced past the lexeme. Also, if the lexeme is a
1219: * LITERAL or FUNC_NAME, then info.start is set to the first
1220: * character of the lexeme; otherwise it is set NULL.
1221: *
1222: * Side effects:
1223: * If there is insufficient space in parseObj to hold all the
1224: * information about the subexpression, then additional space is
1225: * malloc-ed..
1226: *
1227: *----------------------------------------------------------------------
1228: */
1229:
1230: static void GetLexeme(Interp interp, ParseInfo info)
1231: throws TclException
1232: // info; Holds state needed to parse the expr, including the resulting lexeme.
1233: {
1234: int src; // Points to current source char.
1235: int term; // Points to char terminating a literal.
1236: double doubleValue; // Value of a scanned double literal.
1237: char c, c2;
1238: boolean startsWithDigit;
1239: int offset, length;
1240: TclParse parseObj = info.parseObj;
1241: char ch;
1242: info.lexeme = UNKNOWN;
1243: //System.out.println("getlex");
1244:
1245: // Record where the previous lexeme ended. Since we always read one
1246: // lexeme ahead during parsing, this helps us know the source length of
1247: // subexpression tokens.
1248:
1249: info.prevEnd = info.next;
1250:
1251: // Scan over leading white space at the start of a lexeme. Note that a
1252: // backslash-newline is treated as a space.
1253:
1254: src = info.next;
1255: if (src >= info.lastChar) {
1256: info.lexeme = END;
1257: info.next = src;
1258: return;
1259: }
1260: c = info.originalExpr[src];
1261: //System.out.println(new String(info.originalExpr,src,info.size));
1262: // FIXME: This code should invoke Parser.ParseWhiteSpace()
1263: // to handle embedded nulls properly. It is disabled for now.
1264: // See parseExpr-1.1 in parseRxpr.test for a test case.
1265: while ((c == ' ') || Character.isWhitespace(c) || (c == '\\')) { // INTL: ISO space
1266: if (c == '\\') {
1267: if (info.originalExpr[src + 1] == '\n') {
1268: src += 2;
1269: } else {
1270: break; // no longer white space
1271: }
1272: } else {
1273: src++;
1274: }
1275: c = info.originalExpr[src];
1276: }
1277: parseObj.termIndex = src;
1278: if (src >= info.lastChar) {
1279: info.lexeme = END;
1280: info.next = src;
1281: return;
1282: }
1283: //System.out.println(new String(info.originalExpr,src,info.size));
1284:
1285: // Try to parse the lexeme first as an integer or floating-point
1286: // number. Don't check for a number if the first character c is
1287: // "+" or "-". If we did, we might treat a binary operator as unary
1288: // by mistake, which would eventually cause a syntax error.
1289:
1290: if ((c != '+') && (c != '-')) {
1291: startsWithDigit = Character.isDigit(c); // INTL: digit
1292: String s = new String(info.originalExpr, src, info.lastChar
1293: - src);
1294: if (startsWithDigit
1295: && Expression.looksLikeInt(s, s.length(), 0, false)) {
1296: StrtoulResult res = interp.strtoulResult;
1297: Util.strtoul(s, 0, 0, res);
1298: if (res.errno == 0) {
1299: term = src + res.index;
1300: info.lexeme = LITERAL;
1301: info.start = src;
1302: info.size = (term - src);
1303: info.next = term;
1304: parseObj.termIndex = term;
1305: return;
1306: } else {
1307: parseObj.errorType = Parser.TCL_PARSE_BAD_NUMBER;
1308: if (res.errno == TCL.INTEGER_RANGE) {
1309: Expression.IntegerTooLarge(interp);
1310: } else {
1311: throw new TclException(interp,
1312: "parse bad number");
1313: }
1314: }
1315: } else if ((length = ParseMaxDoubleLength(
1316: info.originalExpr, src, info.lastChar)) > 0) {
1317:
1318: // There are length characters that could be a double.
1319: // Let strtod() tells us for sure.
1320:
1321: s = new String(info.originalExpr, src, length);
1322:
1323: StrtodResult res = interp.strtodResult;
1324: Util.strtod(s, 0, -1, res);
1325: if (res.index > 0) {
1326: if (res.errno != 0) {
1327: parseObj.errorType = Parser.TCL_PARSE_BAD_NUMBER;
1328: if (res.errno == TCL.DOUBLE_RANGE) {
1329: if (res.value != 0) {
1330: Expression.DoubleTooLarge(interp);
1331: } else {
1332: Expression.DoubleTooSmall(interp);
1333: }
1334: } else {
1335: throw new TclException(interp,
1336: "parse bad number");
1337: }
1338: }
1339:
1340: // string was the start of a valid double, copied
1341: // from src.
1342:
1343: term = src + res.index;
1344: info.lexeme = LITERAL;
1345: info.start = src;
1346: info.size = (term - src);
1347: if (info.size > length) {
1348: info.size = length;
1349: }
1350: info.next = src + info.size;
1351: parseObj.termIndex = info.next;
1352: return;
1353: }
1354: }
1355: }
1356:
1357: // Not an integer or double literal. Initialize the lexeme's fields
1358: // assuming the common case of a single character lexeme.
1359:
1360: c = info.originalExpr[src];
1361: c2 = info.originalExpr[src + 1];
1362: info.start = src;
1363: info.size = 1;
1364: info.next = src + 1;
1365: parseObj.termIndex = info.next;
1366:
1367: switch (c) {
1368: case '[':
1369: info.lexeme = OPEN_BRACKET;
1370: return;
1371:
1372: case '{':
1373: info.lexeme = OPEN_BRACE;
1374: return;
1375:
1376: case '(':
1377: info.lexeme = OPEN_PAREN;
1378: return;
1379:
1380: case ')':
1381: info.lexeme = CLOSE_PAREN;
1382: return;
1383:
1384: case '$':
1385: info.lexeme = DOLLAR;
1386: return;
1387:
1388: case '\"':
1389: info.lexeme = QUOTE;
1390: return;
1391:
1392: case ',':
1393: info.lexeme = COMMA;
1394: return;
1395:
1396: case '*':
1397: info.lexeme = MULT;
1398: return;
1399:
1400: case '/':
1401: info.lexeme = DIVIDE;
1402: return;
1403:
1404: case '%':
1405: info.lexeme = MOD;
1406: return;
1407:
1408: case '+':
1409: info.lexeme = PLUS;
1410: return;
1411:
1412: case '-':
1413: info.lexeme = MINUS;
1414: return;
1415:
1416: case '?':
1417: info.lexeme = QUESTY;
1418: return;
1419:
1420: case ':':
1421: info.lexeme = COLON;
1422: return;
1423:
1424: case '<':
1425: switch (c2) {
1426: case '<':
1427: info.lexeme = LEFT_SHIFT;
1428: info.size = 2;
1429: info.next = src + 2;
1430: break;
1431: case '=':
1432: info.lexeme = LEQ;
1433: info.size = 2;
1434: info.next = src + 2;
1435: break;
1436: default:
1437: info.lexeme = LESS;
1438: break;
1439: }
1440: parseObj.termIndex = info.next;
1441: return;
1442:
1443: case '>':
1444: switch (c2) {
1445: case '>':
1446: info.lexeme = RIGHT_SHIFT;
1447: info.size = 2;
1448: info.next = src + 2;
1449: break;
1450: case '=':
1451: info.lexeme = GEQ;
1452: info.size = 2;
1453: info.next = src + 2;
1454: break;
1455: default:
1456: info.lexeme = GREATER;
1457: break;
1458: }
1459: parseObj.termIndex = info.next;
1460: return;
1461:
1462: case '=':
1463: if (c2 == '=') {
1464: info.lexeme = EQUAL;
1465: info.size = 2;
1466: info.next = src + 2;
1467: } else {
1468: info.lexeme = UNKNOWN;
1469: }
1470: parseObj.termIndex = info.next;
1471: return;
1472:
1473: case '!':
1474: if (c2 == '=') {
1475: info.lexeme = NEQ;
1476: info.size = 2;
1477: info.next = src + 2;
1478: } else {
1479: info.lexeme = NOT;
1480: }
1481: parseObj.termIndex = info.next;
1482: return;
1483:
1484: case '&':
1485: if (c2 == '&') {
1486: info.lexeme = AND;
1487: info.size = 2;
1488: info.next = src + 2;
1489: } else {
1490: info.lexeme = BIT_AND;
1491: }
1492: parseObj.termIndex = info.next;
1493: return;
1494:
1495: case '^':
1496: info.lexeme = BIT_XOR;
1497: return;
1498:
1499: case '|':
1500: if (c2 == '|') {
1501: info.lexeme = OR;
1502: info.size = 2;
1503: info.next = src + 2;
1504: } else {
1505: info.lexeme = BIT_OR;
1506: }
1507: parseObj.termIndex = info.next;
1508: return;
1509:
1510: case '~':
1511: info.lexeme = BIT_NOT;
1512: return;
1513:
1514: case 'e':
1515: if (c2 == 'q') {
1516: info.lexeme = STREQ;
1517: info.size = 2;
1518: info.next = src + 2;
1519: parseObj.termIndex = info.next;
1520: return;
1521: } else {
1522: checkFuncName(interp, info, src);
1523: return;
1524: }
1525:
1526: case 'n':
1527: if (c2 == 'e') {
1528: info.lexeme = STRNEQ;
1529: info.size = 2;
1530: info.next = src + 2;
1531: parseObj.termIndex = info.next;
1532: return;
1533: } else {
1534: checkFuncName(interp, info, src);
1535: return;
1536: }
1537:
1538: default:
1539: checkFuncName(interp, info, src);
1540: return;
1541: }
1542: }
1543:
1544: static void checkFuncName(Interp interp, ParseInfo info, int src) {
1545: char c = info.originalExpr[src];
1546: if (Character.isLetter(c)) { // INTL: ISO only.
1547: info.lexeme = FUNC_NAME;
1548: while (Character.isLetterOrDigit(c) || (c == '_')) { // INTL: ISO only.
1549: src++;
1550: c = info.originalExpr[src];
1551: }
1552: info.size = (src - info.start);
1553: info.next = src;
1554: info.parseObj.termIndex = info.next;
1555: String s = new String(info.originalExpr, info.start,
1556: info.size);
1557:
1558: // Check for boolean literals (true, false, yes, no, on, off)
1559:
1560: c = info.originalExpr[info.start];
1561: switch (c) {
1562: case 'f':
1563: if (info.size == 5 && s.equals("false")) {
1564: info.lexeme = LITERAL;
1565: return;
1566: }
1567: break;
1568: case 'n':
1569: if (info.size == 2 && s.equals("no")) {
1570: info.lexeme = LITERAL;
1571: return;
1572: }
1573: break;
1574: case 'o':
1575: if (info.size == 3 && s.equals("off")) {
1576: info.lexeme = LITERAL;
1577: return;
1578: } else if (info.size == 2 && s.equals("on")) {
1579: info.lexeme = LITERAL;
1580: return;
1581: }
1582: break;
1583: case 't':
1584: if (info.size == 4 && s.equals("true")) {
1585: info.lexeme = LITERAL;
1586: return;
1587: }
1588: break;
1589: case 'y':
1590: if (info.size == 3 && s.equals("yes")) {
1591: info.lexeme = LITERAL;
1592: return;
1593: }
1594: break;
1595: }
1596: } else {
1597: info.lexeme = UNKNOWN_CHAR;
1598: }
1599: }
1600:
1601: /*
1602: *----------------------------------------------------------------------
1603: *
1604: * PrependSubExprTokens --
1605: *
1606: * This procedure is called after the operands of an subexpression have
1607: * been parsed. It generates two tokens: a TCL_TOKEN_SUB_EXPR token for
1608: * the subexpression, and a TCL_TOKEN_OPERATOR token for its operator.
1609: * These two tokens are inserted before the operand tokens.
1610: *
1611: * Results:
1612: * None.
1613: *
1614: * Side effects:
1615: * If there is insufficient space in parseObj to hold the new tokens,
1616: * additional space is malloc-ed.
1617: *
1618: *----------------------------------------------------------------------
1619: */
1620:
1621: static void PrependSubExprTokens(int op, int opBytes, int src,
1622: int srcBytes, int firstIndex, ParseInfo info)
1623: /* op; Points to first byte of the operator
1624: * in the source script. */
1625: /* opBytes; Number of bytes in the operator. */
1626: /* src; /* Points to first byte of the subexpression
1627: * in the source script. */
1628: /* srcBytes; Number of bytes in subexpression's
1629: * source. */
1630: /* firstIndex; Index of first token already emitted for
1631: * operator's first (or only) operand. */
1632: /* info; /* Holds the parse state for the
1633: * expression being parsed. */
1634: {
1635: //System.out.println("prepend "+firstIndex+" "+srcBytes+" "+src);
1636: TclParse parseObj = info.parseObj;
1637: TclToken token, firstToken;
1638: int numToMove;
1639:
1640: if ((parseObj.numTokens + 1) >= parseObj.tokensAvailable) {
1641: parseObj.expandTokenArray(parseObj.numTokens + 1);
1642: }
1643: parseObj.insertInTokenArray(firstIndex, 2);
1644: parseObj.numTokens += 2;
1645:
1646: token = parseObj.getToken(firstIndex);
1647: token.type = Parser.TCL_TOKEN_SUB_EXPR;
1648: token.script_index = src;
1649: token.script_array = info.originalExpr;
1650: token.size = srcBytes;
1651: token.numComponents = parseObj.numTokens - (firstIndex + 1);
1652:
1653: token = parseObj.getToken(firstIndex + 1);
1654: token.type = Parser.TCL_TOKEN_OPERATOR;
1655: token.script_index = op;
1656: token.script_array = info.originalExpr;
1657: token.size = opBytes;
1658: token.numComponents = 0;
1659: }
1660:
1661: /*
1662: *----------------------------------------------------------------------
1663: *
1664: * LogSyntaxError --
1665: *
1666: * This procedure is invoked after an error occurs when parsing an
1667: * expression. It sets the interpreter result to an error message
1668: * describing the error.
1669: *
1670: * Results:
1671: * None.
1672: *
1673: * Side effects:
1674: * Sets the interpreter result to an error message describing the
1675: * expression that was being parsed when the error occurred.
1676: *
1677: *----------------------------------------------------------------------
1678: */
1679:
1680: static void LogSyntaxError(ParseInfo info, // Holds the parse state for the
1681: // expression being parsed.
1682: String extraInfo) // String to provide extra information
1683: // about the syntax error.
1684: throws TclException {
1685: //int numChars = (info.lastChar - info.originalExprStart);
1686: String expr = info.getOriginalExpr();
1687: if (expr.length() > 60) {
1688: expr = expr.substring(0, 60) + "...";
1689: }
1690: StringBuffer msg = new StringBuffer();
1691: msg.append("syntax error in expression \"");
1692: msg.append(expr);
1693: msg.append("\"");
1694:
1695: // Extra info is disabled for now until the parser test cases are updated to
1696: // match Tcl 8.4 parser error messages.
1697: // if (extraInfo != null) {
1698: // msg.append(": ");
1699: // msg.append(extraInfo);
1700: // }
1701:
1702: info.parseObj.errorType = Parser.TCL_PARSE_SYNTAX;
1703: info.parseObj.termIndex = info.start;
1704:
1705: if (info.parseObj.interp != null) {
1706: info.parseObj.interp.resetResult();
1707: }
1708: throw new TclException(info.parseObj.interp, msg.toString());
1709: }
1710:
1711: /*
1712: *----------------------------------------------------------------------
1713: *
1714: * ParseMaxDoubleLength -> ParseMaxDoubleLength
1715: *
1716: * Scans a sequence of characters checking that the characters could
1717: * be in a string rep of a double.
1718: *
1719: * Results:
1720: * Returns the number of characters starting with string, runing to, but
1721: * not including end, all of which could be part of a string rep.
1722: * of a double. Only character identity is used, no actual
1723: * parsing is done.
1724: *
1725: * The legal bytes are '0' - '9', 'A' - 'F', 'a' - 'f',
1726: * '.', '+', '-', 'i', 'I', 'n', 'N', 'p', 'P', 'x', and 'X'.
1727: * This covers the values "Inf" and "Nan" as well as the
1728: * decimal and hexadecimal representations recognized by a
1729: * C99-compliant strtod().
1730: *
1731: * Side effects:
1732: * None.
1733: *
1734: *----------------------------------------------------------------------
1735: */
1736:
1737: static int ParseMaxDoubleLength(char[] script_array,
1738: int script_index, int end) {
1739: int p = script_index;
1740: done: {
1741: while (p < end) {
1742: switch (script_array[p]) {
1743: case '0':
1744: case '1':
1745: case '2':
1746: case '3':
1747: case '4':
1748: case '5':
1749: case '6':
1750: case '7':
1751: case '8':
1752: case '9':
1753: case 'A':
1754: case 'B':
1755: case 'C':
1756: case 'D':
1757: case 'E':
1758: case 'F':
1759: case 'I':
1760: case 'N':
1761: case 'P':
1762: case 'X':
1763: case 'a':
1764: case 'b':
1765: case 'c':
1766: case 'd':
1767: case 'e':
1768: case 'f':
1769: case 'i':
1770: case 'n':
1771: case 'p':
1772: case 'x':
1773: case '.':
1774: case '+':
1775: case '-':
1776: p++;
1777: break;
1778: default:
1779: break done;
1780: }
1781: }
1782: } // end done block
1783: return (p - script_index);
1784: }
1785:
1786: } // end class ParseExpr
|