0001: /*
0002: * Jatha - a Common LISP-compatible LISP library in Java.
0003: * Copyright (C) 1997-2005 Micheal Scott Hewett
0004: *
0005: * This library is free software; you can redistribute it and/or
0006: * modify it under the terms of the GNU Lesser General Public
0007: * License as published by the Free Software Foundation; either
0008: * version 2.1 of the License, or (at your option) any later version.
0009: *
0010: * This library is distributed in the hope that it will be useful,
0011: * but WITHOUT ANY WARRANTY; without even the implied warranty of
0012: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
0013: * Lesser General Public License for more details.
0014: *
0015: * You should have received a copy of the GNU Lesser General Public
0016: * License along with this library; if not, write to the Free Software
0017: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
0018: *
0019: *
0020: * For further information, please contact Micheal Hewett at
0021: * hewett@cs.stanford.edu
0022: *
0023: */
0024:
0025: package org.jatha.read;
0026:
0027: import java.io.*;
0028: import java.math.BigInteger;
0029:
0030: import org.jatha.dynatype.*;
0031: import org.jatha.Jatha;
0032: import org.jatha.compile.CompilerException;
0033:
0034: /**
0035: * A parser that reads LISP-syntax text from a text
0036: * stream or string. It recognizes all standard
0037: * LISP datatypes, although not structured ones.
0038: * This function is designed to fulfill the function
0039: * of the reader in a LISP <tt>read-eval-print</tt> loop.
0040: *
0041: * Once the LISP parser is instantiated, the
0042: * <tt>parse()</tt> function can be used to read from
0043: * a string or stream.
0044: *
0045: * It is best not to instantiate the LispParser yourself.
0046: * Instead, do the following:
0047: * <pre>
0048: * 1. LispParser parser = Jatha.getParser();
0049: * 2. parser.setInputString(myString);
0050: * 3. parser.setCaseSensitivity(LispParser.PRESERVE);
0051: * 4. LispValue result = parser.parse();
0052: * </pre>
0053: * Normal usage is to parse a string. If you want to use a Reader,
0054: * do: <code>new PushbackReader(myReader)</code>.
0055: *
0056: * @see org.jatha.dynatype.LispValue
0057: * @author Micheal S. Hewett
0058: * @version 1.0
0059: */
0060: public class LispParser {
0061: public static final int UPCASE = 1;
0062: public static final int DOWNCASE = 2;
0063: public static final int PRESERVE = 3;
0064:
0065: static final char AT_SIGN = '@';
0066: static final char BACK_QUOTE = '`';
0067: static final char BACKSLASH = '\\';
0068: static final char COLON = ':';
0069: static final char COMMA = ',';
0070: static final char DECIMAL = '.';
0071: static final char DOUBLE_QUOTE = '"';
0072: static final char EQUAL_SIGN = '=';
0073: static final char LEFT_ANGLE_BRACKET = '<';
0074: static final char LEFT_PAREN = '(';
0075: static final char HYPHEN = '-';
0076: static final char OR_BAR = '|';
0077: static final char POUND = '#';
0078: static final char PERIOD = '.';
0079: static final char RIGHT_PAREN = ')';
0080: static final char SEMICOLON = ';';
0081: static final char RIGHT_ANGLE_BRACKET = '>';
0082: static final char SINGLE_QUOTE = '\'';
0083: static final char UNDERSCORE = '_';
0084:
0085: // Parser states
0086: static final int READING_NOTHING = 0;
0087: static final int READING_SYMBOL = 1;
0088: static final int READING_MIXED_CASE_SYMBOL = 2;
0089: static final int READING_CHARACTER = 3;
0090: static final int READING_STRING = 4;
0091: static final int READING_BACKQUOTED_LIST = 5;
0092:
0093: private int BackQuoteLevel = 0;
0094: private PushbackReader inputReader;
0095:
0096: private int f_caseSensitivity = UPCASE; // default LISP behavior.
0097:
0098: private static LispParser f_myParser = null;
0099:
0100: private Jatha f_lisp = null;
0101:
0102: public LispParser(Jatha lisp, Reader inStream) {
0103: this (lisp, inStream, UPCASE);
0104: }
0105:
0106: public LispParser(Jatha lisp, String inString) {
0107: this (lisp, new StringReader(inString), UPCASE);
0108: }
0109:
0110: /**
0111: * Allows you to create a parser that handles
0112: * input case conversion as you like.
0113: * Default is UPCASE. Other values are DOWNCASE and PRESERVE.
0114: * @param inStream
0115: */
0116: public LispParser(Jatha lisp, Reader inStream, int caseSensitivity) {
0117: f_lisp = lisp;
0118:
0119: if (inStream instanceof PushbackReader)
0120: inputReader = (PushbackReader) inStream;
0121: else
0122: inputReader = new PushbackReader(inStream);
0123: setCaseSensitivity(caseSensitivity);
0124: }
0125:
0126: /**
0127: * Allows you to create a parser that handles
0128: * input case conversion as you like.
0129: * Default is UPCASE. Other values are DOWNCASE and PRESERVE.
0130: */
0131: public LispParser(Jatha lisp, String inString, int caseSensitivity) {
0132: this (lisp, new StringReader(inString));
0133: setCaseSensitivity(caseSensitivity);
0134: }
0135:
0136: /**
0137: * Retrieves the current case-sensitivity of the parser.
0138: * It can be eiher LispParser.UPCASE, LispParser.DOWNCASE
0139: * or LispParser.PRESERVE
0140: * @return UPCASE, DOWNCASE or PRESERVE
0141: */
0142: public int getCaseSensitivity() {
0143: return f_caseSensitivity;
0144: }
0145:
0146: /**
0147: * Sets the current case-sensitivity of the parser.
0148: * It can be eiher LispParser.UPCASE, LispParser.DOWNCASE
0149: * or LispParser.PRESERVE
0150: */
0151: public void setCaseSensitivity(int caseSensitivity) {
0152: f_caseSensitivity = caseSensitivity;
0153: }
0154:
0155: /**
0156: * Gets the current reader to be parsed.
0157: */
0158: public PushbackReader getInputReader() {
0159: return inputReader;
0160: }
0161:
0162: /**
0163: * Sets the input reader for the Parser.
0164: */
0165: public void setInputReader(PushbackReader inputReader) {
0166: this .inputReader = inputReader;
0167: }
0168:
0169: /**
0170: * Sets the input string for the parser. This is the
0171: * String to parse.
0172: */
0173: public void setInputString(String s) {
0174: this .inputReader = new PushbackReader(new StringReader(s));
0175: }
0176:
0177: /**
0178: * Parse() assumes that there is only one expression in the input
0179: * string or file. If you need to read multiple items from a
0180: * string or file, use the read() function.
0181: * Parse just calls read right now.
0182: *
0183: * @see #read
0184: */
0185: public LispValue parse() throws EOFException {
0186: return read();
0187: }
0188:
0189: /**
0190: * Reads one s-expression from the input stream (a string or file).
0191: * Throws an EOFxception when EOF is reached.
0192: * Call this method repeatedly to do read-eval-print on a file.
0193: */
0194: public LispValue read() throws EOFException {
0195: StringBuffer token = new StringBuffer(80); // Should cover most tokens.
0196: char ch;
0197: int intCh = 0;
0198: int parseState = READING_NOTHING;
0199:
0200: while (true) {
0201: try {
0202: intCh = inputReader.read();
0203: } catch (IOException ioe) {
0204: break;
0205: }
0206:
0207: if (intCh < 0)
0208: if (parseState == READING_SYMBOL) // end of symbol at end of input
0209: ch = ' ';
0210: else
0211: throw new EOFException(
0212: "Premature end of LISP input at: "
0213: + token.toString());
0214: else
0215: ch = (char) intCh;
0216:
0217: // Debugging
0218: // System.err.print("\n Read: read: " + intCh + " ('" + ch + "')");
0219:
0220: // Encounter a comment?: flush the remaining characters on the line.
0221: if (isSemi(ch) && (parseState != READING_STRING)
0222: && (parseState != READING_CHARACTER)) {
0223: do {
0224: try {
0225: intCh = inputReader.read();
0226: } catch (IOException e) {
0227: break;
0228: }
0229: if (intCh < 0)
0230: throw new EOFException(
0231: "Premature end of LISP input at: "
0232: + token.toString());
0233: else
0234: ch = (char) intCh;
0235:
0236: // Apparently read() doesn't do translation.
0237: if (ch == '\r')
0238: ch = '\n';
0239: } while (ch != '\n');
0240: // System.err.println("\n Finished comment with: " + (int) ch);
0241: continue;
0242: }
0243:
0244: if (parseState != READING_NOTHING) { /* If reading anything... */
0245: switch (parseState) {
0246: case READING_SYMBOL:
0247: if (isTerminator(ch)) /* Terminate reading token. */
0248: {
0249: try {
0250: inputReader.unread(ch);
0251: } catch (IOException e) {
0252: System.err
0253: .println("\n *** I/O error while unreading character '"
0254: + ch + "'.");
0255: }
0256: parseState = READING_NOTHING;
0257: if (f_caseSensitivity == UPCASE)
0258: return (tokenToLispValue(token.toString()
0259: .toUpperCase()));
0260: else if (f_caseSensitivity == DOWNCASE)
0261: return (tokenToLispValue(token.toString()
0262: .toLowerCase()));
0263: else
0264: // if (f_caseSensitivity == PRESERVE)
0265: return (tokenToLispValue(token.toString()));
0266: } else
0267: token.append(ch);
0268: break;
0269:
0270: case READING_MIXED_CASE_SYMBOL:
0271: if (isOrBar(ch)) /* Terminate reading token. */
0272: {
0273: String s = token.toString();
0274:
0275: token.append(ch);
0276: parseState = READING_NOTHING;
0277: // Strip off the beginning and ending Or Bars.
0278: return (tokenToLispValue(s.substring(1, s
0279: .length())));
0280: } else
0281: token.append(ch);
0282: break;
0283:
0284: case READING_STRING:
0285: if (ch == BACKSLASH) // Next char is always in the string
0286: {
0287: try {
0288: intCh = inputReader.read();
0289: } catch (IOException e) {
0290: break;
0291: }
0292: if (intCh < 0)
0293: throw new EOFException(
0294: "Premature end of LISP input at: "
0295: + token.toString());
0296: else
0297: ch = (char) intCh;
0298:
0299: token.append(ch);
0300: break;
0301: }
0302:
0303: if (ch == DOUBLE_QUOTE) {
0304: token.append(ch);
0305: parseState = READING_NOTHING;
0306: return (tokenToLispValue(token.toString()));
0307: } else
0308: token.append(ch);
0309: break;
0310: } /* END OF SWITCH */
0311: } /* END OF IF (parseState) */
0312:
0313: // We are not in the middle of reading something recognizable, so
0314: // we try to start something recognizable.
0315: else if (!isSpace(ch)) /* Start reading a token */
0316: {
0317: if (isLparen(ch)) {
0318: return (read_list_token(inputReader));
0319: } else if (isRparen(ch)) {
0320: System.err
0321: .println("WARNING: Too many right parentheses. NIL assumed.");
0322: return (f_lisp.NIL);
0323: } else if (isQuote(ch)) {
0324: return (read_quoted_token(inputReader));
0325: } else if (isDoubleQuote(ch)) {
0326: token.append(ch);
0327: parseState = READING_STRING;
0328: } else if (isPound(ch)) {
0329: return (applyReaderMacro(inputReader));
0330: } else if (isBackQuote(ch)) {
0331: return (read_backquoted_token(inputReader));
0332: } else if (isComma(ch)) {
0333: return (read_comma_token(inputReader));
0334: } else if (isOrBar(ch)) {
0335: token.append(ch);
0336: parseState = READING_MIXED_CASE_SYMBOL;
0337: } else {
0338: parseState = READING_SYMBOL;
0339: try {
0340: inputReader.unread(ch);
0341: } catch (IOException e) {
0342: System.err
0343: .println("\n *** I/O error while unreading character '"
0344: + ch + "'.");
0345: }
0346: }
0347: } /* if (!isSpace(ch)) */
0348:
0349: } /* main WHILE loop */
0350:
0351: /* WE ONLY EXECUTE THIS CODE IF WE HIT end of input string or file. */
0352: if (token.length() > 0)
0353: return (tokenToLispValue(token.toString()));
0354: else
0355: return (f_lisp.NIL);
0356: }
0357:
0358: /**
0359: * Reads one list expression from the input stream and returns it.
0360: * The input pointer should be on the character following the left parenthesis.
0361: */
0362: public LispValue read_list_token(PushbackReader stream)
0363: throws EOFException {
0364: boolean firstTime = true;
0365: boolean haveDot = false;
0366: char ch;
0367: int intCh = 0;
0368: LispValue newToken;
0369: LispValue newList, newCell;
0370:
0371: newList = f_lisp.NIL;
0372: newCell = f_lisp.NIL;
0373:
0374: while (true) {
0375: try {
0376: intCh = inputReader.read();
0377: } catch (IOException e) {
0378: break;
0379: }
0380: if (intCh < 0) {
0381: throw new EOFException("Premature end of LISP input.");
0382: } else
0383: ch = (char) intCh;
0384:
0385: if (!isSpace(ch)) {
0386: if (isRparen(ch))
0387: return (newList);
0388:
0389: if (isPeriod(ch)) {
0390: if (haveDot) {
0391: System.err
0392: .println("WARNING: Illegal dotted syntax. NIL assumed.");
0393: return f_lisp.NIL;
0394: }
0395: haveDot = true;
0396: continue; // Skip to end of while loop.
0397: }
0398:
0399: // Encounter a comment?: flush the remaining characters on the line.
0400: if (isSemi(ch)) {
0401: do {
0402: try {
0403: intCh = inputReader.read();
0404: } catch (IOException e) {
0405: break;
0406: }
0407: if (intCh < 0) {
0408: throw new EOFException(
0409: "Premature end of LISP input.");
0410: } else
0411: ch = (char) intCh;
0412:
0413: // Apparently read() doesn't do translation.
0414: if (ch == '\r')
0415: ch = '\n';
0416: } while (ch != '\n');
0417: continue;
0418: }
0419:
0420: // otherwise process a normal token.
0421:
0422: try {
0423: inputReader.unread(ch);
0424: } catch (IOException e) {
0425: System.err
0426: .println("\n *** I/O error while unreading character '"
0427: + ch + "'.");
0428: }
0429:
0430: // System.err.print("\nRLT calling parse()");
0431: newToken = read();
0432: // System.err.print("...got back: " + newToken.toString());
0433:
0434: if (firstTime) {
0435: newList = f_lisp.makeCons(f_lisp.NIL, f_lisp.NIL);
0436: newList.rplaca(newToken);
0437: firstTime = false;
0438: } else {
0439: if (haveDot)
0440: newList.last().rplacd(newToken);
0441: else {
0442: newCell = f_lisp.makeCons(f_lisp.NIL,
0443: f_lisp.NIL); /* (NIL . NIL) */
0444: newCell.rplaca(newToken);
0445: newList.last().rplacd(newCell);
0446: }
0447: }
0448: } // if (!isSpace())
0449: } // while ()...
0450:
0451: return f_lisp.NIL; // Shouldn't get here.
0452: }
0453:
0454: /**
0455: * This routine is called by parse when it encounters
0456: * a quote mark. It calls parse recursively.
0457: */
0458: LispValue read_quoted_token(PushbackReader stream)
0459: throws EOFException {
0460: LispValue newCell = f_lisp.NIL;
0461: LispValue newQuotedList = f_lisp.NIL;
0462:
0463: /* Construct the quoted list (QUOTE . (NIL . NIL)) then
0464: * read a token and replace the first NIL by the token read.
0465: */
0466:
0467: newQuotedList = f_lisp.makeCons(f_lisp.QUOTE, f_lisp.makeCons(
0468: f_lisp.NIL, f_lisp.NIL));
0469: newCell = read();
0470: newQuotedList.cdr().rplaca(newCell);
0471: return (newQuotedList);
0472: }
0473:
0474: /**
0475: * This routine is called by parse when it encounters
0476: * a backquote mark. It calls parse recursively.
0477: */
0478: public LispValue read_backquoted_token(PushbackReader stream)
0479: throws EOFException {
0480: LispValue newCell = f_lisp.NIL;
0481: LispValue newQuotedList = f_lisp.NIL;
0482:
0483: /* Construct the quoted list (SYS::BACKQUOTE . (NIL . NIL)) then
0484: * read a token and replace the first NIL by the token read.
0485: */
0486:
0487: newQuotedList = f_lisp.makeCons(f_lisp.BACKQUOTE, f_lisp
0488: .makeCons(f_lisp.NIL, f_lisp.NIL));
0489:
0490: ++BackQuoteLevel;
0491: newCell = read();
0492: --BackQuoteLevel;
0493:
0494: newQuotedList.cdr().rplaca(newCell);
0495: return (newQuotedList);
0496: }
0497:
0498: /**
0499: * This routine is called by parse when it encounters
0500: * a comma, which is only legal inside a backquote.
0501: */
0502: LispValue read_comma_token(PushbackReader stream)
0503: throws EOFException {
0504: LispValue newCell = f_lisp.NIL;
0505: LispValue newQuotedList = f_lisp.NIL;
0506: LispValue identifier = f_lisp.NIL;
0507: int intCh;
0508: char ch;
0509:
0510: if (BackQuoteLevel <= 0) {
0511: System.err
0512: .println(";; *** ERROR: Comma not inside backquote.");
0513: return f_lisp.NIL;
0514: }
0515:
0516: try {
0517: intCh = inputReader.read();
0518: } catch (IOException e) {
0519: return f_lisp.NIL;
0520: }
0521:
0522: if (intCh < 0) {
0523: throw new EOFException("Premature end of LISP input.");
0524: } else
0525: ch = (char) intCh;
0526:
0527: // Apparently read() doesn't do translation.
0528: if (ch == '\r')
0529: ch = '\n';
0530:
0531: if (isAtSign(ch))
0532: identifier = f_lisp.COMMA_ATSIGN_FN;
0533: else if (isPeriod(ch))
0534: identifier = f_lisp.COMMA_DOT_FN;
0535: else {
0536: identifier = f_lisp.COMMA_FN;
0537: try {
0538: inputReader.unread(ch);
0539: } catch (IOException e) {
0540: System.err
0541: .println("\n *** I/O error while unreading character '"
0542: + ch + "'.");
0543: }
0544: }
0545:
0546: newQuotedList = f_lisp.makeCons(identifier, f_lisp.makeCons(
0547: f_lisp.NIL, f_lisp.NIL));
0548:
0549: newCell = read();
0550:
0551: newQuotedList.cdr().rplaca(newCell);
0552: return (newQuotedList);
0553: }
0554:
0555: /**
0556: * This routine is called by parse when it encounters
0557: * a pound (#) mark.
0558: */
0559: public LispValue applyReaderMacro(PushbackReader stream)
0560: throws EOFException {
0561: char ch = '0';
0562: int intCh = 0;
0563:
0564: try {
0565: intCh = inputReader.read();
0566: } catch (IOException e) {
0567: System.err
0568: .println("\n *** I/O error while reading '#' token.");
0569: }
0570: if (intCh < 0) {
0571: throw new EOFException("Premature end of LISP input.");
0572: } else
0573: ch = (char) intCh;
0574:
0575: // #:foo is an uninterned symbol.
0576: if (isColon(ch)) {
0577: StringBuffer token = new StringBuffer(80);
0578: token.append('#');
0579:
0580: while (!isTerminator(ch)) /* Terminate reading token. */
0581: {
0582: token.append(ch);
0583: try {
0584: intCh = inputReader.read();
0585: } catch (IOException e) {
0586: System.err
0587: .println("\n *** I/O error while reading '#:' token.");
0588: }
0589: if (intCh < 0) {
0590: throw new EOFException(
0591: "Premature end of LISP input.");
0592: } else
0593: ch = (char) intCh;
0594: }
0595:
0596: try {
0597: inputReader.unread(ch);
0598: } catch (IOException e) {
0599: System.err
0600: .println("\n *** I/O error while unreading character '"
0601: + ch + "'.");
0602: }
0603:
0604: if (f_caseSensitivity == UPCASE)
0605: return (tokenToLispValue(token.toString().toUpperCase()));
0606: else if (f_caseSensitivity == DOWNCASE)
0607: return (tokenToLispValue(token.toString().toLowerCase()));
0608: else
0609: // if (f_caseSensitivity == PRESERVE)
0610: return (tokenToLispValue(token.toString()));
0611: }
0612:
0613: // #'foo means (function foo)
0614: else if (isQuote(ch)) {
0615: LispValue result = f_lisp.makeList(
0616: tokenToLispValue("FUNCTION"), read());
0617: return result;
0618:
0619: //LispValue result = f_lisp.makeList(tokenToLispValue("FUNCTION"));
0620: //
0621: // try {
0622: // intCh = inputReader.read();
0623: // } catch (IOException e) {
0624: // System.err.println("\n *** I/O error while reading a #' expression." + e);
0625: // return tokenToLispValue(token.toString());
0626: // }
0627: // if (intCh < 0)
0628: // throw new EOFException("Premature end of LISP input.");
0629: // else
0630: // ch = (char) intCh;
0631:
0632: //LispValue fnToken = read();
0633:
0634: // if (intCh == LEFT_PAREN) // #'(lambda...
0635: // {
0636: // fnToken = read_list_token(inputReader);
0637: // }
0638: //
0639: // else
0640: // {
0641: // while (! isTerminator(ch))
0642: // {
0643: // token.append(ch);
0644: // try {
0645: // intCh = inputReader.read();
0646: // } catch (IOException e) {
0647: // System.err.println("\n *** I/O error while reading a #'symbol expression." + e);
0648: // }
0649: // if (intCh < 0)
0650: // throw new EOFException("Premature end of LISP input.");
0651: // else
0652: // ch = (char) intCh;
0653: // }
0654: //
0655: // try {
0656: // inputReader.unread(ch);
0657: // } catch (IOException e) {
0658: // System.err.println("\n *** I/O error while reading a #' expression." + e);
0659: // return tokenToLispValue(token.toString());
0660: // }
0661: //
0662: // if (f_caseSensitivity == UPCASE)
0663: // fnToken = tokenToLispValue(token.toString().toUpperCase());
0664: // else if (f_caseSensitivity == DOWNCASE)
0665: // fnToken = tokenToLispValue(token.toString().toLowerCase());
0666: // else // if (f_caseSensitivity == PRESERVE)
0667: // fnToken = tokenToLispValue(token.toString());
0668: // }
0669:
0670: //return result.append(f_lisp.makeList(fnToken));
0671:
0672: }
0673:
0674: // #\ reads a character macro
0675: else if (isBackSlash(ch)) {
0676: StringBuffer token = new StringBuffer(80);
0677: try {
0678: intCh = inputReader.read();
0679: } catch (IOException e) {
0680: System.err
0681: .println("\n *** I/O error while reading character token.");
0682: }
0683: if (intCh < 0) {
0684: throw new EOFException("Premature end of LISP input.");
0685: } else
0686: ch = (char) intCh;
0687:
0688: while (!isTerminator(ch)) {
0689: token.append(ch);
0690: try {
0691: intCh = inputReader.read();
0692: } catch (IOException e) {
0693: System.err
0694: .println("\n *** I/O error while reading a #' expression."
0695: + e);
0696: }
0697: if (intCh < 0)
0698: throw new EOFException(
0699: "Premature end of LISP input.");
0700: else
0701: ch = (char) intCh;
0702: }
0703:
0704: try {
0705: inputReader.unread(ch);
0706: } catch (IOException e) {
0707: System.err
0708: .println("\n *** I/O error while reading a #' expression."
0709: + e);
0710: return tokenToLispValue(token.toString());
0711: }
0712:
0713: if (token.length() > 1) {
0714: final String tok = token.toString();
0715: if (tok.equalsIgnoreCase("SPACE")) {
0716: ch = ' ';
0717: } else if (tok.equalsIgnoreCase("NEWLINE")) {
0718: ch = '\n';
0719: } else {
0720: ch = 0;
0721: }
0722: } else {
0723: ch = token.charAt(0);
0724: }
0725:
0726: return new StandardLispCharacter(f_lisp, ch);
0727: }
0728:
0729: // #< usually starts a structure
0730: else if (isLeftAngleBracket(ch)) {
0731: System.err
0732: .println("\n *** parser can't read structures yet.");
0733: while (!isRightAngleBracket(ch))
0734: try {
0735: intCh = inputReader.read();
0736: ch = (char) intCh;
0737: } catch (IOException e) {
0738: System.err
0739: .println("\n *** I/O error while reading a structure.");
0740: }
0741:
0742: if (intCh < 0) {
0743: throw new EOFException("Premature end of LISP input.");
0744: }
0745:
0746: else
0747: ch = (char) intCh;
0748:
0749: return f_lisp.NIL;
0750: }
0751:
0752: // -- #| ... |# is a block comment
0753: else if (isOrBar(ch)) {
0754: try {
0755: boolean done = false;
0756: boolean terminating = false;
0757: while (!done) {
0758: intCh = inputReader.read();
0759: ch = (char) intCh;
0760: if (isOrBar(ch))
0761: terminating = true;
0762: else if (terminating && isPound(ch))
0763: done = true;
0764: else
0765: terminating = false;
0766: }
0767: } catch (IOException e) {
0768: System.err
0769: .println("\n *** I/O error while reading a block comment. Not terminated?");
0770: }
0771: return f_lisp.NIL;
0772: }
0773:
0774: else {
0775: System.err.println("\n *** unknown '#' construct.");
0776: return f_lisp.NIL;
0777: }
0778: }
0779:
0780: /**
0781: * This library can't read backquotes yet.
0782: */
0783: public LispValue read_backquoted_list_token(PushbackReader stream) {
0784: System.err
0785: .println("\n *** Parser can't read backquoted lists yet.");
0786: return f_lisp.NIL;
0787: }
0788:
0789: /**
0790: * Converts a string to a LISP value such as
0791: * NIL, T, an integer, a real number, a string or a symbol.
0792: */
0793: public LispValue tokenToLispValue(String token) {
0794: LispValue newCell = null;
0795: LispValue keywordPackage = f_lisp.findPackage("KEYWORD");
0796:
0797: if (T_token_p(token))
0798: newCell = f_lisp.T;
0799: else if (NIL_token_p(token))
0800: newCell = f_lisp.NIL;
0801: else if (INTEGER_token_p(token)) {
0802: // It may be an Fixnum or a Bignum.
0803: // Let Java tell us by generating a NumberFormatException
0804: // when the number is too big (or too negatively big).
0805: try {
0806: newCell = f_lisp.makeInteger(new Long(token));
0807: } catch (NumberFormatException e) {
0808: newCell = f_lisp.makeBignum(new BigInteger(token));
0809: }
0810: } else if (REAL_token_p(token))
0811: newCell = f_lisp.makeReal(new Double(token));
0812: else if (STRING_token_p(token)) { /* remove the first and last double quotes. */
0813: try {
0814: newCell = f_lisp.makeString(token.substring(1, token
0815: .length() - 1));
0816: } catch (StringIndexOutOfBoundsException e) {
0817: System.err
0818: .println("Hey, got a bad string index in 'tokenToLispValue'!");
0819: }
0820: ;
0821:
0822: } else if (SYMBOL_token_p(token)) {
0823: // default package.
0824: LispValue pkg = f_lisp.PACKAGE_SYMBOL.symbol_value();
0825: String packageStr = "";
0826: boolean external = false;
0827:
0828: // Added packages, 10 May 1997 (mh)
0829: if (token.indexOf(':') >= 0) {
0830: packageStr = token.substring(0, token.indexOf(':'));
0831: if (packageStr.equals("#")) // Uninterned symbol
0832: pkg = null;
0833: else {
0834: pkg = f_lisp.findPackage(packageStr);
0835: if (pkg == f_lisp.NIL) {
0836: // throw(new LispUndefinedPackageException(packageStr));
0837: System.err
0838: .println("Warning: package '"
0839: + packageStr
0840: + "' undefined. Using current package.");
0841: pkg = f_lisp.PACKAGE_SYMBOL.symbol_value();
0842: }
0843: }
0844:
0845: // Strip off the package.
0846: token = token.substring(packageStr.length(), token
0847: .length());
0848:
0849: if (token.startsWith(":::")) {
0850: System.err
0851: .println("Warning: ignored extra ':' in '"
0852: + packageStr + token + "'.");
0853: token = token.substring(token.lastIndexOf(':') + 1,
0854: token.length());
0855: } else if (token.startsWith("::"))
0856: token = token.substring(2, token.length());
0857: else if (token.startsWith(":")) {
0858: external = true;
0859: token = token.substring(1, token.length());
0860: }
0861: } // end of package parsing
0862:
0863: // Handle external symbols separately, except for keywords
0864: if (external && !(packageStr.equals(""))) {
0865: newCell = ((LispPackage) pkg).getExternalSymbol(f_lisp
0866: .makeString(token));
0867: if (newCell == f_lisp.NIL)
0868: System.err
0869: .println(";; *** ERROR: "
0870: + packageStr
0871: + ":"
0872: + token
0873: + " is not an external symbol in "
0874: + packageStr
0875: + ".\n;; *** Creating new symbol in current package.");
0876: newCell = f_lisp.EVAL.intern(token,
0877: (LispPackage) f_lisp.PACKAGE_SYMBOL
0878: .symbol_value());
0879: }
0880: // keywords must always be uppercase.
0881: else if (pkg == keywordPackage)
0882: newCell = f_lisp.EVAL.intern(token.toUpperCase(),
0883: (LispPackage) pkg);
0884: else
0885: newCell = f_lisp.EVAL.intern(token, (LispPackage) pkg);
0886: } else {
0887: System.err.println("ERROR: Unrecognized input: \"" + token
0888: + "\"");
0889: newCell = f_lisp.NIL;
0890: }
0891: ;
0892:
0893: if (newCell == null) {
0894: System.err.println("MEMORY_ERROR in \"tokenToLispValue\" "
0895: + "for token \"" + token + "\", returning NIL.");
0896: newCell = f_lisp.NIL;
0897: }
0898: ;
0899:
0900: return (newCell);
0901: }
0902:
0903: // ---- Utility functions ----------------------------------
0904:
0905: boolean isLparen(char x) {
0906: return (x == LEFT_PAREN);
0907: };
0908:
0909: boolean isRparen(char x) {
0910: return (x == RIGHT_PAREN);
0911: };
0912:
0913: boolean isAtSign(char x) {
0914: return (x == AT_SIGN);
0915: };
0916:
0917: boolean isBackQuote(char x) {
0918: return (x == BACK_QUOTE);
0919: };
0920:
0921: boolean isBackSlash(char x) {
0922: return (x == BACKSLASH);
0923: };
0924:
0925: boolean isColon(char x) {
0926: return (x == COLON);
0927: };
0928:
0929: boolean isComma(char x) {
0930: return (x == COMMA);
0931: };
0932:
0933: boolean isDoubleQuote(char x) {
0934: return (x == DOUBLE_QUOTE);
0935: };
0936:
0937: boolean isOrBar(char x) {
0938: return (x == OR_BAR);
0939: };
0940:
0941: boolean isPound(char x) {
0942: return (x == POUND);
0943: };
0944:
0945: boolean isPeriod(char x) {
0946: return (x == PERIOD);
0947: };
0948:
0949: boolean isQuote(char x) {
0950: return (x == SINGLE_QUOTE);
0951: };
0952:
0953: boolean isSemi(char x) {
0954: return (x == SEMICOLON);
0955: };
0956:
0957: boolean isLeftAngleBracket(char x) {
0958: return (x == LEFT_ANGLE_BRACKET);
0959: };
0960:
0961: boolean isRightAngleBracket(char x) {
0962: return (x == RIGHT_ANGLE_BRACKET);
0963: };
0964:
0965: boolean isSpace(char x) {
0966: return ((x == ' ') // space
0967: || (x == '\n') // newline
0968: || (x == '\r') // carriage return
0969: || (x == '\t') // tab
0970: || (x == '\f') // form feed
0971: || (x == '\b')); // backspace
0972: }
0973:
0974: boolean isTerminator(char x) {
0975: return (isSpace(x) // white space
0976: || isLparen(x) || isRparen(x)
0977: || isQuote(x)
0978: || isSemi(x) || isDoubleQuote(x) || isComma(x));
0979: }
0980:
0981: /** The equivalent of the C function 'strspn'.
0982: * Given a string and another string representing a set of characters,
0983: * this function scans the string and accepts characters that are
0984: * elements of the given set of characters. It returns the index
0985: * of the first element of the string that is not a member of the
0986: * set of characters.
0987: * For example:
0988: * pos = firstCharNotInSet(0, "hello there, how are you?", "ehlort ");
0989: * returns 11.
0990: *
0991: * If the string does not contain any of the characters in the set,
0992: * str.length() is returned.
0993: */
0994: public static int firstCharNotInSet(int startIndex, String str,
0995: String charSet) {
0996: int searchIndex = startIndex - 1; // we add one at the end.
0997: int length = str.length();
0998:
0999: // System.out.print("\nSearching \"" + str + "\" for \"" + charSet + "\" from index " + startIndex);
1000: try {
1001: for (int i = startIndex; ((i < length) && (charSet
1002: .indexOf(str.charAt(i)) >= 0)); ++i)
1003: searchIndex = i;
1004: } catch (StringIndexOutOfBoundsException e) {
1005: System.err
1006: .println("Hey, got a bad string index in 'firstCharNotInSet'!");
1007: }
1008: ;
1009:
1010: // System.out.println("...returning " + searchIndex);
1011: return searchIndex + 1;
1012: };
1013:
1014: /**
1015: * Does NOT recognize an isolated '+' or '-' as a real number.
1016: */
1017: boolean REAL_token_p(String str) {
1018: String DECIMALchars = ".";
1019: String INTchars = "0123456789";
1020:
1021: int decimalPos;
1022: int length = str.length();
1023: int index = 0;
1024:
1025: if ((index < length)
1026: && ((str.charAt(index) == '-') || (str.charAt(index) == '+')))
1027: index++;
1028:
1029: if (index == length) // Don't accept a single '-' or '+'
1030: return false;
1031:
1032: decimalPos = str.indexOf(DECIMALchars); /* Check for decimal. If none, not a real number. */
1033: if (decimalPos < 0)
1034: return (false);
1035:
1036: if (firstCharNotInSet(index, str, INTchars) != decimalPos)
1037: return (false);
1038:
1039: if (decimalPos == str.length() - 1)
1040: return (true); /* Decimal point followed by no digits is legal in LISP. */
1041:
1042: /* Check decimal digits. */
1043: index = decimalPos + 1;
1044: return (firstCharNotInSet(index, str, INTchars) == length);
1045: };
1046:
1047: boolean INTEGER_token_p(String str)
1048: /*
1049: * Does NOT recognize an isolated '+' or '-' as an integer.
1050: */
1051: {
1052: String INTchars = "0123456789";
1053:
1054: int length = str.length();
1055: int index = 0;
1056:
1057: try {
1058: if ((index < length)
1059: && ((str.charAt(index) == '-') || (str
1060: .charAt(index) == '+')))
1061: index++;
1062: } catch (StringIndexOutOfBoundsException e) {
1063: System.err
1064: .println("Hey, got a bad string index in 'INTEGER_token_p'! on string '"
1065: + str + "'");
1066: }
1067: ;
1068:
1069: if (index == length) // Don't accept a single '-' or '+'
1070: return false;
1071:
1072: return (firstCharNotInSet(index, str, INTchars) == length);
1073: }
1074:
1075: boolean NIL_token_p(String str) {
1076: return (str.equalsIgnoreCase("NIL"));
1077: };
1078:
1079: boolean STRING_token_p(String str) {
1080: int length = str.length();
1081: boolean value;
1082:
1083: value = false;
1084:
1085: try {
1086: value = ((length >= 2) && (str.charAt(0) == DOUBLE_QUOTE) && (str
1087: .charAt(length - 1) == DOUBLE_QUOTE));
1088: } catch (StringIndexOutOfBoundsException e) {
1089: System.err
1090: .println("Hey, got a bad string index in 'NIL_token_p'!");
1091: }
1092: ;
1093:
1094: return value;
1095: }
1096:
1097: boolean SYMBOL_token_p(String str) {
1098: return (str.length() >= 1);
1099: };
1100:
1101: boolean T_token_p(String str) {
1102: return (str.equalsIgnoreCase("T"));
1103: };
1104:
1105: // ---- Test functions ----------------------------------
1106:
1107: public void test_parser(String s) {
1108: System.out.print("The string \"" + s + "\" ");
1109:
1110: if (T_token_p(s))
1111: System.out.println("is T.");
1112: else if (NIL_token_p(s))
1113: System.out.println("is NIL.");
1114: else if (INTEGER_token_p(s))
1115: System.out.println("is an integer.");
1116: else if (REAL_token_p(s))
1117: System.out.println("is a double.");
1118: else if (STRING_token_p(s))
1119: System.out.println("is a string.");
1120: else if (SYMBOL_token_p(s))
1121: System.out.println("is a symbol.");
1122: else
1123: System.out.println("is not recognized.");
1124: }
1125:
1126: public void test_parser_loop() throws EOFException {
1127: LispValue temp, exit;
1128:
1129: exit = f_lisp.EVAL.intern("EXIT");
1130: temp = f_lisp.EVAL.intern("*TEMP*");
1131:
1132: System.out.println("Run (EXIT) to stop.");
1133: try {
1134: do {
1135: System.out.print("\nJATHA>");
1136: System.out.flush(); // Should print top-level prompt
1137: // input = parse(stdin);
1138: // setq(temp, symbol_value(STAR));
1139:
1140: // print(setq(STAR, eval(input)));
1141: // setq(STARSTARSTAR, symbol_value(STARSTAR));
1142: // setq(STARSTAR, symbol_value(temp));
1143: temp = read();
1144: // System.out.println(); temp.prin1();
1145: temp = f_lisp.COMPILER.compile(f_lisp.MACHINE, temp,
1146: f_lisp.NIL); // No globals for now
1147: // System.out.println(); temp.prin1();
1148: temp = f_lisp.MACHINE.Execute(temp, f_lisp.NIL);
1149: System.out.println();
1150: temp.prin1();
1151: } while (temp != exit);
1152: } catch (CompilerException ce) {
1153: System.err.println("Compiler error: " + ce.toString());
1154: }
1155: System.out.println();
1156: System.out.flush();
1157: }
1158:
1159: /**
1160: * Returns true if the input expression has balanced parentheses
1161: * @param input a String
1162: * @return true if it has balanced parentheses
1163: */
1164: public static boolean hasBalancedParentheses(Jatha lisp,
1165: LispValue input) {
1166: return hasBalancedParentheses(lisp, input.toString());
1167: }
1168:
1169: /**
1170: * Returns true if the input expression has balanced parentheses
1171: * @param input a String
1172: * @return true if it has balanced parentheses
1173: */
1174: public static boolean hasBalancedParentheses(Jatha lisp,
1175: String input) {
1176: LispValue result = lisp.NIL;
1177:
1178: if (f_myParser == null)
1179: f_myParser = new LispParser(lisp, input);
1180: else
1181: f_myParser.setInputString(input);
1182:
1183: try {
1184: while (true)
1185: result = f_myParser.read();
1186:
1187: } catch (EOFException eofe) {
1188: if (eofe.getMessage().toLowerCase().startsWith("premature")) {
1189: System.err
1190: .println("Unbalanced parentheses in input. Last form read was "
1191: + result);
1192: return false;
1193: } else
1194: return true;
1195: }
1196: }
1197:
1198: public void simple_parser_test() {
1199: test_parser("1234.56789");
1200: test_parser("1234.");
1201: test_parser(".56789");
1202: test_parser("-1234.56789");
1203: test_parser("+1234.56789");
1204: test_parser("-.56789");
1205: test_parser("1234");
1206: test_parser("-1234");
1207: test_parser("+1234");
1208: test_parser("T");
1209: test_parser("NIL");
1210: test_parser("\"This is a string\"");
1211: test_parser("\"astring\"");
1212: test_parser("\"\"");
1213: test_parser("ABCD");
1214: test_parser("def1234");
1215: test_parser("123def");
1216: test_parser("abc_def_ghi");
1217: }
1218:
1219: }
|