0001: package gnu.kawa.lispexpr;
0002:
0003: import gnu.text.*;
0004: import gnu.mapping.*;
0005: import gnu.lists.*;
0006: import gnu.math.*;
0007: import gnu.expr.*;
0008:
0009: /** A Lexer for reading S-expressions in generic Lisp-like syntax.
0010: * This class may have outlived its usefulness: It's mostly just a
0011: * wrapper around a LineBufferedReader plus a helper token-buffer.
0012: * The functionality should be moved to ReadTable, though it is
0013: * unclear what to do about the tokenBuffer.
0014: */
0015:
0016: public class LispReader extends Lexer {
0017: public LispReader(LineBufferedReader port) {
0018: super (port);
0019: }
0020:
0021: public LispReader(LineBufferedReader port, SourceMessages messages) {
0022: super (port, messages);
0023: }
0024:
0025: /** Resolve a unit name, if possible.
0026: * Returns null if the unit name is unknown. */
0027: public static Object lookupUnit(String name) {
0028: name = (name + "$unit").intern();
0029: try {
0030: return Environment.getCurrent().getChecked(name);
0031: } catch (UnboundLocationException ex) {
0032: return name;
0033: }
0034: }
0035:
0036: /** Read a #|...|#-style comment (which may contain other nested comments).
0037: * Assumes the initial "#|" has already been read.
0038: */
0039: final public void readNestedComment(char c1, char c2)
0040: throws java.io.IOException, SyntaxException {
0041: int commentNesting = 1;
0042: int startLine = port.getLineNumber();
0043: int startColumn = port.getColumnNumber();
0044: do {
0045: int c = read();
0046: if (c == '|') {
0047: c = read();
0048: if (c == c1)
0049: commentNesting--;
0050: } else if (c == c1) {
0051: c = read();
0052: if (c == c2)
0053: commentNesting++;
0054: }
0055: if (c < 0) {
0056: eofError("unexpected end-of-file in " + c1 + c2
0057: + " comment starting here", startLine + 1,
0058: startColumn - 1);
0059: return;
0060: }
0061: } while (commentNesting > 0);
0062: }
0063:
0064: /** Get specification of how symbols should be case-folded.
0065: * @return Either 'P' (means preserve case), 'U' (upcase),
0066: * 'D' (downcase, or 'I' (invert case).
0067: */
0068: static char getReadCase() {
0069: char read_case;
0070: try {
0071: String read_case_string = Environment.getCurrent().get(
0072: "symbol-read-case", "P").toString();
0073: read_case = read_case_string.charAt(0);
0074: if (read_case == 'P')
0075: ;
0076: else if (read_case == 'u')
0077: read_case = 'U';
0078: else if (read_case == 'd' || read_case == 'l'
0079: || read_case == 'L')
0080: read_case = 'D';
0081: else if (read_case == 'i')
0082: read_case = 'I';
0083: } catch (Exception ex) {
0084: read_case = 'P';
0085: }
0086: return read_case;
0087: }
0088:
0089: public Object readValues(int ch, ReadTable rtable)
0090: throws java.io.IOException, SyntaxException {
0091: return readValues(ch, rtable.lookup(ch), rtable);
0092: }
0093:
0094: /** May return zero or multiple values. */
0095: public Object readValues(int ch, ReadTableEntry entry,
0096: ReadTable rtable) throws java.io.IOException,
0097: SyntaxException {
0098: // Step numbers refer to steps in section 2.2 of the HyperSpec.
0099: // Step 1:
0100: int startPos = tokenBufferLength;
0101:
0102: if (entry == null) {
0103: // Step 2:
0104: String err = ("invalid character #\\" + ((char) ch)); // FIXME
0105: if (interactive)
0106: fatal(err);
0107: else
0108: error(err);
0109: return Values.empty;
0110: }
0111: int kind = entry.getKind();
0112: seenEscapes = false;
0113: switch (kind) {
0114: case ReadTable.WHITESPACE:
0115: // Step 3:
0116: return Values.empty;
0117: case ReadTable.TERMINATING_MACRO:
0118: case ReadTable.NON_TERMINATING_MACRO:
0119: Object value = entry.read(this , ch, -1);
0120: return value;
0121: case ReadTable.CONSTITUENT:
0122: if (ch == rtable.postfixLookupOperator) { // Force an initial ':' to be treated as a CONSTITUENT.
0123: tokenBufferAppend(ch);
0124: ch = read();
0125: }
0126: case ReadTable.SINGLE_ESCAPE: // Step 5:
0127: case ReadTable.MULTIPLE_ESCAPE: // Step 6:
0128: default: //
0129: break;
0130: }
0131:
0132: readToken(ch, getReadCase(), rtable);
0133: int endPos = tokenBufferLength;
0134: if (seenEscapes)
0135: return returnSymbol(startPos, endPos, rtable);
0136: else
0137: return handleToken(startPos, endPos, rtable);
0138: }
0139:
0140: public static final char TOKEN_ESCAPE_CHAR = '\uffff';
0141:
0142: /** If true, then tokenbuffer contains escaped characters.
0143: * These are prefixed (in the buffer) by TOKEN_ESCAPE_CHAR.
0144: */
0145: protected boolean seenEscapes;
0146:
0147: /** True if ":IDENTIFIER" should be treated as a keyword. */
0148: protected boolean initialColonIsKeyword = true;
0149:
0150: /** True if "IDENTIFIER:" should be treated as a keyword. */
0151: protected boolean finalColonIsKeyword = true;
0152:
0153: void readToken(int ch, char readCase, ReadTable rtable)
0154: throws java.io.IOException, SyntaxException {
0155: boolean inEscapes = false;
0156: for (;; ch = read()) {
0157: if (ch < 0) {
0158: if (inEscapes)
0159: eofError("unexpected EOF between escapes");
0160: else
0161: break;
0162: }
0163: ReadTableEntry entry = rtable.lookup(ch);
0164: if (entry == null) {
0165: if (inEscapes) {
0166: tokenBufferAppend(TOKEN_ESCAPE_CHAR);
0167: tokenBufferAppend(ch);
0168: continue;
0169: }
0170: unread(ch);
0171: break;
0172: }
0173: int kind = entry.getKind();
0174: if (ch == rtable.postfixLookupOperator && !inEscapes
0175: && validPostfixLookupStart(rtable))
0176: kind = ReadTable.TERMINATING_MACRO;
0177:
0178: if (kind == ReadTable.SINGLE_ESCAPE) {
0179: ch = read();
0180: if (ch < 0)
0181: eofError("unexpected EOF after single escape");
0182: tokenBufferAppend(TOKEN_ESCAPE_CHAR);
0183: tokenBufferAppend(ch);
0184: seenEscapes = true;
0185: continue;
0186: }
0187: if (kind == ReadTable.MULTIPLE_ESCAPE) {
0188: inEscapes = !inEscapes;
0189: continue;
0190: }
0191: if (inEscapes) {
0192: // Step 9:
0193: tokenBufferAppend(TOKEN_ESCAPE_CHAR);
0194: tokenBufferAppend(ch);
0195: } else {
0196: // Step 8:
0197: switch (kind) {
0198: case ReadTable.CONSTITUENT:
0199: // ... fall through ...
0200: case ReadTable.NON_TERMINATING_MACRO:
0201: if (readCase == 'U'
0202: || (readCase == 'I' && Character
0203: .isLowerCase((char) ch)))
0204: ch = Character.toUpperCase((char) ch);
0205: else if (readCase == 'D'
0206: || (readCase == 'I' && Character
0207: .isUpperCase((char) ch)))
0208: ch = Character.toLowerCase((char) ch);
0209: tokenBufferAppend(ch);
0210: continue;
0211: case ReadTable.MULTIPLE_ESCAPE:
0212: inEscapes = true;
0213: seenEscapes = true;
0214: continue;
0215: case ReadTable.TERMINATING_MACRO:
0216: unread(ch);
0217: return;
0218: case ReadTable.WHITESPACE:
0219: // if (readPreservingWhitespace) FIXME
0220: unread(ch);
0221: return;
0222: }
0223: }
0224: }
0225: }
0226:
0227: public Object readObject() throws java.io.IOException,
0228: SyntaxException {
0229: char saveReadState = ((InPort) port).readState;
0230: int startPos = tokenBufferLength;
0231: ((InPort) port).readState = ' ';
0232: try {
0233: ReadTable rtable = ReadTable.getCurrent();
0234: for (;;) {
0235: int line = port.getLineNumber();
0236: int column = port.getColumnNumber();
0237: int ch = port.read();
0238: if (ch < 0)
0239: return Sequence.eofValue; // FIXME
0240: Object value = readValues(ch, rtable);
0241: if (value == Values.empty)
0242: continue;
0243: return handlePostfix(value, rtable, line, column);
0244: }
0245: } finally {
0246: tokenBufferLength = startPos;
0247: ((InPort) port).readState = saveReadState;
0248: }
0249: }
0250:
0251: protected boolean validPostfixLookupStart(ReadTable rtable)
0252: throws java.io.IOException {
0253: int ch = port.peek();
0254: ReadTableEntry entry;
0255: if (ch < 0 || ch == ':' || (entry = rtable.lookup(ch)) == null
0256: || ch == rtable.postfixLookupOperator)
0257: return false;
0258: int kind = entry.getKind();
0259: return kind == ReadTable.CONSTITUENT
0260: || kind == ReadTable.NON_TERMINATING_MACRO
0261: || kind == ReadTable.MULTIPLE_ESCAPE
0262: || kind == ReadTable.SINGLE_ESCAPE;
0263: }
0264:
0265: Object handlePostfix(Object value, ReadTable rtable, int line,
0266: int column) throws java.io.IOException, SyntaxException {
0267: if (value == QuoteExp.voidExp)
0268: value = Values.empty;
0269: for (;;) {
0270: int ch = port.peek();
0271: if (ch < 0 || ch != rtable.postfixLookupOperator)
0272: break;
0273: // A kludge to map PreOpWord to ($lookup$ Pre 'Word).
0274: port.read();
0275: if (!validPostfixLookupStart(rtable)) {
0276: unread();
0277: break;
0278: }
0279: ch = port.read();
0280: Object rightOperand = readValues(ch, rtable.lookup(ch),
0281: rtable);
0282: value = LList.list2(value, LList.list2(
0283: LispLanguage.quote_sym, rightOperand));
0284: value = PairWithPosition.make(LispLanguage.lookup_sym,
0285: value, port.getName(), line + 1, column + 1);
0286: }
0287: return value;
0288: }
0289:
0290: private boolean isPotentialNumber(char[] buffer, int start, int end) {
0291: int sawDigits = 0;
0292: for (int i = start; i < end; i++) {
0293: char ch = buffer[i];
0294: if (Character.isDigit(ch))
0295: sawDigits++;
0296: else if (ch == '-' || ch == '+') {
0297: if (i + 1 == end)
0298: return false;
0299: } else if (ch == '#')
0300: return true;
0301: else if (Character.isLetter(ch) || ch == '/' || ch == '_'
0302: || ch == '^') {
0303: // CommonLisp defines _123 (and ^123) as a "potential number";
0304: // most implementations seem to define it as a symbol.
0305: // Scheme does defines it as a symbol.
0306: if (i == start)
0307: return false;
0308: } else if (ch != '.')
0309: return false;
0310: }
0311: return sawDigits > 0;
0312: }
0313:
0314: static final int SCM_COMPLEX = 1;
0315: public static final int SCM_NUMBERS = SCM_COMPLEX;
0316:
0317: /** Parse a number.
0318: * @param buffer contains the characters of the number
0319: * @param start startinging index of the number in the buffer
0320: * @param count number of characters in buffer to use
0321: * @param exactness either 'i' or 'I' force an inexact result,
0322: * either 'e' or 'E' force an exact result,
0323: * '\0' yields an inact or inexact depending on the form of the literal,
0324: * while ' ' is like '\0' but does not allow more exactness specifiers.
0325: * @param radix the number base to use or 0 if unspecified
0326: * @return the number if a valid number; null or a String-valued error
0327: * message if if there was some error parsing the number.
0328: */
0329: public static Object parseNumber(char[] buffer, int start,
0330: int count, char exactness, int radix, int flags) {
0331: int end = start + count;
0332: int pos = start;
0333: if (pos >= end)
0334: return "no digits";
0335: char ch = buffer[pos++];
0336: while (ch == '#') {
0337: if (pos >= end)
0338: return "no digits";
0339: ch = buffer[pos++];
0340: switch (ch) {
0341: case 'b':
0342: case 'B':
0343: if (radix != 0)
0344: return "duplicate radix specifier";
0345: radix = 2;
0346: break;
0347: case 'o':
0348: case 'O':
0349: if (radix != 0)
0350: return "duplicate radix specifier";
0351: radix = 8;
0352: break;
0353: case 'd':
0354: case 'D':
0355: if (radix != 0)
0356: return "duplicate radix specifier";
0357: radix = 10;
0358: break;
0359: case 'x':
0360: case 'X':
0361: if (radix != 0)
0362: return "duplicate radix specifier";
0363: radix = 16;
0364: break;
0365: case 'e':
0366: case 'E':
0367: case 'i':
0368: case 'I':
0369: if (exactness != '\0') {
0370: if (exactness == ' ')
0371: return "non-prefix exactness specifier";
0372: else
0373: return "duplicate exactness specifier";
0374: }
0375: exactness = ch;
0376: break;
0377: default:
0378: int value = 0;
0379: for (;;) {
0380: int dig = Character.digit(ch, 10);
0381: if (dig < 0)
0382: break;
0383: value = 10 * value + dig;
0384: if (pos >= end)
0385: return "missing letter after '#'";
0386: ch = buffer[pos++];
0387: }
0388: if (ch == 'R' || ch == 'r') {
0389: if (radix != 0)
0390: return "duplicate radix specifier";
0391: if (value < 2 || value > 35)
0392: return "invalid radix specifier";
0393: radix = value;
0394: break;
0395: }
0396: return "unknown modifier '#" + ch + '\'';
0397: }
0398: if (pos >= end)
0399: return "no digits";
0400: ch = buffer[pos++];
0401: }
0402: if (exactness == '\0')
0403: exactness = ' ';
0404: if (radix == 0) {
0405: for (int i = count;;) {
0406: if (--i < 0) {
0407: // FIXME - should get *read-base* in CommonLisp:
0408: // radix = *read_base*;
0409: radix = 10;
0410: break;
0411: }
0412: if (buffer[start + i] == '.') {
0413: radix = 10;
0414: break;
0415: }
0416: }
0417: }
0418:
0419: boolean negative = ch == '-';
0420: boolean numeratorNegative = negative;
0421: if (ch == '-' || ch == '+') {
0422: if (pos >= end)
0423: return "no digits following sign";
0424: ch = buffer[pos++];
0425: }
0426:
0427: // Special case for '+i' and '-i'.
0428: if ((ch == 'i' || ch == 'I') && pos == end && start == pos - 2
0429: && (flags & SCM_COMPLEX) != 0) {
0430: char sign = buffer[start];
0431: if (sign != '+' && sign != '-')
0432: return "no digits";
0433: if (exactness == 'i' || exactness == 'I')
0434: return new DComplex(0, negative ? -1 : 1);
0435: return negative ? Complex.imMinusOne() : Complex.imOne();
0436: }
0437:
0438: int realStart = pos - 1;
0439: boolean hash_seen = false;
0440: char exp_seen = '\000';
0441: int digits_start = -1;
0442: int decimal_point = -1;
0443: boolean copy_needed = false;
0444: boolean underscore_seen = false;
0445: IntNum numerator = null;
0446: long lvalue = 0;
0447: loop: for (;;) {
0448: int digit = Character.digit(ch, radix);
0449: if (digit >= 0) {
0450: if (hash_seen && decimal_point < 0)
0451: return "digit after '#' in number";
0452: if (digits_start < 0)
0453: digits_start = pos - 1;
0454: lvalue = radix * lvalue + digit;
0455: } else {
0456: switch (ch) {
0457: /*
0458: case '_':
0459: underscore_seen = true;
0460: break;
0461: */
0462: /*
0463: case '#':
0464: if (radix != 10)
0465: return "'#' in non-decimal number";
0466: if (digits_start < 0)
0467: return "'#' with no preceeding digits in number";
0468: hash_seen = true;
0469: break;
0470: */
0471: case '.':
0472: if (decimal_point >= 0)
0473: return "duplicate '.' in number";
0474: if (radix != 10)
0475: return "'.' in non-decimal number";
0476: decimal_point = pos - 1;
0477: break;
0478: case 'e':
0479: case 's':
0480: case 'f':
0481: case 'd':
0482: case 'l':
0483: case 'E':
0484: case 'S':
0485: case 'F':
0486: case 'D':
0487: case 'L':
0488: if (pos == end || radix != 10) {
0489: pos--;
0490: break loop;
0491: }
0492: char next = buffer[pos];
0493: if (next == '+' || next == '-') {
0494: if (++pos >= end
0495: || Character.digit(buffer[pos], 10) < 0)
0496: return "missing exponent digits";
0497: } else if (Character.digit(next, 10) < 0) {
0498: pos--;
0499: break loop;
0500: }
0501: if (exp_seen != '\000')
0502: return "duplicate exponent";
0503: if (radix != 10)
0504: return "exponent in non-decimal number";
0505: if (digits_start < 0)
0506: return "mantissa with no digits";
0507: exp_seen = ch;
0508: for (;;) {
0509: pos++;
0510: if (pos >= end
0511: || Character.digit(buffer[pos], 10) < 0)
0512: break loop;
0513: }
0514: case '/':
0515: if (numerator != null)
0516: return "multiple fraction symbol '/'";
0517: if (digits_start < 0)
0518: return "no digits before fraction symbol '/'";
0519: if (exp_seen != '\000' || decimal_point >= 0)
0520: return "fraction symbol '/' following exponent or '.'";
0521: numerator = valueOf(buffer, digits_start, pos
0522: - digits_start, radix, negative, lvalue);
0523: digits_start = -1;
0524: lvalue = 0;
0525: negative = false;
0526: hash_seen = false;
0527: underscore_seen = false;
0528: break;
0529: default:
0530: pos--;
0531: break loop;
0532: }
0533: }
0534: if (pos == end)
0535: break;
0536: ch = buffer[pos++];
0537: }
0538:
0539: if (digits_start < 0)
0540: return "no digits";
0541:
0542: if (hash_seen || underscore_seen) {
0543: // FIXME make copy, removing '_' and replacing '#' by '0'.
0544: }
0545:
0546: boolean inexact = (exactness == 'i' || exactness == 'I' || (exactness == ' ' && hash_seen));
0547: RealNum number = null;
0548: if (exp_seen != '\000' || decimal_point >= 0) {
0549: if (digits_start > decimal_point && decimal_point >= 0)
0550: digits_start = decimal_point;
0551: if (numerator != null)
0552: return "floating-point number after fraction symbol '/'";
0553: String str = new String(buffer, digits_start, pos
0554: - digits_start);
0555: double d = Convert.parseDouble(str);
0556: number = new DFloNum(negative ? -d : d);
0557: } else {
0558: IntNum iresult = valueOf(buffer, digits_start, pos
0559: - digits_start, radix, negative, lvalue);
0560: if (numerator == null)
0561: number = iresult;
0562: else {
0563: // Check for zero denominator values: 0/0, n/0, and -n/0
0564: // (i.e. NaN, Infinity, and -Infinity).
0565: if (iresult.isZero()) {
0566: boolean numeratorZero = numerator.isZero();
0567: if (inexact)
0568: number = new DFloNum(
0569: (numeratorZero ? Double.NaN
0570: : numeratorNegative ? Double.NEGATIVE_INFINITY
0571: : Double.POSITIVE_INFINITY));
0572: else if (numeratorZero)
0573: return "0/0 is undefined";
0574: else
0575: number = RatNum.make(numerator, iresult);
0576: } else {
0577: number = RatNum.make(numerator, iresult);
0578: }
0579: }
0580: if (inexact && number.isExact())
0581: // We want #i-0 or #i-0/1 to be -0.0, not 0.0.
0582: number = new DFloNum(numeratorNegative
0583: && number.isZero() ? -0.0 : number
0584: .doubleValue());
0585: }
0586:
0587: if (exactness == 'e' || exactness == 'E')
0588: number = number.toExact();
0589:
0590: if (pos < end) {
0591: ch = buffer[pos++];
0592:
0593: if (ch == '@') { /* polar notation */
0594: Object angle = parseNumber(buffer, pos, end - pos,
0595: exactness, 10, flags);
0596: if (angle instanceof String)
0597: return angle;
0598: if (!(angle instanceof RealNum))
0599: return "invalid complex polar constant";
0600: RealNum rangle = (RealNum) angle;
0601: /* r4rs requires 0@1.0 to be inexact zero, even if (make-polar
0602: * 0 1.0) is exact zero, so check for this case. */
0603: if (number.isZero() && !rangle.isExact())
0604: return new DFloNum(0.0);
0605:
0606: return Complex.polar(number, rangle);
0607: }
0608:
0609: if (ch == '-' || ch == '+') {
0610: pos--;
0611: Object imag = parseNumber(buffer, pos, end - pos,
0612: exactness, 10, flags);
0613: if (imag instanceof String)
0614: return imag;
0615: if (!(imag instanceof Complex))
0616: return "invalid numeric constant (" + imag + ")";
0617: Complex cimag = (Complex) imag;
0618: RealNum re = cimag.re();
0619: if (!re.isZero())
0620: return "invalid numeric constant";
0621: return Complex.make(number, cimag.im());
0622: }
0623:
0624: int lcount = 0;
0625: for (;;) {
0626: if (!Character.isLetter(ch)) {
0627: pos--;
0628: break;
0629: }
0630: lcount++;
0631: if (pos == end)
0632: break;
0633: ch = buffer[pos++];
0634: }
0635:
0636: if (lcount == 1) {
0637: char prev = buffer[pos - 1];
0638: if (prev == 'i' || prev == 'I') {
0639: if (pos < end)
0640: return "junk after imaginary suffix 'i'";
0641: return Complex.make(IntNum.zero(), number);
0642: }
0643: }
0644: if (lcount > 0) {
0645: Object unit = null;
0646: for (;;) {
0647: String word = new String(buffer, pos - lcount,
0648: lcount);
0649: Object u = lookupUnit(word);
0650:
0651: int power = 1;
0652: if (pos < end) {
0653: ch = buffer[pos];
0654: if (ch == '^' && ++pos < end)
0655: ch = buffer[pos];
0656: boolean neg = ch == '-';
0657: if ((ch == '-' || ch == '+') && ++pos < end)
0658: ch = buffer[pos];
0659: power = -1;
0660: for (;;) {
0661: int d = Character.digit(ch, 10);
0662: if (d < 0) {
0663: if (power < 0)
0664: return "junk after unit name";
0665: break;
0666: }
0667: power = power < 0 ? d : 10 * power + d;
0668: if (++pos == end)
0669: break;
0670: if (power > 1000000)
0671: return "unit power too large";
0672: ch = buffer[pos];
0673: }
0674: if (neg)
0675: power = -power;
0676: }
0677:
0678: // "expt" and "*" are too open to name clashes. FIXME.
0679: if (power != 1) {
0680: if (u instanceof Unit)
0681: u = Unit.pow((Unit) u, power);
0682: else
0683: u = LList.list3("expt", u, IntNum
0684: .make(power));
0685: }
0686: if (unit == null)
0687: unit = u;
0688: else if (u instanceof Unit && unit instanceof Unit)
0689: unit = Unit.times((Unit) unit, (Unit) u);
0690: else
0691: unit = LList.list3("*", unit, u);
0692: if (pos >= end)
0693: break;
0694: ch = buffer[pos++];
0695: if (ch == '*') {
0696: if (pos == end)
0697: return "end of token after '*'";
0698: ch = buffer[pos++];
0699: }
0700: lcount = 0;
0701: for (;;) {
0702: if (!Character.isLetter(ch)) {
0703: pos--;
0704: break;
0705: }
0706: lcount++;
0707: if (pos == end)
0708: break;
0709: ch = buffer[pos++];
0710: }
0711: if (lcount == 0)
0712: return "excess junk after unit";
0713: }
0714:
0715: if (unit == null)
0716: return "expected unit";
0717: else if (unit instanceof Unit)
0718: return Quantity.make(number, (Unit) unit);
0719: else
0720: return LList.list3("*", number, unit);
0721: } else
0722: return "excess junk after number";
0723:
0724: }
0725: return number;
0726: }
0727:
0728: private static IntNum valueOf(char[] buffer, int digits_start,
0729: int number_of_digits, int radix, boolean negative,
0730: long lvalue) {
0731: // It turns out that if number_of_digits + radix <= 28
0732: // then the value will fit in a long without overflow,
0733: // so we can use the value calculated in lvalue.
0734: if (number_of_digits + radix <= 28)
0735: return IntNum.make(negative ? -lvalue : lvalue);
0736: else
0737: return IntNum.valueOf(buffer, digits_start,
0738: number_of_digits, radix, negative);
0739: }
0740:
0741: protected Object returnSymbol(int startPos, int endPos,
0742: ReadTable rtable) {
0743: char readCase = getReadCase();
0744: if (readCase == 'I') {
0745: int upperCount = 0;
0746: int lowerCount = 0;
0747: for (int i = startPos; i < endPos; i++) {
0748: char ch = tokenBuffer[i];
0749: if (ch == TOKEN_ESCAPE_CHAR)
0750: i++;
0751: else if (Character.isLowerCase(ch))
0752: lowerCount++;
0753: else if (Character.isUpperCase(ch))
0754: upperCount++;
0755: }
0756: if (lowerCount == 0)
0757: readCase = 'D';
0758: else if (upperCount == 0)
0759: readCase = 'U';
0760: else
0761: readCase = 'P';
0762: }
0763:
0764: int packageMarker = -1;
0765: int j = startPos;
0766: for (int i = startPos; i < endPos; i++) {
0767: char ch = tokenBuffer[i];
0768: if (ch == TOKEN_ESCAPE_CHAR) {
0769: if (++i < endPos)
0770: tokenBuffer[j++] = tokenBuffer[i];
0771: continue;
0772: }
0773: if (ch == ':')
0774: packageMarker = packageMarker >= 0 ? -1 : j;
0775: else if (readCase == 'U')
0776: ch = Character.toUpperCase(ch);
0777: else if (readCase == 'D')
0778: ch = Character.toLowerCase(ch);
0779: tokenBuffer[j++] = ch;
0780: }
0781: endPos = j;
0782:
0783: int len = endPos - startPos;
0784:
0785: if (initialColonIsKeyword && packageMarker == startPos
0786: && len > 1) {
0787: startPos++;
0788: String str = new String(tokenBuffer, startPos, endPos
0789: - startPos);
0790: return Keyword.make(str.intern());
0791: }
0792: if (finalColonIsKeyword && packageMarker == endPos - 1
0793: && len > 1) {
0794: String str = new String(tokenBuffer, startPos, len - 1);
0795: return Keyword.make(str.intern());
0796: }
0797: return rtable
0798: .makeSymbol(new String(tokenBuffer, startPos, len));
0799: }
0800:
0801: /** Classify and return a token in tokenBuffer from startPos to endPos. */
0802: public Object handleToken(int startPos, int endPos, ReadTable rtable) {
0803: Object value = parseNumber(tokenBuffer, startPos, endPos
0804: - startPos, '\0', 0, SCM_NUMBERS);
0805: if (value != null && !(value instanceof String))
0806: return value;
0807: if (isPotentialNumber(tokenBuffer, startPos, endPos)) {
0808: error(value == null ? "not a valid number"
0809: : "not a valid number: " + value);
0810: return IntNum.zero();
0811: }
0812: return returnSymbol(startPos, endPos, rtable);
0813: }
0814:
0815: /** Reads a C-style String escape sequence.
0816: * Assume '\\' has already been read.
0817: * Return the converted character, or -1 on EOF, or -2 to ignore. */
0818: public int readEscape() throws java.io.IOException, SyntaxException {
0819: int c = read();
0820: if (c < 0) {
0821: eofError("unexpected EOF in character literal");
0822: return -1;
0823: }
0824: return readEscape(c);
0825: }
0826:
0827: public final int readEscape(int c) throws java.io.IOException,
0828: SyntaxException {
0829: switch ((char) c) {
0830: case 'a':
0831: c = 7;
0832: break; // alarm/bell
0833: case 'b':
0834: c = 8;
0835: break; // backspace
0836: case 't':
0837: c = 9;
0838: break; // tab
0839: case 'n':
0840: c = 10;
0841: break; // newline
0842: case 'v':
0843: c = 11;
0844: break; // vertical tab
0845: case 'f':
0846: c = 12;
0847: break; // formfeed
0848: case 'r':
0849: c = 13;
0850: break; // carriage return
0851: case 'e':
0852: c = 27;
0853: break; // escape
0854: case '\"':
0855: c = 34;
0856: break; // quote
0857: case '\\':
0858: c = 92;
0859: break; // backslash
0860: case ' ': // Skip to end of line, inclusive.
0861: for (;;) {
0862: c = read();
0863: if (c < 0) {
0864: eofError("unexpected EOF in character literal");
0865: return -1;
0866: }
0867: if (c == '\n')
0868: return -2;
0869: if (c == '\r') {
0870: if (peek() == '\n')
0871: skip();
0872: return -2;
0873: }
0874: if (c != ' ' && c != '\t') {
0875: unread(c);
0876: break;
0877: }
0878: }
0879: case '\r':
0880: if (peek() == '\n')
0881: skip();
0882: return -2;
0883: case '\n':
0884: return -2;
0885: case 'M':
0886: c = read();
0887: if (c != '-') {
0888: error("Invalid escape character syntax");
0889: return '?';
0890: }
0891: c = read();
0892: if (c == '\\')
0893: c = readEscape();
0894: return c | 0200;
0895: case 'C':
0896: c = read();
0897: if (c != '-') {
0898: error("Invalid escape character syntax");
0899: return '?';
0900: }
0901: /* ... fall through ... */
0902: case '^':
0903: c = read();
0904: if (c == '\\')
0905: c = readEscape();
0906: if (c == '?')
0907: return 0177;
0908: return c & (0200 | 037);
0909: case '0':
0910: case '1':
0911: case '2':
0912: case '3':
0913: case '4':
0914: case '5':
0915: case '6':
0916: case '7':
0917: /* An octal escape, as in ANSI C. */
0918: c = c - '0';
0919: for (int count = 0; ++count < 3;) {
0920: int d = read();
0921: int v = Character.digit((char) d, 8);
0922: if (v >= 0)
0923: c = (c << 3) + v;
0924: else {
0925: if (d >= 0)
0926: unread(d);
0927: break;
0928: }
0929: }
0930: break;
0931: case 'u':
0932: c = 0;
0933: for (int i = 4; --i >= 0;) {
0934: int d = read();
0935: if (d < 0)
0936: eofError("premature EOF in \\u escape");
0937: int v = Character.digit((char) d, 16);
0938: if (v < 0)
0939: error("non-hex character following \\u");
0940: c = 16 * c + v;
0941: }
0942: break;
0943: case 'x':
0944: c = 0;
0945: /* A hex escape, as in ANSI C. */
0946: for (;;) {
0947: int d = read();
0948: int v = Character.digit((char) d, 16);
0949: if (v >= 0)
0950: c = (c << 4) + v;
0951: else {
0952: if (d >= 0)
0953: unread(d);
0954: break;
0955: }
0956: }
0957: break;
0958: default:
0959: break;
0960: }
0961: return c;
0962: }
0963:
0964: public final Object readObject(int c) throws java.io.IOException,
0965: SyntaxException {
0966: unread(c);
0967: return readObject();
0968: }
0969:
0970: /** Read a "command" - a top-level expression or declaration.
0971: * Return Sequence.eofValue of end of file. */
0972: public Object readCommand() throws java.io.IOException,
0973: SyntaxException {
0974: return readObject();
0975: }
0976:
0977: protected Object makeNil() {
0978: return LList.Empty;
0979: }
0980:
0981: protected Object makePair(Object car, int line, int column) {
0982: String pname = port.getName();
0983: if (pname != null && line >= 0)
0984: return PairWithPosition.make(car, LList.Empty, pname,
0985: line + 1, column + 1);
0986: else
0987: return Pair.make(car, LList.Empty);
0988: }
0989:
0990: public Object makePair(Object car, Object cdr) {
0991: Object pair = makePair(car, -1, -1);
0992: setCdr(pair, cdr);
0993: return pair;
0994: }
0995:
0996: protected void setCdr(Object pair, Object cdr) {
0997: ((Pair) pair).cdr = cdr;
0998: }
0999:
1000: /** Read a number from a LispReader
1001: * @param previous number of characters already pushed on tokenBuffer
1002: * @param reader LispReader to read from
1003: * @param radix base to use or -1 if unspecified
1004: */
1005: public static Object readNumberWithRadix(int previous,
1006: LispReader reader, int radix) throws java.io.IOException,
1007: SyntaxException {
1008: int startPos = reader.tokenBufferLength - previous;
1009: reader.readToken(reader.read(), 'P', ReadTable.getCurrent());
1010: int endPos = reader.tokenBufferLength;
1011: if (startPos == endPos) {
1012: reader.error("missing numeric token");
1013: return IntNum.zero();
1014: }
1015: Object result = LispReader.parseNumber(reader.tokenBuffer,
1016: startPos, endPos - startPos, '\0', radix, 0);
1017: if (result instanceof String) {
1018: reader.error((String) result);
1019: return IntNum.zero();
1020: } else if (result == null) {
1021: reader.error("invalid numeric constant");
1022: return IntNum.zero();
1023: } else
1024: return result;
1025: }
1026:
1027: public static Object readCharacter(LispReader reader)
1028: throws java.io.IOException, SyntaxException {
1029: int ch = reader.read();
1030: if (ch < 0)
1031: reader.eofError("unexpected EOF in character literal");
1032: int startPos = reader.tokenBufferLength;
1033: reader.tokenBufferAppend(ch);
1034: reader.readToken(reader.read(), 'D', ReadTable.getCurrent());
1035: int length = reader.tokenBufferLength - startPos;
1036: if (length == 1)
1037: return Char.make(reader.tokenBuffer[startPos]);
1038: String name = new String(reader.tokenBuffer, startPos, length);
1039: ch = Char.nameToChar(name);
1040: if (ch >= 0)
1041: return Char.make(ch);
1042: ch = Character.digit(reader.tokenBuffer[startPos], 8);
1043: if (ch >= 0) {
1044: int value = ch;
1045: for (int i = 1;; i++) {
1046: if (i == length)
1047: return Char.make(value);
1048: ch = Character.digit(reader.tokenBuffer[startPos + i],
1049: 8);
1050: if (ch < 0)
1051: break;
1052: value = 8 * value + ch;
1053: }
1054: }
1055: reader.error("unknown character name: " + name);
1056: return Char.make('?');
1057: }
1058:
1059: public static Object readSpecial(LispReader reader)
1060: throws java.io.IOException, SyntaxException {
1061: int ch = reader.read();
1062: if (ch < 0)
1063: reader.eofError("unexpected EOF in #! special form");
1064:
1065: /* Handle Unix #!PROGRAM line at start of file. */
1066: if (ch == '/' && reader.getLineNumber() == 0
1067: && reader.getColumnNumber() == 3) {
1068: ReaderIgnoreRestOfLine.getInstance().read(reader, '#', 1);
1069: return Values.empty;
1070: }
1071:
1072: int startPos = reader.tokenBufferLength;
1073: reader.tokenBufferAppend(ch);
1074: reader.readToken(reader.read(), 'D', ReadTable.getCurrent());
1075: int length = reader.tokenBufferLength - startPos;
1076: String name = new String(reader.tokenBuffer, startPos, length);
1077: if (name.equals("optional"))
1078: return Special.optional;
1079: if (name.equals("rest"))
1080: return Special.rest;
1081: if (name.equals("key"))
1082: return Special.key;
1083: if (name.equals("eof"))
1084: return Special.eof;
1085: if (name.equals("void"))
1086: //return Values.empty;
1087: return QuoteExp.voidExp;
1088: if (name.equals("default"))
1089: return Special.dfault;
1090: if (name.equals("undefined"))
1091: return Special.undefined;
1092: if (name.equals("null"))
1093: return null;
1094: reader.error("unknown named constant #!" + name);
1095: return null;
1096: }
1097:
1098: public static SimpleVector readSimpleVector(LispReader reader,
1099: char kind) throws java.io.IOException, SyntaxException {
1100: int size = 0;
1101: int ch;
1102: for (;;) {
1103: ch = reader.read();
1104: if (ch < 0)
1105: reader
1106: .eofError("unexpected EOF reading uniform vector");
1107: int digit = Character.digit((char) ch, 10);
1108: if (digit < 0)
1109: break;
1110: size = size * 10 + digit;
1111: }
1112: if (!(size == 8 || size == 16 || size == 32 || size == 64)
1113: || (kind == 'F' && size < 32) || ch != '(') {
1114: reader.error("invalid uniform vector syntax");
1115: return null;
1116: }
1117: Object list = ReaderParens.readList(reader, '(', -1, ')');
1118: int len = LList.listLength(list, false);
1119: if (len < 0) {
1120: reader.error("invalid elements in uniform vector syntax");
1121: return null;
1122: }
1123: Sequence q = (Sequence) list;
1124: switch (kind) {
1125: case 'F':
1126: switch (size) {
1127: case 32:
1128: return new F32Vector(q);
1129: case 64:
1130: return new F64Vector(q);
1131: }
1132: case 'S':
1133: switch (size) {
1134: case 8:
1135: return new S8Vector(q);
1136: case 16:
1137: return new S16Vector(q);
1138: case 32:
1139: return new S32Vector(q);
1140: case 64:
1141: return new S64Vector(q);
1142: }
1143: case 'U':
1144: switch (size) {
1145: case 8:
1146: return new U8Vector(q);
1147: case 16:
1148: return new U16Vector(q);
1149: case 32:
1150: return new U32Vector(q);
1151: case 64:
1152: return new U64Vector(q);
1153: }
1154: }
1155: return null;
1156: }
1157: }
|