001: /***** BEGIN LICENSE BLOCK *****
002: * Version: CPL 1.0/GPL 2.0/LGPL 2.1
003: *
004: * The contents of this file are subject to the Common Public
005: * License Version 1.0 (the "License"); you may not use this file
006: * except in compliance with the License. You may obtain a copy of
007: * the License at http://www.eclipse.org/legal/cpl-v10.html
008: *
009: * Software distributed under the License is distributed on an "AS
010: * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
011: * implied. See the License for the specific language governing
012: * rights and limitations under the License.
013: *
014: * Copyright (C) 2004-2006 Thomas E Enebo <enebo@acm.org>
015: * Copyright (C) 2004 Jan Arne Petersen <jpetersen@uni-bonn.de>
016: * Copyright (C) 2004 Stefan Matthias Aust <sma@3plus4.de>
017: * Copyright (C) 2005 Zach Dennis <zdennis@mktec.com>
018: *
019: * Alternatively, the contents of this file may be used under the terms of
020: * either of the GNU General Public License Version 2 or later (the "GPL"),
021: * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
022: * in which case the provisions of the GPL or the LGPL are applicable instead
023: * of those above. If you wish to allow use of your version of this file only
024: * under the terms of either the GPL or the LGPL, and not to allow others to
025: * use your version of this file under the terms of the CPL, indicate your
026: * decision by deleting the provisions above and replace them with the notice
027: * and other provisions required by the GPL or the LGPL. If you do not delete
028: * the provisions above, a recipient may use your version of this file under
029: * the terms of any one of the CPL, the GPL or the LGPL.
030: ***** END LICENSE BLOCK *****/package org.jruby.lexer.yacc;
031:
032: import java.io.IOException;
033: import java.io.Reader;
034:
035: import org.jruby.util.ByteList;
036:
037: /**
038: * This class is what feeds the lexer. It is primarily a wrapper around a
039: * Reader that can unread() data back onto the source. Originally, I thought
040: * about using the PushBackReader to handle read/unread, but I realized that
041: * some extremely pathological case could overflow the pushback buffer. Better
042: * safe than sorry. I could have combined this implementation with a
043: * PushbackBuffer, but the added complexity did not seem worth it.
044: *
045: * @author enebo
046: */
047: public class LexerSource {
048: private static final int INITIAL_PUSHBACK_SIZE = 100;
049:
050: // Where we get new positions from.
051: private ISourcePositionFactory positionFactory;
052:
053: // Where we get our newest char's
054: private final Reader reader;
055:
056: // Our readback/pushback buffer.
057: private char buf[] = new char[INITIAL_PUSHBACK_SIZE];
058:
059: // index of last character in pushback buffer
060: private int bufLength = -1;
061:
062: // Character read before previous read
063: private int oneAgo = '\n';
064: private int twoAgo = 0;
065:
066: // The name of this source (e.g. a filename: foo.rb)
067: private final String sourceName;
068:
069: // Number of newlines read from the reader
070: private int line = 0;
071:
072: // How many bytes into the source are we?
073: private int offset = 0;
074:
075: /**
076: * Create our food-source for the lexer
077: *
078: * @param sourceName is the file we are reading
079: * @param reader is what represents the contents of file sourceName
080: * @param line starting line number for source (used by eval)
081: * @param extraPositionInformation will gives us extra information that an IDE may want
082: */
083: public LexerSource(String sourceName, Reader reader, int line,
084: boolean extraPositionInformation) {
085: this .sourceName = sourceName;
086: this .reader = reader;
087: if (extraPositionInformation) {
088: positionFactory = new IDESourcePositionFactory(this , line);
089: } else {
090: positionFactory = new SimplePositionFactory(this , line);
091: }
092: this .line = line;
093: }
094:
095: /**
096: * Read next character from this source
097: *
098: * @return next character to viewed by the source
099: */
100: public char read() throws IOException {
101: int length = bufLength;
102: char c;
103:
104: if (length >= 0) {
105: c = buf[bufLength--];
106: } else {
107: c = wrappedRead();
108:
109: // EOF...Do not advance column...Go straight to jail
110: if (c == 0) {
111: //offset++;
112: return c;
113: }
114: }
115:
116: twoAgo = oneAgo;
117: oneAgo = c;
118: offset++;
119:
120: if (c == '\n')
121: line++;
122:
123: return c;
124: }
125:
126: /**
127: * Pushes char back onto this source. Note, this also
128: * allows us to push whatever is passes back into the source.
129: *
130: * @param c to be put back onto the source
131: */
132: public void unread(char c) {
133: if (c == 0)
134: return;
135:
136: offset--;
137: oneAgo = twoAgo;
138: twoAgo = 0;
139:
140: if (c == '\n')
141: line--;
142:
143: buf[++bufLength] = c;
144:
145: // If we outgrow our pushback stack then grow it (this should only happen in pretty
146: // pathological cases).
147: if (bufLength + 1 == buf.length) {
148: char[] newBuf = new char[buf.length + INITIAL_PUSHBACK_SIZE];
149:
150: System.arraycopy(buf, 0, newBuf, 0, buf.length);
151:
152: buf = newBuf;
153: }
154: }
155:
156: public boolean peek(char to) throws IOException {
157: char c = read();
158: unread(c);
159: return c == to;
160: }
161:
162: /**
163: * What file are we lexing?
164: * @return the files name
165: */
166: public String getFilename() {
167: return sourceName;
168: }
169:
170: /**
171: * What line are we at?
172: * @return the line number 0...line_size-1
173: */
174: public int getLine() {
175: return line;
176: }
177:
178: /**
179: * The location of the last byte we read from the source.
180: *
181: * @return current location of source
182: */
183: public int getOffset() {
184: return (offset <= 0 ? 0 : offset);
185: }
186:
187: /**
188: * Where is the reader within the source {filename,row}
189: *
190: * @return the current position
191: */
192: public ISourcePosition getPosition(ISourcePosition startPosition,
193: boolean inclusive) {
194: return positionFactory.getPosition(startPosition, inclusive);
195: }
196:
197: /**
198: * Where is the reader within the source {filename,row}
199: *
200: * @return the current position
201: */
202: public ISourcePosition getPosition() {
203: return positionFactory.getPosition(null, false);
204: }
205:
206: public ISourcePositionFactory getPositionFactory() {
207: return positionFactory;
208: }
209:
210: /**
211: * Convenience method to hide exception. If we do hit an exception
212: * we will pretend we EOF'd.
213: *
214: * @return the current char or EOF (at EOF or on error)
215: */
216: private char wrappedRead() throws IOException {
217: int c = reader.read();
218:
219: // If \r\n then just pass along \n (windows)
220: // If \r[^\n] then pass along \n (MAC)
221: if (c == '\r') {
222: if ((c = reader.read()) != '\n') {
223: unread((char) c);
224: c = '\n';
225: } else {
226: // Position within source must reflect the actual offset and column. Since
227: // we ate an extra character here (this accounting is normally done in read
228: // ), we should update position info.
229: offset++;
230: }
231: }
232:
233: return c != -1 ? (char) c : '\0';
234: }
235:
236: /**
237: * Create a source.
238: *
239: * @param name the name of the source (e.g a filename: foo.rb)
240: * @param content the data of the source
241: * @return the new source
242: */
243: public static LexerSource getSource(String name, Reader content,
244: int line, boolean extraPositionInformation) {
245: return new LexerSource(name, content, line,
246: extraPositionInformation);
247: }
248:
249: public String readLine() throws IOException {
250: StringBuffer sb = new StringBuffer(80);
251: for (char c = read(); c != '\n' && c != '\0'; c = read()) {
252: sb.append(c);
253: }
254: return sb.toString();
255: }
256:
257: public ByteList readLineBytes() throws IOException {
258: ByteList bytelist = new ByteList(80);
259:
260: for (char c = read(); c != '\n' && c != '\0'; c = read()) {
261: bytelist.append(c);
262: }
263: return bytelist;
264: }
265:
266: public void unreadMany(CharSequence buffer) {
267: int length = buffer.length();
268: for (int i = length - 1; i >= 0; i--) {
269: unread(buffer.charAt(i));
270: }
271: }
272:
273: public boolean matchString(String match, boolean indent)
274: throws IOException {
275: int length = match.length();
276: StringBuffer buffer = new StringBuffer(length + 20);
277:
278: if (indent) {
279: char c;
280: while ((c = read()) != '\0') {
281: if (!Character.isWhitespace(c) || c == '\n') {
282: unread(c);
283: break;
284: }
285: buffer.append(c);
286: }
287: }
288:
289: for (int i = 0; i < length; i++) {
290: char c = read();
291: buffer.append(c);
292: if (match.charAt(i) != c) {
293: unreadMany(buffer);
294: return false;
295: }
296: }
297:
298: char c = read();
299: if (c != '\n' && c != '\0') {
300: unread(c);
301: return false;
302: }
303:
304: return true;
305: }
306:
307: public boolean wasBeginOfLine() {
308: return twoAgo == '\n';
309: }
310:
311: public char readEscape() throws IOException {
312: char c = read();
313:
314: switch (c) {
315: case '\\': // backslash
316: return c;
317: case 'n': // newline
318: return '\n';
319: case 't': // horizontal tab
320: return '\t';
321: case 'r': // carriage return
322: return '\r';
323: case 'f': // form feed
324: return '\f';
325: case 'v': // vertical tab
326: return '\u000B';
327: case 'a': // alarm(bell)
328: return '\u0007';
329: case 'e': // escape
330: return '\u001B';
331: case '0':
332: case '1':
333: case '2':
334: case '3': // octal constant
335: case '4':
336: case '5':
337: case '6':
338: case '7':
339: unread(c);
340: return scanOct(3);
341: case 'x': // hex constant
342: int i = 0;
343: //char hexValue = scanHex(2);
344:
345: char hexValue = '\0';
346:
347: for (; i < 2; i++) {
348: char h1 = read();
349:
350: if (!RubyYaccLexer.isHexChar(h1)) {
351: unread(h1);
352: break;
353: }
354:
355: hexValue <<= 4;
356: hexValue |= Integer.parseInt("" + h1, 16) & 15;
357: }
358:
359: // No hex value after the 'x'.
360: if (i == 0) {
361: throw new SyntaxException(getPosition(),
362: "Invalid escape character syntax");
363: }
364: return hexValue;
365: case 'b': // backspace
366: return '\010';
367: case 's': // space
368: return ' ';
369: case 'M':
370: if ((c = read()) != '-') {
371: throw new SyntaxException(getPosition(),
372: "Invalid escape character syntax");
373: } else if ((c = read()) == '\\') {
374: return (char) (readEscape() | 0x80);
375: } else if (c == '\0') {
376: throw new SyntaxException(getPosition(),
377: "Invalid escape character syntax");
378: }
379: return (char) ((c & 0xff) | 0x80);
380: case 'C':
381: if ((c = read()) != '-') {
382: throw new SyntaxException(getPosition(),
383: "Invalid escape character syntax");
384: }
385: case 'c':
386: if ((c = read()) == '\\') {
387: c = readEscape();
388: } else if (c == '?') {
389: return '\u0177';
390: } else if (c == '\0') {
391: throw new SyntaxException(getPosition(),
392: "Invalid escape character syntax");
393: }
394: return (char) (c & 0x9f);
395: case '\0':
396: throw new SyntaxException(getPosition(),
397: "Invalid escape character syntax");
398: default:
399: return c;
400: }
401: }
402:
403: private char scanOct(int count) throws IOException {
404: char value = '\0';
405:
406: for (int i = 0; i < count; i++) {
407: char c = read();
408:
409: if (!RubyYaccLexer.isOctChar(c)) {
410: unread(c);
411: break;
412: }
413:
414: value <<= 3;
415: value |= Integer.parseInt("" + c, 8);
416: }
417:
418: return value;
419: }
420:
421: /**
422: * Get character ahead of current position by offset positions.
423: *
424: * @param anOffset is location past current position to get char at
425: * @return character index positions ahead of source location or EOF
426: */
427: public char getCharAt(int anOffset) throws IOException {
428: StringBuffer buffer = new StringBuffer(anOffset);
429:
430: // read next offset chars
431: for (int i = 0; i < anOffset; i++) {
432: buffer.append(read());
433: }
434:
435: int length = buffer.length();
436:
437: // Whoops not enough chars left EOF!
438: if (length == 0) {
439: return '\0';
440: }
441:
442: // Push chars back now that we found it
443: for (int i = 0; i < length; i++) {
444: unread(buffer.charAt(i));
445: }
446:
447: return buffer.charAt(length - 1);
448: }
449:
450: public String toString() {
451: try {
452: StringBuffer buffer = new StringBuffer(20);
453: for (int i = 0; i < 20; i++) {
454: buffer.append(read());
455: }
456: for (int i = 0; i < 20; i++) {
457: unread(buffer.charAt(buffer.length() - i - 1));
458: }
459: buffer.append(" ...");
460: return buffer.toString();
461: } catch (Exception e) {
462: return null;
463: }
464: }
465:
466: // BEGIN NETBEANS MODIFICATIONS
467: public int chompReadAhead() {
468: int result = bufLength + 1;
469: bufLength = -1;
470: return result;
471: }
472:
473: public boolean isANewLine() {
474: return oneAgo == '\n';
475: }
476:
477: // Various places where we call LexerSource.unread(), the nextCharIsOnANewline value gets inaccurate (column/line too, but I don't care about those)
478: public void setIsANewLine(boolean nextCharIsOnANewLine) {
479: oneAgo = nextCharIsOnANewLine ? '\n' : oneAgo;
480: }
481:
482: public void setOffset(int offset) {
483: this .offset = offset;
484: }
485: // END NETBEANS MODIFICATIONS
486: }
|