001: // Copyright (c) 2004, 2007 Per M.A. Bothner.
002: // This is free software; for terms and warranty disclaimer see ./COPYING.
003:
004: package gnu.text;
005:
006: import java.io.*;
007:
008: /** A LineNumberReader with some extra features:
009: *
010: * You can seek backwards to the start of the line preceding the
011: * current position (or the mark, if that has been set).
012: * You can use seek with a negative offset, or unread.
013: * You can also use peek to look at the next character without moving.
014: *
015: * The method getColumnNumber gives you the current column.
016: *
017: * Provides a method that is called at the start of a line.
018: * This is especially useful for interactive streams (e.g. prompting).
019: *
020: * It would be nice if we could inherit from LineNumberReader.
021: * That may be possible in theory, but it is difficult and
022: * expensive (because we don't get access to BufferedReader's buffer).
023: *
024: * @author Per Bothner <bothner@cygnus.com>
025: */
026:
027: public class LineBufferedReader extends Reader {
028: public void close() throws IOException {
029: in.close();
030: }
031:
032: // Not used by LineInputStreamReader subclass!
033: protected Reader in;
034:
035: /** Default (initial buffer) size. */
036: public final static int BUFFER_SIZE = 8192;
037:
038: /** The input buffer, containing the current line etc. */
039: public char[] buffer;
040:
041: /** The current read position, as an index into buffer. */
042: public int pos;
043:
044: /** The length of the valid (data-containing) part of the buffer. */
045: public int limit;
046:
047: /** The high-water mark for pos, at a reset or line start. */
048: int highestPos;
049:
050: public char readState = '\n';
051:
052: /** Return a character that indicates what we are currently reading.
053: * Returns '\n' if we are not inside read; '\"' if reading a string;
054: * '|' if inside a comment; '(' if inside a list; and
055: * ' ' if otherwise inside a read. */
056: public char getReadState() {
057: return readState;
058: }
059:
060: private int flags;
061:
062: // Notice the asymmetry in how "X\r\nY" is handled (assuming convertCR)..
063: // When we read forward, the positions are 0, 1, 2, 4.
064: // After seeing the '\r', we do not read ahead to look for a '\n'
065: // because if we did, and there were no '\n', terminal input would hang.
066: // The (lineNumber, lineStartPos) goes (0,0), (0,0), (0,0), (1,3).
067: // However, the methods (getLineNumber(), getColumnNumber())
068: // return the external values (0:0), (0:1), (1:0), (1:1).
069: // When we move backwards, the positions are 4, 3, 1, 0.
070: // This is because we want to stay within the same line.
071: // The (lineNumber, lineStartPos) goes (1,3), (1,3), (0,0), (0,0).
072: // For (getLineNumber(), getColumnNumber()) we get (1:1), (1:0), (0:1), (0:0)
073: // which are the same as when we are moving forwards.
074: // A nice bonus is that both skip_quick and unread_quick are trivial.
075:
076: /* If true in flags, convert "\r" and "\r\n" to '\n'. */
077: private static final int CONVERT_CR = 1;
078:
079: /* If true in flags, may not re-allocate buffer. */
080: private static final int USER_BUFFER = 2;
081:
082: /* If true in flags, char before start of buffer was '\r'. */
083: private static final int PREV_WAS_CR = 4;
084:
085: /** If true in flags, we don't need to keep the whole line.
086: * I.e. the application doesn't need to scan to the beginning of line. */
087: private static final int DONT_KEEP_FULL_LINES = 8;
088:
089: /** Should we preserve the complete current line?
090: * The default is true, but in some cases there can be a performance
091: * improvement if we don't need to keep a long line when filling the buffer.
092: */
093: public void setKeepFullLines(boolean keep) {
094: if (keep)
095: flags &= ~DONT_KEEP_FULL_LINES;
096: else
097: flags |= DONT_KEEP_FULL_LINES;
098: }
099:
100: /** True if CR and CRLF should be converted to LF. */
101: public final boolean getConvertCR() {
102: return (flags & CONVERT_CR) != 0;
103: }
104:
105: public final void setConvertCR(boolean convertCR) {
106: if (convertCR)
107: flags |= CONVERT_CR;
108: else
109: flags &= ~CONVERT_CR;
110: }
111:
112: /** The position that marks the start of the current or marked line.
113: * If the {@code readAheadLimit > 0 && markPos < pos},
114: * then it is the start of the line containing the {@code markPos}.
115: * If we are at the end of a line, and have not started reading the next
116: * one (and are therefore allowed by unread back to the old line),
117: * the current line is still the old line; {@code lineStartPos} does not
118: * get set to the new pos until we read/peek the first char of the new line.
119: * If {@code lineStartPos < 0}, it means we went beyond the buffer maximum.
120: */
121: private int lineStartPos;
122:
123: Path path;
124:
125: /** The current line number (at position of lineStartPos). */
126: protected int lineNumber;
127:
128: /** If mark has been called, and not invalidated, the read ahead limit.
129: * Zero if mark has not been called, or had been invalidated
130: * (due to either calling reset or excessive reading ahead). */
131: protected int readAheadLimit = 0;
132:
133: /** The position of the mark (assuming {@code readAheadLinit > 0}).
134: * (Garbage if {@code readAheadLimit <= 0}). */
135: protected int markPos;
136:
137: public LineBufferedReader(InputStream in) {
138: this .in = new InputStreamReader(in);
139: }
140:
141: public LineBufferedReader(Reader in) {
142: this .in = in;
143: }
144:
145: /** A hook to allow sub-classes to perform some action at start of line.
146: * Called just before the first character of the new line is read.
147: * @param revisited true if we have read here before (i.e.
148: * we did a reset of unread() to get here)
149: */
150: public void lineStart(boolean revisited) throws java.io.IOException {
151: }
152:
153: /** Called by {@code read()} when it needs its buffer filled.
154: * Read characters into buffer, starting at off, for len.
155: * Can assume that len > 0. Only called if {@code pos>=limit}.
156: * Return -1 if EOF, otherwise number of read chars.
157: * This can be usefully overridden by sub-classes. */
158: public int fill(int len) throws java.io.IOException {
159: return in.read(buffer, pos, len);
160: }
161:
162: private void clearMark() {
163: // Invalidate the mark.
164: readAheadLimit = 0;
165: // Need to maintain the lineStartPos invariant.
166: int i = lineStartPos < 0 ? 0 : lineStartPos;
167: for (;;) {
168: if (++i >= pos)
169: break;
170: char ch = buffer[i - 1];
171: if (ch == '\n'
172: || (ch == '\r' && (!getConvertCR() || buffer[i] != '\n'))) {
173: lineNumber++;
174: lineStartPos = i;
175: }
176:
177: }
178: }
179:
180: /** Specify a buffer to use for the input buffer. */
181: public void setBuffer(char[] buffer) throws java.io.IOException {
182: if (buffer == null) {
183: if (this .buffer != null) {
184: buffer = new char[this .buffer.length];
185: System.arraycopy(this .buffer, 0, buffer, 0,
186: this .buffer.length);
187: this .buffer = buffer;
188: }
189: flags &= ~USER_BUFFER;
190: } else {
191: if (limit - pos > buffer.length)
192: throw new java.io.IOException("setBuffer - too short");
193: flags |= USER_BUFFER;
194: reserve(buffer, 0);
195: }
196: }
197:
198: /* Make sure there is enough space for more characters in buffer. */
199:
200: private void reserve(char[] buffer, int reserve)
201: throws java.io.IOException {
202: int saveStart;
203: reserve += limit;
204: if (reserve <= buffer.length)
205: saveStart = 0;
206: else {
207: saveStart = pos;
208: if (readAheadLimit > 0 && markPos < pos) {
209: if (pos - markPos > readAheadLimit
210: || ((flags & USER_BUFFER) != 0 && reserve
211: - markPos > buffer.length))
212: clearMark();
213: else
214: saveStart = markPos;
215: }
216:
217: reserve -= buffer.length;
218: if (reserve <= saveStart
219: && (saveStart <= lineStartPos || (flags & DONT_KEEP_FULL_LINES) != 0))
220: ;
221: else if (reserve <= lineStartPos
222: && saveStart > lineStartPos)
223: saveStart = lineStartPos;
224: else if ((flags & USER_BUFFER) != 0)
225: saveStart -= (saveStart - reserve) >> 2;
226: else {
227: if (lineStartPos >= 0)
228: saveStart = lineStartPos;
229: buffer = new char[2 * buffer.length];
230: }
231:
232: lineStartPos -= saveStart;
233: limit -= saveStart;
234: markPos -= saveStart;
235: pos -= saveStart;
236: highestPos -= saveStart;
237: }
238: if (limit > 0)
239: System.arraycopy(this .buffer, saveStart, buffer, 0, limit);
240: this .buffer = buffer;
241: }
242:
243: public int read() throws java.io.IOException {
244: char prev;
245: if (pos > 0)
246: prev = buffer[pos - 1];
247: else if ((flags & PREV_WAS_CR) != 0)
248: prev = '\r';
249: else if (lineStartPos >= 0)
250: prev = '\n';
251: else
252: prev = '\0';
253: if (prev == '\r' || prev == '\n') {
254: if (lineStartPos < pos
255: && (readAheadLimit == 0 || pos <= markPos)) {
256: lineStartPos = pos;
257: lineNumber++;
258: }
259: boolean revisited = pos < highestPos;
260: if (prev != '\n'
261: || (pos <= 1 ? (flags & PREV_WAS_CR) == 0
262: : buffer[pos - 2] != '\r')) {
263: lineStart(revisited);
264: }
265: if (!revisited)
266: highestPos = pos + 1; // Add one for this read().
267: }
268:
269: if (pos >= limit) {
270: if (buffer == null)
271: buffer = new char[BUFFER_SIZE];
272: else if (limit == buffer.length)
273: reserve(buffer, 1);
274: if (pos == 0) {
275: if (prev == '\r')
276: flags |= PREV_WAS_CR;
277: else
278: flags &= ~PREV_WAS_CR;
279: }
280: int readCount = fill(buffer.length - pos);
281: if (readCount <= 0)
282: return -1;
283: limit += readCount;
284: }
285:
286: int ch = buffer[pos++];
287: if (ch == '\n') {
288: if (prev == '\r') {
289: // lineNumber is the number of lines before lineStartPos.
290: // If lineStartPos is between '\r and '\n', we will count
291: // an extra line for the '\n', which gets the count off.
292: // Hence compensate.
293: if (lineStartPos == pos - 1) {
294: lineNumber--;
295: lineStartPos--;
296: }
297: if (getConvertCR())
298: return read();
299: }
300: } else if (ch == '\r') {
301: if (getConvertCR())
302: return '\n';
303: }
304: return ch;
305: }
306:
307: public int read(char[] cbuf, int off, int len)
308: throws java.io.IOException {
309: // Same logic as in skip(n), when n>0.
310: int ch;
311: if (pos >= limit)
312: ch = '\0';
313: else if (pos > 0)
314: ch = buffer[pos - 1];
315: else if ((flags & PREV_WAS_CR) != 0 || lineStartPos >= 0)
316: ch = '\n';
317: else
318: ch = '\0';
319: int to_do = len;
320: while (to_do > 0) {
321: if (pos >= limit || ch == '\n' || ch == '\r') {
322: // Return if there is no more in the input buffer, and we got
323: // at least one char. This is desirable for interactive input.
324: if (pos >= limit && to_do < len)
325: return len - to_do;
326: ch = read();
327: if (ch < 0) {
328: len -= to_do;
329: return len <= 0 ? -1 : len;
330: }
331: cbuf[off++] = (char) ch;
332: to_do--;
333: } else {
334: int p = pos;
335: int lim = limit;
336: if (to_do < lim - p)
337: lim = p + to_do;
338: while (p < lim) {
339: ch = buffer[p];
340: // For simplicity and correctness we defer handling of
341: // newlines (including previous character) to read().
342: if (ch == '\n' || ch == '\r')
343: break;
344: cbuf[off++] = (char) ch;
345: p++;
346: }
347: to_do -= p - pos;
348: pos = p;
349: }
350: }
351: return len;
352: }
353:
354: public Path getPath() {
355: return path;
356: }
357:
358: public void setPath(Path path) {
359: this .path = path;
360: }
361:
362: public String getName() {
363: return path == null ? null : path.toString();
364: }
365:
366: public void setName(Object name) {
367: setPath(Path.valueOf(name));
368: }
369:
370: /** Get the current line number.
371: * The "first" line is number number 0. */
372: public int getLineNumber() {
373: int lineno = lineNumber;
374: if (readAheadLimit == 0) // Normal, fast case:
375: {
376: if (pos > 0 && pos > lineStartPos) {
377: char prev = buffer[pos - 1];
378: if (prev == '\n' || prev == '\r')
379: lineno++;
380: }
381: } else
382: lineno += countLines(buffer, lineStartPos < 0 ? 0
383: : lineStartPos, pos);
384: return lineno;
385: }
386:
387: public void setLineNumber(int lineNumber) {
388: this .lineNumber += lineNumber - getLineNumber();
389: }
390:
391: public void incrLineNumber(int lineDelta, int lineStartPos) {
392: lineNumber += lineDelta;
393: this .lineStartPos = lineStartPos;
394: }
395:
396: /** Return the current (zero-based) column number. */
397: public int getColumnNumber() {
398: if (pos > 0) {
399: char prev = buffer[pos - 1];
400: if (prev == '\n' || prev == '\r')
401: return 0;
402: }
403: if (readAheadLimit <= 0) // Normal, fast case:
404: return pos - lineStartPos;
405:
406: // Somebody did a mark(). Thus lineStartPos is not necessarily the
407: // start of the current line, so we have to search.
408: int start = lineStartPos < 0 ? 0 : lineStartPos;
409: for (int i = start; i < pos;) {
410: char ch = buffer[i++];
411: if (ch == '\n' || ch == '\r')
412: start = i;
413: }
414: int col = pos - start;
415: if (lineStartPos < 0)
416: col -= lineStartPos;
417: return col;
418: }
419:
420: public boolean markSupported() {
421: return true;
422: }
423:
424: public synchronized void mark(int readAheadLimit) {
425: if (this .readAheadLimit > 0)
426: clearMark();
427: this .readAheadLimit = readAheadLimit;
428: markPos = pos;
429: }
430:
431: public void reset() throws IOException {
432: if (readAheadLimit <= 0)
433: throw new IOException("mark invalid");
434: if (pos > highestPos)
435: highestPos = pos;
436: pos = markPos;
437: readAheadLimit = 0;
438: }
439:
440: /** Read a line.
441: * If mode is 'I' ("ignore") ignore delimiters.
442: * If mode is 'P' ("peek") leave delimiter in input stream.
443: * If mode is 'A' ("append") append delimiter to result.
444: */
445:
446: public void readLine(StringBuffer sbuf, char mode)
447: throws IOException {
448: for (;;) {
449: int ch = read();
450: if (ch < 0)
451: return;
452: int start = --pos;
453: while (pos < limit) {
454: ch = buffer[pos++];
455: if (ch == '\r' || ch == '\n') {
456: sbuf.append(buffer, start, pos - 1 - start);
457: if (mode == 'P') {
458: pos--;
459: return;
460: }
461: if (getConvertCR() || ch == '\n') {
462: if (mode != 'I')
463: sbuf.append('\n');
464: } else {
465: if (mode != 'I')
466: sbuf.append('\r');
467: ch = read();
468: if (ch == '\n') {
469: if (mode != 'I')
470: sbuf.append('\n');
471: } else if (ch >= 0)
472: unread_quick();
473: }
474: return;
475: }
476: }
477: sbuf.append(buffer, start, pos - start);
478: }
479: }
480:
481: public String readLine() throws IOException {
482: int ch = read();
483: if (ch < 0)
484: return null;
485: if (ch == '\r' || ch == '\n')
486: return "";
487: int start = pos - 1;
488: while (pos < limit) {
489: ch = buffer[pos++];
490: if (ch == '\r' || ch == '\n') {
491: int end = pos - 1;
492: if (ch != '\n' && !getConvertCR()) {
493: if (pos >= limit) {
494: pos--;
495: break;
496: }
497: if (buffer[pos] == '\n')
498: pos++;
499: }
500: return new String(buffer, start, end - start);
501: }
502: }
503: StringBuffer sbuf = new StringBuffer(100);
504: sbuf.append(buffer, start, pos - start);
505: readLine(sbuf, 'I');
506: return sbuf.toString();
507: }
508:
509: /** Skip forwards or backwards a number of characters. */
510: public int skip(int n) throws IOException {
511: if (n < 0) {
512: int to_do = -n;
513: for (; to_do > 0 && pos > 0; to_do--)
514: unread();
515: return n + to_do;
516: } else {
517: // Same logic as in read(char[],int,int).
518: int to_do = n;
519: int ch;
520: if (pos >= limit)
521: ch = '\0';
522: else if (pos > 0)
523: ch = buffer[pos - 1];
524: else if ((flags & PREV_WAS_CR) != 0 || lineStartPos >= 0)
525: ch = '\n';
526: else
527: ch = '\0';
528: while (to_do > 0) {
529: if (ch == '\n' || ch == '\r' || pos >= limit) {
530: ch = read();
531: if (ch < 0)
532: return n - to_do;
533: to_do--;
534: } else {
535: int p = pos;
536: int lim = limit;
537: if (to_do < lim - p)
538: lim = p + to_do;
539: while (p < lim) {
540: ch = buffer[p];
541: // For simplicity and correctness we defer handling of
542: // newlines (including previous character) to read().
543: if (ch == '\n' || ch == '\r')
544: break;
545: p++;
546: }
547: to_do -= p - pos;
548: pos = p;
549: }
550: }
551: return n;
552: }
553: }
554:
555: public boolean ready() throws java.io.IOException {
556: return pos < limit || in.ready();
557: }
558:
559: /** Same as skip(), but assumes previous command was a non-EOF peek(). */
560: public final void skip_quick() throws java.io.IOException {
561: pos++;
562: }
563:
564: public void skip() throws java.io.IOException {
565: read();
566: }
567:
568: static int countLines(char[] buffer, int start, int limit) {
569: int count = 0;
570: char prev = '\0';
571: for (int i = start; i < limit; i++) {
572: char ch = buffer[i];
573: if ((ch == '\n' && prev != '\r') || ch == '\r')
574: count++;
575: prev = ch;
576: }
577: return count;
578: }
579:
580: /** Skips the rest of the current line, including the line terminator. */
581: public void skipRestOfLine() throws java.io.IOException {
582: for (;;) {
583: int c = read();
584: if (c < 0)
585: return;
586: if (c == '\r') {
587: c = read();
588: if (c >= 0 && c != '\n')
589: unread();
590: break;
591: } else if (c == '\n')
592: break;
593: }
594: }
595:
596: /* Move one character backwards. */
597: public void unread() throws java.io.IOException {
598: if (pos == 0)
599: throw new java.io.IOException("unread too much");
600: pos--;
601: char ch = buffer[pos];
602: if (ch == '\n' || ch == '\r') {
603: if (pos > 0 && ch == '\n' && getConvertCR()
604: && buffer[pos - 1] == '\r')
605: pos--;
606: if (pos < lineStartPos) {
607: lineNumber--;
608: int i;
609: for (i = pos; i > 0;) {
610: ch = buffer[--i];
611: if (ch == '\r' || ch == '\n') {
612: i++;
613: break;
614: }
615: }
616: lineStartPos = i;
617: }
618: }
619: }
620:
621: /** Same as unread, but only allowed after non-EOF-returning read().
622: * Also allowed after an intervening peek(), but only if the read()
623: * did not return '\r' or '\n'. */
624: public void unread_quick() {
625: pos--;
626: }
627:
628: public int peek() throws java.io.IOException {
629: if (pos < limit && pos > 0) {
630: char ch = buffer[pos - 1];
631: if (ch != '\n' && ch != '\r') {
632: ch = buffer[pos];
633: if (ch == '\r' && getConvertCR())
634: ch = '\n';
635: return ch;
636: }
637: }
638: int c = read();
639: if (c >= 0)
640: unread_quick();
641: return c;
642: }
643: }
|