001: package gnu.text;
002:
003: import java.io.*;
004:
005: /** A LineNumberReader with some extra features:
006: *
007: * You can seek backwards to the start of the line preceding the
008: * current position (or the mark, if that has been set).
009: * You can use seek with a negative offset, or unread.
010: * You can also use peek to look at the next character without moving.
011: *
012: * The method getColumnNumber gives you the current column.
013: *
014: * Provides a method that is called at the start of a line.
015: * This is especially useful for interactive streams (e.g. prompting).
016: *
017: * It would be nice if we could inherit from LineNumberReader.
018: * That may be possible in theory, but it is difficult and
019: * expensive (because we don't get access to BufferedReader's buffer).
020: *
021: * @author Per Bothner <bothner@cygnus.com>
022: */
023:
024: public class LineBufferedReader extends FilterReader {
025: /** Default (initial buffer) size. */
026: final static int BUFFER_SIZE = 1024;
027:
028: /** The input buffer, containing the current line etc. */
029: public char[] buffer;
030:
031: /** The current read position, as an index into buffer. */
032: public int pos;
033:
034: /** The length of the valid (data-containing) part of the buffer. */
035: public int limit;
036:
037: /** The high-water mark for pos, at a reset or line start. */
038: int highestPos;
039:
040: private int flags;
041:
042: // Notice the asymmetry in how "X\r\nY" is handled (assuming convertCR)..
043: // When we read forward, the positions are 0, 1, 2, 4.
044: // After seeing the '\r', we do not read ahead to look for a '\n'
045: // because if we did, and there were no '\n', terminal input would hang.
046: // The (lineNumber, lineStartPos) goes (0,0), (0,0), (0,0), (1,3).
047: // However, the methods (getLineNumber(), getColumnNumber())
048: // return the external values (0:0), (0:1), (1:0), (1:1).
049: // When we move backwards, the positions are 4, 3, 1, 0.
050: // This is because we want to stay within the same line.
051: // The (lineNumber, lineStartPos) goes (1,3), (1,3), (0,0), (0,0).
052: // For (getLineNumber(), getColumnNumber()) we get (1:1), (1:0), (0:1), (0:0)
053: // which are the same as when we are moving forwards.
054: // A nice bonus is that both skip_quick and unread_quick are trivial.
055:
056: /* If true in flags, convert "\r" and "\r\n" to '\n'. */
057: private static final int CONVERT_CR = 1;
058:
059: /* If true in flags, may not re-allocate buffer. */
060: private static final int USER_BUFFER = 2;
061:
062: /* If true in flags, char before start of buffer was '\r'. */
063: private static final int PREV_WAS_CR = 4;
064:
065: /** True if CR and CRLF should be converted to LF. */
066: public final boolean getConvertCR() {
067: return (flags & CONVERT_CR) != 0;
068: }
069:
070: public final void setConvertCR(boolean convertCR) {
071: if (convertCR)
072: flags |= CONVERT_CR;
073: else
074: flags &= ~CONVERT_CR;
075: }
076:
077: /** The position that marks the start of the current or marked line.
078: * If the readAheadLimit && markPos < pos, then it is the start of the
079: * line containing the markPos.
080: * If we are at the end of a line, and have not started reading the next
081: * one (and are therefore allowed by unread back to the old line),
082: * the current line is still the old line; lineStartPos does not
083: * get set to the new pos until we read/peek the first char of the new line.
084: * If lineStartPos < 0, it means we went beyond the buffer maximum. */
085: int lineStartPos;
086:
087: String name;
088:
089: /** The current line number (at position of lineStartPos). */
090: protected int lineNumber;
091:
092: /** If mark has been called, and not invalidated, the read ahead limit.
093: * Zero if mark has not been called, or had been invalidated
094: * (due to either calling reset or excessive reading ahead). */
095: protected int readAheadLimit = 0;
096:
097: /** The position of the mark (assuming readAheadLinit > 0).
098: * (Garbage if readAheadLimit <= 0). */
099: protected int markPos;
100:
101: public LineBufferedReader(InputStream in) {
102: super (new InputStreamReader(in));
103: }
104:
105: public LineBufferedReader(Reader in) {
106: super (in);
107: }
108:
109: /** A hook to allow sub-classes to perform some action at start of line.
110: * Called just before the first character of the new line is read.
111: * @param revisited true if we have read here before (i.e.
112: * we did a reset of unread() to get here)
113: */
114: public void lineStart(boolean revisited) throws java.io.IOException {
115: }
116:
117: /** Called by read() when it needs its buffer filled.
118: * Read characters into buffer, starting at off, for len.
119: * Can assume that len > 0. Only called if pos>=limit.
120: * Return -1 if EOF, otherwise number of read chars.
121: * This can be usefully overridden by sub-classes. */
122: public int fill(int len) throws java.io.IOException {
123: return in.read(buffer, pos, len);
124: }
125:
126: private void clearMark() {
127: // Invalidate the mark.
128: readAheadLimit = 0;
129: // Need to maintain the lineStartPos invariant.
130: int i = lineStartPos < 0 ? 0 : lineStartPos;
131: for (;;) {
132: if (++i >= pos)
133: break;
134: char ch = buffer[i - 1];
135: if (ch == '\n'
136: || (ch == '\r' && (!getConvertCR() || buffer[i] != '\n'))) {
137: lineNumber++;
138: lineStartPos = i;
139: }
140:
141: }
142: }
143:
144: /** Specify a buffer to use for the input buffer. */
145: public void setBuffer(char[] buffer) throws java.io.IOException {
146: if (buffer == null) {
147: if (this .buffer != null) {
148: buffer = new char[this .buffer.length];
149: System.arraycopy(this .buffer, 0, buffer, 0,
150: this .buffer.length);
151: this .buffer = buffer;
152: }
153: flags &= ~USER_BUFFER;
154: } else {
155: if (limit - pos > buffer.length)
156: throw new java.io.IOException("setBuffer - too short");
157: flags |= USER_BUFFER;
158: reserve(buffer, 0);
159: }
160: }
161:
162: /* Make sure there is enough space for space more characters in buffer. */
163:
164: private void reserve(char[] buffer, int reserve)
165: throws java.io.IOException {
166: int saveStart;
167: reserve += limit;
168: if (reserve <= buffer.length)
169: saveStart = 0;
170: else {
171: saveStart = pos;
172: if (readAheadLimit > 0 && markPos < pos) {
173: if (pos - markPos < readAheadLimit
174: || ((flags & USER_BUFFER) != 0 && reserve
175: - markPos > buffer.length))
176: clearMark();
177: else
178: saveStart = markPos;
179: }
180:
181: reserve -= buffer.length;
182: if (saveStart < lineStartPos && reserve <= saveStart)
183: ;
184: else if (reserve <= lineStartPos
185: && saveStart > lineStartPos)
186: saveStart = lineStartPos;
187: else if ((flags & USER_BUFFER) != 0)
188: saveStart -= (saveStart - reserve) >> 2;
189: else {
190: if (lineStartPos >= 0)
191: saveStart = lineStartPos;
192: buffer = new char[2 * buffer.length];
193: }
194:
195: lineStartPos -= saveStart;
196: limit -= saveStart;
197: markPos -= saveStart;
198: pos -= saveStart;
199: highestPos -= saveStart;
200: }
201: if (limit > 0)
202: System.arraycopy(this .buffer, saveStart, buffer, 0, limit);
203: this .buffer = buffer;
204: }
205:
206: public int read() throws java.io.IOException {
207: char prev;
208: if (pos > 0)
209: prev = buffer[pos - 1];
210: else if ((flags & PREV_WAS_CR) != 0)
211: prev = '\r';
212: else if (lineStartPos >= 0)
213: prev = '\n';
214: else
215: prev = '\0';
216: if (prev == '\r' || prev == '\n') {
217: if (lineStartPos < pos
218: && (readAheadLimit == 0 || pos <= markPos)) {
219: lineStartPos = pos;
220: lineNumber++;
221: }
222: boolean revisited = pos < highestPos;
223: if (prev != '\n'
224: || (pos <= 1 ? (flags & PREV_WAS_CR) == 0
225: : buffer[pos - 2] != '\r')) {
226: lineStart(revisited);
227: }
228: if (!revisited)
229: highestPos = pos + 1; // Add one for this read().
230: }
231:
232: if (pos >= limit) {
233: if (buffer == null)
234: buffer = new char[BUFFER_SIZE];
235: else if (limit == buffer.length)
236: reserve(buffer, 1);
237: if (pos == 0) {
238: if (prev == '\r')
239: flags |= PREV_WAS_CR;
240: else
241: flags &= ~PREV_WAS_CR;
242: }
243: int readCount = fill(buffer.length - pos);
244: if (readCount <= 0)
245: return -1;
246: limit += readCount;
247: }
248:
249: int ch = buffer[pos++];
250: if (ch == '\n') {
251: if (prev == '\r') {
252: // lineNumber is the number of lines before lineStartPos.
253: // If lineStartPos is between '\r and '\n', we will count
254: // an extra line for the '\n', which gets the count off.
255: // Hence compensate.
256: if (lineStartPos == pos - 1) {
257: lineNumber--;
258: lineStartPos--;
259: }
260: if (getConvertCR())
261: return read();
262: }
263: } else if (ch == '\r') {
264: if (getConvertCR())
265: return '\n';
266: }
267: return ch;
268: }
269:
270: public int read(char[] cbuf, int off, int len)
271: throws java.io.IOException {
272: if (len <= 0)
273: return len;
274: int c;
275: int i = off;
276: if (pos >= limit) {
277: c = read();
278: if (c < 0)
279: return -1;
280: cbuf[i++] = (char) c;
281: }
282: while (--len > 0 && pos < limit) {
283: c = buffer[pos];
284: if (c == '\r' || c == '\n')
285: c = read();
286: else
287: pos++;
288: cbuf[i++] = (char) c;
289: }
290: return i - off;
291: }
292:
293: public String getName() {
294: return name;
295: }
296:
297: public void setName(String name) {
298: this .name = name;
299: }
300:
301: /** Get the current line number.
302: * The "first" line is number number 0. */
303: public int getLineNumber() {
304: int lineno = lineNumber;
305: if (readAheadLimit == 0) // Normal, fast case:
306: {
307: if (pos > 0 && pos > lineStartPos) {
308: char prev = buffer[pos - 1];
309: if (prev == '\n' || prev == '\r')
310: lineno++;
311: }
312: } else
313: lineno += countLines(buffer, lineStartPos < 0 ? 0
314: : lineStartPos, pos);
315: return lineno;
316: }
317:
318: public void setLineNumber(int lineNumber) {
319: this .lineNumber += lineNumber - getLineNumber();
320: }
321:
322: public int getColumnNumber() {
323: if (pos > 0) {
324: char prev = buffer[pos - 1];
325: if (prev == '\n' || prev == '\r')
326: return 0;
327: }
328: if (readAheadLimit <= 0) // Normal, fast case:
329: return pos - lineStartPos;
330:
331: // Somebody did a mark(). Thus lineStartPos is not necessarily the
332: // start of the current line, so we have to search.
333: int start = lineStartPos < 0 ? 0 : lineStartPos;
334: for (int i = start; i < pos;) {
335: char ch = buffer[i++];
336: if (ch == '\n' || ch == '\r')
337: start = i;
338: }
339: int col = pos - start;
340: if (lineStartPos < 0)
341: col -= lineStartPos;
342: return col;
343: }
344:
345: public boolean markSupported() {
346: return true;
347: }
348:
349: public synchronized void mark(int readAheadLimit) {
350: if (this .readAheadLimit > 0)
351: clearMark();
352: this .readAheadLimit = readAheadLimit;
353: markPos = pos;
354: }
355:
356: public void reset() throws IOException {
357: if (readAheadLimit <= 0)
358: throw new IOException("mark invalid");
359: if (pos > highestPos)
360: highestPos = pos;
361: pos = markPos;
362: readAheadLimit = 0;
363: }
364:
365: /** Read a line.
366: * If mode is 'I' ("ignore") ignore delimiters.
367: * If mode is 'P' ("peek") leave delimiter in input stream.
368: * If mode is 'A' ("append") append delimiter to result.
369: */
370:
371: public void readLine(StringBuffer sbuf, char mode)
372: throws IOException {
373: for (;;) {
374: int ch = read();
375: if (ch < 0)
376: return;
377: int start = --pos;
378: while (pos < limit) {
379: ch = buffer[pos++];
380: if (ch == '\r' || ch == '\n') {
381: sbuf.append(buffer, start, pos - 1 - start);
382: if (mode == 'P') {
383: pos--;
384: return;
385: }
386: if (getConvertCR() || ch == '\n') {
387: if (mode != 'I')
388: sbuf.append('\n');
389: } else {
390: if (mode != 'I')
391: sbuf.append('\r');
392: ch = read();
393: if (ch == '\n') {
394: if (mode != 'I')
395: sbuf.append('\n');
396: } else if (ch >= 0)
397: unread_quick();
398: }
399: return;
400: }
401: }
402: sbuf.append(buffer, start, pos - start);
403: }
404: }
405:
406: public String readLine() throws IOException {
407: int ch = read();
408: if (ch < 0)
409: return null;
410: if (ch == '\r' || ch == 'n')
411: return "";
412: int start = pos - 1;
413: while (pos < limit) {
414: ch = buffer[pos++];
415: if (ch == '\r' || ch == '\n') {
416: if (ch != '\n' && !getConvertCR()) {
417: if (pos >= limit) {
418: pos--;
419: break;
420: }
421: if (buffer[pos] == '\n')
422: pos++;
423: }
424: return new String(buffer, start, pos - start);
425: }
426: }
427: StringBuffer sbuf = new StringBuffer(100);
428: sbuf.append(buffer, start, pos);
429: readLine(sbuf, 'I');
430: return sbuf.toString();
431: }
432:
433: /** Skip forwards or backwards a number of characters. */
434: public int skip(int n) throws IOException {
435: if (n < 0) {
436: int to_do = -n;
437: for (; to_do > 0 && pos > 0; to_do--)
438: unread();
439: return n + to_do;
440: } else {
441: int to_do = n;
442: while (to_do > 0) {
443: int ch;
444: int i = pos;
445: int count = limit - i;
446: if (count > to_do)
447: count = to_do;
448: while (--count >= 0 && (ch = buffer[i]) != '\r'
449: && ch != '\n')
450: i++;
451: to_do -= i - pos;
452: if (to_do <= 0)
453: break;
454: ch = read();
455: if (ch < 0)
456: break;
457: to_do--;
458: }
459: return n - to_do;
460: }
461: }
462:
463: public boolean ready() throws java.io.IOException {
464: return pos < limit || in.ready();
465: }
466:
467: /** Same as skip(), but assumes previous command was a non-EOF peek(). */
468: public final void skip_quick() throws java.io.IOException {
469: pos++;
470: }
471:
472: public void skip() throws java.io.IOException {
473: read();
474: }
475:
476: static int countLines(char[] buffer, int off, int len) {
477: int count = 0;
478: char prev = '\0';
479: for (int i = 0; i < len; i++) {
480: char ch = buffer[i + off];
481: if ((ch == '\n' && prev != '\r') || ch == '\r')
482: count++;
483: prev = ch;
484: }
485: return count;
486: }
487:
488: /* Move one character backwards. */
489: public void unread() throws java.io.IOException {
490: if (pos == 0)
491: throw new java.io.IOException("unread too much");
492: pos--;
493: char ch = buffer[pos];
494: if (ch == '\n' || ch == '\r') {
495: if (pos > 0 && ch == '\n' && getConvertCR()
496: && buffer[pos - 1] == '\r')
497: pos--;
498: if (pos < lineStartPos) {
499: lineNumber--;
500: int i;
501: for (i = pos; i > 0;) {
502: ch = buffer[--i];
503: if (ch == '\r' || ch == '\n') {
504: i++;
505: break;
506: }
507: }
508: lineStartPos = i;
509: }
510: }
511: }
512:
513: /** Same as unread, but only allowed after non-EOF-returning read().
514: * Also allowed after an intervening peek(), but only if the read()
515: * did not return '\r' or '\n'. */
516: public void unread_quick() {
517: pos--;
518: }
519:
520: public int peek() throws java.io.IOException {
521: if (pos < limit && pos > 0) {
522: char ch = buffer[pos - 1];
523: if (ch != '\n' && ch != '\r') {
524: ch = buffer[pos];
525: if (ch == '\r' && getConvertCR())
526: ch = '\n';
527: return ch;
528: }
529: }
530: int c = read();
531: if (c >= 0)
532: unread_quick();
533: return c;
534: }
535:
536: }
|