001: /*
002: * Enhydra Java Application Server Project
003: *
004: * The contents of this file are subject to the Enhydra Public License
005: * Version 1.1 (the "License"); you may not use this file except in
006: * compliance with the License. You may obtain a copy of the License on
007: * the Enhydra web site ( http://www.enhydra.org/ ).
008: *
009: * Software distributed under the License is distributed on an "AS IS"
010: * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
011: * the License for the specific terms governing rights and limitations
012: * under the License.
013: *
014: * The Initial Developer of the Enhydra Application Server is Lutris
015: * Technologies, Inc. The Enhydra Application Server and portions created
016: * by Lutris Technologies, Inc. are Copyright Lutris Technologies, Inc.
017: * All Rights Reserved.
018: *
019: * Contributor(s):
020: *
021: * $Id: BMByteSearchStream.java,v 1.2 2006-06-15 13:47:00 sinisa Exp $
022: */
023:
024: package com.lutris.util;
025:
026: import java.io.FilterInputStream;
027: import java.io.IOException;
028: import java.io.InputStream;
029:
030: /**
031: * Implements the Boyer-Moore pattern matching algorithm for a given
032: * byte pattern. This object implements searches on byte-oriented input
033: * streams.
034: * <P>
035: * The algorithm was obtained from "Computer Algorithms - Introduction to
036: * Design and Analysis, Second Edition" by Sara Baase.
037: */
038: public class BMByteSearchStream extends FilterInputStream {
039: /**
040: * EOF value. Traditionally -1.
041: */
042: public static final int EOF = -1;
043:
044: /**
045: * "At Pattern" value. Indicates that the stream position is
046: * currently at the beginning of a detected pattern occurence.
047: */
048: public static final int AT_PATTERN = -2;
049:
050: /**
051: * Boyer-Moore byte string searcher for scanning the scan buffer.
052: */
053: private BMByteSearch searcher;
054:
055: /**
056: * Scan buffer -- Should be at least ten times the length of the
057: * search pattern.
058: */
059: private byte[] scanBuf;
060:
061: /**
062: * Scan buffer position.
063: */
064: private int scanBufPos;
065:
066: /**
067: * Number of bytes currently in the scan buffer.
068: */
069: private int scanBufCount;
070:
071: /**
072: * Total size of the scan buffer.
073: */
074: private int scanBufSize;
075:
076: /**
077: * The length of the search pattern.
078: */
079: private int patternLength;
080:
081: /**
082: * Number of bytes to overlap buffer reads by to ensure that
083: * the pattern gets matched even if it crosses a read boundary.
084: * This should normally be the length of the pattern.
085: */
086: private int patternKeep;
087:
088: /**
089: * The current position of the pattern within the current
090: * buffer contents. This may be less than the current buffer
091: * position if the pattern has already been skipped.
092: */
093: private int patternPos;
094:
095: /**
096: * Buffer to hold a single character in order to implement the
097: * single character read() method.
098: */
099: private byte[] readByte = new byte[1];
100:
101: /**
102: * Creates a Boyer-Moore byte stream scanner for a given pattern.
103: * Creates a buffer of length <code>buflen</code> for the scanning
104: * buffer.
105: *
106: * @param inputSource The input source stream to scan.
107: * @param pattern The pattern to scan for. Characters
108: * outside the Latin-1 encoding are truncated
109: * to signed bytes.
110: * @param buflen The length to use for the scanning buffer.
111: */
112: public BMByteSearchStream(InputStream inputSource, String pattern,
113: int buflen) {
114: super (inputSource); // Pass input source stream to parent class.
115: scanBuf = new byte[buflen];
116: scanBufSize = buflen;
117: scanBufPos = 0;
118: scanBufCount = 0;
119: setPattern(pattern);
120: }
121:
122: /**
123: * Reads the next byte of data from this input stream. The value byte
124: * is returned as an <code>int</code> in the range 0 to 255. If no
125: * byte is available because the end of the stream has been reached,
126: * the value -1 is returned. This method blocks until input data is
127: * available, the end of the stream is detected, or an exception is
128: * thrown
129: *
130: * @return The next byte of data, or -1 if the end
131: * of stream is reached.
132: * @exception IOException If an I/O error occurs.
133: */
134: public int read() throws IOException {
135: int len = read(readByte, 0, 1);
136: if (len != 1)
137: return -1;
138: return ((int) readByte[0] + 0x100) & 0xff;
139: }
140:
141: /**
142: * Reads up to <code>buffer.length</code> bytes of data from this input
143: * stream into an array of bytes. This method blocks until some input
144: * is available
145: *
146: * @param buffer The buffer into which data are read.
147: * @return The number of bytes actually read, or -1
148: * if there are no more bytes because the
149: * end of stream has been reached.
150: * @exception IOException If an I/O error occurs.
151: */
152: public int read(byte[] buffer) throws IOException {
153: return (read(buffer, 0, buffer.length));
154: }
155:
156: /**
157: * Reads <code>length</code> bytes of data from this input stream into
158: * an array of bytes. This method blocks until some input is available.
159: *
160: * @param buffer The buffer into which data are read.
161: * @param offset The start offset of the data.
162: * @param length The maximum number of bytes read.
163: * @return The total number of bytes read into the
164: * buffer, or -1 if there are no more bytes
165: * because the end of stream has been reached.
166: * @exception IOException If an I/O error occurs.
167: */
168: public int read(byte[] buffer, int offset, int length)
169: throws IOException {
170: int count, dst, i;
171: while ((count = min(length, scanBufCount - patternKeep)) <= 0) {
172: if (loadBuf() < 0) {
173: //
174: // Underlying stream reached EOF. return remainder of
175: // buffer or EOF.
176: //
177: count = min(length, scanBufCount);
178: if (count <= 0) {
179: patternPos = -1;
180: return EOF;
181: }
182: break;
183: }
184: }
185: for (dst = offset, i = count; i > 0; i--) {
186: buffer[dst++] = scanBuf[scanBufPos++];
187: }
188: scanBufCount -= count;
189: if ((scanBufPos > patternPos) && (patternPos >= 0)) {
190: patternPos = searcher.search(scanBuf, scanBufPos,
191: scanBufCount);
192: }
193: return count;
194: }
195:
196: /**
197: * Skips over and discards n bytes of data from the input stream.
198: * The <code>skip</code> method may, for a variety of reasons,
199: * end up skipping over some smaller number of bytes, possibly 0.
200: * The actual number of bytes skipped is returned.
201: *
202: * @param n The number of bytes to be skipped.
203: * @return The actual number of bytes skipped.
204: * @exception IOException If an I/O error occurs.
205: */
206: public long skip(long n) throws IOException {
207: byte[] skipBuf = new byte[1024];
208: long bytesLeft = n;
209: long skipped = 0;
210: long r, count;
211: while (bytesLeft > 0) {
212: if (bytesLeft < 1024)
213: count = bytesLeft;
214: else
215: count = 1024;
216: if ((r = this .read(skipBuf, 0, (int) count)) < 0) {
217: patternPos = -1;
218: return (skipped); // At EOF.
219: }
220: skipped += r;
221: bytesLeft -= r;
222: }
223: if ((scanBufPos > patternPos) && (patternPos >= 0)) {
224: patternPos = searcher.search(scanBuf, scanBufPos,
225: scanBufCount);
226: }
227: return skipped;
228: }
229:
230: /**
231: * Returns the number of bytes that can be read from this input stream
232: * without blocking. This consists of any bytes left in the buffer
233: * plus the result of the underlying stream's <code>available</code>
234: * method.
235: *
236: * @return The number of bytes that can be read
237: * without blocking.
238: * @exception IOException If an I/O error occurs.
239: */
240: public int available() throws IOException {
241: return (scanBufCount + super .available());
242: }
243:
244: /**
245: * Returns the number of bytes that can be read from this input stream
246: * without blocking or encountering the search pattern. If the search
247: * pattern has been found in the buffer, this is the number of bytes
248: * in the search buffer prior to the search pattern; otherwise, while
249: * there may be additional data in the input stream buffer that can be
250: * brought into the search buffer without blocking, this data may or
251: * may not contain the search pattern (or complete a partial search
252: * pattern located at the end of the buffer), so this is calculated as
253: * the number of bytes left in the search buffer less the length of
254: * the search pattern.
255: *
256: * @return The number of bytes that can be read
257: * for certain without blocking or encountering
258: * the search pattern.
259: * @exception IOException If an I/O error occurs.
260: */
261: public int availableTo() throws IOException {
262: return (patternPos < 0) ? scanBufCount - patternLength
263: : patternPos - scanBufPos;
264: }
265:
266: /**
267: * Reads data into a buffer until the search pattern or the
268: * end of file is detected. Returns <code>-1</code> if at the
269: * search pattern or end-of-file.
270: *
271: * @param buffer Buffer to read into.
272: * @param offset Offset in buffer to read into.
273: * @param length Number of bytes to try to read.
274: * @return The number of bytes actually read, or
275: * <code>-1</code> if at the pattern or eof.
276: * @exception IOException Thrown if an I/O exception occurs.
277: */
278: public int readTo(byte[] buffer, int offset, int length)
279: throws IOException {
280: int count, dst, i;
281: if (patternPos == scanBufPos)
282: return AT_PATTERN;
283: if (patternPos > scanBufPos) {
284: count = min(length, patternPos - scanBufPos);
285: for (dst = offset, i = count; i > 0; i--) {
286: buffer[dst++] = scanBuf[scanBufPos++];
287: }
288: scanBufCount -= count;
289: return count;
290: }
291: while ((count = min(length, scanBufCount - patternKeep)) <= 0) {
292: if (loadBuf() < 0) {
293: //
294: // Underlying stream reached EOF. return remainder of
295: // buffer or EOF.
296: //
297: count = min(length, scanBufCount);
298: if (count <= 0)
299: return EOF;
300: break;
301: }
302: // Bug fixed - must recheck for being at pattern.
303: if (patternPos == scanBufPos)
304: return AT_PATTERN;
305: if (patternPos > scanBufPos) {
306: count = min(length, patternPos - scanBufPos);
307: for (dst = offset, i = count; i > 0; i--) {
308: buffer[dst++] = scanBuf[scanBufPos++];
309: }
310: scanBufCount -= count;
311: return count;
312: }
313: }
314: for (dst = offset, i = count; i > 0; i--) {
315: buffer[dst++] = scanBuf[scanBufPos++];
316: }
317: scanBufCount -= count;
318: return count;
319: }
320:
321: /**
322: * Skips all bytes up to but not including the search pattern or EOF.
323: * Returns the number of bytes skipped. Repeated calls to
324: * this method will leave the stream at the same postion until
325: * another call explicitly reads or skips the pattern data.
326: *
327: * @return The number of bytes skipped.
328: *
329: * @exception IOException Thrown if an I/O exception occurs.
330: */
331: public int skipTo() throws IOException {
332: int count = 0, skipCount = 0;
333: while (patternPos < scanBufPos) {
334: count = min(0, scanBufCount - patternKeep);
335: if (count >= 0) {
336: skipCount += count;
337: scanBufCount -= count;
338: scanBufPos += count;
339: }
340: int nread = loadBuf();
341: if (nread < 0) {
342: boolean notFound = (patternPos < scanBufPos);
343: skipCount += scanBufCount;
344: scanBufCount = 0;
345: scanBufPos = 0;
346: patternPos = -1;
347: if (notFound)
348: return -1;
349: return (skipCount <= 0) ? 0 : skipCount;
350: }
351: if (nread == 0) {
352: // Special case. Buffer needs to be discarded.
353: if (scanBufCount == scanBuf.length) {
354: int skip = scanBuf.length - patternKeep;
355: scanBufPos = skip;
356: scanBufCount = patternKeep;
357: skipCount += skip;
358: patternPos = -1; // Pattern not avail.
359: }
360: }
361: }
362: count = patternPos - scanBufPos;
363: skipCount += count;
364: scanBufPos += count;
365: scanBufCount -= count;
366: return skipCount;
367: }
368:
369: /**
370: * Skips all bytes up to and including the search pattern or EOF.
371: *
372: * @return The number of bytes skipped.
373: * @exception IOException Thrown if an I/O exception occurs.
374: */
375: public int skipPattern() throws IOException {
376: int skipCount = skipTo();
377: if (skipCount < 0)
378: return -1; // Pattern not found.
379: skipCount += patternLength;
380: scanBufCount -= patternLength;
381: scanBufPos += patternLength;
382: if (scanBufCount > 0) {
383: patternPos = searcher.search(scanBuf, scanBufPos,
384: scanBufCount);
385: }
386: return skipCount;
387: }
388:
389: /**
390: * Set the search pattern. After this call, all new scans will be
391: * for the new pattern. Characters outside the values 0-255 are
392: * truncated into signed byte values in the Latin-1 encoding.
393: *
394: * @param pattern The new pattern to search for.
395: */
396: public void setPattern(String pattern) {
397: searcher = new BMByteSearch(pattern);
398: patternLength = searcher.getPatternLength();
399: patternKeep = patternLength + 2; // Potential CRLF parsing. *ugh*
400: patternPos = searcher.search(scanBuf, scanBufPos, scanBufCount);
401: }
402:
403: /**
404: * Set the search pattern. After this call, all new scans will be
405: * for the new pattern. Characters outside the values 0-255 are
406: * truncated into signed byte values in the Latin-1 encoding.
407: *
408: * @param search The precomputed Boyer-Moore searcher.
409: */
410: public void setPattern(BMByteSearch search) {
411: searcher = search;
412: patternLength = searcher.getPatternLength();
413: patternKeep = patternLength + 2; // Potential CRLF parsing. *ugh*
414: patternPos = searcher.search(scanBuf, scanBufPos, scanBufCount);
415: }
416:
417: /**
418: * Returns the next <code>length</code> bytes of the input buffer
419: * as a string. If fewer than <code>length</code> bytes remain on
420: * the input stream, then only the remaining bytes are returned. If
421: * the input stream is at EOF and there are no more bytes in the buffer
422: * then an empty string is returned.
423: *
424: * @param length The number of bytes to look ahead.
425: * @return A string containing the lookahead bytes as 8 bit characters.
426: * @exception IOException Thrown if an I/O exception occurs.
427: */
428: public String peekAheadString(int length) throws IOException {
429: while (scanBufCount < length) {
430: if (loadBuf() < 0)
431: break;
432: }
433: int count = min(length, scanBufCount);
434: if (count <= 0)
435: return "";
436: StringBuffer sb = new StringBuffer();
437: for (int i = scanBufPos; count > 0; i++, count--) {
438: sb.append((char) (scanBuf[i] & 0xff));
439: }
440: return sb.toString();
441: }
442:
443: /**
444: * Reads more data from the underlying input source, keeping
445: * track of buffering and pattern matching. Before reading
446: * new bytes, any remaining bytes in the buffer are copied
447: * to position 0. The <code>scanBufPos</code>,
448: * <code>scanBufCount</code>, and <code>patternPos</code>
449: * fields are reset to appropriate values.
450: *
451: * @return The number of new bytes added to the
452: * buffer, or <code>-1</code> if the input
453: * stream has reached end of file.
454: * @exception IOException Thrown if an I/O exception occurs.
455: */
456: private int loadBuf() throws IOException {
457: //
458: // First, copy remainder of buffer to beginning.
459: //
460: if (scanBufPos > 0) {
461: for (int i = 0; i < scanBufCount; i++) {
462: scanBuf[i] = scanBuf[scanBufPos + i];
463: }
464: scanBufPos = 0;
465: }
466:
467: //
468: // Determine how many bytes are available to read.
469: // If no bytes are available, then read one byte and block.
470: // Then, recheck available.
471: //
472: int n, avail = super .available();
473: if (avail == 0) {
474: // Wait for at least one char.
475: n = super .read(scanBuf, scanBufCount, 1);
476: if (n <= 0) {
477: patternPos = searcher.search(scanBuf, 0, scanBufCount);
478: return EOF;
479: }
480: scanBufCount += n;
481: avail = super .available();
482: if (avail == 0) {
483: patternPos = searcher.search(scanBuf, 0, scanBufCount);
484: return n;
485: }
486: }
487:
488: //
489: // Only read currently available bytes. Don't block until
490: // all bytes are ready. That could cause a hang.
491: //
492: int nread = scanBufSize - scanBufCount;
493: if ((avail >= 0) && (nread > avail))
494: nread = avail;
495:
496: //
497: // Second, read additional bytes using superclass's read.
498: //
499: n = super .read(scanBuf, scanBufCount, nread);
500:
501: if (n < 0) {
502: patternPos = searcher.search(scanBuf, 0, scanBufCount);
503: return EOF;
504: }
505: scanBufCount += n;
506:
507: //
508: // Search for first occurence of pattern.
509: //
510: patternPos = searcher.search(scanBuf, 0, scanBufCount);
511: return n;
512: }
513:
514: /**
515: * Compares two integers and returns the lesser value.
516: *
517: * @param i1 First integer to compare.
518: * @param i2 Second integer to compare.
519: * @return The lesser of <code>i1</code> or <code>i2</code>.
520: */
521: private static final int min(int i1, int i2) {
522: return (i1 < i2) ? i1 : i2;
523: }
524: }
|