001: /*
002: * $Id: AwkMatcher.java,v 1.11 2003/11/07 20:16:24 dfs Exp $
003: *
004: * ====================================================================
005: * The Apache Software License, Version 1.1
006: *
007: * Copyright (c) 2000 The Apache Software Foundation. All rights
008: * reserved.
009: *
010: * Redistribution and use in source and binary forms, with or without
011: * modification, are permitted provided that the following conditions
012: * are met:
013: *
014: * 1. Redistributions of source code must retain the above copyright
015: * notice, this list of conditions and the following disclaimer.
016: *
017: * 2. Redistributions in binary form must reproduce the above copyright
018: * notice, this list of conditions and the following disclaimer in
019: * the documentation and/or other materials provided with the
020: * distribution.
021: *
022: * 3. The end-user documentation included with the redistribution,
023: * if any, must include the following acknowledgment:
024: * "This product includes software developed by the
025: * Apache Software Foundation (http://www.apache.org/)."
026: * Alternately, this acknowledgment may appear in the software itself,
027: * if and wherever such third-party acknowledgments normally appear.
028: *
029: * 4. The names "Apache" and "Apache Software Foundation", "Jakarta-Oro"
030: * must not be used to endorse or promote products derived from this
031: * software without prior written permission. For written
032: * permission, please contact apache@apache.org.
033: *
034: * 5. Products derived from this software may not be called "Apache"
035: * or "Jakarta-Oro", nor may "Apache" or "Jakarta-Oro" appear in their
036: * name, without prior written permission of the Apache Software Foundation.
037: *
038: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
039: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
040: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
041: * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
042: * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
043: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
044: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
045: * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
046: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
047: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
048: * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
049: * SUCH DAMAGE.
050: * ====================================================================
051: *
052: * This software consists of voluntary contributions made by many
053: * individuals on behalf of the Apache Software Foundation. For more
054: * information on the Apache Software Foundation, please see
055: * <http://www.apache.org/>.
056: */
057:
058: package org.apache.oro.text.awk;
059:
060: import java.io.*;
061:
062: import org.apache.oro.text.regex.*;
063:
064: /**
065: * The AwkMatcher class is used to match regular expressions
066: * (conforming to the Awk regular expression syntax) generated by
067: * AwkCompiler. AwkMatcher only supports 8-bit ASCII. Any attempt
068: * to match Unicode values greater than 255 will result in undefined
069: * behavior. AwkMatcher finds true leftmost-longest matches, so
070: * you must take care with how you formulate your regular expression
071: * to avoid matching more than you really want.
072: * <p>
073: * It is important for you to remember that AwkMatcher does not save
074: * parenthesized sub-group information. Therefore the number of groups
075: * saved in a MatchResult produced by AwkMatcher will always be 1.
076: *
077: * @version @version@
078: * @since 1.0
079: * @see org.apache.oro.text.regex.PatternMatcher
080: * @see AwkCompiler
081: */
082: public final class AwkMatcher implements PatternMatcher {
083: private int __lastMatchedBufferOffset;
084: private AwkMatchResult __lastMatchResult = null;
085: private AwkStreamInput __scratchBuffer, __streamSearchBuffer;
086: private AwkPattern __awkPattern;
087: private int __offsets[] = new int[2];
088:
089: /**
090: * A kluge variable to make PatternMatcherInput matches work when
091: * their begin offset is non-zero. This kluge is caused by the
092: * misguided notion that AwkStreamInput could be overloaded to do
093: * both stream and fixed buffer matches. The whole input representation
094: * scheme has to be scrapped and redone. -- dfs 2001/07/10
095: */
096: private int __beginOffset;
097:
098: public AwkMatcher() {
099: __scratchBuffer = new AwkStreamInput();
100: __scratchBuffer._endOfStreamReached = true;
101: }
102:
103: /**
104: * Determines if a prefix of a string (represented as a char[])
105: * matches a given pattern, starting from a given offset into the string.
106: * If a prefix of the string matches the pattern, a MatchResult instance
107: * representing the match is made accesible via
108: * {@link #getMatch()}.
109: * <p>
110: * This method is useful for certain common token identification tasks
111: * that are made more difficult without this functionality.
112: * <p>
113: * @param input The char[] to test for a prefix match.
114: * @param pattern The Pattern to be matched.
115: * @param offset The offset at which to start searching for the prefix.
116: * @return True if input matches pattern, false otherwise.
117: */
118: // I reimplemented this method in terms of streammatchesPrefix
119: // to reduce the code size. This is not very elegant and
120: // reduces performance by a small degree.
121: public boolean matchesPrefix(char[] input, Pattern pattern,
122: int offset) {
123: int result = -1;
124:
125: __awkPattern = (AwkPattern) pattern;
126:
127: __scratchBuffer._buffer = input;
128: __scratchBuffer._bufferSize = input.length;
129: __scratchBuffer._bufferOffset = __beginOffset = 0;
130: __scratchBuffer._endOfStreamReached = true;
131: __streamSearchBuffer = __scratchBuffer;
132: __offsets[0] = offset;
133: try {
134: result = __streamMatchPrefix();
135: } catch (IOException e) {
136: // Don't do anything because we're not doing any I/O
137: result = -1;
138: }
139:
140: if (result < 0) {
141: __lastMatchResult = null;
142: return false;
143: }
144:
145: __lastMatchResult = new AwkMatchResult(new String(input, 0,
146: result), offset);
147:
148: return true;
149: }
150:
151: /**
152: * Determines if a prefix of a string (represented as a char[])
153: * matches a given pattern.
154: * If a prefix of the string matches the pattern, a MatchResult instance
155: * representing the match is made accesible via
156: * {@link #getMatch()}.
157: * <p>
158: * This method is useful for certain common token identification tasks
159: * that are made more difficult without this functionality.
160: * <p>
161: * @param input The char[] to test for a prefix match.
162: * @param pattern The Pattern to be matched.
163: * @return True if input matches pattern, false otherwise.
164: */
165: public boolean matchesPrefix(char[] input, Pattern pattern) {
166: return matchesPrefix(input, pattern, 0);
167: }
168:
169: /**
170: * Determines if a prefix of a string matches a given pattern.
171: * If a prefix of the string matches the pattern, a MatchResult instance
172: * representing the match is made accesible via
173: * {@link #getMatch()}.
174: * <p>
175: * This method is useful for certain common token identification tasks
176: * that are made more difficult without this functionality.
177: * <p>
178: * @param input The String to test for a prefix match.
179: * @param pattern The Pattern to be matched.
180: * @return True if input matches pattern, false otherwise.
181: */
182: public boolean matchesPrefix(String input, Pattern pattern) {
183: return matchesPrefix(input.toCharArray(), pattern, 0);
184: }
185:
186: /**
187: * Determines if a prefix of a PatternMatcherInput instance
188: * matches a given pattern. If there is a match, a MatchResult instance
189: * representing the match is made accesible via
190: * {@link #getMatch()}. Unlike the
191: * {@link #contains(PatternMatcherInput, Pattern)}
192: * method, the current offset of the PatternMatcherInput argument
193: * is not updated. You should remember that the region starting
194: * from the begin offset of the PatternMatcherInput will be
195: * tested for a prefix match.
196: * <p>
197: * This method is useful for certain common token identification tasks
198: * that are made more difficult without this functionality.
199: * <p>
200: * @param input The PatternMatcherInput to test for a prefix match.
201: * @param pattern The Pattern to be matched.
202: * @return True if input matches pattern, false otherwise.
203: */
204: public boolean matchesPrefix(PatternMatcherInput input,
205: Pattern pattern) {
206: int result = -1;
207:
208: __awkPattern = (AwkPattern) pattern;
209: __scratchBuffer._buffer = input.getBuffer();
210: __scratchBuffer._bufferOffset = __beginOffset = input
211: .getBeginOffset();
212: __offsets[0] = input.getCurrentOffset();
213:
214: __scratchBuffer._bufferSize = input.length();
215: __scratchBuffer._endOfStreamReached = true;
216: __streamSearchBuffer = __scratchBuffer;
217: try {
218: result = __streamMatchPrefix();
219: } catch (IOException e) {
220: // Don't do anything because we're not doing any I/O
221: result = -1;
222: }
223:
224: if (result < 0) {
225: __lastMatchResult = null;
226: return false;
227: }
228:
229: __lastMatchResult = new AwkMatchResult(new String(
230: __scratchBuffer._buffer, __offsets[0], result),
231: __offsets[0]);
232:
233: return true;
234: }
235:
236: /**
237: * Determines if a string (represented as a char[]) exactly
238: * matches a given pattern. If
239: * there is an exact match, a MatchResult instance
240: * representing the match is made accesible via
241: * {@link #getMatch()}. The pattern must be
242: * an AwkPattern instance, otherwise a ClassCastException will
243: * be thrown. You are not required to, and indeed should NOT try to
244: * (for performance reasons), catch a ClassCastException because it
245: * will never be thrown as long as you use an AwkPattern as the pattern
246: * parameter.
247: * <p>
248: * @param input The char[] to test for an exact match.
249: * @param pattern The AwkPattern to be matched.
250: * @return True if input matches pattern, false otherwise.
251: * @exception ClassCastException If a Pattern instance other than an
252: * AwkPattern is passed as the pattern parameter.
253: */
254: public boolean matches(char[] input, Pattern pattern) {
255: int result = -1;
256:
257: __awkPattern = (AwkPattern) pattern;
258: __scratchBuffer._buffer = input;
259: __scratchBuffer._bufferSize = input.length;
260: __scratchBuffer._bufferOffset = __beginOffset = 0;
261: __scratchBuffer._endOfStreamReached = true;
262: __streamSearchBuffer = __scratchBuffer;
263: __offsets[0] = 0;
264: try {
265: result = __streamMatchPrefix();
266: } catch (IOException e) {
267: // Don't do anything because we're not doing any I/O
268: result = -1;
269: }
270:
271: if (result != input.length) {
272: __lastMatchResult = null;
273: return false;
274: }
275:
276: __lastMatchResult = new AwkMatchResult(new String(input, 0,
277: result), 0);
278:
279: return true;
280: }
281:
282: /**
283: * Determines if a string exactly matches a given pattern. If
284: * there is an exact match, a MatchResult instance
285: * representing the match is made accesible via
286: * {@link #getMatch()}. The pattern must be
287: * a AwkPattern instance, otherwise a ClassCastException will
288: * be thrown. You are not required to, and indeed should NOT try to
289: * (for performance reasons), catch a ClassCastException because it
290: * will never be thrown as long as you use an AwkPattern as the pattern
291: * parameter.
292: * <p>
293: * @param input The String to test for an exact match.
294: * @param pattern The AwkPattern to be matched.
295: * @return True if input matches pattern, false otherwise.
296: * @exception ClassCastException If a Pattern instance other than an
297: * AwkPattern is passed as the pattern parameter.
298: */
299: public boolean matches(String input, Pattern pattern) {
300: return matches(input.toCharArray(), pattern);
301: }
302:
303: /**
304: * Determines if the contents of a PatternMatcherInput instance
305: * exactly matches a given pattern. If
306: * there is an exact match, a MatchResult instance
307: * representing the match is made accesible via
308: * {@link #getMatch()}. Unlike the
309: * {@link #contains(PatternMatcherInput, Pattern)}
310: * method, the current offset of the PatternMatcherInput argument
311: * is not updated. You should remember that the region between
312: * the begin and end offsets of the PatternMatcherInput will be
313: * tested for an exact match.
314: * <p>
315: * The pattern must be an AwkPattern instance, otherwise a
316: * ClassCastException will be thrown. You are not required to, and
317: * indeed should NOT try to (for performance reasons), catch a
318: * ClassCastException because it will never be thrown as long as you use
319: * an AwkPattern as the pattern parameter.
320: * <p>
321: * @param input The PatternMatcherInput to test for a match.
322: * @param pattern The AwkPattern to be matched.
323: * @return True if input matches pattern, false otherwise.
324: * @exception ClassCastException If a Pattern instance other than an
325: * AwkPattern is passed as the pattern parameter.
326: */
327: public boolean matches(PatternMatcherInput input, Pattern pattern) {
328: int result = -1;
329:
330: __awkPattern = (AwkPattern) pattern;
331: __scratchBuffer._buffer = input.getBuffer();
332: __scratchBuffer._bufferSize = input.length();
333: __scratchBuffer._bufferOffset = __beginOffset = input
334: .getBeginOffset();
335: __offsets[0] = input.getBeginOffset();
336: __scratchBuffer._endOfStreamReached = true;
337: __streamSearchBuffer = __scratchBuffer;
338: try {
339: result = __streamMatchPrefix();
340: } catch (IOException e) {
341: // Don't do anything because we're not doing any I/O
342: result = -1;
343: }
344:
345: if (result != __scratchBuffer._bufferSize) {
346: __lastMatchResult = null;
347: return false;
348: }
349:
350: __lastMatchResult = new AwkMatchResult(new String(
351: __scratchBuffer._buffer, __offsets[0],
352: __scratchBuffer._bufferSize), __offsets[0]);
353:
354: return true;
355: }
356:
357: /**
358: * Determines if a string (represented as a char[]) contains a pattern.
359: * If the pattern is
360: * matched by some substring of the input, a MatchResult instance
361: * representing the <b> first </b> such match is made acessible via
362: * {@link #getMatch()}. If you want to access
363: * subsequent matches you should either use a PatternMatcherInput object
364: * or use the offset information in the MatchResult to create a substring
365: * representing the remaining input. Using the MatchResult offset
366: * information is the recommended method of obtaining the parts of the
367: * string preceeding the match and following the match.
368: * <p>
369: * The pattern must be an AwkPattern instance, otherwise a
370: * ClassCastException will be thrown. You are not required to, and
371: * indeed should NOT try to (for performance reasons), catch a
372: * ClassCastException because it will never be thrown as long as you use
373: * an AwkPattern as the pattern parameter.
374: * <p>
375: * @param input The char[] to test for a match.
376: * @param pattern The AwkPattern to be matched.
377: * @return True if the input contains a pattern match, false otherwise.
378: * @exception ClassCastException If a Pattern instance other than an
379: * AwkPattern is passed as the pattern parameter.
380: */
381: public boolean contains(char[] input, Pattern pattern) {
382: __awkPattern = (AwkPattern) pattern;
383:
384: // Begin anchor requires match occur at beginning of input
385: if (__awkPattern._hasBeginAnchor
386: && !__awkPattern._fastMap[input[0]]) {
387: __lastMatchResult = null;
388: return false;
389: }
390:
391: __scratchBuffer._buffer = input;
392: __scratchBuffer._bufferSize = input.length;
393: __scratchBuffer._bufferOffset = __beginOffset = 0;
394: __scratchBuffer._endOfStreamReached = true;
395: __streamSearchBuffer = __scratchBuffer;
396: __lastMatchedBufferOffset = 0;
397: try {
398: _search();
399: } catch (IOException e) {
400: // do nothing
401: }
402: return (__lastMatchResult != null);
403: }
404:
405: /**
406: * Determines if a string contains a pattern. If the pattern is
407: * matched by some substring of the input, a MatchResult instance
408: * representing the <b> first </b> such match is made acessible via
409: * {@link #getMatch()}. If you want to access
410: * subsequent matches you should either use a PatternMatcherInput object
411: * or use the offset information in the MatchResult to create a substring
412: * representing the remaining input. Using the MatchResult offset
413: * information is the recommended method of obtaining the parts of the
414: * string preceeding the match and following the match.
415: * <p>
416: * The pattern must be an AwkPattern instance, otherwise a
417: * ClassCastException will be thrown. You are not required to, and
418: * indeed should NOT try to (for performance reasons), catch a
419: * ClassCastException because it will never be thrown as long as you use
420: * an AwkPattern as the pattern parameter.
421: * <p>
422: * @param input The String to test for a match.
423: * @param pattern The AwkPattern to be matched.
424: * @return True if the input contains a pattern match, false otherwise.
425: * @exception ClassCastException If a Pattern instance other than an
426: * AwkPattern is passed as the pattern parameter.
427: */
428: public boolean contains(String input, Pattern pattern) {
429: return contains(input.toCharArray(), pattern);
430: }
431:
432: /**
433: * Determines if the contents of a PatternMatcherInput, starting from the
434: * current offset of the input contains a pattern.
435: * If a pattern match is found, a MatchResult
436: * instance representing the <b>first</b> such match is made acessible via
437: * {@link #getMatch()}. The current offset of the
438: * PatternMatcherInput is set to the offset corresponding to the end
439: * of the match, so that a subsequent call to this method will continue
440: * searching where the last call left off. You should remember that the
441: * region between the begin and end offsets of the PatternMatcherInput are
442: * considered the input to be searched, and that the current offset
443: * of the PatternMatcherInput reflects where a search will start from.
444: * Matches extending beyond the end offset of the PatternMatcherInput
445: * will not be matched. In other words, a match must occur entirely
446: * between the begin and end offsets of the input. See
447: * {@link org.apache.oro.text.regex.PatternMatcherInput PatternMatcherInput}
448: * for more details.
449: * <p>
450: * As a side effect, if a match is found, the PatternMatcherInput match
451: * offset information is updated. See the PatternMatcherInput
452: * {@link org.apache.oro.text.regex.PatternMatcherInput#setMatchOffsets
453: * setMatchOffsets(int, int)} method for more details.
454: * <p>
455: * The pattern must be an AwkPattern instance, otherwise a
456: * ClassCastException will be thrown. You are not required to, and
457: * indeed should NOT try to (for performance reasons), catch a
458: * ClassCastException because it will never be thrown as long as you use
459: * an AwkPattern as the pattern parameter.
460: * <p>
461: * This method is usually used in a loop as follows:
462: * <blockquote><pre>
463: * PatternMatcher matcher;
464: * PatternCompiler compiler;
465: * Pattern pattern;
466: * PatternMatcherInput input;
467: * MatchResult result;
468: *
469: * compiler = new AwkCompiler();
470: * matcher = new AwkMatcher();
471: *
472: * try {
473: * pattern = compiler.compile(somePatternString);
474: * } catch(MalformedPatternException e) {
475: * System.err.println("Bad pattern.");
476: * System.err.println(e.getMessage());
477: * return;
478: * }
479: *
480: * input = new PatternMatcherInput(someStringInput);
481: *
482: * while(matcher.contains(input, pattern)) {
483: * result = matcher.getMatch();
484: * // Perform whatever processing on the result you want.
485: * }
486: *
487: * </pre></blockquote>
488: * <p>
489: * @param input The PatternMatcherInput to test for a match.
490: * @param pattern The Pattern to be matched.
491: * @return True if the input contains a pattern match, false otherwise.
492: * @exception ClassCastException If a Pattern instance other than an
493: * AwkPattern is passed as the pattern parameter.
494: */
495: public boolean contains(PatternMatcherInput input, Pattern pattern) {
496: __awkPattern = (AwkPattern) pattern;
497: __scratchBuffer._buffer = input.getBuffer();
498: __scratchBuffer._bufferOffset = __beginOffset = input
499: .getBeginOffset();
500: __lastMatchedBufferOffset = input.getCurrentOffset();
501:
502: // Begin anchor requires match occur at beginning of input
503: // No need to adjust current offset if no match found.
504: if (__awkPattern._hasBeginAnchor) {
505: if (__beginOffset != __lastMatchedBufferOffset
506: || !__awkPattern._fastMap[__scratchBuffer._buffer[__beginOffset]]) {
507: __lastMatchResult = null;
508: return false;
509: }
510: }
511:
512: __scratchBuffer._bufferSize = input.length();
513: __scratchBuffer._endOfStreamReached = true;
514: __streamSearchBuffer = __scratchBuffer;
515: try {
516: _search();
517: } catch (IOException e) {
518: // do nothing
519: }
520: input.setCurrentOffset(__lastMatchedBufferOffset);
521:
522: if (__lastMatchResult == null)
523: return false;
524:
525: input.setMatchOffsets(__lastMatchResult.beginOffset(0),
526: __lastMatchResult.endOffset(0));
527:
528: return true;
529: }
530:
531: /**
532: * Determines if the contents of an AwkStreamInput, starting from the
533: * current offset of the input contains a pattern.
534: * If a pattern match is found, a MatchResult
535: * instance representing the <b>first</b> such match is made acessible via
536: * {@link #getMatch()}. The current offset of the
537: * input stream is advanced to the end offset corresponding to the end
538: * of the match. Consequently a subsequent call to this method will continue
539: * searching where the last call left off.
540: * See {@link AwkStreamInput} for more details.
541: * <p>
542: * Note, patterns matching the null string do NOT match at end of input
543: * stream. This is different from the behavior you get from the other
544: * contains() methods.
545: * <p>
546: * The pattern must be an AwkPattern instance, otherwise a
547: * ClassCastException will be thrown. You are not required to, and
548: * indeed should NOT try to (for performance reasons), catch a
549: * ClassCastException because it will never be thrown as long as you use
550: * an AwkPattern as the pattern parameter.
551: * <p>
552: * This method is usually used in a loop as follows:
553: * <blockquote><pre>
554: * PatternMatcher matcher;
555: * PatternCompiler compiler;
556: * Pattern pattern;
557: * AwkStreamInput input;
558: * MatchResult result;
559: *
560: * compiler = new AwkCompiler();
561: * matcher = new AwkMatcher();
562: *
563: * try {
564: * pattern = compiler.compile(somePatternString);
565: * } catch(MalformedPatternException e) {
566: * System.err.println("Bad pattern.");
567: * System.err.println(e.getMessage());
568: * return;
569: * }
570: *
571: * input = new AwkStreamInput(
572: * new BufferedInputStream(new FileInputStream(someFileName)));
573: *
574: * while(matcher.contains(input, pattern)) {
575: * result = matcher.getMatch();
576: * // Perform whatever processing on the result you want.
577: * }
578: *
579: * </pre></blockquote>
580: * <p>
581: * @param input The PatternStreamInput to test for a match.
582: * @param pattern The Pattern to be matched.
583: * @return True if the input contains a pattern match, false otherwise.
584: * @exception ClassCastException If a Pattern instance other than an
585: * AwkPattern is passed as the pattern parameter.
586: */
587: public boolean contains(AwkStreamInput input, Pattern pattern)
588: throws IOException {
589: __awkPattern = (AwkPattern) pattern;
590:
591: // Begin anchor requires match occur at beginning of input
592: if (__awkPattern._hasBeginAnchor) {
593: // Do read here instead of in _search() so we can test first char
594: if (input._bufferOffset == 0) {
595: if (input.read()
596: && !__awkPattern._fastMap[input._buffer[0]]) {
597: __lastMatchResult = null;
598: return false;
599: }
600: } else {
601: __lastMatchResult = null;
602: return false;
603: }
604: }
605:
606: __lastMatchedBufferOffset = input._currentOffset;
607: __streamSearchBuffer = input;
608: __beginOffset = 0;
609: _search();
610: input._currentOffset = __lastMatchedBufferOffset;
611:
612: if (__lastMatchResult != null) {
613: // Adjust match begin offset to be relative to beginning of stream.
614: __lastMatchResult
615: ._incrementMatchBeginOffset(input._bufferOffset);
616: return true;
617: }
618:
619: return false;
620: }
621:
622: private int __streamMatchPrefix() throws IOException {
623: int token, current = AwkPattern._START_STATE, lastState;
624: int offset, initialOffset, maxOffset;
625: int lastMatchedOffset = -1;
626: int[] tstateArray;
627:
628: offset = initialOffset = __offsets[0];
629: maxOffset = __streamSearchBuffer._bufferSize + __beginOffset;
630:
631: test: while (offset < maxOffset) {
632: token = __streamSearchBuffer._buffer[offset++];
633:
634: if (current < __awkPattern._numStates) {
635: lastState = current;
636: tstateArray = __awkPattern._getStateArray(current);
637: current = tstateArray[token];
638:
639: if (current == 0) {
640: __awkPattern._createNewState(lastState, token,
641: tstateArray);
642: current = tstateArray[token];
643: }
644:
645: if (current == AwkPattern._INVALID_STATE) {
646: break test;
647: } else if (__awkPattern._endStates.get(current)) {
648: lastMatchedOffset = offset;
649: }
650:
651: if (offset == maxOffset) {
652: offset = __streamSearchBuffer
653: ._reallocate(initialOffset)
654: + __beginOffset;
655:
656: maxOffset = __streamSearchBuffer._bufferSize
657: + __beginOffset;
658:
659: // If we're at the end of the stream, don't reset values
660: if (offset != maxOffset) {
661: if (lastMatchedOffset != -1)
662: lastMatchedOffset -= initialOffset;
663: initialOffset = 0;
664: }
665: }
666: } else
667: break;
668: }
669:
670: __offsets[0] = initialOffset;
671: __offsets[1] = lastMatchedOffset - 1;
672:
673: if (lastMatchedOffset == -1 && __awkPattern._matchesNullString)
674: return 0;
675:
676: // End anchor requires match occur at end of input
677: if (__awkPattern._hasEndAnchor
678: && (!__streamSearchBuffer._endOfStreamReached || lastMatchedOffset < __streamSearchBuffer._bufferSize
679: + __beginOffset))
680: return -1;
681:
682: return (lastMatchedOffset - initialOffset);
683: }
684:
685: void _search() throws IOException {
686: int position, tokensMatched;
687:
688: __lastMatchResult = null;
689:
690: while (true) {
691: if (__lastMatchedBufferOffset >= __streamSearchBuffer._bufferSize
692: + __beginOffset) {
693: if (__streamSearchBuffer._endOfStreamReached) {
694: // Get rid of reference now that it should no longer be used.
695: __streamSearchBuffer = null;
696: return;
697: } else {
698: if (!__streamSearchBuffer.read())
699: return;
700: __lastMatchedBufferOffset = 0;
701: }
702: }
703:
704: for (position = __lastMatchedBufferOffset; position < __streamSearchBuffer._bufferSize
705: + __beginOffset; position = __offsets[0] + 1) {
706:
707: __offsets[0] = position;
708: if (__awkPattern._fastMap[__streamSearchBuffer._buffer[position]]
709: && (tokensMatched = __streamMatchPrefix()) > -1) {
710:
711: __lastMatchResult = new AwkMatchResult(new String(
712: __streamSearchBuffer._buffer, __offsets[0],
713: tokensMatched), __offsets[0]);
714:
715: __lastMatchedBufferOffset = (tokensMatched > 0 ? __offsets[1] + 1
716: : __offsets[0] + 1);
717:
718: return;
719: } else if (__awkPattern._matchesNullString) {
720: __lastMatchResult = new AwkMatchResult(
721: new String(), position);
722:
723: __lastMatchedBufferOffset = position + 1;
724:
725: return;
726: }
727: }
728:
729: __lastMatchedBufferOffset = position;
730: }
731: }
732:
733: /**
734: * Fetches the last match found by a call to a matches() or contains()
735: * method.
736: * <p>
737: * @return A MatchResult instance containing the pattern match found
738: * by the last call to any one of the matches() or contains()
739: * methods. If no match was found by the last call, returns
740: * null.
741: */
742: public MatchResult getMatch() {
743: return __lastMatchResult;
744: }
745:
746: }
|