001: /*
002: * $Id: PatternMatcher.java,v 1.7 2003/11/07 20:16:25 dfs Exp $
003: *
004: * ====================================================================
005: * The Apache Software License, Version 1.1
006: *
007: * Copyright (c) 2000 The Apache Software Foundation. All rights
008: * reserved.
009: *
010: * Redistribution and use in source and binary forms, with or without
011: * modification, are permitted provided that the following conditions
012: * are met:
013: *
014: * 1. Redistributions of source code must retain the above copyright
015: * notice, this list of conditions and the following disclaimer.
016: *
017: * 2. Redistributions in binary form must reproduce the above copyright
018: * notice, this list of conditions and the following disclaimer in
019: * the documentation and/or other materials provided with the
020: * distribution.
021: *
022: * 3. The end-user documentation included with the redistribution,
023: * if any, must include the following acknowledgment:
024: * "This product includes software developed by the
025: * Apache Software Foundation (http://www.apache.org/)."
026: * Alternately, this acknowledgment may appear in the software itself,
027: * if and wherever such third-party acknowledgments normally appear.
028: *
029: * 4. The names "Apache" and "Apache Software Foundation", "Jakarta-Oro"
030: * must not be used to endorse or promote products derived from this
031: * software without prior written permission. For written
032: * permission, please contact apache@apache.org.
033: *
034: * 5. Products derived from this software may not be called "Apache"
035: * or "Jakarta-Oro", nor may "Apache" or "Jakarta-Oro" appear in their
036: * name, without prior written permission of the Apache Software Foundation.
037: *
038: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
039: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
040: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
041: * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
042: * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
043: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
044: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
045: * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
046: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
047: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
048: * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
049: * SUCH DAMAGE.
050: * ====================================================================
051: *
052: * This software consists of voluntary contributions made by many
053: * individuals on behalf of the Apache Software Foundation. For more
054: * information on the Apache Software Foundation, please see
055: * <http://www.apache.org/>.
056: */
057:
058: package org.apache.oro.text.regex;
059:
060: /**
061: * The PatternMatcher interface defines the operations a regular
062: * expression matcher must implement. However, the types of the Pattern
063: * implementations recognized by a matcher are not restricted. Typically
064: * PatternMatcher instances will only recognize a specific type of Pattern.
065: * For example, the Perl5Matcher only recognizes Perl5Pattern instances.
066: * However, none of the PatternMatcher methods are required to throw an
067: * exception in case of the use of an invalid pattern. This is done for
068: * efficiency reasons, although usually a CastClassException will be
069: * thrown by the Java runtime system if you use the wrong Pattern
070: * implementation. It is the responsibility of the programmer to make
071: * sure he uses the correct Pattern instance with a given PatternMatcher
072: * instance. The current version of this package only contains the Perl5
073: * suite of pattern matching classes, but future ones for other regular
074: * expression grammars may be added and users may also create their own
075: * implementations of the provided interfaces. Therefore the programmer
076: * should be careful not to mismatch classes.
077: *
078: * @version @version@
079: * @since 1.0
080: * @see Pattern
081: * @see PatternCompiler
082: * @see MatchResult
083: */
084: public interface PatternMatcher {
085:
086: /**
087: * Determines if a prefix of a string (represented as a char[])
088: * matches a given pattern, starting from a given offset into the string.
089: * If a prefix of the string matches the pattern, a MatchResult instance
090: * representing the match is made accesible via
091: * {@link #getMatch()}.
092: * <p>
093: * This method is useful for certain common token identification tasks
094: * that are made more difficult without this functionality.
095: * <p>
096: * @param input The char[] to test for a prefix match.
097: * @param pattern The Pattern to be matched.
098: * @param offset The offset at which to start searching for the prefix.
099: * @return True if input matches pattern, false otherwise.
100: */
101: public boolean matchesPrefix(char[] input, Pattern pattern,
102: int offset);
103:
104: /**
105: * Determines if a prefix of a string matches a given pattern.
106: * If a prefix of the string matches the pattern, a MatchResult instance
107: * representing the match is made accesible via
108: * {@link #getMatch()}.
109: * <p>
110: * This method is useful for certain common token identification tasks
111: * that are made more difficult without this functionality.
112: * <p>
113: * @param input The String to test for a prefix match.
114: * @param pattern The Pattern to be matched.
115: * @return True if input matches pattern, false otherwise.
116: */
117: public boolean matchesPrefix(String input, Pattern pattern);
118:
119: /**
120: * Determines if a prefix of a string (represented as a char[])
121: * matches a given pattern.
122: * If a prefix of the string matches the pattern, a MatchResult instance
123: * representing the match is made accesible via
124: * {@link #getMatch()}.
125: * <p>
126: * This method is useful for certain common token identification tasks
127: * that are made more difficult without this functionality.
128: * <p>
129: * @param input The char[] to test for a prefix match.
130: * @param pattern The Pattern to be matched.
131: * @return True if input matches pattern, false otherwise.
132: */
133: public boolean matchesPrefix(char[] input, Pattern pattern);
134:
135: /**
136: * Determines if a prefix of a PatternMatcherInput instance
137: * matches a given pattern. If there is a match, a MatchResult instance
138: * representing the match is made accesible via
139: * {@link #getMatch()}. Unlike the
140: * {@link #contains(PatternMatcherInput, Pattern)}
141: * method, the current offset of the PatternMatcherInput argument
142: * is not updated. You should remember that the region starting
143: * from the begin offset of the PatternMatcherInput will be
144: * tested for a prefix match.
145: * <p>
146: * This method is useful for certain common token identification tasks
147: * that are made more difficult without this functionality.
148: * <p>
149: * @param input The PatternMatcherInput to test for a prefix match.
150: * @param pattern The Pattern to be matched.
151: * @return True if input matches pattern, false otherwise.
152: */
153: public boolean matchesPrefix(PatternMatcherInput input,
154: Pattern pattern);
155:
156: /**
157: * Determines if a string exactly matches a given pattern. If
158: * there is an exact match, a MatchResult instance
159: * representing the match is made accesible via
160: * {@link #getMatch()}.
161: * <p>
162: * @param input The String to test for an exact match.
163: * @param pattern The Pattern to be matched.
164: * @return True if input matches pattern, false otherwise.
165: */
166: public boolean matches(String input, Pattern pattern);
167:
168: /**
169: * Determines if a string (represented as a char[]) exactly matches
170: * a given pattern. If there is an exact match, a MatchResult
171: * instance representing the match is made accesible via
172: * {@link #getMatch()}.
173: * <p>
174: * @param input The char[] to test for a match.
175: * @param pattern The Pattern to be matched.
176: * @return True if input matches pattern, false otherwise.
177: */
178: public boolean matches(char[] input, Pattern pattern);
179:
180: /**
181: * Determines if the contents of a PatternMatcherInput instance
182: * exactly matches a given pattern. If
183: * there is an exact match, a MatchResult instance
184: * representing the match is made accesible via
185: * {@link #getMatch()}. Unlike the
186: * {@link #contains(PatternMatcherInput, Pattern)}
187: * method, the current offset of the PatternMatcherInput argument
188: * is not updated. You should remember that the region between
189: * the begin and end offsets of the PatternMatcherInput will be
190: * tested for an exact match.
191: * <p>
192: * @param input The PatternMatcherInput to test for a match.
193: * @param pattern The Pattern to be matched.
194: * @return True if input matches pattern, false otherwise.
195: */
196: public boolean matches(PatternMatcherInput input, Pattern pattern);
197:
198: /**
199: * Determines if a string contains a pattern. If the pattern is
200: * matched by some substring of the input, a MatchResult instance
201: * representing the <b> first </b> such match is made acessible via
202: * {@link #getMatch()}. If you want to access
203: * subsequent matches you should either use a PatternMatcherInput object
204: * or use the offset information in the MatchResult to create a substring
205: * representing the remaining input. Using the MatchResult offset
206: * information is the recommended method of obtaining the parts of the
207: * string preceeding the match and following the match.
208: * <p>
209: * @param input The String to test for a match.
210: * @param pattern The Pattern to be matched.
211: * @return True if the input contains a pattern match, false otherwise.
212: */
213: public boolean contains(String input, Pattern pattern);
214:
215: /**
216: * Determines if a string (represented as a char[]) contains a pattern.
217: * If the pattern is matched by some substring of the input, a MatchResult
218: * instance representing the <b>first</b> such match is made acessible via
219: * {@link #getMatch()}. If you want to access
220: * subsequent matches you should either use a PatternMatcherInput object
221: * or use the offset information in the MatchResult to create a substring
222: * representing the remaining input. Using the MatchResult offset
223: * information is the recommended method of obtaining the parts of the
224: * string preceeding the match and following the match.
225: * <p>
226: * @param input The String to test for a match.
227: * @param pattern The Pattern to be matched.
228: * @return True if the input contains a pattern match, false otherwise.
229: */
230: public boolean contains(char[] input, Pattern pattern);
231:
232: /**
233: * Determines if the contents of a PatternMatcherInput, starting from the
234: * current offset of the input contains a pattern.
235: * If a pattern match is found, a MatchResult
236: * instance representing the <b>first</b> such match is made acessible via
237: * {@link #getMatch()}. The current offset of the
238: * PatternMatcherInput is set to the offset corresponding to the end
239: * of the match, so that a subsequent call to this method will continue
240: * searching where the last call left off. You should remember that the
241: * region between the begin and end offsets of the PatternMatcherInput are
242: * considered the input to be searched, and that the current offset
243: * of the PatternMatcherInput reflects where a search will start from.
244: * Matches extending beyond the end offset of the PatternMatcherInput
245: * will not be matched. In other words, a match must occur entirely
246: * between the begin and end offsets of the input. See
247: * {@link PatternMatcherInput} for more details.
248: * <p>
249: * This method is usually used in a loop as follows:
250: * <blockquote><pre>
251: * PatternMatcher matcher;
252: * PatternCompiler compiler;
253: * Pattern pattern;
254: * PatternMatcherInput input;
255: * MatchResult result;
256: *
257: * compiler = new Perl5Compiler();
258: * matcher = new Perl5Matcher();
259: *
260: * try {
261: * pattern = compiler.compile(somePatternString);
262: * } catch(MalformedPatternException e) {
263: * System.out.println("Bad pattern.");
264: * System.out.println(e.getMessage());
265: * return;
266: * }
267: *
268: * input = new PatternMatcherInput(someStringInput);
269: *
270: * while(matcher.contains(input, pattern)) {
271: * result = matcher.getMatch();
272: * // Perform whatever processing on the result you want.
273: * }
274: *
275: * </pre></blockquote>
276: * <p>
277: * @param input The PatternMatcherInput to test for a match.
278: * @param pattern The Pattern to be matched.
279: * @return True if the input contains a pattern match, false otherwise.
280: */
281: public boolean contains(PatternMatcherInput input, Pattern pattern);
282:
283: /**
284: * Fetches the last match found by a call to a matches() or contains()
285: * method.
286: * <p>
287: * @return A MatchResult instance containing the pattern match found
288: * by the last call to any one of the matches() or contains()
289: * methods. If no match was found by the last call,
290: * returns null.
291: */
292: public MatchResult getMatch();
293: }
|