001: /*
002: * $Id: Perl5MatchResult.java,v 1.8 2003/11/07 20:16:25 dfs Exp $
003: *
004: * ====================================================================
005: * The Apache Software License, Version 1.1
006: *
007: * Copyright (c) 2000 The Apache Software Foundation. All rights
008: * reserved.
009: *
010: * Redistribution and use in source and binary forms, with or without
011: * modification, are permitted provided that the following conditions
012: * are met:
013: *
014: * 1. Redistributions of source code must retain the above copyright
015: * notice, this list of conditions and the following disclaimer.
016: *
017: * 2. Redistributions in binary form must reproduce the above copyright
018: * notice, this list of conditions and the following disclaimer in
019: * the documentation and/or other materials provided with the
020: * distribution.
021: *
022: * 3. The end-user documentation included with the redistribution,
023: * if any, must include the following acknowledgment:
024: * "This product includes software developed by the
025: * Apache Software Foundation (http://www.apache.org/)."
026: * Alternately, this acknowledgment may appear in the software itself,
027: * if and wherever such third-party acknowledgments normally appear.
028: *
029: * 4. The names "Apache" and "Apache Software Foundation", "Jakarta-Oro"
030: * must not be used to endorse or promote products derived from this
031: * software without prior written permission. For written
032: * permission, please contact apache@apache.org.
033: *
034: * 5. Products derived from this software may not be called "Apache"
035: * or "Jakarta-Oro", nor may "Apache" or "Jakarta-Oro" appear in their
036: * name, without prior written permission of the Apache Software Foundation.
037: *
038: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
039: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
040: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
041: * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
042: * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
043: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
044: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
045: * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
046: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
047: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
048: * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
049: * SUCH DAMAGE.
050: * ====================================================================
051: *
052: * This software consists of voluntary contributions made by many
053: * individuals on behalf of the Apache Software Foundation. For more
054: * information on the Apache Software Foundation, please see
055: * <http://www.apache.org/>.
056: */
057:
058: package org.apache.oro.text.regex;
059:
060: /**
061: * A class used to store and access the results of a Perl5Pattern match.
062: *
063: * @version @version@
064: * @since 1.0
065: * @see PatternMatcher
066: * @see Perl5Matcher
067: */
068: final class Perl5MatchResult implements MatchResult {
069: /**
070: * The character offset into the line or stream where the match
071: * begins. Pattern matching methods that look for matches a line at
072: * a time should use this field as the offset into the line
073: * of the match. Methods that look for matches independent of line
074: * boundaries should use this field as the offset into the entire
075: * text stream.
076: */
077: int _matchBeginOffset;
078:
079: /**
080: * Arrays containing the beginning and end offsets of the pattern
081: * groups matched within the actual matched pattern contained in the
082: * variable <code>match</code>.
083: * Pattern matching methods that do not match subgroups, will only contain
084: * entries for group 0, which always refers to the entire pattern.
085: * <code>beginGroupOffset</code> contains the start offset of the groups,
086: * indexed by group number, which will always be 0 for group 0.
087: * <code>endGroupOffset</code> contains the ending offset + 1 of the groups.
088: * A group matching the null string will have <code>beginGroupOffset</code>
089: * and <code>endGroupOffset</code> entries of equal value. Following a
090: * convention established by the GNU regular expression library for the
091: * C language, groups that are not part of a match contain -1 as their
092: * begin and end offsets.
093: */
094: int[] _beginGroupOffset, _endGroupOffset;
095:
096: /**
097: * The entire string that matched the pattern.
098: */
099: String _match;
100:
101: /**
102: * Constructs a MatchResult able to store match information for
103: * a number of subpattern groups.
104: * <p>
105: * @param groups The number of groups this MatchResult can store.
106: * Only postitive values greater than or equal to 1 make any
107: * sense. At minimum, a MatchResult stores one group which
108: * represents the entire pattern matched including all subparts.
109: */
110: Perl5MatchResult(int groups) {
111: _beginGroupOffset = new int[groups];
112: _endGroupOffset = new int[groups];
113: }
114:
115: /**
116: * @return The length of the match.
117: */
118: public int length() {
119: int length;
120:
121: length = (_endGroupOffset[0] - _beginGroupOffset[0]);
122:
123: return (length > 0 ? length : 0);
124: }
125:
126: /**
127: * @return The number of groups contained in the result. This number
128: * includes the 0th group. In other words, the result refers
129: * to the number of parenthesized subgroups plus the entire match
130: * itself.
131: */
132: public int groups() {
133: return _beginGroupOffset.length;
134: }
135:
136: /**
137: * @param group The pattern subgroup to return.
138: * @return A string containing the indicated pattern subgroup. Group
139: * 0 always refers to the entire match. If a group was never
140: * matched, it returns null. This is not to be confused with
141: * a group matching the null string, which will return a String
142: * of length 0.
143: */
144: public String group(int group) {
145: int begin, end, length;
146:
147: if (group < _beginGroupOffset.length) {
148: begin = _beginGroupOffset[group];
149: end = _endGroupOffset[group];
150: length = _match.length();
151:
152: if (begin >= 0 && end >= 0) {
153: if (begin < length && end <= length && end > begin)
154: return _match.substring(begin, end);
155: else if (begin <= end)
156: return "";
157: }
158: }
159:
160: return null;
161: }
162:
163: /**
164: * @param group The pattern subgroup.
165: * @return The offset into group 0 of the first token in the indicated
166: * pattern subgroup. If a group was never matched or does
167: * not exist, returns -1.
168: */
169: public int begin(int group) {
170: int begin, end;//, length;
171: if (group < _beginGroupOffset.length) {
172: begin = _beginGroupOffset[group];
173: end = _endGroupOffset[group];
174: //length = _match.length();
175: if (begin >= 0 && end >= 0)// && begin < length && end <= length)
176: //return _beginGroupOffset[group];
177: return begin;
178: }
179:
180: return -1;
181: }
182:
183: /**
184: * @param group The pattern subgroup.
185: * @return Returns one plus the offset into group 0 of the last token in
186: * the indicated pattern subgroup. If a group was never matched
187: * or does not exist, returns -1. A group matching the null
188: * string will return its start offset.
189: */
190: public int end(int group) {
191: int begin, end; //, length;
192: if (group < _beginGroupOffset.length) {
193: begin = _beginGroupOffset[group];
194: end = _endGroupOffset[group];
195: //length = _match.length();
196: if (begin >= 0 && end >= 0)// && begin < length && end <= length)
197: //return _endGroupOffset[group];
198: return end;
199: }
200: return -1;
201: }
202:
203: /**
204: * Returns an offset marking the beginning of the pattern match
205: * relative to the beginning of the input.
206: * <p>
207: * @param group The pattern subgroup.
208: * @return The offset of the first token in the indicated
209: * pattern subgroup. If a group was never matched or does
210: * not exist, returns -1.
211: */
212: public int beginOffset(int group) {
213: int begin, end;//, length;
214: if (group < _beginGroupOffset.length) {
215: begin = _beginGroupOffset[group];
216: end = _endGroupOffset[group];
217: //length = _match.length();
218: if (begin >= 0 && end >= 0)// && begin < length && end <= length)
219: //return _matchBeginOffset + _beginGroupOffset[group];
220: return _matchBeginOffset + begin;
221: }
222: return -1;
223: }
224:
225: /**
226: * Returns an offset marking the end of the pattern match
227: * relative to the beginning of the input.
228: * <p>
229: * @param group The pattern subgroup.
230: * @return Returns one plus the offset of the last token in
231: * the indicated pattern subgroup. If a group was never matched
232: * or does not exist, returns -1. A group matching the null
233: * string will return its start offset.
234: */
235: public int endOffset(int group) {
236: int begin, end;//, length;
237: if (group < _endGroupOffset.length) {
238: begin = _beginGroupOffset[group];
239: end = _endGroupOffset[group];
240: //length = _match.length();
241: if (begin >= 0 && end >= 0)// && begin < length && end <= length)
242: //return _matchBeginOffset + _endGroupOffset[group];
243: return _matchBeginOffset + end;
244: }
245: return -1;
246: }
247:
248: /**
249: * The same as group(0).
250: *
251: * @return A string containing the entire match.
252: */
253: public String toString() {
254: return group(0);
255: }
256: }
|