001: /*
002: * gnu/regexp/RETokenRepeated.java
003: * Copyright (C) 1998-2001 Wes Biggs
004: *
005: * This library is free software; you can redistribute it and/or modify
006: * it under the terms of the GNU Lesser General Public License as published
007: * by the Free Software Foundation; either version 2.1 of the License, or
008: * (at your option) any later version.
009: *
010: * This library is distributed in the hope that it will be useful,
011: * but WITHOUT ANY WARRANTY; without even the implied warranty of
012: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
013: * GNU Lesser General Public License for more details.
014: *
015: * You should have received a copy of the GNU Lesser General Public License
016: * along with this program; if not, write to the Free Software
017: * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
018: */
019:
020: package gnu.regexp;
021:
022: import java.util.Vector;
023:
024: final class RETokenRepeated extends REToken {
025: private REToken token;
026: private int min, max;
027: private boolean stingy;
028:
029: RETokenRepeated(int subIndex, REToken token, int min, int max) {
030: super (subIndex);
031: this .token = token;
032: this .min = min;
033: this .max = max;
034: }
035:
036: /** Sets the minimal matching mode to true. */
037: void makeStingy() {
038: stingy = true;
039: }
040:
041: /** Queries if this token has minimal matching enabled. */
042: boolean isStingy() {
043: return stingy;
044: }
045:
046: /**
047: * The minimum length of a repeated token is the minimum length
048: * of the token multiplied by the minimum number of times it must
049: * match.
050: */
051: int getMinimumLength() {
052: return (min * token.getMinimumLength());
053: }
054:
055: // We do need to save every possible point, but the number of clone()
056: // invocations here is really a killer for performance on non-stingy
057: // repeat operators. I'm open to suggestions...
058:
059: // Hypothetical question: can you have a RE that matches 1 times,
060: // 3 times, 5 times, but not 2 times or 4 times? Does having
061: // the subexpression back-reference operator allow that?
062:
063: boolean match(CharIndexed input, REMatch mymatch) {
064: // number of times we've matched so far
065: int numRepeats = 0;
066:
067: // Possible positions for the next repeat to match at
068: REMatch newMatch = mymatch;
069: REMatch last = null;
070: REMatch current;
071:
072: // Add the '0-repeats' index
073: // positions.elementAt(z) == position [] in input after <<z>> matches
074: Vector positions = new Vector();
075: positions.addElement(newMatch);
076:
077: // Declare variables used in loop
078: REMatch doables;
079: REMatch doablesLast;
080: REMatch recurrent;
081:
082: do {
083: // Check for stingy match for each possibility.
084: if (stingy && (numRepeats >= min)) {
085: REMatch result = matchRest(input, newMatch);
086: if (result != null) {
087: mymatch.assignFrom(result);
088: return true;
089: }
090: }
091:
092: doables = null;
093: doablesLast = null;
094:
095: // try next repeat at all possible positions
096: for (current = newMatch; current != null; current = current.next) {
097: recurrent = (REMatch) current.clone();
098: if (token.match(input, recurrent)) {
099: // add all items in current to doables array
100: if (doables == null) {
101: doables = recurrent;
102: doablesLast = recurrent;
103: } else {
104: // Order these from longest to shortest
105: // Start by assuming longest (more repeats)
106: doablesLast.next = recurrent;
107: }
108: // Find new doablesLast
109: while (doablesLast.next != null) {
110: doablesLast = doablesLast.next;
111: }
112: }
113: }
114: // if none of the possibilities worked out, break out of do/while
115: if (doables == null)
116: break;
117:
118: // reassign where the next repeat can match
119: newMatch = doables;
120:
121: // increment how many repeats we've successfully found
122: ++numRepeats;
123:
124: positions.addElement(newMatch);
125: } while (numRepeats < max);
126:
127: // If there aren't enough repeats, then fail
128: if (numRepeats < min)
129: return false;
130:
131: // We're greedy, but ease off until a true match is found
132: int posIndex = positions.size();
133:
134: // At this point we've either got too many or just the right amount.
135: // See if this numRepeats works with the rest of the regexp.
136: REMatch allResults = null;
137: REMatch allResultsLast = null;
138:
139: REMatch results = null;
140: while (--posIndex >= min) {
141: newMatch = (REMatch) positions.elementAt(posIndex);
142: results = matchRest(input, newMatch);
143: if (results != null) {
144: if (allResults == null) {
145: allResults = results;
146: allResultsLast = results;
147: } else {
148: // Order these from longest to shortest
149: // Start by assuming longest (more repeats)
150: allResultsLast.next = results;
151: }
152: // Find new doablesLast
153: while (allResultsLast.next != null) {
154: allResultsLast = allResultsLast.next;
155: }
156: }
157: // else did not match rest of the tokens, try again on smaller sample
158: }
159: if (allResults != null) {
160: mymatch.assignFrom(allResults); // does this get all?
161: return true;
162: }
163: // If we fall out, no matches.
164: return false;
165: }
166:
167: private REMatch matchRest(CharIndexed input, final REMatch newMatch) {
168: REMatch current, single;
169: REMatch doneIndex = null;
170: REMatch doneIndexLast = null;
171: // Test all possible matches for this number of repeats
172: for (current = newMatch; current != null; current = current.next) {
173: // clone() separates a single match from the chain
174: single = (REMatch) current.clone();
175: if (next(input, single)) {
176: // chain results to doneIndex
177: if (doneIndex == null) {
178: doneIndex = single;
179: doneIndexLast = single;
180: } else {
181: doneIndexLast.next = single;
182: }
183: // Find new doneIndexLast
184: while (doneIndexLast.next != null) {
185: doneIndexLast = doneIndexLast.next;
186: }
187: }
188: }
189: return doneIndex;
190: }
191:
192: void dump(StringBuffer os) {
193: os.append("(?:");
194: token.dumpAll(os);
195: os.append(')');
196: if ((max == Integer.MAX_VALUE) && (min <= 1))
197: os.append((min == 0) ? '*' : '+');
198: else if ((min == 0) && (max == 1))
199: os.append('?');
200: else {
201: os.append('{').append(min);
202: if (max > min) {
203: os.append(',');
204: if (max != Integer.MAX_VALUE)
205: os.append(max);
206: }
207: os.append('}');
208: }
209: if (stingy)
210: os.append('?');
211: }
212: }
|