001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017:
018: /**
019: * @author Nikolay A. Kuznetsov
020: * @version $Revision: 1.12.2.2 $
021: */package java.util.regex;
022:
023: /**
024: * Basic class for nodes, representing given regular expression.
025: * Note: All the classes representing nodes has set prefix;
026: *
027: * @author Nikolay A. Kuznetsov
028: * @version $Revision: 1.12.2.2 $
029: */
030: abstract class AbstractSet {
031:
032: public static final int TYPE_LEAF = 1 << 0;
033:
034: public static final int TYPE_FSET = 1 << 1;
035:
036: public static final int TYPE_QUANT = 1 << 3;
037:
038: public static final int TYPE_DOTSET = 0x80000000 | '.';
039:
040: /**
041: * Next node to visit
042: */
043: protected AbstractSet next;
044:
045: /**
046: * Counter for debugging purposes, represent unique node index;
047: */
048: static int counter = 1;
049:
050: protected boolean isSecondPassVisited = false;
051:
052: protected String index = new Integer(AbstractSet.counter++)
053: .toString();
054:
055: private int type = 0;
056:
057: public AbstractSet() {
058: }
059:
060: public AbstractSet(AbstractSet n) {
061: next = n;
062: }
063:
064: /**
065: * Checks if this node matches in given position and recursively call
066: * next node matches on positive self match. Returns positive integer if
067: * entire match succeed, negative otherwise
068: * @param stringIndex - string index to start from;
069: * @param testString - input string
070: * @param matchResult - MatchResult to sore result into
071: * @return -1 if match fails or n > 0;
072: */
073: public abstract int matches(int stringIndex,
074: CharSequence testString, MatchResultImpl matchResult);
075:
076: /**
077: * Attempts to apply pattern starting from this set/stringIndex; returns
078: * index this search was started from, if value is negative, this means that
079: * this search didn't succeed, additional information could be obtained via
080: * matchResult;
081: *
082: * Note: this is default implementation for find method, it's based on
083: * matches, subclasses do not have to override find method unless
084: * more effective find method exists for a particular node type
085: * (sequence, i.e. substring, for example). Same applies for find back
086: * method.
087: *
088: * @param stringIndex
089: * starting index
090: * @param testString
091: * string to search in
092: * @param matchResult
093: * result of the match
094: * @return last searched index
095: */
096: public int find(int stringIndex, CharSequence testString,
097: MatchResultImpl matchResult) {
098: int length = matchResult.getRightBound();
099: while (stringIndex <= length) {
100: if (matches(stringIndex, testString, matchResult) >= 0) {
101: return stringIndex;
102: } else {
103: stringIndex++;
104: }
105: }
106: return -1;
107: }
108:
109: /**
110: * @param stringIndex -
111: * an index, to finish search back (left limit)
112: * @param startSearch -
113: * an index to start search from (right limit)
114: * @param testString -
115: * test string;
116: * @param matchResult
117: * match result
118: * @return an index to start back search next time if this search fails(new
119: * left bound); if this search fails the value is negative;
120: */
121: public int findBack(int stringIndex, int startSearch,
122: CharSequence testString, MatchResultImpl matchResult) {
123: int shift;
124: while (startSearch >= stringIndex) {
125: if (matches(startSearch, testString, matchResult) >= 0) {
126: return startSearch;
127: } else {
128: startSearch--;
129: }
130: }
131: return -1;
132: }
133:
134: /**
135: * Returns true, if this node has consumed any characters during
136: * positive match attempt, for example node representing character always
137: * consumes one character if it matches. If particular node matches
138: * empty sting this method will return false;
139: *
140: * @param matchResult
141: * @return
142: */
143: public abstract boolean hasConsumed(MatchResultImpl matchResult);
144:
145: /**
146: * Returns name for the particular node type.
147: * Used for debugging purposes.
148: */
149: protected abstract String getName();
150:
151: protected void setType(int type) {
152: this .type = type;
153: }
154:
155: public int getType() {
156: return this .type;
157: }
158:
159: protected String getQualifiedName() {
160: return "<" + index + ":" + getName() + ">"; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
161: }
162:
163: public String toString() {
164: return getQualifiedName();
165: }
166:
167: /**
168: * Returns the next.
169: */
170: public AbstractSet getNext() {
171: return next;
172: }
173:
174: /**
175: * Sets next abstract set
176: * @param next
177: * The next to set.
178: */
179: public void setNext(AbstractSet next) {
180: this .next = next;
181: }
182:
183: /**
184: * Returns true if the given node intersects with this one,
185: * false otherwise.
186: * This method is being used for quantifiers construction,
187: * lets consider the following regular expression (a|b)*ccc.
188: *
189: * (a|b) does not intersects with "ccc" and thus can be quantified
190: * greedily (w/o kickbacks), like *+ instead of *.
191: *
192: * @param set - usually previous node
193: *
194: * @return true if the given node intersects with this one
195: */
196: public boolean first(AbstractSet set) {
197: return true;
198: }
199:
200: /**
201: * This method is used for replacement backreferenced
202: * sets.
203: *
204: * @param prev - node who references to this node
205: * @return null if current node need not to be replaced
206: * JointSet which is replacement of
207: * current node otherwise
208: */
209: public JointSet processBackRefReplacement() {
210: return null;
211: }
212:
213: /**
214: * This method is used for traversing nodes after the
215: * first stage of compilation.
216: */
217: public void processSecondPass() {
218: this .isSecondPassVisited = true;
219:
220: if (next != null) {
221:
222: if (!next.isSecondPassVisited) {
223:
224: /*
225: * Add here code to do during the pass
226: */
227: JointSet set = next.processBackRefReplacement();
228:
229: if (set != null) {
230: next.isSecondPassVisited = true;
231: next = (AbstractSet) set;
232: }
233:
234: /*
235: * End code to do during the pass
236: */
237: next.processSecondPass();
238: } else {
239:
240: /*
241: * We reach node through next but it is already traversed.
242: * You can see this situation for AltGroupQuantifierSet.next
243: * when we reach this node through
244: * AltGroupQuantifierSet.innerset. ... .next
245: */
246:
247: /*
248: * Add here code to do during the pass
249: */
250: if (next instanceof SingleSet
251: && ((FSet) ((JointSet) next).fSet).isBackReferenced) {
252: next = next.next;
253: }
254:
255: /*
256: * End code to do during the pass
257: */
258: }
259: }
260: }
261: }
|