001: /*
002: * 01/07/2003 - 15:19:32
003: *
004: * PatternPro.java -
005: * Copyright (C) 2003 Buero fuer Softwarearchitektur GbR
006: * ralf.meyer@karneim.com
007: * http://jrexx.sf.net
008: *
009: * This program is free software; you can redistribute it and/or
010: * modify it under the terms of the GNU Lesser General Public License
011: * as published by the Free Software Foundation; either version 2
012: * of the License, or (at your option) any later version.
013: *
014: * This program is distributed in the hope that it will be useful,
015: * but WITHOUT ANY WARRANTY; without even the implied warranty of
016: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
017: * GNU Lesser General Public License for more details.
018: *
019: * You should have received a copy of the GNU Lesser General Public License
020: * along with this program; if not, write to the Free Software
021: * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
022: */
023: package com.tc.jrexx.regex;
024:
025: import java.lang.ref.*;
026: import com.tc.jrexx.automaton.*;
027: import com.tc.jrexx.set.*;
028:
029: /**
030: * Regular expression based on a minimized deterministic automaton (FSA) and designed as a set of strings.
031: * <br>Use this class to manipulate a reular expression through set oprations or automaton methods
032: * <br>PatternPro differs from Pattern that the contributed set of strings is mutable through the methods
033: * addAll, removeAll and retainAll.
034: * <br>Further PaternPro provides access to its PAutomaton through the getAutomaton method.
035: * So it is possible to inspect the automaton's states through PAutomaton's methods.
036: * @author Ralf Meyer
037: * @version 1.0
038: */
039: public class PatternPro extends Pattern {
040:
041: WeakReference automatonWrapper = null;
042:
043: // WeakReference patternWrapper = null;
044:
045: protected PatternPro(ISet_char fullSet) {
046: super (fullSet);
047: }
048:
049: protected PatternPro(Automaton_Pattern automaton) {
050: super (automaton);
051: }
052:
053: protected AutomatonSet_String getInnerAutomaton() {
054: return this .automaton;
055: }
056:
057: public PatternPro() {
058: this (new Automaton_Pattern());
059: }
060:
061: /**
062: * creates a PatternPro with the given automaton. The automaton will not be cloned:
063: * two PatternPro can use the same automaton.
064: */
065: public PatternPro(PAutomaton automaton) {
066: super ((Automaton_Pattern) automaton.getAutomaton());
067: this .automatonWrapper = new WeakReference(automaton);
068: }
069:
070: /**
071: * copy constructor
072: */
073: public PatternPro(Pattern p) {
074: super ((Automaton_Pattern) p.automaton.clone());
075: }
076:
077: public PatternPro(String regEx) {
078: super (Pattern.get(regEx, false));
079: }
080:
081: public void setRegEx(String regEx) {
082: this .automaton = Pattern.get(regEx, false);
083: }
084:
085: /**
086: * if p is an instance of PatternPro
087: * use setAutomaton(p.getAutomaton());
088: * else setAutomaton(new PatternPro(p).getAutomaton())
089: *
090: * @deprecated
091: */
092: public void setPattern(Pattern p) {
093: this .automaton = (Automaton_Pattern) p.automaton.clone();
094: if (p.getClass() != Pattern.class)
095: this .automaton.minimize();
096: }
097:
098: public void setAutomaton(PAutomaton a) {
099: this .automatonWrapper = new WeakReference(a);
100: this .automaton = (Automaton_Pattern) a.getAutomaton();
101: }
102:
103: /**
104: * don't needed: you have a PatternPro which extends Pattern.
105: * (Pattern)this.clone() has the same effect
106: * @deprecated
107: */
108: public Pattern getPattern() {
109: return new Pattern((Automaton_Pattern) this .automaton.clone());
110: }
111:
112: public PAutomaton getAutomaton() {
113: if (this .automatonWrapper == null) {
114: PAutomaton answer = new PAutomaton(this .automaton);
115: this .automatonWrapper = new WeakReference(answer);
116: return answer;
117: }
118:
119: PAutomaton answer = (PAutomaton) this .automatonWrapper.get();
120: if (answer != null)
121: return answer;
122:
123: answer = new PAutomaton(this .automaton);
124: this .automatonWrapper = new WeakReference(answer);
125: return answer;
126: }
127:
128: public boolean contains(String s, int offset, int length) {
129: if (this .automaton.isDeterministic())
130: return super .contains(s, offset, length);
131:
132: Automaton.State state = ((Automaton_Pattern) this .automaton)
133: .getStartState();
134:
135: //int _offset = offset;
136: //int _length = length;
137: //long start = System.currentTimeMillis();
138: //try {
139: if (state == null)
140: return false;
141:
142: Automaton.IState istate = ((Automaton_Pattern.PState) state)
143: .getEClosure();
144: Automaton.LinkedSet_State states = (istate instanceof Automaton_Pattern.PState) ? this .automaton
145: .newLinkedSet_State((Automaton_Pattern.PState) istate)
146: : (Automaton.LinkedSet_State) istate;
147:
148: Automaton.LinkedSet_State newStates = this .automaton
149: .newLinkedSet_State();
150:
151: for (; length > 0; ++offset, --length) {
152: loop: for (Automaton.Wrapper_State w = states.elements; w != null; w = w.next) {
153: if (w.state.isDeterministic()) {
154: for (Automaton.State.Transition trans = w.state.transitions; trans != null; trans = trans.next) {
155: if (trans.charSet.contains(s.charAt(offset))) {
156: newStates.add(trans.toState);
157: continue loop;
158: }
159: }
160: } else {
161: for (Automaton.State.Transition trans = w.state.transitions; trans != null; trans = trans.next) {
162: if (trans.charSet.contains(s.charAt(offset))) {
163: newStates.add(trans.toState);
164: }
165: }
166: }
167: }
168:
169: for (Automaton.Wrapper_State w = newStates.elements; w != null; w = w.next) {
170: for (Automaton.State.Transition trans = w.state.eTransitions; trans != null; trans = trans.next) {
171: newStates.add(trans.toState);
172: }
173: }
174:
175: if (newStates.isEmpty())
176: return false;
177:
178: Automaton.LinkedSet_State tmp = states;
179: states = newStates;
180: newStates = tmp;
181: newStates.clear();
182: }
183: return ((Automaton_Pattern.LinkedSet_PState) states).isFinal();
184:
185: //} finally {
186: // long end = System.currentTimeMillis();
187: // System.out.println("Pattern.contains: "+(end-start));
188: // if (length>0) {
189: // System.out.println(this.automaton);
190: // s = s.substring(_offset,_offset+_length);
191: // offset = offset-_offset;
192: // if (offset<=100) System.out.println(" can start with: "+s.substring(0,offset)+"\"");
193: // else System.out.println(" can start with: \""+s.substring(0,100)+"...\""+s.length());
194: //
195: // if (s.length()-offset<=100) System.out.println(" stopped for : "+s.substring(offset)+"\"");
196: // else System.out.println(" stopped for : "+s.substring(offset,offset+100)+"...\""+(s.length()-offset));
197: //
198: // System.out.println("currentState: "+state);
199: // }
200: //}
201: }
202:
203: public boolean contains(char[] chars, int offset, int length) {
204: if (this .automaton.isDeterministic())
205: return super .contains(chars, offset, length);
206:
207: Automaton.State state = ((Automaton_Pattern) this .automaton)
208: .getStartState();
209: if (state == null)
210: return false;
211:
212: Automaton.IState istate = ((Automaton_Pattern.PState) state)
213: .getEClosure();
214: Automaton.LinkedSet_State states = (istate instanceof Automaton_Pattern.PState) ? this .automaton
215: .newLinkedSet_State((Automaton_Pattern.PState) istate)
216: : (Automaton.LinkedSet_State) istate;
217:
218: Automaton.LinkedSet_State newStates = this .automaton
219: .newLinkedSet_State();
220:
221: for (; length > 0; ++offset, --length) {
222: loop: for (Automaton.Wrapper_State w = states.elements; w != null; w = w.next) {
223: if (w.state.isDeterministic()) {
224: for (Automaton.State.Transition trans = w.state.transitions; trans != null; trans = trans.next) {
225: if (trans.charSet.contains(chars[offset])) {
226: newStates.add(trans.toState);
227: continue loop;
228: }
229: }
230: } else {
231: for (Automaton.State.Transition trans = w.state.transitions; trans != null; trans = trans.next) {
232: if (trans.charSet.contains(chars[offset])) {
233: newStates.add(trans.toState);
234: }
235: }
236: }
237: }
238:
239: for (Automaton.Wrapper_State w = newStates.elements; w != null; w = w.next) {
240: for (Automaton.State.Transition trans = w.state.eTransitions; trans != null; trans = trans.next) {
241: newStates.add(trans.toState);
242: }
243: }
244:
245: if (newStates.isEmpty())
246: return false;
247:
248: Automaton.LinkedSet_State tmp = states;
249: states = newStates;
250: newStates = tmp;
251: newStates.clear();
252: }
253: return ((Automaton_Pattern.LinkedSet_PState) states).isFinal();
254: }
255:
256: public boolean contains(java.io.Reader in)
257: throws java.io.IOException {
258: if (this .automaton.isDeterministic())
259: return super .contains(in);
260:
261: Automaton.State state = ((Automaton_Pattern) this .automaton)
262: .getStartState();
263: if (state == null)
264: return false;
265:
266: Automaton.IState istate = ((Automaton_Pattern.PState) state)
267: .getEClosure();
268: Automaton.LinkedSet_State states = (istate instanceof Automaton_Pattern.PState) ? this .automaton
269: .newLinkedSet_State((Automaton_Pattern.PState) istate)
270: : (Automaton.LinkedSet_State) istate;
271:
272: Automaton.LinkedSet_State newStates = this .automaton
273: .newLinkedSet_State();
274:
275: for (int ch = in.read(); ch != -1; ch = in.read()) {
276: loop: for (Automaton.Wrapper_State w = states.elements; w != null; w = w.next) {
277: if (w.state.isDeterministic()) {
278: for (Automaton.State.Transition trans = w.state.transitions; trans != null; trans = trans.next) {
279: if (trans.charSet.contains((char) ch)) {
280: newStates.add(trans.toState);
281: continue loop;
282: }
283: }
284: } else {
285: for (Automaton.State.Transition trans = w.state.transitions; trans != null; trans = trans.next) {
286: if (trans.charSet.contains((char) ch)) {
287: newStates.add(trans.toState);
288: }
289: }
290: }
291: }
292:
293: for (Automaton.Wrapper_State w = newStates.elements; w != null; w = w.next) {
294: for (Automaton.State.Transition trans = w.state.eTransitions; trans != null; trans = trans.next) {
295: newStates.add(trans.toState);
296: }
297: }
298:
299: if (newStates.isEmpty())
300: return false;
301:
302: Automaton.LinkedSet_State tmp = states;
303: states = newStates;
304: newStates = tmp;
305: newStates.clear();
306: }
307: return ((Automaton_Pattern.LinkedSet_PState) states).isFinal();
308: }
309:
310: public void complement() {
311: this .automaton.complement();
312: }
313:
314: public void addAll(String regEx) {
315: this .automaton.addAll(regEx);
316: this .automaton.minimize();
317: }
318:
319: public void retainAll(String regEx) {
320: this .automaton.retainAll(regEx);
321: this .automaton.minimize();
322: }
323:
324: public void removeAll(String regEx) {
325: this .automaton.removeAll(regEx);
326: this .automaton.minimize();
327: }
328:
329: public void addAll(Pattern pattern) {
330: this .automaton.addAll(pattern.automaton);
331: this .automaton.minimize();
332: }
333:
334: public void retainAll(Pattern pattern) {
335: this .automaton.retainAll(pattern.automaton);
336: this .automaton.minimize();
337: }
338:
339: public void removeAll(Pattern pattern) {
340: this .automaton.removeAll(pattern.automaton);
341: this .automaton.minimize();
342: }
343:
344: public void addAll(PAutomaton a) {
345: this .automaton.addAll((Automaton_Pattern) a.getAutomaton());
346: this .automaton.minimize();
347: }
348:
349: public void retainAll(PAutomaton a) {
350: this .automaton.retainAll((Automaton_Pattern) a.getAutomaton());
351: this .automaton.minimize();
352: }
353:
354: public void removeAll(PAutomaton a) {
355: this .automaton.removeAll((Automaton_Pattern) a.getAutomaton());
356: this .automaton.minimize();
357: }
358:
359: public void clear() {
360: this.automaton.clear();
361: }
362:
363: }
|