001: /*
002: * 01/07/2003 - 15:19:32
003: *
004: * Pattern.java -
005: * Copyright (C) 2003 Buero fuer Softwarearchitektur GbR
006: * ralf.meyer@karneim.com
007: * http://jrexx.sf.net
008: *
009: * This program is free software; you can redistribute it and/or
010: * modify it under the terms of the GNU Lesser General Public License
011: * as published by the Free Software Foundation; either version 2
012: * of the License, or (at your option) any later version.
013: *
014: * This program is distributed in the hope that it will be useful,
015: * but WITHOUT ANY WARRANTY; without even the implied warranty of
016: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
017: * GNU Lesser General Public License for more details.
018: *
019: * You should have received a copy of the GNU Lesser General Public License
020: * along with this program; if not, write to the Free Software
021: * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
022: */
023: package com.tc.jrexx.regex;
024:
025: import com.tc.jrexx.set.ISet_char;
026:
027: import java.util.*;
028:
029: import com.tc.jrexx.automaton.*;
030:
031: import java.lang.ref.SoftReference;
032:
033: /**
034: * Regular expression based on a minimized deterministic automaton (DFA) and designed as an immutable set of strings.
035: * <br>Use this class to create a regular expression and match strings against it.
036: * <br>for example:
037: * <br>to check whether a given string is a number try<br>
038: * <br><code>new Pattern("[0-9]+").contains(s)</code>
039: *
040: * @author Ralf Meyer
041: * @version 1.1
042: */
043: public class Pattern implements Cloneable {
044:
045: protected static final HashMap AUTOMATON_MAP = new HashMap();
046:
047: protected static final Automaton_Pattern get(String regEx,
048: boolean cache) {
049: if (cache) {
050: synchronized (AUTOMATON_MAP) {
051: final SoftReference reference = (SoftReference) AUTOMATON_MAP
052: .get(regEx);
053:
054: if (reference != null) {
055: Automaton_Pattern automaton = (Automaton_Pattern) reference
056: .get();
057: if (automaton != null)
058: return automaton;
059: }
060:
061: final Automaton_Pattern automaton = new Automaton_Pattern(
062: regEx);
063:
064: final Automaton.LinkedSet_State states = automaton
065: .getStates();
066: for (Automaton.Wrapper_State w = states.elements; w != null; w = w.next) {
067: for (Automaton.State.Transition trans = w.state.eTransitions; trans != null; trans = trans.next) {
068: trans.properties = null;
069: }
070: for (Automaton.State.Transition trans = w.state.transitions; trans != null; trans = trans.next) {
071: trans.properties = null;
072: }
073: }
074:
075: automaton.minimize();
076:
077: AUTOMATON_MAP.put(regEx, new SoftReference(automaton));
078: return automaton;
079: }
080: } else {
081: SoftReference reference = null;
082: synchronized (AUTOMATON_MAP) {
083: reference = (SoftReference) AUTOMATON_MAP.get(regEx);
084: }
085:
086: if (reference != null) {
087: Automaton_Pattern automaton = (Automaton_Pattern) reference
088: .get();
089: if (automaton != null)
090: return automaton;
091: }
092:
093: final Automaton_Pattern automaton = new Automaton_Pattern(
094: regEx);
095:
096: final Automaton.LinkedSet_State states = automaton
097: .getStates();
098: for (Automaton.Wrapper_State w = states.elements; w != null; w = w.next) {
099: for (Automaton.State.Transition trans = w.state.eTransitions; trans != null; trans = trans.next) {
100: trans.properties = null;
101: }
102: for (Automaton.State.Transition trans = w.state.transitions; trans != null; trans = trans.next) {
103: trans.properties = null;
104: }
105: }
106:
107: automaton.minimize();
108:
109: return automaton;
110: }
111: }
112:
113: protected Automaton_Pattern automaton;
114:
115: /*
116: protected Pattern() {
117: this(new Automaton_Pattern());
118: }
119: */
120: protected Pattern(Automaton_Pattern automaton) {
121: this .automaton = automaton;
122: }
123:
124: protected Pattern(ISet_char fullSet) {
125: this .automaton = new Automaton_Pattern(fullSet);
126: }
127:
128: /**
129: * creates a minimized deterministic automaton (DFA) from the given regEx pattern.
130: */
131: public Pattern(String regEx) {
132: this (Pattern.get(regEx, true));
133: }
134:
135: public boolean contains(String s) {
136: return this .contains(s, 0, s.length());
137: }
138:
139: public boolean contains(String s, int offset) {
140: return this .contains(s, offset, s.length() - offset);
141: }
142:
143: public boolean contains(String s, int offset, int length) {
144: Automaton.State state = ((Automaton_Pattern) this .automaton)
145: .getStartState();
146:
147: //int _offset = offset;
148: //int _length = length;
149: //long start = System.currentTimeMillis();
150: //try {
151: if (state == null)
152: return false;
153:
154: Automaton.LinkedSet_State states = this .automaton
155: .newLinkedSet_State(state);
156: Automaton.LinkedSet_State newStates = this .automaton
157: .newLinkedSet_State();
158:
159: loop: for (; length > 0; ++offset, --length) {
160: for (Automaton.State.Transition trans = state.transitions; trans != null; trans = trans.next) {
161: if (trans.charSet.contains(s.charAt(offset))) {
162: state = trans.toState;
163: continue loop;
164: }
165: }
166: return false;
167: }
168:
169: return ((Automaton_Pattern.PState) state).isFinal();
170: //} finally {
171: // long end = System.currentTimeMillis();
172: // System.out.println("Pattern.contains: "+(end-start));
173: // if (length>0) {
174: // System.out.println(this.automaton);
175: // s = s.substring(_offset,_offset+_length);
176: // offset = offset-_offset;
177: // if (offset<=100) System.out.println(" can start with: "+s.substring(0,offset)+"\"");
178: // else System.out.println(" can start with: \""+s.substring(0,100)+"...\""+s.length());
179:
180: // if (s.length()-offset<=100) System.out.println(" stopped for : "+s.substring(offset)+"\"");
181: // else System.out.println(" stopped for : "+s.substring(offset,offset+100)+"...\""+(s.length()-offset));
182:
183: // System.out.println("currentState: "+state);
184: // }
185: //}
186:
187: }
188:
189: public boolean contains(char[] chars) {
190: return this .contains(chars, 0, chars.length);
191: }
192:
193: public boolean contains(char[] chars, int offset) {
194: return this .contains(chars, offset, chars.length - offset);
195: }
196:
197: public boolean contains(char[] chars, int offset, int length) {
198: Automaton.State state = ((Automaton_Pattern) this .automaton)
199: .getStartState();
200: if (state == null)
201: return false;
202:
203: loop: for (; length > 0; ++offset, --length) {
204: for (Automaton.State.Transition trans = state.transitions; trans != null; trans = trans.next) {
205: if (trans.charSet.contains(chars[offset])) {
206: state = trans.toState;
207: continue loop;
208: }
209: }
210: return false;
211: }
212:
213: return ((Automaton_Pattern.PState) state).isFinal;
214: }
215:
216: public boolean contains(java.io.Reader in)
217: throws java.io.IOException {
218: Automaton.State state = ((Automaton_Pattern) this .automaton)
219: .getStartState();
220: if (state == null)
221: return false;
222:
223: loop: for (int ch = in.read(); ch != -1; ch = in.read()) {
224: for (Automaton.State.Transition trans = state.transitions; trans != null; trans = trans.next) {
225: if (trans.charSet.contains((char) ch)) {
226: state = trans.toState;
227: continue loop;
228: }
229: }
230: return false;
231: }
232: return ((Automaton_Pattern.PState) state).isFinal;
233: }
234:
235: public String getRegEx() {
236: return this .automaton.regEx;
237: }
238:
239: public String toString() {
240: return this .getRegEx();
241: }
242:
243: /*
244: private void writeObject(java.io.OutputStream out) throws IOException {
245: out.writeObject(this.toAutomatonData());
246: }
247: private void readObject(java.io.InputStream in) throws IOException,ClassNotFoundException {
248: SAutomatonData a = (SAutomatonData)in.readObject();
249: this.init(a);
250: }
251:
252:
253: public SAutomatonData toAutomatonData() {
254: return new PAutomaton(this.automaton).toData();
255: }
256:
257: public void toAutomatonData(OutputStream automatonDataStream) throws IOException {
258: new OutputStream(automatonDataStream).writeObject(this);
259: }
260:
261:
262: protected void init(SAutomatonData a) {
263: this.automaton = (Automaton_Pattern)new PAutomaton(a).getAutomaton();
264: }
265: */
266:
267: public Object clone() {
268: try {
269: Pattern clone = (Pattern) super .clone();
270: clone.automaton = (Automaton_Pattern) clone.automaton
271: .clone();
272: return clone;
273: } catch (CloneNotSupportedException e) {
274: throw new Error("should never happen");
275: }
276: }
277:
278: /*
279: public Iterator iterator() {
280: return new Iterator() {
281: HashMap fromStates = null;
282: HashMap toStates = new HashMap();
283: Iterator it_states = null;
284: ISet_char.Iterator it_charSet = null;
285: {
286: Object startState = Pattern.this.automaton.getStartState();
287: if (startState!=null) {
288: LinkedList lists = new LinkedList(); lists.add(new LinkedList());
289: toStates.put(startState,lists);
290: }
291:
292:
293: }
294:
295: public boolean hasNext() {
296: return false;
297: }
298: public Object next() {
299: it_charSet.next();
300: //if (this.it_states==null) it_states = fromStates.keySet().iterator();
301: if (it_states.hasNext()==false) {
302: this.fromStates = toStates;
303: this.toStates = new HashMap();
304: this.it_states
305: }
306:
307: IStatePro.ITransition[] trans = state.getTransitions();
308:
309: Iterator it_lists = lists.iterator();
310: while (it_lists.hasNext()) {
311: LinkedList elements = (LinkedList)it_lists.next();
312:
313: for (int i=0; i<trans.length; ++i) {
314: LinkedList toLists = (LinkedList)toStates.get(trans[i].getToState());
315: if (toLists==null) {
316: toLists = new LinkedList();
317: toStates.put(trans[i].getToState(),toLists);
318: }
319:
320: LinkedList toElements = new LinkedList(elements);
321: toElements.add(trans[i].getCharSet());
322: toLists.add(toElements);
323: }
324: }
325:
326: return null;
327: }
328:
329: public void remove() {
330: throw new UnsupportedOperationException();
331: }
332: };
333: }
334: */
335: }
|