001: /*
002: * @(#)RegexpPool.java 1.16 06/10/10
003: *
004: * Copyright 1990-2006 Sun Microsystems, Inc. All Rights Reserved.
005: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER
006: *
007: * This program is free software; you can redistribute it and/or
008: * modify it under the terms of the GNU General Public License version
009: * 2 only, as published by the Free Software Foundation.
010: *
011: * This program is distributed in the hope that it will be useful, but
012: * WITHOUT ANY WARRANTY; without even the implied warranty of
013: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
014: * General Public License version 2 for more details (a copy is
015: * included at /legal/license.txt).
016: *
017: * You should have received a copy of the GNU General Public License
018: * version 2 along with this work; if not, write to the Free Software
019: * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
020: * 02110-1301 USA
021: *
022: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
023: * Clara, CA 95054 or visit www.sun.com if you need additional
024: * information or have any questions.
025: *
026: */
027:
028: package sun.misc;
029:
030: import java.io.*;
031:
032: /**
033: * A class to represent a pool of regular expressions. A string
034: * can be matched against the whole pool all at once. It is much
035: * faster than doing individual regular expression matches one-by-one.
036: *
037: * @see java.misc.RegexpTarget
038: * @author James Gosling
039: */
040:
041: public class RegexpPool {
042: private RegexpNode prefixMachine = new RegexpNode();
043: private RegexpNode suffixMachine = new RegexpNode();
044: private static final int BIG = 0x7FFFFFFF;
045: private int lastDepth = BIG;
046:
047: public RegexpPool() {
048: }
049:
050: /**
051: * Add a regular expression to the pool of regular expressions.
052: * @param re The regular expression to add to the pool.
053: For now, only handles strings that either begin or end with
054: a '*'.
055: * @param ret The object to be returned when this regular expression is
056: matched. If ret is an instance of the RegexpTarget class, ret.found
057: is called with the string fragment that matched the '*' as its
058: parameter.
059: * @exception REException error
060: */
061: public void add(String re, Object ret) throws REException {
062: add(re, ret, false);
063: }
064:
065: /**
066: * Replace the target for the regular expression with a different
067: * target.
068: *
069: * @param re The regular expression to be replaced in the pool.
070: * For now, only handles strings that either begin or end with
071: * a '*'.
072: * @param ret The object to be returned when this regular expression is
073: * matched. If ret is an instance of the RegexpTarget class, ret.found
074: * is called with the string fragment that matched the '*' as its
075: * parameter.
076: */
077: public void replace(String re, Object ret) {
078: try {
079: add(re, ret, true);
080: } catch (Exception e) {
081: // should never occur if replace is true
082: }
083: }
084:
085: /**
086: * Delete the regular expression and its target.
087: * @param re The regular expression to be deleted from the pool.
088: * must begin or end with a '*'
089: * @return target - the old target.
090: */
091: public Object delete(String re) {
092: Object o = null;
093: RegexpNode p = prefixMachine;
094: RegexpNode best = p;
095: int len = re.length() - 1;
096: int i;
097: boolean prefix = true;
098:
099: if (!re.startsWith("*") || !re.endsWith("*"))
100: len++;
101:
102: if (len <= 0)
103: return null;
104:
105: /* March forward through the prefix machine */
106: for (i = 0; p != null; i++) {
107: if (p.result != null && p.depth < BIG
108: && (!p.exact || i == len)) {
109: best = p;
110: }
111: if (i >= len)
112: break;
113: p = p.find(re.charAt(i));
114: }
115:
116: /* march backward through the suffix machine */
117: p = suffixMachine;
118: for (i = len; --i >= 0 && p != null;) {
119: if (p.result != null && p.depth < BIG) {
120: prefix = false;
121: best = p;
122: }
123: p = p.find(re.charAt(i));
124: }
125:
126: // delete only if there is an exact match
127: if (prefix) {
128: if (re.equals(best.re)) {
129: p("Deleting " + best.re);
130: o = best.result;
131: best.result = null;
132:
133: }
134: } else {
135: if (re.equals(best.re)) {
136: o = best.result;
137: best.result = null;
138: }
139: }
140: return o;
141: }
142:
143: /** Search for a match to a string & return the object associated
144: with it with the match. When multiple regular expressions
145: would match the string, the best match is returned first.
146: The next best match is returned the next time matchNext is
147: called.
148: @param s The string to match against the regular expressions
149: in the pool.
150: @return null on failure, otherwise the object associated with
151: the regular expression when it was added to the pool.
152: If the object is an instance of RegexpTarget, then
153: the return value is the result from calling
154: return.found(string_that_matched_wildcard).
155: */
156: public Object match(String s) {
157: return matchAfter(s, BIG);
158: }
159:
160: /** Identical to match except that it will only find matches to
161: regular expressions that were added to the pool <i>after</i>
162: the last regular expression that matched in the last call
163: to match() or matchNext() */
164: public Object matchNext(String s) {
165: return matchAfter(s, lastDepth);
166: }
167:
168: private void add(String re, Object ret, boolean replace)
169: throws REException {
170: int len = re.length();
171: RegexpNode p;
172: if (re.charAt(0) == '*') {
173: p = suffixMachine;
174: while (len > 1)
175: p = p.add(re.charAt(--len));
176: } else {
177: boolean exact = false;
178: if (re.charAt(len - 1) == '*')
179: len--;
180: else
181: exact = true;
182: p = prefixMachine;
183: for (int i = 0; i < len; i++)
184: p = p.add(re.charAt(i));
185: p.exact = exact;
186: }
187:
188: if (p.result != null && !replace)
189: throw new REException(re + " is a duplicate");
190:
191: p.re = re;
192: p.result = ret;
193: }
194:
195: private Object matchAfter(String s, int lastMatchDepth) {
196: RegexpNode p = prefixMachine;
197: RegexpNode best = p;
198: int bst = 0;
199: int bend = 0;
200: int len = s.length();
201: int i;
202: if (len <= 0)
203: return null;
204: /* March forward through the prefix machine */
205: for (i = 0; p != null; i++) {
206: if (p.result != null && p.depth < lastMatchDepth
207: && (!p.exact || i == len)) {
208: lastDepth = p.depth;
209: best = p;
210: bst = i;
211: bend = len;
212: }
213: if (i >= len)
214: break;
215: p = p.find(s.charAt(i));
216: }
217: /* march backward through the suffix machine */
218: p = suffixMachine;
219: for (i = len; --i >= 0 && p != null;) {
220: if (p.result != null && p.depth < lastMatchDepth) {
221: lastDepth = p.depth;
222: best = p;
223: bst = 0;
224: bend = i + 1;
225: }
226: p = p.find(s.charAt(i));
227: }
228: Object o = best.result;
229: if (o != null && o instanceof RegexpTarget)
230: o = ((RegexpTarget) o).found(s.substring(bst, bend));
231: return o;
232: }
233:
234: /** Resets the pool so that the next call to matchNext looks
235: at all regular expressions in the pool. match(s); is equivalent
236: to reset(); matchNext(s);
237: <p><b>Multithreading note:</b> reset/nextMatch leave state in the
238: regular expression pool. If multiple threads could be using this
239: pool this way, they should be syncronized to avoid race hazards.
240: match() was done in such a way that there are no such race
241: hazards: multiple threads can be matching in the same pool
242: simultaneously. */
243: public void reset() {
244: lastDepth = BIG;
245: }
246:
247: static public void main(String argv[]) {
248: if (argv.length < 5) {
249: p("need 5 params");
250: System.exit(1);
251: }
252: RegexpPool rp = new RegexpPool();
253: try {
254: for (int i = 1; i < argv.length; i++)
255: rp.add(argv[i], argv[i]);
256:
257: // delete the 3rd addition
258: p("Trying to delete " + argv[3]);
259: rp.delete(argv[3]);
260: } catch (Exception e) {
261: }
262: p((String) rp.match(argv[0]));
263: }
264:
265: /** Print this pool to standard output */
266: public void print(PrintStream out) {
267: out.print("Regexp pool:\n");
268: if (suffixMachine.firstchild != null) {
269: out.print(" Suffix machine: ");
270: suffixMachine.firstchild.print(out);
271: out.print("\n");
272: }
273: if (prefixMachine.firstchild != null) {
274: out.print(" Prefix machine: ");
275: prefixMachine.firstchild.print(out);
276: out.print("\n");
277: }
278: }
279:
280: private static void p(String s) {
281: // System.out.println(s);
282: }
283: }
284:
285: /* A node in a regular expression finite state machine. */
286: class RegexpNode {
287: char c;
288: RegexpNode firstchild;
289: RegexpNode nextsibling;
290: int depth;
291: boolean exact;
292: Object result;
293: String re = null;
294:
295: RegexpNode() {
296: c = '#';
297: depth = 0;
298: }
299:
300: RegexpNode(char C, int depth) {
301: c = C;
302: this .depth = depth;
303: }
304:
305: RegexpNode add(char C) {
306: RegexpNode p = firstchild;
307: if (p == null)
308: p = new RegexpNode(C, depth + 1);
309: else {
310: while (p != null)
311: if (p.c == C)
312: return p;
313: else
314: p = p.nextsibling;
315: p = new RegexpNode(C, depth + 1);
316: p.nextsibling = firstchild;
317: }
318: firstchild = p;
319: return p;
320: }
321:
322: RegexpNode find(char C) {
323: for (RegexpNode p = firstchild; p != null; p = p.nextsibling)
324: if (p.c == C)
325: return p;
326: return null;
327: }
328:
329: void print(PrintStream out) {
330: if (nextsibling != null) {
331: RegexpNode p = this ;
332: out.print("(");
333: while (p != null) {
334: out.write(p.c);
335: if (p.firstchild != null)
336: p.firstchild.print(out);
337: p = p.nextsibling;
338: out.write(p != null ? '|' : ')');
339: }
340: } else {
341: out.write(c);
342: if (firstchild != null)
343: firstchild.print(out);
344: }
345: }
346: }
|