001: /*
002: * Resolver.java February 2001
003: *
004: * Copyright (C) 2001, Niall Gallagher <niallg@users.sf.net>
005: *
006: * This library is free software; you can redistribute it and/or
007: * modify it under the terms of the GNU Lesser General Public
008: * License as published by the Free Software Foundation.
009: *
010: * This library is distributed in the hope that it will be useful,
011: * but WITHOUT ANY WARRANTY; without even the implied warranty of
012: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
013: * GNU Lesser General Public License for more details.
014: *
015: * You should have received a copy of the GNU Lesser General
016: * Public License along with this library; if not, write to the
017: * Free Software Foundation, Inc., 59 Temple Place, Suite 330,
018: * Boston, MA 02111-1307 USA
019: */
020:
021: package simple.util;
022:
023: import simple.util.cache.CacheList;
024: import java.io.ObjectInputStream;
025: import java.io.Serializable;
026: import java.io.IOException;
027: import java.util.Vector;
028:
029: /**
030: * This is used to match <code>String</code>'s with the first pattern
031: * that <code>String</code> matches. A pattern consists of characters
032: * with either the '*' or '?' characters as wild characters. The '*'
033: * character is completely wild meaning that is will match nothing or
034: * a long sequence of characters. The '?' character matches a single
035: * character.
036: * <p>
037: * If the '?' character immediately follows the '*' character then the
038: * match is made as any sequence of characters up to the first match
039: * of the next character. For example "/*?/index.jsp" will match all
040: * files preceeded by only a single path. So "/pub/index.jsp" will
041: * match, however "/pub/bin/index.jsp" will not, as it has two paths.
042: * So, in effect the '*?' sequence will match anything or nothing up
043: * to the first occurence of the next character in the pattern.
044: * <p>
045: * A design goal of the <code>Resolver</code> was to make it capable
046: * of high performance in a multithreaded environment. In order to
047: * achieve a high performance the <code>Resolver</code> can cache the
048: * resolutions it makes so that if the same text is given to the
049: * <code>Resolver.resolve</code> method a cached result can be retrived
050: * quickly which will decrease the length of time a thread occupies the
051: * synchronized method. The cache used is a <code>CacheList</code>.
052: * <p>
053: * The semantics of the resolver are such that the last pattern inserted
054: * with a wild string is the first one checked for a match. This means
055: * that if a sequence of insertions like <code>insert(a,b)</code>
056: * <code>insert(x,y)</code> is made and then a <code>resolve(z)</code>
057: * is attemped then z will be compared to x first and then a, if z
058: * matches x then y is given as the result and if z matches a then b
059: * will be returned, remember if z matches both a and x then y will be
060: * the result due to the fact that is was the last pattern inserted.
061: *
062: * @author Niall Gallagher
063: *
064: * @see simple.util.cache.CacheList
065: */
066: public class Resolver implements Serializable {
067:
068: /**
069: * This is the default size of the resolve cache.
070: */
071: private static final int DEFAULT_CACHE = 512;
072:
073: /**
074: * The is used to store recent resolves for speed.
075: */
076: private transient CacheList cache;
077:
078: /**
079: * This is used to store the matches and patterns.
080: */
081: private Vector matches;
082:
083: /**
084: * This is the size of the cache for this instance.
085: */
086: private int size;
087:
088: /**
089: * The default constructor will create a <code>Resolver</code>
090: * without a large cache size. This is intended for use when
091: * the requests for <code>resolve</code> tend to use strings
092: * that are reasonably similar. If the strings issued to this
093: * instance are dramatically different then the cache tends
094: * to be an overhead rather than a bonus.
095: */
096: public Resolver() {
097: this (DEFAULT_CACHE);
098: }
099:
100: /**
101: * This constructor allows resolves to be cached for increased
102: * performance. When strings tend to be reused for pattern
103: * matching then this constructor should be used. The cache is
104: * a simple Least Recently Used List. So only patterns that
105: * have a high 'hit' rate will remain in the cache.
106: * <p>
107: * If a caching is done then an <code>CacheList</code> is used.
108: * This is a transient object so that when this instance of the
109: * <code>Resolver</code> is serialized list is cleared.
110: *
111: * @param size if this is true the caching of resolves is done
112: * using a <code>CacheList</code>
113: */
114: public Resolver(int size) {
115: this .cache = new CacheList(size);
116: this .matches = new Vector();
117: this .size = size;
118: }
119:
120: /**
121: * This will search the patterns in this <code>Resolver</code> to
122: * see if there is a pattern in it that matches the string given.
123: * This will search the patterns from the last entered pattern to
124: * the first entered. So that the last entered patterns are the
125: * most searched patterns and will resolve it first if it matches.
126: * <p>
127: * Although it is criticial that this perform well this method
128: * is synchronized. The reasnon for this is that if there was
129: * several threads modifing the <code>Resolver</code> at the same
130: * time <code>ConcurrentModificationException</code> exceptions
131: * would be thrown this would reduce the usefulness of this object.
132: *
133: * @param text this is the <code>String</code> to be resolved
134: *
135: * @return will return the <code>String</code> that pattern
136: * resolves to
137: */
138: public synchronized String resolve(String text) {
139: if (cache.contains(text)) {
140: Entry entry = (Entry) cache.lookup(text);
141: if (entry != null)
142: return entry.match;
143: }
144: char[] array = text.toCharArray();
145:
146: for (int pos = matches.size() - 1; pos >= 0; pos--) {
147: Entry entry = (Entry) matches.elementAt(pos);
148: if (match(array, entry.array)) {
149: cache.insert(text, entry);
150: return entry.match;
151: }
152: }
153: return null;
154: }
155:
156: /**
157: * This will add a new pattern with its resolution. This will
158: * be added to the end of the list and will be the first pattern
159: * checked for a match when the <code>resolve</code> method is
160: * invoked. This inserts the pair at the end of the list and so
161: * it will be the first pattern checked. If the match is an
162: * empty string then there is not insertion made.
163: *
164: * @param pattern this is the pattern that will resolve this
165: * @param match the <code>String</code> returned when a correct
166: * match is made
167: *
168: * @throws NullPointerException if either string is null
169: */
170: public synchronized void insert(String pattern, String match) {
171: if (match.length() > 0) {
172: Entry entry = new Entry(pattern, match);
173: matches.addElement(entry);
174: cache.clear();
175: }
176: }
177:
178: /**
179: * This will add a new pattern with its resolution. This will
180: * be added to the end of the list and will be the first pattern
181: * checked for a match when the <code>resolve</code> method is
182: * invoked. This will insert the pair into the position specified.
183: * This behaves like the <code>Vector.insertElementAt</code>
184: * method, by bumping all entrys up one position. If the match is
185: * an empty string then there is not insertion made.
186: *
187: * @param pattern this is the pattern that will resolve this
188: * @param match the <code>String</code> returned when a correct
189: * match is made
190: * @param pos this is the position to insert the pair into
191: *
192: * @throws NullPointerException if either string is null
193: */
194: public synchronized void insert(String pattern, String match,
195: int pos) {
196: if (match.length() > 0) {
197: Entry entry = new Entry(pattern, match);
198: matches.insertElementAt(entry, pos);
199: cache.clear();
200: }
201: }
202:
203: /**
204: * This will remove the entry in this <code>Resolver</code> so that
205: * the pattern will not be used to resolve <code>String</code>'s any
206: * more. This behaves like the <code>Vector.removeElementAt</code>.
207: *
208: * @param pos this is the position that is removed from this
209: */
210: public synchronized void remove(int pos) {
211: matches.removeElementAt(pos);
212: cache.clear();
213: }
214:
215: /**
216: * This will remove the entry in this <code>Resolver</code> so that
217: * the pattern will not be used to resolve <code>String</code>'s any
218: * more. If the wild string is parameter is <code>null</code> then
219: * there is a <code>NullPointerException</code>. This behaves like
220: * the <code>Vector.removeElementAt</code> method.
221: *
222: * @param pattern this is the pattern that is removed from this
223: */
224: public synchronized void remove(String pattern) {
225: matches.remove(new Entry(pattern, ""));
226: cache.clear();
227: }
228:
229: /**
230: * This will remove the entry in this <code>Resolver</code> so that
231: * the pattern will not be used to resolve <code>String</code>'s any
232: * more. If the wild string is parameter is <code>null</code> then
233: * there is a <code>NullPointerException</code>. This will only use
234: * the <code>Match.getPattern</code> to remove the match. This is a
235: * convienence method that it can be used with <code>getMatches</code>.
236: *
237: * @param match this is the match that is removed from this
238: */
239: public synchronized void remove(Match match) {
240: matches.remove(match);
241: cache.clear();
242: }
243:
244: /**
245: * Retrives an array of <code>Match</code>'s of each pair that
246: * was entered into this <code>Resolver</code>. The elements in
247: * the array are ordered to represent the order they were made.
248: *
249: * @return the array of <code>Match</code> objects that exist
250: */
251: public synchronized Match[] getMatches() {
252: Match[] list = new Match[matches.size()];
253: return (Match[]) matches.toArray(list);
254: }
255:
256: /**
257: * This will return the corrosponding <code>Match</code> at
258: * the specified position. This can be used in conjunction
259: * with the <code>indexOf</code> method to remove matches.
260: *
261: * @param pos the position <code>Match</code> to retrive
262: *
263: * @return this will retrun the match object at that position
264: */
265: public synchronized Match getMatch(int pos) {
266: return (Match) matches.elementAt(pos);
267: }
268:
269: /**
270: * This will return the corrosponding <code>Match</code> at
271: * the specified position. This can be used in conjunction
272: * with the <code>indexOf</code> method to remove matches.
273: * This is a convienence method that avoids having to use
274: * the <code>indexOf</code> method.
275: *
276: * @param pattern the position <code>Match</code> to retrive
277: *
278: * @return returns the match object at that position
279: */
280: public synchronized Match getMatch(String pattern) {
281: int index = indexOf(pattern);
282: return index >= 0 ? getMatch(index) : null;
283: }
284:
285: /**
286: * Used to find the position of the <code>Match</code>
287: * stored using the specified pattern. This operates in a
288: * similar way to the <code>Vector.indexOf</code> method.
289: *
290: * @param pattern the pattern that the <code>Match</code>
291: * is stored using
292: *
293: * @return this will return the position of that pattern
294: */
295: public synchronized int indexOf(String pattern) {
296: return matches.indexOf(new Entry(pattern, ""));
297: }
298:
299: /**
300: * Used to find the position of the <code>Match</code>
301: * stored using the specified pattern. This operates in a
302: * similar way to the <code>Vector.indexOf</code> method.
303: *
304: * @param pattern the pattern that the <code>Match</code>
305: * is stored using
306: * @param from this starts checking from this position
307: *
308: * @return this will return the position of that pattern
309: */
310: public synchronized int indexOf(String pattern, int from) {
311: return matches.indexOf(new Entry(pattern, ""));
312: }
313:
314: /**
315: * This will check this <code>Resolver</code> to see if the
316: * pattern given is used by this <code>Resolver</code>, if
317: * it is this will return <code>true</code>.
318: *
319: * @param pattern the pattern that is used resolving
320: * <code>String</code>'s
321: *
322: * @return this will return <code>true</code> if this pattern
323: * exists in this <code>Resolver</code>
324: */
325: public synchronized boolean contains(String pattern) {
326: return matches.contains(new Entry(pattern, ""));
327: }
328:
329: /**
330: * This will check this <code>Resolver</code> to see if the
331: * match given is used by this <code>Resolver</code>, if it
332: * is this will return <code>true</code>.
333: *
334: * @param match this is a specific match and pattern pair
335: *
336: * @return this will return <code>true</code> if this match
337: * exists in this <code>Resolver</code>
338: */
339: public synchronized boolean contains(Match match) {
340: return matches.contains(match);
341: }
342:
343: /**
344: * This can be used to determine wheather or not there is any
345: * entrys in the <code>Resolver</code>. If this <code>Resolver</code>
346: * is empty this will return </code>true</code>, there must be at
347: * least one entry in the <code>Resolver</code>.
348: *
349: * @return this returns <code>true</code> is there are no elements
350: */
351: public synchronized boolean isEmpty() {
352: return matches.isEmpty();
353: }
354:
355: /**
356: * This will return the number of entrys that have been inserted into
357: * this <code>Resolver</code>. This uses the <code>Vector</code>'s
358: * <code>size</code> implementation to determine the number of entrys.
359: *
360: * @return this returns the number of pattern/match pairs
361: */
362: public synchronized int size() {
363: return matches.size();
364: }
365:
366: /**
367: * This is used to clear all matches from the resolver. This ensures
368: * that the resolver contains no matches and that the resolution
369: * cache is cleared. This is used to that the resolver can be reused
370: * and have new pattern matches inserted into it for resolution.
371: */
372: public synchronized void clear() {
373: matches.clear();
374: cache.clear();
375: }
376:
377: /**
378: * The <code>Resolver</code> object is <code>Serializable</code>
379: * so this method is used to recover the contents of the object.
380: * Also because the <code>Resolver</code> uses a transient cache
381: * this enables the cache to be re-established if the instance
382: * is set to enable caching. The cache is instantiated regardless
383: * of wheather this uses caching or not.
384: *
385: * @param in this is the <code>ObjectInputStream</code> the
386: * <code>Resolver</code> is re-constituted from
387: */
388: private void readObject(ObjectInputStream in) throws IOException,
389: ClassNotFoundException {
390: in.defaultReadObject();
391: cache = new CacheList(size);
392: }
393:
394: /**
395: * This acts as a driver to the <code>match</code> method so
396: * that the offsets can be used as zeros for the start of
397: * matching for the <code>match(char[],int,char[],int)</code>.
398: * method.
399: *
400: * @param str this is the buffer that is to be resolved
401: * @param wild this is the pattern that will be used
402: */
403: private boolean match(char[] str, char[] wild) {
404: return match(str, 0, wild, 0);
405: }
406:
407: /**
408: * This will be used to check to see if a certain buffer
409: * matches the pattern if it does then it returns
410: * <code>true</code>. This is a recursive method that will
411: * attempt to match the buffers based on the wild characters
412: * '?' and '*'. This returns <code>true</code> if they match.
413: *
414: * @param str this is the buffer that is to be resolved
415: * @param off this is the read offset for the str buffer
416: * @param wild this is the pattern that will be used
417: * @param pos this is the read offset for the wild buffer
418: */
419: private boolean match(char[] str, int off, char[] wild, int pos) {
420: while (pos < wild.length && off < str.length) { /* examine chars */
421: if (wild[pos] == '*') {
422: while (wild[pos] == '*') { /* totally wild */
423: if (++pos >= wild.length) /* if finished */
424: return true;
425: }
426: if (wild[pos] == '?') { /* *? is special */
427: if (++pos >= wild.length)
428: return true;
429: }
430: for (; off < str.length; off++) { /* find next matching char */
431: if (str[off] == wild[pos] || wild[pos] == '?') { /* match */
432: if (wild[pos - 1] != '?') {
433: if (match(str, off, wild, pos))
434: return true;
435: } else {
436: break;
437: }
438: }
439: }
440: if (str.length == off)
441: return false;
442: }
443: if (str[off++] != wild[pos++]) {
444: if (wild[pos - 1] != '?')
445: return false; /* if not equal */
446: }
447: }
448: if (wild.length == pos) { /* if wild is finished */
449: return str.length == off; /* is str finished */
450: }
451: while (wild[pos] == '*') { /* ends in all stars */
452: if (++pos >= wild.length) /* if finished */
453: return true;
454: }
455: return false;
456: }
457:
458: /**
459: * The <code>Entry</code> is used to keep the pattern and the
460: * match togeather. It implementations the <code>Match</code>
461: * interface so that the pattern match pair can be retrived
462: * using the <code>getMatch</code> methods. This also has an
463: * <code>equals</code> method that allows it to be searched
464: * for in a list like, a <code>Vector</code>.
465: */
466: private class Entry implements Match {
467:
468: /**
469: * This is the pattern string as an array.
470: */
471: public char[] array;
472:
473: /**
474: * This is the pattern for this pair.
475: */
476: public String pattern;
477:
478: /**
479: * This is the match for the pattern.
480: */
481: public String match;
482:
483: /**
484: * This creates an <code>Entry</code> object using the given
485: * pattern and match pair. This will create a character array
486: * from the pattern to help the <code>Resolver</code> with
487: * performance in the <code>resolve</code> method.
488: *
489: * @param pattern this is the pattern string for this match
490: * @param match this is returned from a resolution
491: *
492: * @throws NullPointerException if either string is null
493: */
494: public Entry(String pattern, String match) {
495: this .array = pattern.toCharArray();
496: this .match = match.toString();
497: this .pattern = pattern;
498: }
499:
500: /**
501: * This is the pattern that this match was stored under
502: * in the <code>Resolver</code>. The pattern is typically
503: * a string that contains wild characters so that it can
504: * be matched with certain strings.
505: *
506: * @return this returns the wild pattern for the match
507: */
508: public String getPattern() {
509: return pattern;
510: }
511:
512: /**
513: * This is match that the <code>getPattern</code> string
514: * resolves for. The <code>Resolver</code> returns this
515: * string when the <code>resolve</code> method is given
516: * a string that can be matched using the pattern.
517: *
518: * @return this is the matched string for the pattern
519: */
520: public String getMatch() {
521: return match;
522: }
523:
524: /**
525: * This is used when two matches are compared for equality.
526: * This will compare the <code>Match</code> implementations
527: * by retriving the pattern and match and comparing both
528: * for equality.
529: *
530: * @param item this is a <code>Match</code> being compared
531: * for equality
532: */
533: public boolean equals(Match item) {
534: if (match.equals("") || item.getMatch().equals("")) {
535: return item.getPattern().equals(pattern);
536: }
537: return item.getMatch().equals(match)
538: && item.getPattern().equals(pattern);
539: }
540:
541: /**
542: * This implementation of the <code>equals</code> method will
543: * allow <code>Match</code> implementations to be compared.
544: * This enables the <code>Match.getMatch</code> to be an empty
545: * string and act as a wildcard that matches every thing. If
546: * the wildcard match is used a pattern comparison happens.
547: *
548: * @param item a <code>Match</code> that is to be compared
549: */
550: public boolean equals(Object item) {
551: if (item instanceof Match) {
552: return equals((Match) item);
553: }
554: return false;
555: }
556:
557: /**
558: * This returns the <code>String</code> representation of
559: * the <code>Match</code>. This is useful when the contents
560: * of the <code>Resolver</code> is examined using the
561: * <code>getMatch</code> method.
562: *
563: * @return this returns the representation of the pair
564: */
565: public String toString() {
566: return pattern + "=" + match;
567: }
568: }
569: }
|