001: /*
002: * Copyright 2004 Brian S O'Neill
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016:
017: package org.cojen.util;
018:
019: import java.io.PrintStream;
020: import java.lang.reflect.Constructor;
021: import java.lang.reflect.InvocationTargetException;
022: import java.util.AbstractList;
023: import java.util.ArrayList;
024: import java.util.Arrays;
025: import java.util.Comparator;
026: import java.util.List;
027: import java.util.Map;
028: import java.util.Stack;
029: import org.cojen.classfile.ClassFile;
030: import org.cojen.classfile.CodeBuilder;
031: import org.cojen.classfile.Label;
032: import org.cojen.classfile.LocalVariable;
033: import org.cojen.classfile.MethodInfo;
034: import org.cojen.classfile.Modifiers;
035: import org.cojen.classfile.Opcode;
036: import org.cojen.classfile.TypeDesc;
037:
038: /**
039: * Provides fast matching of strings against patterns containing wildcards.
040: * An ordinary map must be supplied in order to create a PatternMatcher. The
041: * map keys must be strings. Asterisks (*) are treated as wildcard characters.
042: *
043: * @author Brian S O'Neill
044: */
045: public abstract class PatternMatcher {
046: private static final int[] NO_POSITIONS = new int[0];
047:
048: // Maps pattern sets to auto-generated classes.
049: private static Map cPatternMatcherClasses = new SoftValuedHashMap();
050:
051: public static synchronized PatternMatcher forPatterns(Map patternMap) {
052: Maker maker = new Maker(patternMap);
053: Class clazz = (Class) cPatternMatcherClasses
054: .get(maker.getKey());
055:
056: if (clazz == null) {
057: Class patternMatcherClass = PatternMatcher.class;
058:
059: ClassInjector ci = ClassInjector.create(patternMatcherClass
060: .getName(), patternMatcherClass.getClassLoader());
061:
062: ClassFile cf = maker.createClassFile(ci.getClassName());
063: clazz = ci.defineClass(cf);
064:
065: cPatternMatcherClasses.put(maker.getKey(), clazz);
066: }
067:
068: try {
069: Constructor ctor = clazz
070: .getConstructor(new Class[] { Object[].class });
071: return (PatternMatcher) ctor
072: .newInstance(new Object[] { maker.getMappedValues() });
073: } catch (NoSuchMethodException e) {
074: throw new InternalError(e.toString());
075: } catch (InstantiationException e) {
076: throw new InternalError(e.toString());
077: } catch (IllegalAccessException e) {
078: throw new InternalError(e.toString());
079: } catch (InvocationTargetException e) {
080: throw new InternalError(e.toString());
081: }
082: }
083:
084: protected final Object[] mValues;
085:
086: protected PatternMatcher(Object[] values) {
087: mValues = values;
088: }
089:
090: /**
091: * Returns null if no match.
092: */
093: public Result getMatch(String lookup) {
094: int strLen = lookup.length();
095: char[] chars = new char[strLen + 1];
096: lookup.getChars(0, strLen, chars, 0);
097: chars[strLen] = '\uffff';
098:
099: TinyList resultList = new TinyList();
100: fillMatchResults(chars, 1, resultList);
101:
102: return (Result) resultList.mElement;
103: }
104:
105: /**
106: * Returns an empty array if no matches.
107: *
108: * @param limit maximum number of results to return
109: */
110: public Result[] getMatches(String lookup, int limit) {
111: int strLen = lookup.length();
112: char[] chars = new char[strLen + 1];
113: lookup.getChars(0, strLen, chars, 0);
114: chars[strLen] = '\uffff';
115:
116: List resultList = new ArrayList();
117: fillMatchResults(chars, limit, resultList);
118:
119: return (Result[]) resultList.toArray(new Result[resultList
120: .size()]);
121: }
122:
123: protected abstract void fillMatchResults(char[] lookup, int limit,
124: List results);
125:
126: // Returns false if no more results should be added.
127: protected static boolean addMatchResult(int limit, List results,
128: String pattern, Object value, int[] positions, int len) {
129: int size = results.size();
130: if (size < limit) {
131: if (positions == null || len == 0) {
132: positions = NO_POSITIONS;
133: } else {
134: int[] original = positions;
135: positions = new int[len];
136: for (int i = 0; i < len; i++) {
137: positions[i] = original[i];
138: }
139: }
140: results.add(new Result(pattern, value, positions));
141: return size + 1 < limit;
142: } else {
143: return false;
144: }
145: }
146:
147: public static class Result {
148: private final String mPattern;
149: private final Object mValue;
150: private final int[] mPositions;
151:
152: Result(String pattern, Object value, int[] positions) {
153: mPattern = pattern;
154: mValue = value;
155: mPositions = positions;
156: }
157:
158: public String getPattern() {
159: return mPattern;
160: }
161:
162: /**
163: * Returns the value associated with the matched pattern.
164: */
165: public Object getValue() {
166: return mValue;
167: }
168:
169: /**
170: * Returns the indexes used to parse the lookup string at wildcard
171: * positions in order for it to match the pattern. Array length is
172: * always double the number of wildcards in the pattern. Every even
173: * element is the start index (inclusive) of a wildcard match, and
174: * every odd element is the end index (exclusive) of a wildcard match.
175: */
176: public int[] getWildcardPositions() {
177: return mPositions;
178: }
179: }
180:
181: private static class Maker {
182: private PatternNode mPatternRoot;
183: private Object mKey;
184: private Object[] mMappedValues;
185: private int mMaxWildPerKey;
186:
187: private TypeDesc mIntType;
188: private TypeDesc mBooleanType;
189: private TypeDesc mListType;
190: private TypeDesc mStringType;
191: private TypeDesc mObjectType;
192: private TypeDesc mIntArrayType;
193:
194: private CodeBuilder mBuilder;
195: private LocalVariable mLookupLocal;
196: private LocalVariable mLimitLocal;
197: private LocalVariable mResultsLocal;
198: private LocalVariable mPositionsLocal;
199: private LocalVariable mIndexLocal;
200: private Stack mTempLocals;
201: private Label mReturnLabel;
202:
203: private int mReferenceLine;
204:
205: Maker(Map patternMap) {
206: String[] keys = (String[]) patternMap.keySet().toArray(
207: new String[0]);
208:
209: for (int i = 0; i < keys.length; i++) {
210: String key = keys[i];
211: // Ensure terminating patterns end in the special
212: // terminator char.
213: if (!key.endsWith("*")) {
214: keys[i] = key.concat("\uffff");
215: }
216: }
217:
218: // Sort the keys in a special order that ensures correct
219: // "closest match" semantics.
220: Arrays.sort(keys, new PatternComparator());
221:
222: mMappedValues = new Object[keys.length];
223: for (int i = 0; i < keys.length; i++) {
224: String key = keys[i];
225: if (key.endsWith("\uffff")) {
226: key = key.substring(0, key.length() - 1);
227: }
228: mMappedValues[i] = patternMap.get(key);
229: }
230:
231: // Build tree structure for managing pattern matching.
232: mPatternRoot = new PatternNode();
233: for (int i = 0; i < keys.length; i++) {
234: String key = keys[i];
235: mPatternRoot.buildPathTo(key, i);
236: }
237:
238: mMaxWildPerKey = mPatternRoot.getMaxWildcardCount();
239:
240: mKey = KeyFactory.createKey(keys);
241: }
242:
243: public Object getKey() {
244: return mKey;
245: }
246:
247: public Object getMappedValues() {
248: return mMappedValues;
249: }
250:
251: public ClassFile createClassFile(String className) {
252: ClassFile cf = new ClassFile(className,
253: PatternMatcher.class);
254: cf.markSynthetic();
255: cf.setSourceFile(PatternMatcher.class.getName());
256:
257: // constructor
258: TypeDesc objectArrayType = TypeDesc.OBJECT.toArrayType();
259: TypeDesc[] params = { objectArrayType };
260: MethodInfo mi = cf.addConstructor(Modifiers.PUBLIC, params);
261: mBuilder = new CodeBuilder(mi);
262: mBuilder.loadThis();
263: mBuilder.loadLocal(mBuilder.getParameter(0));
264: mBuilder.invokeSuperConstructor(params);
265: mBuilder.returnVoid();
266:
267: mIntType = TypeDesc.INT;
268: mBooleanType = TypeDesc.BOOLEAN;
269: mListType = TypeDesc.forClass(List.class);
270: mStringType = TypeDesc.STRING;
271: mObjectType = TypeDesc.OBJECT;
272: mIntArrayType = TypeDesc.INT.toArrayType();
273:
274: // fillMatchResults method
275: TypeDesc charArrayType = TypeDesc.CHAR.toArrayType();
276: params = new TypeDesc[] { charArrayType, mIntType,
277: mListType };
278: mi = cf.addMethod(Modifiers.PUBLIC, "fillMatchResults",
279: null, params);
280: mBuilder = new CodeBuilder(mi);
281:
282: mLookupLocal = mBuilder.getParameter(0);
283: mLimitLocal = mBuilder.getParameter(1);
284: mResultsLocal = mBuilder.getParameter(2);
285: mPositionsLocal = mBuilder.createLocalVariable("positions",
286: mIntArrayType);
287: mIndexLocal = mBuilder.createLocalVariable("index",
288: mIntType);
289:
290: mBuilder.mapLineNumber(++mReferenceLine);
291:
292: mBuilder.loadConstant(mMaxWildPerKey * 2);
293: mBuilder.newObject(mIntArrayType);
294: mBuilder.storeLocal(mPositionsLocal);
295:
296: mBuilder.loadConstant(0);
297: mBuilder.storeLocal(mIndexLocal);
298: mTempLocals = new Stack();
299: mReturnLabel = mBuilder.createLabel();
300:
301: generateBranches(mPatternRoot, -1, 0);
302:
303: mReturnLabel.setLocation();
304: mBuilder.returnVoid();
305:
306: return cf;
307: }
308:
309: private void generateBranches(PatternNode node, int depth,
310: int posIndex) {
311: generateBranches(node, depth, posIndex, null);
312: }
313:
314: private void generateBranches(PatternNode node, int depth,
315: int posIndex, LocalVariable tempChar) {
316: int c = node.mChar;
317: List subNodes = node.mSubNodes;
318:
319: mBuilder.mapLineNumber(++mReferenceLine);
320:
321: if (c == '*') {
322: LocalVariable savedIndex;
323:
324: if (mTempLocals.isEmpty()) {
325: savedIndex = mBuilder.createLocalVariable("temp",
326: mIntType);
327: } else {
328: savedIndex = (LocalVariable) mTempLocals.pop();
329: }
330:
331: mBuilder.loadLocal(mIndexLocal);
332: mBuilder.storeLocal(savedIndex);
333:
334: // Save position of wildcard start.
335: mBuilder.loadLocal(mPositionsLocal);
336: mBuilder.loadConstant(posIndex);
337: mBuilder.loadLocal(mIndexLocal);
338: if (depth > 0) {
339: mBuilder.loadConstant(depth);
340: mBuilder.math(Opcode.IADD);
341: }
342: mBuilder.storeToArray(TypeDesc.INT);
343:
344: if (subNodes == null) {
345: generateWildcard(null, depth, posIndex + 2);
346: } else {
347: int size = subNodes.size();
348: for (int i = 0; i < size; i++) {
349: generateWildcard((PatternNode) subNodes.get(i),
350: depth, posIndex + 2);
351: mBuilder.loadLocal(savedIndex);
352: mBuilder.storeLocal(mIndexLocal);
353: }
354: }
355:
356: mTempLocals.push(savedIndex);
357:
358: if (node.mPattern != null) {
359: generateAddMatchResult(node);
360: }
361:
362: return;
363: }
364:
365: Label noMatch = mBuilder.createLabel();
366:
367: if (c >= 0) {
368: if (tempChar != null) {
369: mBuilder.loadLocal(tempChar);
370: mTempLocals.push(tempChar);
371: } else {
372: mBuilder.loadLocal(mLookupLocal);
373: mBuilder.loadLocal(mIndexLocal);
374: if (depth > 0) {
375: mBuilder.loadConstant(depth);
376: mBuilder.math(Opcode.IADD);
377: }
378: mBuilder.loadFromArray(TypeDesc.CHAR);
379: }
380:
381: mBuilder.loadConstant((char) c);
382: mBuilder.ifComparisonBranch(noMatch, "!=");
383: }
384:
385: if (subNodes != null) {
386: int size = subNodes.size();
387: for (int i = 0; i < size; i++) {
388: generateBranches((PatternNode) subNodes.get(i),
389: depth + 1, posIndex);
390: }
391: }
392:
393: if (node.mPattern != null) {
394: // Matched pattern; save results.
395: generateAddMatchResult(node);
396: }
397:
398: noMatch.setLocation();
399: }
400:
401: private void generateWildcard(PatternNode node, int depth,
402: int posIndex) {
403: Label loopStart = mBuilder.createLabel().setLocation();
404: Label loopEnd = mBuilder.createLabel();
405: Label loopContinue = mBuilder.createLabel();
406:
407: // Save position of wildcard end.
408: mBuilder.loadLocal(mPositionsLocal);
409: mBuilder.loadConstant(posIndex - 1);
410: mBuilder.loadLocal(mIndexLocal);
411: if (depth > 0) {
412: mBuilder.loadConstant(depth);
413: mBuilder.math(Opcode.IADD);
414: }
415: mBuilder.storeToArray(TypeDesc.INT);
416:
417: mBuilder.loadLocal(mLookupLocal);
418: mBuilder.loadLocal(mIndexLocal);
419: if (depth > 0) {
420: mBuilder.loadConstant(depth);
421: mBuilder.math(Opcode.IADD);
422: }
423: mBuilder.loadFromArray(TypeDesc.CHAR);
424:
425: if (node == null) {
426: mBuilder.loadConstant('\uffff');
427: mBuilder.ifComparisonBranch(loopEnd, "==");
428: } else {
429: LocalVariable tempChar;
430: if (mTempLocals.isEmpty()) {
431: tempChar = mBuilder.createLocalVariable("temp",
432: mIntType);
433: } else {
434: tempChar = (LocalVariable) mTempLocals.pop();
435: }
436: mBuilder.storeLocal(tempChar);
437: mBuilder.loadLocal(tempChar);
438:
439: mBuilder.loadConstant('\uffff');
440: mBuilder.ifComparisonBranch(loopEnd, "==");
441:
442: generateBranches(node, depth, posIndex, tempChar);
443: }
444:
445: loopContinue.setLocation();
446: mBuilder.integerIncrement(mIndexLocal, 1);
447: mBuilder.branch(loopStart);
448: loopEnd.setLocation();
449: }
450:
451: private void generateAddMatchResult(PatternNode node) {
452: mBuilder.mapLineNumber(++mReferenceLine);
453:
454: mBuilder.loadLocal(mLimitLocal);
455: mBuilder.loadLocal(mResultsLocal);
456: mBuilder.loadConstant(node.mPattern);
457: mBuilder.loadThis();
458: mBuilder
459: .loadField("mValues", TypeDesc.OBJECT.toArrayType());
460: mBuilder.loadConstant(node.mOrder);
461: mBuilder.loadFromArray(TypeDesc.OBJECT);
462: mBuilder.loadLocal(mPositionsLocal);
463: mBuilder.loadConstant(node.getWildcardCount() * 2);
464:
465: TypeDesc[] params = { mIntType, mListType, mStringType,
466: mObjectType, mIntArrayType, mIntType };
467:
468: mBuilder.invokeStatic(PatternMatcher.class.getName(),
469: "addMatchResult", mBooleanType, params);
470: mBuilder.ifZeroComparisonBranch(mReturnLabel, "==");
471: }
472: }
473:
474: private static class PatternNode {
475: public final int mChar;
476: public String mPattern;
477: public int mOrder;
478: public List mSubNodes;
479:
480: public PatternNode() {
481: mChar = -1;
482: }
483:
484: public PatternNode(char c) {
485: mChar = c;
486: }
487:
488: public void buildPathTo(String pattern, int order) {
489: buildPathTo(pattern, order, 0);
490: }
491:
492: public int getHeight() {
493: int height = 1;
494: if (mSubNodes != null) {
495: int size = mSubNodes.size();
496: for (int i = 0; i < size; i++) {
497: int subH = ((PatternNode) mSubNodes.get(i))
498: .getHeight();
499: if (subH > height) {
500: height = subH;
501: }
502: }
503: }
504: return height;
505: }
506:
507: public int getWildcardCount() {
508: int wildCount = 0;
509: String pattern = mPattern;
510: if (pattern != null) {
511: int len = pattern.length();
512: for (int i = 0; i < len; i++) {
513: if (pattern.charAt(i) == '*') {
514: wildCount++;
515: }
516: }
517: }
518: return wildCount;
519: }
520:
521: public int getMaxWildcardCount() {
522: int wildCount = getWildcardCount();
523:
524: if (mSubNodes != null) {
525: for (int i = 0; i < mSubNodes.size(); i++) {
526: int count = ((PatternNode) mSubNodes.get(i))
527: .getMaxWildcardCount();
528: if (count > wildCount) {
529: wildCount = count;
530: }
531: }
532: }
533:
534: return wildCount;
535: }
536:
537: private void buildPathTo(String pattern, int order, int index) {
538: if (index >= pattern.length()) {
539: if (pattern.endsWith("\uffff")) {
540: // Trim off the '\uffff'.
541: pattern = pattern
542: .substring(0, pattern.length() - 1);
543: }
544: mPattern = pattern;
545: mOrder = order;
546: return;
547: }
548:
549: char c = pattern.charAt(index);
550:
551: if (mSubNodes == null) {
552: mSubNodes = new ArrayList(10);
553: }
554:
555: int size = mSubNodes.size();
556: for (int i = 0; i < size; i++) {
557: PatternNode node = (PatternNode) mSubNodes.get(i);
558: if (node.mChar == c) {
559: node.buildPathTo(pattern, order, index + 1);
560: return;
561: }
562: }
563:
564: PatternNode node = new PatternNode(c);
565: mSubNodes.add(node);
566: node.buildPathTo(pattern, order, index + 1);
567:
568: return;
569: }
570:
571: public void dump(PrintStream out, String indent) {
572: if (mSubNodes != null) {
573: String subIndent = indent.concat(" ");
574: for (int i = 0; i < mSubNodes.size(); i++) {
575: ((PatternNode) mSubNodes.get(i)).dump(out,
576: subIndent);
577: }
578: }
579: out.print(indent);
580: out.print('\'');
581: out.print((char) mChar);
582: out.print('\'');
583: if (mPattern != null) {
584: out.print(" -> ");
585: out.print(mPattern);
586: }
587: out.println();
588: }
589: }
590:
591: private static class PatternComparator implements Comparator {
592: public int compare(Object a, Object b) {
593: String sa = (String) a;
594: String sb = (String) b;
595:
596: int alen = sa.length();
597: int blen = sb.length();
598: int mlen = Math.min(alen, blen);
599:
600: for (int i = 0; i < mlen; i++) {
601: char ca = sa.charAt(i);
602: char cb = sb.charAt(i);
603: if (ca == '*') {
604: if (cb != '*') {
605: // Wildcard sorted high.
606: return 1;
607: }
608: } else if (cb == '*') {
609: // Wildcard sorted high.
610: return -1;
611: } else if (ca < cb) {
612: return -1;
613: } else if (ca > cb) {
614: return 1;
615: }
616: }
617:
618: // The shorter string is sorted high.
619: if (alen < blen) {
620: return 1;
621: } else if (alen > blen) {
622: return -1;
623: }
624:
625: return 0;
626: }
627: }
628:
629: private static class TinyList extends AbstractList {
630: public Object mElement;
631:
632: public int size() {
633: return mElement == null ? 0 : 1;
634: }
635:
636: public boolean add(Object obj) {
637: if (mElement == null) {
638: mElement = obj;
639: return true;
640: } else {
641: throw new UnsupportedOperationException();
642: }
643: }
644:
645: public Object get(int index) {
646: if (index == 0 && mElement != null) {
647: return mElement;
648: } else {
649: throw new IndexOutOfBoundsException();
650: }
651: }
652: }
653:
654: /* Sample auto-generated method.
655: protected void fillMatchResults(char[] lookup, int limit, List results) {
656: int[] positions = new int[2]; // At least as large as number of wildcards, times 2.
657: int i = 0;
658: if (lookup[i + 0] == '/') {
659: if (lookup[i + 1] == 'a') {
660: if (lookup[i + 2] == 'd') {
661: if (lookup[i + 3] == 'm') {
662: if (lookup[i + 4] == 'i') {
663: if (lookup[i + 5] == 'n') {
664: if (lookup[i + 6] == '2') {
665: if (lookup[i + 7] == '\uffff') {
666: addMatchResult(limit, results, "/admin2", mValues[0], null, 0);
667: }
668: } else if (lookup[i + 6] == '\uffff') {
669: addMatchResult(limit, results, "/admin", mValues[1], null, 0);
670: }
671: }
672: }
673: }
674: }
675: } else if (lookup[i + 1] == 't') {
676: if (lookup[i + 2] == 'e') {
677: if (lookup[i + 3] == 'a') {
678: if (lookup[i + 4] == '/') {
679: // Wildcard pattern. Consume characters until match found.
680: int saved_i = i;
681: positions[0] = i + 5;
682: while (true) {
683: positions[1] = i + 5;
684: char c = lookup[i + 5];
685: if (c == '\uffff') {
686: break;
687: } else if (c == '.') {
688: if (lookup[i + 6] == 'h') {
689: if (lookup[i + 7] == 't') {
690: if (lookup[i + 8] == 'm') {
691: if (lookup[i + 9] == 'l') {
692: if (lookup[i + 10] == '\uffff') {
693: addMatchResult(limit, results, "/tea/*.html", mValues[2], positions, 2);
694: }
695: }
696: }
697: }
698: }
699: }
700: i++;
701: }
702: i = saved_i;
703: addMatchResult(limit, results, "/tea/*", mValues[3], positions, 2);
704: }
705: }
706: }
707: }
708: }
709: // Wildcard pattern. Consume characters until match found.
710: int saved_i = i;
711: positions[0] = i;
712: while (true) {
713: positions[1] = i;
714: char c = lookup[i];
715: if (c == '\uffff') {
716: break;
717: } else if (c == '.') {
718: if (lookup[i + 1] == 'h') {
719: if (lookup[i + 2] == 't') {
720: if (lookup[i + 3] == 'm') {
721: if (lookup[i + 4] == 'l') {
722: if (lookup[i + 5] == '\uffff') {
723: addMatchResult(limit, results, "*.html", mValues[4], positions, 2);
724: }
725: }
726: }
727: }
728: }
729: }
730: i++;
731: }
732: i = saved_i;
733: addMatchResult(limit, results, "*", mValues[5], positions, 2);
734: }
735: */
736: }
|