001: /**
002: * Copyright (c) 2001, Sergey A. Samokhodkin
003: * All rights reserved.
004: *
005: * Redistribution and use in source and binary forms, with or without modification,
006: * are permitted provided that the following conditions are met:
007: *
008: * - Redistributions of source code must retain the above copyright notice,
009: * this list of conditions and the following disclaimer.
010: * - Redistributions in binary form
011: * must reproduce the above copyright notice, this list of conditions and the following
012: * disclaimer in the documentation and/or other materials provided with the distribution.
013: * - Neither the name of jregex nor the names of its contributors may be used
014: * to endorse or promote products derived from this software without specific prior
015: * written permission.
016: *
017: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
018: * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
019: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
020: * IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
021: * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
022: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
023: * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
024: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
025: * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
026: *
027: * @version 1.2_01
028: */package jregex;
029:
030: import java.util.*;
031:
032: public class Optimizer {
033: public static final int THRESHOLD = 20;
034:
035: static Optimizer find(Term entry) {
036: return find(entry, 0);
037: }
038:
039: private static Optimizer find(Term term, int dist) {
040: //System.out.println("term="+term+", dist="+dist);
041: if (term == null)
042: return null;
043: Term next = term.next;
044: int type = term.type;
045: switch (type) {
046: case Term.CHAR:
047: case Term.REG:
048: case Term.REG_I:
049: return new Optimizer(term, dist);
050: case Term.BITSET:
051: case Term.BITSET2:
052: if (term.weight <= THRESHOLD)
053: return new Optimizer(term, dist);
054: else
055: return find(term.next, dist + 1);
056: case Term.ANY_CHAR:
057: case Term.ANY_CHAR_NE:
058: return find(next, dist + 1);
059: case Term.REPEAT_MIN_INF:
060: case Term.REPEAT_MIN_MAX:
061: if (term.minCount > 0) {
062: return find(term.target, dist);
063: } else
064: return null;
065: }
066: if (type >= Term.FIRST_TRANSPARENT
067: && type <= Term.LAST_TRANSPARENT) {
068: return find(next, dist);
069: }
070: return null;
071: }
072:
073: private Term atom;
074: private int distance;
075:
076: private Optimizer(Term atom, int distance) {
077: this .atom = atom;
078: this .distance = distance;
079: }
080:
081: Term makeFirst(Term theFirst) {
082: return new Find(atom, distance, theFirst);
083: }
084:
085: Term makeBacktrack(Term back) {
086: int min = back.minCount;
087: switch (back.type) {
088: case Term.BACKTRACK_0:
089: min = 0;
090: case Term.BACKTRACK_MIN:
091: return new FindBack(atom, distance, min, back);
092:
093: case Term.BACKTRACK_REG_MIN:
094: return back;
095:
096: default:
097: throw new Error("unexpected iterator's backtracker:" + back);
098: //return back;
099: }
100: }
101: }
102:
103: class Find extends Term {
104: Find(Term target, int distance, Term theFirst) {
105: switch (target.type) {
106: case Term.CHAR:
107: case Term.BITSET:
108: case Term.BITSET2:
109: type = Term.FIND;
110: break;
111: case Term.REG:
112: case Term.REG_I:
113: type = Term.FINDREG;
114: break;
115: default:
116: throw new IllegalArgumentException("wrong target type: "
117: + target.type);
118: }
119: this .target = target;
120: this .distance = distance;
121: if (target == theFirst) {
122: next = target.next;
123: eat = true; //eat the next
124: } else {
125: next = theFirst;
126: eat = false;
127: }
128: }
129: }
130:
131: class FindBack extends Term {
132: FindBack(Term target, int distance, int minCount, Term backtrack) {
133: this .minCount = minCount;
134: switch (target.type) {
135: case Term.CHAR:
136: case Term.BITSET:
137: case Term.BITSET2:
138: type = Term.BACKTRACK_FIND_MIN;
139: break;
140: case Term.REG:
141: case Term.REG_I:
142: type = Term.BACKTRACK_FINDREG_MIN;
143: break;
144: default:
145: throw new IllegalArgumentException("wrong target type: "
146: + target.type);
147: }
148:
149: this .target = target;
150: this .distance = distance;
151: Term next = backtrack.next;
152: if (target == next) {
153: this .next = next.next;
154: this .eat = true;
155: } else {
156: this .next = next;
157: this .eat = false;
158: }
159: }
160: }
|