001: package org.apache.lucene.search;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import java.io.IOException;
021:
022: import org.apache.lucene.index.IndexReader;
023: import org.apache.lucene.index.Term;
024:
025: /**
026: * Subclass of FilteredTermEnum for enumerating all terms that match the
027: * specified wildcard filter term.
028: * <p>
029: * Term enumerations are always ordered by Term.compareTo(). Each term in
030: * the enumeration is greater than all that precede it.
031: *
032: * @version $Id: WildcardTermEnum.java 472959 2006-11-09 16:21:50Z yonik $
033: */
034: public class WildcardTermEnum extends FilteredTermEnum {
035: Term searchTerm;
036: String field = "";
037: String text = "";
038: String pre = "";
039: int preLen = 0;
040: boolean endEnum = false;
041:
042: /**
043: * Creates a new <code>WildcardTermEnum</code>. Passing in a
044: * {@link org.apache.lucene.index.Term Term} that does not contain a
045: * <code>WILDCARD_CHAR</code> will cause an exception to be thrown.
046: * <p>
047: * After calling the constructor the enumeration is already pointing to the first
048: * valid term if such a term exists.
049: */
050: public WildcardTermEnum(IndexReader reader, Term term)
051: throws IOException {
052: super ();
053: searchTerm = term;
054: field = searchTerm.field();
055: text = searchTerm.text();
056:
057: int sidx = text.indexOf(WILDCARD_STRING);
058: int cidx = text.indexOf(WILDCARD_CHAR);
059: int idx = sidx;
060: if (idx == -1) {
061: idx = cidx;
062: } else if (cidx >= 0) {
063: idx = Math.min(idx, cidx);
064: }
065:
066: pre = searchTerm.text().substring(0, idx);
067: preLen = pre.length();
068: text = text.substring(preLen);
069: setEnum(reader.terms(new Term(searchTerm.field(), pre)));
070: }
071:
072: protected final boolean termCompare(Term term) {
073: if (field == term.field()) {
074: String searchText = term.text();
075: if (searchText.startsWith(pre)) {
076: return wildcardEquals(text, 0, searchText, preLen);
077: }
078: }
079: endEnum = true;
080: return false;
081: }
082:
083: public final float difference() {
084: return 1.0f;
085: }
086:
087: public final boolean endEnum() {
088: return endEnum;
089: }
090:
091: /********************************************
092: * String equality with support for wildcards
093: ********************************************/
094:
095: public static final char WILDCARD_STRING = '*';
096: public static final char WILDCARD_CHAR = '?';
097:
098: /**
099: * Determines if a word matches a wildcard pattern.
100: * <small>Work released by Granta Design Ltd after originally being done on
101: * company time.</small>
102: */
103: public static final boolean wildcardEquals(String pattern,
104: int patternIdx, String string, int stringIdx) {
105: int p = patternIdx;
106:
107: for (int s = stringIdx;; ++p, ++s) {
108: // End of string yet?
109: boolean sEnd = (s >= string.length());
110: // End of pattern yet?
111: boolean pEnd = (p >= pattern.length());
112:
113: // If we're looking at the end of the string...
114: if (sEnd) {
115: // Assume the only thing left on the pattern is/are wildcards
116: boolean justWildcardsLeft = true;
117:
118: // Current wildcard position
119: int wildcardSearchPos = p;
120: // While we haven't found the end of the pattern,
121: // and haven't encountered any non-wildcard characters
122: while (wildcardSearchPos < pattern.length()
123: && justWildcardsLeft) {
124: // Check the character at the current position
125: char wildchar = pattern.charAt(wildcardSearchPos);
126:
127: // If it's not a wildcard character, then there is more
128: // pattern information after this/these wildcards.
129: if (wildchar != WILDCARD_CHAR
130: && wildchar != WILDCARD_STRING) {
131: justWildcardsLeft = false;
132: } else {
133: // to prevent "cat" matches "ca??"
134: if (wildchar == WILDCARD_CHAR) {
135: return false;
136: }
137:
138: // Look at the next character
139: wildcardSearchPos++;
140: }
141: }
142:
143: // This was a prefix wildcard search, and we've matched, so
144: // return true.
145: if (justWildcardsLeft) {
146: return true;
147: }
148: }
149:
150: // If we've gone past the end of the string, or the pattern,
151: // return false.
152: if (sEnd || pEnd) {
153: break;
154: }
155:
156: // Match a single character, so continue.
157: if (pattern.charAt(p) == WILDCARD_CHAR) {
158: continue;
159: }
160:
161: //
162: if (pattern.charAt(p) == WILDCARD_STRING) {
163: // Look at the character beyond the '*'.
164: ++p;
165: // Examine the string, starting at the last character.
166: for (int i = string.length(); i >= s; --i) {
167: if (wildcardEquals(pattern, p, string, i)) {
168: return true;
169: }
170: }
171: break;
172: }
173: if (pattern.charAt(p) != string.charAt(s)) {
174: break;
175: }
176: }
177: return false;
178: }
179:
180: public void close() throws IOException {
181: super.close();
182: searchTerm = null;
183: field = null;
184: text = null;
185: }
186: }
|