001: package org.apache.lucene.queryParser.surround.query;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import org.apache.lucene.index.Term;
021: import org.apache.lucene.index.TermEnum;
022: import org.apache.lucene.index.IndexReader;
023:
024: import java.io.IOException;
025:
026: import java.util.regex.Pattern;
027: import java.util.regex.Matcher;
028:
029: public class SrndTruncQuery extends SimpleTerm {
030: public SrndTruncQuery(String truncated, char unlimited, char mask) {
031: super (false); /* not quoted */
032: this .truncated = truncated;
033: this .unlimited = unlimited;
034: this .mask = mask;
035: truncatedToPrefixAndPattern();
036: }
037:
038: private final String truncated;
039: private final char unlimited;
040: private final char mask;
041:
042: private String prefix;
043: private Pattern pattern;
044:
045: public String getTruncated() {
046: return truncated;
047: }
048:
049: public String toStringUnquoted() {
050: return getTruncated();
051: }
052:
053: protected boolean matchingChar(char c) {
054: return (c != unlimited) && (c != mask);
055: }
056:
057: protected void appendRegExpForChar(char c, StringBuffer re) {
058: if (c == unlimited)
059: re.append(".*");
060: else if (c == mask)
061: re.append(".");
062: else
063: re.append(c);
064: }
065:
066: protected void truncatedToPrefixAndPattern() {
067: int i = 0;
068: while ((i < truncated.length())
069: && matchingChar(truncated.charAt(i))) {
070: i++;
071: }
072: prefix = truncated.substring(0, i);
073:
074: StringBuffer re = new StringBuffer();
075: while (i < truncated.length()) {
076: appendRegExpForChar(truncated.charAt(i), re);
077: i++;
078: }
079: pattern = Pattern.compile(re.toString());
080: }
081:
082: public void visitMatchingTerms(IndexReader reader,
083: String fieldName, MatchingTermVisitor mtv)
084: throws IOException {
085: boolean expanded = false;
086: int prefixLength = prefix.length();
087: TermEnum enumerator = reader.terms(new Term(fieldName, prefix));
088: Matcher matcher = pattern.matcher("");
089: try {
090: do {
091: Term term = enumerator.term();
092: if (term != null) {
093: String text = term.text();
094: if ((!text.startsWith(prefix))
095: || (!term.field().equals(fieldName))) {
096: break;
097: } else {
098: matcher.reset(text.substring(prefixLength));
099: if (matcher.matches()) {
100: mtv.visitMatchingTerm(term);
101: expanded = true;
102: }
103: }
104: }
105: } while (enumerator.next());
106: } finally {
107: enumerator.close();
108: matcher.reset();
109: }
110: if (!expanded) {
111: System.out.println("No terms in " + fieldName
112: + " field for: " + toString());
113: }
114: }
115: }
|