001: /**
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */package org.apache.solr.analysis;
017:
018: import org.apache.lucene.analysis.Token;
019:
020: import java.util.*;
021:
022: /** Mapping rules for use with {@link org.apache.solr.analysis.SynonymFilter}
023: *
024: * @author yonik
025: * @version $Id: SynonymMap.java 472574 2006-11-08 18:25:52Z yonik $
026: */
027: public class SynonymMap {
028: Map submap; // recursive: Map<String, SynonymMap>
029: Token[] synonyms;
030: int flags;
031:
032: static final int INCLUDE_ORIG = 0x01;
033:
034: public boolean includeOrig() {
035: return (flags & INCLUDE_ORIG) != 0;
036: }
037:
038: /**
039: * @param singleMatch List<String>, the sequence of strings to match
040: * @param replacement List<Token> the list of tokens to use on a match
041: * @param includeOrig sets a flag on this mapping signaling the generation of matched tokens in addition to the replacement tokens
042: * @param mergeExisting merge the replacement tokens with any other mappings that exist
043: */
044: public void add(List singleMatch, List replacement,
045: boolean includeOrig, boolean mergeExisting) {
046: SynonymMap currMap = this ;
047: for (Iterator iter = singleMatch.iterator(); iter.hasNext();) {
048: String str = (String) iter.next();
049: if (currMap.submap == null) {
050: currMap.submap = new HashMap(1);
051: }
052:
053: SynonymMap map = (SynonymMap) currMap.submap.get(str);
054: if (map == null) {
055: map = new SynonymMap();
056: currMap.submap.put(str, map);
057: }
058:
059: currMap = map;
060: }
061:
062: if (currMap.synonyms != null && !mergeExisting) {
063: throw new RuntimeException(
064: "SynonymFilter: there is already a mapping for "
065: + singleMatch);
066: }
067: List super set = currMap.synonyms == null ? replacement
068: : mergeTokens(Arrays.asList(currMap.synonyms),
069: replacement);
070: currMap.synonyms = (Token[]) super set
071: .toArray(new Token[super set.size()]);
072: if (includeOrig)
073: currMap.flags |= INCLUDE_ORIG;
074: }
075:
076: public String toString() {
077: StringBuffer sb = new StringBuffer("<");
078: if (synonyms != null) {
079: sb.append("[");
080: for (int i = 0; i < synonyms.length; i++) {
081: if (i != 0)
082: sb.append(',');
083: sb.append(synonyms[i]);
084: }
085: if ((flags & INCLUDE_ORIG) != 0) {
086: sb.append(",ORIG");
087: }
088: sb.append("],");
089: }
090: sb.append(submap);
091: sb.append(">");
092: return sb.toString();
093: }
094:
095: /** Produces a List<Token> from a List<String> */
096: public static List makeTokens(List strings) {
097: List ret = new ArrayList(strings.size());
098: for (Iterator iter = strings.iterator(); iter.hasNext();) {
099: Token newTok = new Token((String) iter.next(), 0, 0,
100: "SYNONYM");
101: ret.add(newTok);
102: }
103: return ret;
104: }
105:
106: /**
107: * Merge two lists of tokens, producing a single list with manipulated positionIncrements so that
108: * the tokens end up at the same position.
109: *
110: * Example: [a b] merged with [c d] produces [a/b c/d] ('/' denotes tokens in the same position)
111: * Example: [a,5 b,2] merged with [c d,4 e,4] produces [c a,5/d b,2 e,2] (a,n means a has posInc=n)
112: *
113: */
114: public static List mergeTokens(List lst1, List lst2) {
115: ArrayList result = new ArrayList();
116: if (lst1 == null || lst2 == null) {
117: if (lst2 != null)
118: result.addAll(lst2);
119: if (lst1 != null)
120: result.addAll(lst1);
121: return result;
122: }
123:
124: int pos = 0;
125: Iterator iter1 = lst1.iterator();
126: Iterator iter2 = lst2.iterator();
127: Token tok1 = iter1.hasNext() ? (Token) iter1.next() : null;
128: Token tok2 = iter2.hasNext() ? (Token) iter2.next() : null;
129: int pos1 = tok1 != null ? tok1.getPositionIncrement() : 0;
130: int pos2 = tok2 != null ? tok2.getPositionIncrement() : 0;
131: while (tok1 != null || tok2 != null) {
132: while (tok1 != null && (pos1 <= pos2 || tok2 == null)) {
133: Token tok = new Token(tok1.termText(), tok1
134: .startOffset(), tok1.endOffset(), tok1.type());
135: tok.setPositionIncrement(pos1 - pos);
136: result.add(tok);
137: pos = pos1;
138: tok1 = iter1.hasNext() ? (Token) iter1.next() : null;
139: pos1 += tok1 != null ? tok1.getPositionIncrement() : 0;
140: }
141: while (tok2 != null && (pos2 <= pos1 || tok1 == null)) {
142: Token tok = new Token(tok2.termText(), tok2
143: .startOffset(), tok2.endOffset(), tok2.type());
144: tok.setPositionIncrement(pos2 - pos);
145: result.add(tok);
146: pos = pos2;
147: tok2 = iter2.hasNext() ? (Token) iter2.next() : null;
148: pos2 += tok2 != null ? tok2.getPositionIncrement() : 0;
149: }
150: }
151: return result;
152: }
153:
154: }
|