001: /**
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */package org.apache.solr.analysis;
017:
018: import java.io.IOException;
019: import java.util.ArrayList;
020: import java.util.Arrays;
021: import java.util.Iterator;
022: import java.util.List;
023:
024: import org.apache.lucene.analysis.Token;
025: import org.apache.lucene.analysis.TokenStream;
026:
027: import junit.framework.TestCase;
028:
029: /**
030: * General token testing helper functions
031: */
032: public abstract class BaseTokenTestCase extends TestCase {
033: public static String tsToString(TokenStream in) throws IOException {
034: StringBuffer out = new StringBuffer();
035: Token t = in.next();
036: if (null != t)
037: out.append(t.termText());
038:
039: for (t = in.next(); null != t; t = in.next()) {
040: out.append(" ").append(t.termText());
041: }
042: in.close();
043: return out.toString();
044: }
045:
046: public List<String> tok2str(Iterable<Token> tokLst) {
047: ArrayList<String> lst = new ArrayList<String>();
048: for (Token t : tokLst) {
049: lst.add(t.termText());
050: }
051: return lst;
052: }
053:
054: public void assertTokEqual(List<Token> a, List<Token> b) {
055: assertTokEq(a, b, false);
056: assertTokEq(b, a, false);
057: }
058:
059: public void assertTokEqualOff(List<Token> a, List<Token> b) {
060: assertTokEq(a, b, true);
061: assertTokEq(b, a, true);
062: }
063:
064: private void assertTokEq(List<Token> a, List<Token> b,
065: boolean checkOff) {
066: int pos = 0;
067: for (Iterator iter = a.iterator(); iter.hasNext();) {
068: Token tok = (Token) iter.next();
069: pos += tok.getPositionIncrement();
070: if (!tokAt(b, tok.termText(), pos, checkOff ? tok
071: .startOffset() : -1, checkOff ? tok.endOffset()
072: : -1)) {
073: fail(a + "!=" + b);
074: }
075: }
076: }
077:
078: public boolean tokAt(List<Token> lst, String val, int tokPos,
079: int startOff, int endOff) {
080: int pos = 0;
081: for (Iterator iter = lst.iterator(); iter.hasNext();) {
082: Token tok = (Token) iter.next();
083: pos += tok.getPositionIncrement();
084: if (pos == tokPos
085: && tok.termText().equals(val)
086: && (startOff == -1 || tok.startOffset() == startOff)
087: && (endOff == -1 || tok.endOffset() == endOff)) {
088: return true;
089: }
090: }
091: return false;
092: }
093:
094: /***
095: * Return a list of tokens according to a test string format:
096: * a b c => returns List<Token> [a,b,c]
097: * a/b => tokens a and b share the same spot (b.positionIncrement=0)
098: * a,3/b/c => a,b,c all share same position (a.positionIncrement=3, b.positionIncrement=0, c.positionIncrement=0)
099: * a,1,10,11 => "a" with positionIncrement=1, startOffset=10, endOffset=11
100: */
101: public List<Token> tokens(String str) {
102: String[] arr = str.split(" ");
103: List<Token> result = new ArrayList<Token>();
104: for (int i = 0; i < arr.length; i++) {
105: String[] toks = arr[i].split("/");
106: String[] params = toks[0].split(",");
107:
108: int posInc;
109: int start;
110: int end;
111:
112: if (params.length > 1) {
113: posInc = Integer.parseInt(params[1]);
114: } else {
115: posInc = 1;
116: }
117:
118: if (params.length > 2) {
119: start = Integer.parseInt(params[2]);
120: } else {
121: start = 0;
122: }
123:
124: if (params.length > 3) {
125: end = Integer.parseInt(params[3]);
126: } else {
127: end = start + params[0].length();
128: }
129:
130: Token t = new Token(params[0], start, end, "TEST");
131: t.setPositionIncrement(posInc);
132:
133: result.add(t);
134: for (int j = 1; j < toks.length; j++) {
135: t = new Token(toks[j], 0, 0, "TEST");
136: t.setPositionIncrement(0);
137: result.add(t);
138: }
139: }
140: return result;
141: }
142:
143: //------------------------------------------------------------------------
144: // These may be useful beyond test cases...
145: //------------------------------------------------------------------------
146:
147: // This could probably be put in a utility class
148: static List<Token> getTokens(TokenStream tstream)
149: throws IOException {
150: List<Token> tokens = new ArrayList<Token>();
151: while (true) {
152: Token t = tstream.next();
153: if (t == null)
154: break;
155: tokens.add(t);
156: }
157: return tokens;
158: }
159:
160: // This could probably be put in a utility class
161: public static class IterTokenStream extends TokenStream {
162: Iterator<Token> toks;
163:
164: public IterTokenStream(Token... toks) {
165: this .toks = Arrays.asList(toks).iterator();
166: }
167:
168: public IterTokenStream(Iterator<Token> toks) {
169: this .toks = toks;
170: }
171:
172: @Override
173: public Token next() {
174: if (toks.hasNext()) {
175: return toks.next();
176: }
177: return null;
178: }
179: }
180: }
|