001: package org.apache.lucene.analysis;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import java.io.*;
021: import java.util.List;
022: import java.util.LinkedList;
023:
024: import org.apache.lucene.util.LuceneTestCase;
025: import org.apache.lucene.index.Payload;
026:
027: public class TestAnalyzers extends LuceneTestCase {
028:
029: public TestAnalyzers(String name) {
030: super (name);
031: }
032:
033: public void assertAnalyzesTo(Analyzer a, String input,
034: String[] output) throws Exception {
035: TokenStream ts = a
036: .tokenStream("dummy", new StringReader(input));
037: for (int i = 0; i < output.length; i++) {
038: Token t = ts.next();
039: assertNotNull(t);
040: assertEquals(t.termText(), output[i]);
041: }
042: assertNull(ts.next());
043: ts.close();
044: }
045:
046: public void testSimple() throws Exception {
047: Analyzer a = new SimpleAnalyzer();
048: assertAnalyzesTo(a, "foo bar FOO BAR", new String[] { "foo",
049: "bar", "foo", "bar" });
050: assertAnalyzesTo(a, "foo bar . FOO <> BAR", new String[] {
051: "foo", "bar", "foo", "bar" });
052: assertAnalyzesTo(a, "foo.bar.FOO.BAR", new String[] { "foo",
053: "bar", "foo", "bar" });
054: assertAnalyzesTo(a, "U.S.A.", new String[] { "u", "s", "a" });
055: assertAnalyzesTo(a, "C++", new String[] { "c" });
056: assertAnalyzesTo(a, "B2B", new String[] { "b", "b" });
057: assertAnalyzesTo(a, "2B", new String[] { "b" });
058: assertAnalyzesTo(a, "\"QUOTED\" word", new String[] { "quoted",
059: "word" });
060: }
061:
062: public void testNull() throws Exception {
063: Analyzer a = new WhitespaceAnalyzer();
064: assertAnalyzesTo(a, "foo bar FOO BAR", new String[] { "foo",
065: "bar", "FOO", "BAR" });
066: assertAnalyzesTo(a, "foo bar . FOO <> BAR", new String[] {
067: "foo", "bar", ".", "FOO", "<>", "BAR" });
068: assertAnalyzesTo(a, "foo.bar.FOO.BAR",
069: new String[] { "foo.bar.FOO.BAR" });
070: assertAnalyzesTo(a, "U.S.A.", new String[] { "U.S.A." });
071: assertAnalyzesTo(a, "C++", new String[] { "C++" });
072: assertAnalyzesTo(a, "B2B", new String[] { "B2B" });
073: assertAnalyzesTo(a, "2B", new String[] { "2B" });
074: assertAnalyzesTo(a, "\"QUOTED\" word", new String[] {
075: "\"QUOTED\"", "word" });
076: }
077:
078: public void testStop() throws Exception {
079: Analyzer a = new StopAnalyzer();
080: assertAnalyzesTo(a, "foo bar FOO BAR", new String[] { "foo",
081: "bar", "foo", "bar" });
082: assertAnalyzesTo(a, "foo a bar such FOO THESE BAR",
083: new String[] { "foo", "bar", "foo", "bar" });
084: }
085:
086: void verifyPayload(TokenStream ts) throws IOException {
087: Token t = new Token();
088: for (byte b = 1;; b++) {
089: t.clear();
090: t = ts.next(t);
091: if (t == null)
092: break;
093: // System.out.println("id="+System.identityHashCode(t) + " " + t);
094: // System.out.println("payload=" + (int)t.getPayload().toByteArray()[0]);
095: assertEquals(b, t.getPayload().toByteArray()[0]);
096: }
097: }
098:
099: // Make sure old style next() calls result in a new copy of payloads
100: public void testPayloadCopy() throws IOException {
101: String s = "how now brown cow";
102: TokenStream ts;
103: ts = new WhitespaceTokenizer(new StringReader(s));
104: ts = new BuffTokenFilter(ts);
105: ts = new PayloadSetter(ts);
106: verifyPayload(ts);
107:
108: ts = new WhitespaceTokenizer(new StringReader(s));
109: ts = new PayloadSetter(ts);
110: ts = new BuffTokenFilter(ts);
111: verifyPayload(ts);
112: }
113:
114: }
115:
116: class BuffTokenFilter extends TokenFilter {
117: List lst;
118:
119: public BuffTokenFilter(TokenStream input) {
120: super (input);
121: }
122:
123: public Token next() throws IOException {
124: if (lst == null) {
125: lst = new LinkedList();
126: for (;;) {
127: Token t = input.next();
128: if (t == null)
129: break;
130: lst.add(t);
131: }
132: }
133: return lst.size() == 0 ? null : (Token) lst.remove(0);
134: }
135: }
136:
137: class PayloadSetter extends TokenFilter {
138: public PayloadSetter(TokenStream input) {
139: super (input);
140: }
141:
142: byte[] data = new byte[1];
143: Payload p = new Payload(data, 0, 1);
144:
145: public Token next(Token target) throws IOException {
146: target = input.next(target);
147: if (target == null)
148: return null;
149: target.setPayload(p); // reuse the payload / byte[]
150: data[0]++;
151: return target;
152: }
153: }
|