01: /**
02: * Licensed to the Apache Software Foundation (ASF) under one or more
03: * contributor license agreements. See the NOTICE file distributed with
04: * this work for additional information regarding copyright ownership.
05: * The ASF licenses this file to You under the Apache License, Version 2.0
06: * (the "License"); you may not use this file except in compliance with
07: * the License. You may obtain a copy of the License at
08: *
09: * http://www.apache.org/licenses/LICENSE-2.0
10: *
11: * Unless required by applicable law or agreed to in writing, software
12: * distributed under the License is distributed on an "AS IS" BASIS,
13: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14: * See the License for the specific language governing permissions and
15: * limitations under the License.
16: */package org.apache.solr.analysis;
17:
18: import java.io.StringReader;
19: import java.util.HashMap;
20: import java.util.Map;
21:
22: import junit.framework.TestCase;
23:
24: import org.apache.lucene.analysis.Token;
25: import org.apache.lucene.analysis.TokenStream;
26:
27: public class TestPatternTokenizerFactory extends TestCase {
28: public void testSplitting() throws Exception {
29: String qpattern = "\\'([^\\']+)\\'"; // get stuff between "'"
30: String[][] tests = {
31: // group pattern input output
32: { "-1", "--", "aaa--bbb--ccc", "aaa bbb ccc" },
33: { "-1", ":", "aaa:bbb:ccc", "aaa bbb ccc" },
34: { "-1", "\\p{Space}", "aaa bbb \t\tccc ",
35: "aaa bbb ccc" },
36: { "-1", ":", "boo:and:foo", "boo and foo" },
37: { "-1", "o", "boo:and:foo", "b :and:f" },
38: { "0", ":", "boo:and:foo", ": :" },
39: { "0", qpattern, "aaa 'bbb' 'ccc'", "'bbb' 'ccc'" },
40: { "1", qpattern, "aaa 'bbb' 'ccc'", "bbb ccc" } };
41:
42: Map<String, String> args = new HashMap<String, String>();
43: for (String[] test : tests) {
44: args.put(PatternTokenizerFactory.GROUP, test[0]);
45: args.put(PatternTokenizerFactory.PATTERN, test[1]);
46:
47: PatternTokenizerFactory tokenizer = new PatternTokenizerFactory();
48: tokenizer.init(args);
49:
50: TokenStream stream = tokenizer.create(new StringReader(
51: test[2]));
52: String out = TestHyphenatedWordsFilter.tsToString(stream);
53: System.out.println(test[2] + " ==> " + out);
54:
55: assertEquals("pattern: " + test[2], test[3], out);
56:
57: // Make sure it is the same as if we called 'split'
58: if ("-1".equals(test[0])) {
59: String[] split = test[2].split(test[1]);
60: stream = tokenizer.create(new StringReader(test[2]));
61: int i = 0;
62: for (Token t = stream.next(); null != t; t = stream
63: .next()) {
64: assertEquals("split: " + test[1] + " " + i,
65: split[i++], t.termText());
66: }
67: }
68: }
69: }
70: }
|