001: /**
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */package org.apache.solr.handler;
017:
018: import org.apache.solr.util.AbstractSolrTestCase;
019: import org.apache.solr.util.ContentStream;
020: import org.apache.solr.util.ContentStreamBase;
021: import org.apache.solr.request.SolrQueryRequest;
022: import org.apache.solr.request.LocalSolrQueryRequest;
023: import org.apache.solr.core.SolrException;
024:
025: import java.io.*;
026: import java.util.List;
027: import java.util.ArrayList;
028:
029: public class TestCSVLoader extends AbstractSolrTestCase {
030:
031: public String getSchemaFile() {
032: return "schema.xml";
033: }
034:
035: public String getSolrConfigFile() {
036: return "solrconfig.xml";
037: }
038:
039: String filename = "solr_tmp.csv";
040: String def_charset = "UTF-8";
041: File file = new File(filename);
042:
043: public void setUp() throws Exception {
044: // if you override setUp or tearDown, you better call
045: // the super classes version
046: super .setUp();
047: }
048:
049: public void tearDown() throws Exception {
050: // if you override setUp or tearDown, you better call
051: // the super classes version
052: super .tearDown();
053: deleteFile();
054: }
055:
056: void makeFile(String contents) {
057: makeFile(contents, def_charset);
058: }
059:
060: void makeFile(String contents, String charset) {
061: try {
062: Writer out = new OutputStreamWriter(new FileOutputStream(
063: filename), charset);
064: out.write(contents);
065: out.close();
066: } catch (Exception e) {
067: throw new RuntimeException(e);
068: }
069: }
070:
071: void deleteFile() {
072: file.delete();
073: }
074:
075: void cleanup() {
076: assertU(delQ("id:[100 TO 110]"));
077: assertU(commit());
078: }
079:
080: void loadLocal(String... args) throws Exception {
081: LocalSolrQueryRequest req = (LocalSolrQueryRequest) req(args);
082:
083: // TODO: stop using locally defined streams once stream.file and
084: // stream.body work everywhere
085: List<ContentStream> cs = new ArrayList<ContentStream>();
086: cs.add(new ContentStreamBase.FileStream(new File(filename)));
087: req.setContentStreams(cs);
088: h.query("/update/csv", req);
089: }
090:
091: public void testCSVLoad() throws Exception {
092: makeFile("id\n100\n101\n102");
093: loadLocal("stream.file", filename);
094: // check default commit of false
095: assertQ(req("id:[100 TO 110]"), "//*[@numFound='0']");
096: assertU(commit());
097: assertQ(req("id:[100 TO 110]"), "//*[@numFound='3']");
098: }
099:
100: public void testCommitFalse() throws Exception {
101: makeFile("id\n100\n101\n102");
102: loadLocal("stream.file", filename, "commit", "false");
103: assertQ(req("id:[100 TO 110]"), "//*[@numFound='0']");
104: assertU(commit());
105: assertQ(req("id:[100 TO 110]"), "//*[@numFound='3']");
106: }
107:
108: public void testCommitTrue() throws Exception {
109: makeFile("id\n100\n101\n102");
110: loadLocal("stream.file", filename, "commit", "true");
111: assertQ(req("id:[100 TO 110]"), "//*[@numFound='3']");
112: }
113:
114: public void testCSV() throws Exception {
115: lrf.args.put("version", "2.0");
116:
117: makeFile("id,str_s\n100,\"quoted\"\n101,\n102,\"\"\n103,");
118: loadLocal("stream.file", filename, "commit", "true");
119: assertQ(req("id:[100 TO 110]"), "//*[@numFound='4']");
120: assertQ(req("id:100"), "//str[@name='str_s'][.='quoted']");
121: assertQ(req("id:101"), "count(//str[@name='str_s'])=0");
122: // 102 is a quoted zero length field ,"", as opposed to ,,
123: // but we can't distinguish this case (and it's debateable
124: // if we should). Does CSV have a way to specify missing
125: // from zero-length?
126: assertQ(req("id:102"), "count(//str[@name='str_s'])=0");
127: assertQ(req("id:103"), "count(//str[@name='str_s'])=0");
128:
129: // test overwrite by default
130: loadLocal("stream.file", filename, "commit", "true");
131: assertQ(req("id:[100 TO 110]"), "//*[@numFound='4']");
132:
133: // test no overwrites
134: loadLocal("stream.file", filename, "commit", "true",
135: "overwrite", "false");
136: assertQ(req("id:[100 TO 110]"), "//*[@numFound='8']");
137:
138: // test overwrite
139: loadLocal("stream.file", filename, "commit", "true");
140: assertQ(req("id:[100 TO 110]"), "//*[@numFound='4']");
141:
142: // test global value mapping
143: loadLocal("stream.file", filename, "commit", "true", "map",
144: "quoted:QUOTED");
145: assertQ(req("id:[100 TO 110]"), "//*[@numFound='4']");
146: assertQ(req("id:100"), "//str[@name='str_s'][.='QUOTED']");
147: assertQ(req("id:101"), "count(//str[@name='str_s'])=0");
148: assertQ(req("id:102"), "count(//str[@name='str_s'])=0");
149: assertQ(req("id:103"), "count(//str[@name='str_s'])=0");
150:
151: // test value mapping to empty (remove)
152: loadLocal("stream.file", filename, "commit", "true", "map",
153: "quoted:");
154: assertQ(req("id:[100 TO 110]"), "//*[@numFound='4']");
155: assertQ(req("id:100"), "count(//str[@name='str_s'])=0");
156:
157: // test value mapping from empty
158: loadLocal("stream.file", filename, "commit", "true", "map",
159: ":EMPTY");
160: assertQ(req("id:[100 TO 110]"), "//*[@numFound='4']");
161: assertQ(req("id:100"), "//str[@name='str_s'][.='quoted']");
162: assertQ(req("id:101"), "//str[@name='str_s'][.='EMPTY']");
163: assertQ(req("id:102"), "//str[@name='str_s'][.='EMPTY']");
164: assertQ(req("id:103"), "//str[@name='str_s'][.='EMPTY']");
165:
166: // test multiple map rules
167: loadLocal("stream.file", filename, "commit", "true", "map",
168: ":EMPTY", "map", "quoted:QUOTED");
169: assertQ(req("id:[100 TO 110]"), "//*[@numFound='4']");
170: assertQ(req("id:100"), "//str[@name='str_s'][.='QUOTED']");
171: assertQ(req("id:101"), "//str[@name='str_s'][.='EMPTY']");
172: assertQ(req("id:102"), "//str[@name='str_s'][.='EMPTY']");
173: assertQ(req("id:103"), "//str[@name='str_s'][.='EMPTY']");
174:
175: // test indexing empty fields
176: loadLocal("stream.file", filename, "commit", "true",
177: "f.str_s.keepEmpty", "true");
178: assertQ(req("id:[100 TO 110]"), "//*[@numFound='4']");
179: assertQ(req("id:100"), "//str[@name='str_s'][.='quoted']");
180: assertQ(req("id:101"), "//str[@name='str_s'][.='']");
181: assertQ(req("id:102"), "//str[@name='str_s'][.='']");
182: assertQ(req("id:103"), "//str[@name='str_s'][.='']");
183:
184: // test overriding the name of fields
185: loadLocal("stream.file", filename, "commit", "true",
186: "fieldnames", "id,my_s", "header", "true",
187: "f.my_s.map", ":EMPTY");
188: assertQ(req("id:[100 TO 110]"), "//*[@numFound='4']");
189: assertQ(req("id:100"), "//str[@name='my_s'][.='quoted']");
190: assertQ(req("id:101"), "count(//str[@name='str_s'])=0");
191: assertQ(req("id:102"), "count(//str[@name='str_s'])=0");
192: assertQ(req("id:103"), "count(//str[@name='str_s'])=0");
193: assertQ(req("id:101"), "//str[@name='my_s'][.='EMPTY']");
194: assertQ(req("id:102"), "//str[@name='my_s'][.='EMPTY']");
195: assertQ(req("id:103"), "//str[@name='my_s'][.='EMPTY']");
196:
197: // test that header in file was skipped
198: assertQ(req("id:id"), "//*[@numFound='0']");
199:
200: // test skipping a field via the "skip" parameter
201: loadLocal("stream.file", filename, "commit", "true",
202: "keepEmpty", "true", "skip", "str_s");
203: assertQ(req("id:[100 TO 110]"), "//*[@numFound='4']");
204: assertQ(req("id:[100 TO 110]"), "count(//str[@name='str_s'])=0");
205:
206: // test skipping a field by specifying an empty name
207: loadLocal("stream.file", filename, "commit", "true",
208: "keepEmpty", "true", "fieldnames", "id,");
209: assertQ(req("id:[100 TO 110]"), "//*[@numFound='4']");
210: assertQ(req("id:[100 TO 110]"), "count(//str[@name='str_s'])=0");
211:
212: // test loading file as if it didn't have a header
213: loadLocal("stream.file", filename, "commit", "true",
214: "fieldnames", "id,my_s", "header", "false");
215: assertQ(req("id:id"), "//*[@numFound='1']");
216: assertQ(req("id:100"), "//str[@name='my_s'][.='quoted']");
217:
218: // test skipLines
219: loadLocal("stream.file", filename, "commit", "true",
220: "fieldnames", "id,my_s", "header", "false",
221: "skipLines", "1");
222: assertQ(req("id:id"), "//*[@numFound='1']");
223: assertQ(req("id:100"), "//str[@name='my_s'][.='quoted']");
224:
225: // test multi-valued fields via field splitting w/ mapping of subvalues
226: makeFile("id,str_s\n" + "100,\"quoted\"\n" + "101,\"a,b,c\"\n"
227: + "102,\"a,,b\"\n" + "103,\n");
228: loadLocal("stream.file", filename, "commit", "true",
229: "f.str_s.map", ":EMPTY", "f.str_s.split", "true");
230: assertQ(req("id:[100 TO 110]"), "//*[@numFound='4']");
231: assertQ(req("id:100"), "//str[@name='str_s'][.='quoted']");
232: assertQ(req("id:101"), "//arr[@name='str_s']/str[1][.='a']");
233: assertQ(req("id:101"), "//arr[@name='str_s']/str[2][.='b']");
234: assertQ(req("id:101"), "//arr[@name='str_s']/str[3][.='c']");
235: assertQ(req("id:102"), "//arr[@name='str_s']/str[2][.='EMPTY']");
236: assertQ(req("id:103"), "//str[@name='str_s'][.='EMPTY']");
237:
238: // test alternate values for delimiters
239: makeFile("id|str_s\n" + "100|^quoted^\n" + "101|a;'b';c\n"
240: + "102|a;;b\n" + "103|\n");
241:
242: loadLocal("stream.file", filename, "commit", "true",
243: "separator", "|", "encapsulator", "^", "f.str_s.map",
244: ":EMPTY", "f.str_s.split", "true", "f.str_s.separator",
245: ";", "f.str_s.encapsulator", "'");
246: assertQ(req("id:[100 TO 110]"), "//*[@numFound='4']");
247: assertQ(req("id:100"), "//str[@name='str_s'][.='quoted']");
248: assertQ(req("id:101"), "//arr[@name='str_s']/str[1][.='a']");
249: assertQ(req("id:101"), "//arr[@name='str_s']/str[2][.='b']");
250: assertQ(req("id:101"), "//arr[@name='str_s']/str[3][.='c']");
251: assertQ(req("id:102"), "//arr[@name='str_s']/str[2][.='EMPTY']");
252: assertQ(req("id:103"), "//str[@name='str_s'][.='EMPTY']");
253:
254: }
255:
256: }
|