Source Code Cross Referenced for TestTermVectorsReader.java in  » Net » lucene-connector » org » apache » lucene » index » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Net » lucene connector » org.apache.lucene.index 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


001:        package org.apache.lucene.index;
002:
003:        /**
004:         * Licensed to the Apache Software Foundation (ASF) under one or more
005:         * contributor license agreements.  See the NOTICE file distributed with
006:         * this work for additional information regarding copyright ownership.
007:         * The ASF licenses this file to You under the Apache License, Version 2.0
008:         * (the "License"); you may not use this file except in compliance with
009:         * the License.  You may obtain a copy of the License at
010:         *
011:         *     http://www.apache.org/licenses/LICENSE-2.0
012:         *
013:         * Unless required by applicable law or agreed to in writing, software
014:         * distributed under the License is distributed on an "AS IS" BASIS,
015:         * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016:         * See the License for the specific language governing permissions and
017:         * limitations under the License.
018:         */
019:
020:        import org.apache.lucene.analysis.Analyzer;
021:        import org.apache.lucene.analysis.Token;
022:        import org.apache.lucene.analysis.TokenStream;
023:        import org.apache.lucene.document.Document;
024:        import org.apache.lucene.document.Field;
025:        import org.apache.lucene.store.MockRAMDirectory;
026:        import org.apache.lucene.util.LuceneTestCase;
027:
028:        import java.io.IOException;
029:        import java.io.Reader;
030:        import java.util.Arrays;
031:        import java.util.Iterator;
032:        import java.util.Map;
033:        import java.util.SortedSet;
034:
035:        public class TestTermVectorsReader extends LuceneTestCase {
036:            //Must be lexicographically sorted, will do in setup, versus trying to maintain here
037:            private String[] testFields = { "f1", "f2", "f3", "f4" };
038:            private boolean[] testFieldsStorePos = { true, false, true, false };
039:            private boolean[] testFieldsStoreOff = { true, false, false, true };
040:            private String[] testTerms = { "this", "is", "a", "test" };
041:            private int[][] positions = new int[testTerms.length][];
042:            private TermVectorOffsetInfo[][] offsets = new TermVectorOffsetInfo[testTerms.length][];
043:            private MockRAMDirectory dir = new MockRAMDirectory();
044:            private String seg;
045:            private FieldInfos fieldInfos = new FieldInfos();
046:            private static int TERM_FREQ = 3;
047:
048:            public TestTermVectorsReader(String s) {
049:                super (s);
050:            }
051:
052:            private class TestToken implements  Comparable {
053:                String text;
054:                int pos;
055:                int startOffset;
056:                int endOffset;
057:
058:                public int compareTo(Object other) {
059:                    return pos - ((TestToken) other).pos;
060:                }
061:            }
062:
063:            TestToken[] tokens = new TestToken[testTerms.length * TERM_FREQ];
064:
065:            protected void setUp() throws Exception {
066:                super .setUp();
067:                /*
068:                for (int i = 0; i < testFields.length; i++) {
069:                  fieldInfos.add(testFields[i], true, true, testFieldsStorePos[i], testFieldsStoreOff[i]);
070:                }
071:                 */
072:
073:                Arrays.sort(testTerms);
074:                int tokenUpto = 0;
075:                for (int i = 0; i < testTerms.length; i++) {
076:                    positions[i] = new int[TERM_FREQ];
077:                    offsets[i] = new TermVectorOffsetInfo[TERM_FREQ];
078:                    // first position must be 0
079:                    for (int j = 0; j < TERM_FREQ; j++) {
080:                        // positions are always sorted in increasing order
081:                        positions[i][j] = (int) (j * 10 + Math.random() * 10);
082:                        // offsets are always sorted in increasing order
083:                        offsets[i][j] = new TermVectorOffsetInfo(j * 10, j * 10
084:                                + testTerms[i].length());
085:                        TestToken token = tokens[tokenUpto++] = new TestToken();
086:                        token.text = testTerms[i];
087:                        token.pos = positions[i][j];
088:                        token.startOffset = offsets[i][j].getStartOffset();
089:                        token.endOffset = offsets[i][j].getEndOffset();
090:                    }
091:                }
092:                Arrays.sort(tokens);
093:
094:                IndexWriter writer = new IndexWriter(dir, new MyAnalyzer(),
095:                        true);
096:                writer.setUseCompoundFile(false);
097:                Document doc = new Document();
098:                for (int i = 0; i < testFields.length; i++) {
099:                    final Field.TermVector tv;
100:                    if (testFieldsStorePos[i] && testFieldsStoreOff[i])
101:                        tv = Field.TermVector.WITH_POSITIONS_OFFSETS;
102:                    else if (testFieldsStorePos[i] && !testFieldsStoreOff[i])
103:                        tv = Field.TermVector.WITH_POSITIONS;
104:                    else if (!testFieldsStorePos[i] && testFieldsStoreOff[i])
105:                        tv = Field.TermVector.WITH_OFFSETS;
106:                    else
107:                        tv = Field.TermVector.YES;
108:                    doc.add(new Field(testFields[i], "", Field.Store.NO,
109:                            Field.Index.TOKENIZED, tv));
110:                }
111:
112:                //Create 5 documents for testing, they all have the same
113:                //terms
114:                for (int j = 0; j < 5; j++)
115:                    writer.addDocument(doc);
116:                writer.flush();
117:                seg = writer.newestSegment().name;
118:                writer.close();
119:
120:                fieldInfos = new FieldInfos(dir, seg + "."
121:                        + IndexFileNames.FIELD_INFOS_EXTENSION);
122:            }
123:
124:            private class MyTokenStream extends TokenStream {
125:                int tokenUpto;
126:
127:                public Token next() {
128:                    if (tokenUpto >= tokens.length)
129:                        return null;
130:                    else {
131:                        final Token t = new Token();
132:                        final TestToken testToken = tokens[tokenUpto++];
133:                        t.setTermText(testToken.text);
134:                        if (tokenUpto > 1)
135:                            t.setPositionIncrement(testToken.pos
136:                                    - tokens[tokenUpto - 2].pos);
137:                        else
138:                            t.setPositionIncrement(testToken.pos + 1);
139:                        t.setStartOffset(testToken.startOffset);
140:                        t.setEndOffset(testToken.endOffset);
141:                        return t;
142:                    }
143:                }
144:            }
145:
146:            private class MyAnalyzer extends Analyzer {
147:                public TokenStream tokenStream(String fieldName, Reader reader) {
148:                    return new MyTokenStream();
149:                }
150:            }
151:
152:            public void test() {
153:                //Check to see the files were created properly in setup
154:                assertTrue(dir.fileExists(seg + "."
155:                        + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
156:                assertTrue(dir.fileExists(seg + "."
157:                        + IndexFileNames.VECTORS_INDEX_EXTENSION));
158:            }
159:
160:            public void testReader() throws IOException {
161:                TermVectorsReader reader = new TermVectorsReader(dir, seg,
162:                        fieldInfos);
163:                assertTrue(reader != null);
164:                for (int j = 0; j < 5; j++) {
165:                    TermFreqVector vector = reader.get(j, testFields[0]);
166:                    assertTrue(vector != null);
167:                    String[] terms = vector.getTerms();
168:                    assertTrue(terms != null);
169:                    assertTrue(terms.length == testTerms.length);
170:                    for (int i = 0; i < terms.length; i++) {
171:                        String term = terms[i];
172:                        //System.out.println("Term: " + term);
173:                        assertTrue(term.equals(testTerms[i]));
174:                    }
175:                }
176:            }
177:
178:            public void testPositionReader() throws IOException {
179:                TermVectorsReader reader = new TermVectorsReader(dir, seg,
180:                        fieldInfos);
181:                assertTrue(reader != null);
182:                TermPositionVector vector;
183:                String[] terms;
184:                vector = (TermPositionVector) reader.get(0, testFields[0]);
185:                assertTrue(vector != null);
186:                terms = vector.getTerms();
187:                assertTrue(terms != null);
188:                assertTrue(terms.length == testTerms.length);
189:                for (int i = 0; i < terms.length; i++) {
190:                    String term = terms[i];
191:                    //System.out.println("Term: " + term);
192:                    assertTrue(term.equals(testTerms[i]));
193:                    int[] positions = vector.getTermPositions(i);
194:                    assertTrue(positions != null);
195:                    assertTrue(positions.length == this .positions[i].length);
196:                    for (int j = 0; j < positions.length; j++) {
197:                        int position = positions[j];
198:                        assertTrue(position == this .positions[i][j]);
199:                    }
200:                    TermVectorOffsetInfo[] offset = vector.getOffsets(i);
201:                    assertTrue(offset != null);
202:                    assertTrue(offset.length == this .offsets[i].length);
203:                    for (int j = 0; j < offset.length; j++) {
204:                        TermVectorOffsetInfo termVectorOffsetInfo = offset[j];
205:                        assertTrue(termVectorOffsetInfo.equals(offsets[i][j]));
206:                    }
207:                }
208:
209:                TermFreqVector freqVector = reader.get(0, testFields[1]); //no pos, no offset
210:                assertTrue(freqVector != null);
211:                assertTrue(freqVector instanceof  TermPositionVector == false);
212:                terms = freqVector.getTerms();
213:                assertTrue(terms != null);
214:                assertTrue(terms.length == testTerms.length);
215:                for (int i = 0; i < terms.length; i++) {
216:                    String term = terms[i];
217:                    //System.out.println("Term: " + term);
218:                    assertTrue(term.equals(testTerms[i]));
219:                }
220:            }
221:
222:            public void testOffsetReader() throws IOException {
223:                TermVectorsReader reader = new TermVectorsReader(dir, seg,
224:                        fieldInfos);
225:                assertTrue(reader != null);
226:                TermPositionVector vector = (TermPositionVector) reader.get(0,
227:                        testFields[0]);
228:                assertTrue(vector != null);
229:                String[] terms = vector.getTerms();
230:                assertTrue(terms != null);
231:                assertTrue(terms.length == testTerms.length);
232:                for (int i = 0; i < terms.length; i++) {
233:                    String term = terms[i];
234:                    //System.out.println("Term: " + term);
235:                    assertTrue(term.equals(testTerms[i]));
236:                    int[] positions = vector.getTermPositions(i);
237:                    assertTrue(positions != null);
238:                    assertTrue(positions.length == this .positions[i].length);
239:                    for (int j = 0; j < positions.length; j++) {
240:                        int position = positions[j];
241:                        assertTrue(position == this .positions[i][j]);
242:                    }
243:                    TermVectorOffsetInfo[] offset = vector.getOffsets(i);
244:                    assertTrue(offset != null);
245:                    assertTrue(offset.length == this .offsets[i].length);
246:                    for (int j = 0; j < offset.length; j++) {
247:                        TermVectorOffsetInfo termVectorOffsetInfo = offset[j];
248:                        assertTrue(termVectorOffsetInfo.equals(offsets[i][j]));
249:                    }
250:                }
251:            }
252:
253:            public void testMapper() throws IOException {
254:                TermVectorsReader reader = new TermVectorsReader(dir, seg,
255:                        fieldInfos);
256:                assertTrue(reader != null);
257:                SortedTermVectorMapper mapper = new SortedTermVectorMapper(
258:                        new TermVectorEntryFreqSortedComparator());
259:                reader.get(0, mapper);
260:                SortedSet set = mapper.getTermVectorEntrySet();
261:                assertTrue("set is null and it shouldn't be", set != null);
262:                //three fields, 4 terms, all terms are the same
263:                assertTrue("set Size: " + set.size() + " is not: " + 4, set
264:                        .size() == 4);
265:                //Check offsets and positions
266:                for (Iterator iterator = set.iterator(); iterator.hasNext();) {
267:                    TermVectorEntry tve = (TermVectorEntry) iterator.next();
268:                    assertTrue("tve is null and it shouldn't be", tve != null);
269:                    assertTrue("tve.getOffsets() is null and it shouldn't be",
270:                            tve.getOffsets() != null);
271:                    assertTrue(
272:                            "tve.getPositions() is null and it shouldn't be",
273:                            tve.getPositions() != null);
274:
275:                }
276:
277:                mapper = new SortedTermVectorMapper(
278:                        new TermVectorEntryFreqSortedComparator());
279:                reader.get(1, mapper);
280:                set = mapper.getTermVectorEntrySet();
281:                assertTrue("set is null and it shouldn't be", set != null);
282:                //three fields, 4 terms, all terms are the same
283:                assertTrue("set Size: " + set.size() + " is not: " + 4, set
284:                        .size() == 4);
285:                //Should have offsets and positions b/c we are munging all the fields together
286:                for (Iterator iterator = set.iterator(); iterator.hasNext();) {
287:                    TermVectorEntry tve = (TermVectorEntry) iterator.next();
288:                    assertTrue("tve is null and it shouldn't be", tve != null);
289:                    assertTrue("tve.getOffsets() is null and it shouldn't be",
290:                            tve.getOffsets() != null);
291:                    assertTrue(
292:                            "tve.getPositions() is null and it shouldn't be",
293:                            tve.getPositions() != null);
294:
295:                }
296:
297:                FieldSortedTermVectorMapper fsMapper = new FieldSortedTermVectorMapper(
298:                        new TermVectorEntryFreqSortedComparator());
299:                reader.get(0, fsMapper);
300:                Map map = fsMapper.getFieldToTerms();
301:                assertTrue("map Size: " + map.size() + " is not: "
302:                        + testFields.length, map.size() == testFields.length);
303:                for (Iterator iterator = map.entrySet().iterator(); iterator
304:                        .hasNext();) {
305:                    Map.Entry entry = (Map.Entry) iterator.next();
306:                    SortedSet sortedSet = (SortedSet) entry.getValue();
307:                    assertTrue("sortedSet Size: " + sortedSet.size()
308:                            + " is not: " + 4, sortedSet.size() == 4);
309:                    for (Iterator inner = sortedSet.iterator(); inner.hasNext();) {
310:                        TermVectorEntry tve = (TermVectorEntry) inner.next();
311:                        assertTrue("tve is null and it shouldn't be",
312:                                tve != null);
313:                        //Check offsets and positions.
314:                        assertTrue("tve is null and it shouldn't be",
315:                                tve != null);
316:                        String field = tve.getField();
317:                        if (field.equals(testFields[0])) {
318:                            //should have offsets
319:
320:                            assertTrue(
321:                                    "tve.getOffsets() is null and it shouldn't be",
322:                                    tve.getOffsets() != null);
323:                            assertTrue(
324:                                    "tve.getPositions() is null and it shouldn't be",
325:                                    tve.getPositions() != null);
326:                        } else if (field.equals(testFields[1])) {
327:                            //should not have offsets
328:
329:                            assertTrue(
330:                                    "tve.getOffsets() is not null and it shouldn't be",
331:                                    tve.getOffsets() == null);
332:                            assertTrue(
333:                                    "tve.getPositions() is not null and it shouldn't be",
334:                                    tve.getPositions() == null);
335:                        }
336:                    }
337:                }
338:                //Try mapper that ignores offs and positions
339:                fsMapper = new FieldSortedTermVectorMapper(true, true,
340:                        new TermVectorEntryFreqSortedComparator());
341:                reader.get(0, fsMapper);
342:                map = fsMapper.getFieldToTerms();
343:                assertTrue("map Size: " + map.size() + " is not: "
344:                        + testFields.length, map.size() == testFields.length);
345:                for (Iterator iterator = map.entrySet().iterator(); iterator
346:                        .hasNext();) {
347:                    Map.Entry entry = (Map.Entry) iterator.next();
348:                    SortedSet sortedSet = (SortedSet) entry.getValue();
349:                    assertTrue("sortedSet Size: " + sortedSet.size()
350:                            + " is not: " + 4, sortedSet.size() == 4);
351:                    for (Iterator inner = sortedSet.iterator(); inner.hasNext();) {
352:                        TermVectorEntry tve = (TermVectorEntry) inner.next();
353:                        assertTrue("tve is null and it shouldn't be",
354:                                tve != null);
355:                        //Check offsets and positions.
356:                        assertTrue("tve is null and it shouldn't be",
357:                                tve != null);
358:                        String field = tve.getField();
359:                        if (field.equals(testFields[0])) {
360:                            //should have offsets
361:
362:                            assertTrue(
363:                                    "tve.getOffsets() is null and it shouldn't be",
364:                                    tve.getOffsets() == null);
365:                            assertTrue(
366:                                    "tve.getPositions() is null and it shouldn't be",
367:                                    tve.getPositions() == null);
368:                        } else if (field.equals(testFields[1])) {
369:                            //should not have offsets
370:
371:                            assertTrue(
372:                                    "tve.getOffsets() is not null and it shouldn't be",
373:                                    tve.getOffsets() == null);
374:                            assertTrue(
375:                                    "tve.getPositions() is not null and it shouldn't be",
376:                                    tve.getPositions() == null);
377:                        }
378:                    }
379:                }
380:
381:                // test setDocumentNumber()
382:                IndexReader ir = IndexReader.open(dir);
383:                DocNumAwareMapper docNumAwareMapper = new DocNumAwareMapper();
384:                assertEquals(-1, docNumAwareMapper.getDocumentNumber());
385:
386:                ir.getTermFreqVector(0, docNumAwareMapper);
387:                assertEquals(0, docNumAwareMapper.getDocumentNumber());
388:                docNumAwareMapper.setDocumentNumber(-1);
389:
390:                ir.getTermFreqVector(1, docNumAwareMapper);
391:                assertEquals(1, docNumAwareMapper.getDocumentNumber());
392:                docNumAwareMapper.setDocumentNumber(-1);
393:
394:                ir.getTermFreqVector(0, "f1", docNumAwareMapper);
395:                assertEquals(0, docNumAwareMapper.getDocumentNumber());
396:                docNumAwareMapper.setDocumentNumber(-1);
397:
398:                ir.getTermFreqVector(1, "f2", docNumAwareMapper);
399:                assertEquals(1, docNumAwareMapper.getDocumentNumber());
400:                docNumAwareMapper.setDocumentNumber(-1);
401:
402:                ir.getTermFreqVector(0, "f1", docNumAwareMapper);
403:                assertEquals(0, docNumAwareMapper.getDocumentNumber());
404:
405:                ir.close();
406:
407:            }
408:
409:            /**
410:             * Make sure exceptions and bad params are handled appropriately
411:             */
412:            public void testBadParams() {
413:                try {
414:                    TermVectorsReader reader = new TermVectorsReader(dir, seg,
415:                            fieldInfos);
416:                    assertTrue(reader != null);
417:                    //Bad document number, good field number
418:                    reader.get(50, testFields[0]);
419:                    fail();
420:                } catch (IOException e) {
421:                    // expected exception
422:                }
423:                try {
424:                    TermVectorsReader reader = new TermVectorsReader(dir, seg,
425:                            fieldInfos);
426:                    assertTrue(reader != null);
427:                    //Bad document number, no field
428:                    reader.get(50);
429:                    fail();
430:                } catch (IOException e) {
431:                    // expected exception
432:                }
433:                try {
434:                    TermVectorsReader reader = new TermVectorsReader(dir, seg,
435:                            fieldInfos);
436:                    assertTrue(reader != null);
437:                    //good document number, bad field number
438:                    TermFreqVector vector = reader.get(0, "f50");
439:                    assertTrue(vector == null);
440:                } catch (IOException e) {
441:                    fail();
442:                }
443:            }
444:
445:            public static class DocNumAwareMapper extends TermVectorMapper {
446:
447:                public DocNumAwareMapper() {
448:                }
449:
450:                private int documentNumber = -1;
451:
452:                public void setExpectations(String field, int numTerms,
453:                        boolean storeOffsets, boolean storePositions) {
454:                    if (documentNumber == -1) {
455:                        throw new RuntimeException(
456:                                "Documentnumber should be set at this point!");
457:                    }
458:                }
459:
460:                public void map(String term, int frequency,
461:                        TermVectorOffsetInfo[] offsets, int[] positions) {
462:                    if (documentNumber == -1) {
463:                        throw new RuntimeException(
464:                                "Documentnumber should be set at this point!");
465:                    }
466:                }
467:
468:                public int getDocumentNumber() {
469:                    return documentNumber;
470:                }
471:
472:                public void setDocumentNumber(int documentNumber) {
473:                    this.documentNumber = documentNumber;
474:                }
475:            }
476:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.