Source Code Cross Referenced for NearSpansOrdered.java in  » Net » lucene-connector » org » apache » lucene » search » spans » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Net » lucene connector » org.apache.lucene.search.spans 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


001:        package org.apache.lucene.search.spans;
002:
003:        /**
004:         * Licensed to the Apache Software Foundation (ASF) under one or more
005:         * contributor license agreements.  See the NOTICE file distributed with
006:         * this work for additional information regarding copyright ownership.
007:         * The ASF licenses this file to You under the Apache License, Version 2.0
008:         * (the "License"); you may not use this file except in compliance with
009:         * the License.  You may obtain a copy of the License at
010:         *
011:         *     http://www.apache.org/licenses/LICENSE-2.0
012:         *
013:         * Unless required by applicable law or agreed to in writing, software
014:         * distributed under the License is distributed on an "AS IS" BASIS,
015:         * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016:         * See the License for the specific language governing permissions and
017:         * limitations under the License.
018:         */
019:
020:        import java.io.IOException;
021:
022:        import java.util.Arrays;
023:        import java.util.Comparator;
024:
025:        import org.apache.lucene.index.IndexReader;
026:
027:        /** A Spans that is formed from the ordered subspans of a SpanNearQuery
028:         * where the subspans do not overlap and have a maximum slop between them.
029:         * <p>
030:         * The formed spans only contains minimum slop matches.<br>
031:         * The matching slop is computed from the distance(s) between
032:         * the non overlapping matching Spans.<br>
033:         * Successive matches are always formed from the successive Spans
034:         * of the SpanNearQuery.
035:         * <p>
036:         * The formed spans may contain overlaps when the slop is at least 1.
037:         * For example, when querying using
038:         * <pre>t1 t2 t3</pre>
039:         * with slop at least 1, the fragment:
040:         * <pre>t1 t2 t1 t3 t2 t3</pre>
041:         * matches twice:
042:         * <pre>t1 t2 .. t3      </pre>
043:         * <pre>      t1 .. t2 t3</pre>
044:         */
045:        class NearSpansOrdered implements  Spans {
046:            private final int allowedSlop;
047:            private boolean firstTime = true;
048:            private boolean more = false;
049:
050:            /** The spans in the same order as the SpanNearQuery */
051:            private final Spans[] subSpans;
052:
053:            /** Indicates that all subSpans have same doc() */
054:            private boolean inSameDoc = false;
055:
056:            private int matchDoc = -1;
057:            private int matchStart = -1;
058:            private int matchEnd = -1;
059:
060:            private final Spans[] subSpansByDoc;
061:            private final Comparator spanDocComparator = new Comparator() {
062:                public int compare(Object o1, Object o2) {
063:                    return ((Spans) o1).doc() - ((Spans) o2).doc();
064:                }
065:            };
066:
067:            private SpanNearQuery query;
068:
069:            public NearSpansOrdered(SpanNearQuery spanNearQuery,
070:                    IndexReader reader) throws IOException {
071:                if (spanNearQuery.getClauses().length < 2) {
072:                    throw new IllegalArgumentException("Less than 2 clauses: "
073:                            + spanNearQuery);
074:                }
075:                allowedSlop = spanNearQuery.getSlop();
076:                SpanQuery[] clauses = spanNearQuery.getClauses();
077:                subSpans = new Spans[clauses.length];
078:                subSpansByDoc = new Spans[clauses.length];
079:                for (int i = 0; i < clauses.length; i++) {
080:                    subSpans[i] = clauses[i].getSpans(reader);
081:                    subSpansByDoc[i] = subSpans[i]; // used in toSameDoc()
082:                }
083:                query = spanNearQuery; // kept for toString() only.
084:            }
085:
086:            // inherit javadocs
087:            public int doc() {
088:                return matchDoc;
089:            }
090:
091:            // inherit javadocs
092:            public int start() {
093:                return matchStart;
094:            }
095:
096:            // inherit javadocs
097:            public int end() {
098:                return matchEnd;
099:            }
100:
101:            // inherit javadocs
102:            public boolean next() throws IOException {
103:                if (firstTime) {
104:                    firstTime = false;
105:                    for (int i = 0; i < subSpans.length; i++) {
106:                        if (!subSpans[i].next()) {
107:                            more = false;
108:                            return false;
109:                        }
110:                    }
111:                    more = true;
112:                }
113:                return advanceAfterOrdered();
114:            }
115:
116:            // inherit javadocs
117:            public boolean skipTo(int target) throws IOException {
118:                if (firstTime) {
119:                    firstTime = false;
120:                    for (int i = 0; i < subSpans.length; i++) {
121:                        if (!subSpans[i].skipTo(target)) {
122:                            more = false;
123:                            return false;
124:                        }
125:                    }
126:                    more = true;
127:                } else if (more && (subSpans[0].doc() < target)) {
128:                    if (subSpans[0].skipTo(target)) {
129:                        inSameDoc = false;
130:                    } else {
131:                        more = false;
132:                        return false;
133:                    }
134:                }
135:                return advanceAfterOrdered();
136:            }
137:
138:            /** Advances the subSpans to just after an ordered match with a minimum slop
139:             * that is smaller than the slop allowed by the SpanNearQuery.
140:             * @return true iff there is such a match.
141:             */
142:            private boolean advanceAfterOrdered() throws IOException {
143:                while (more && (inSameDoc || toSameDoc())) {
144:                    if (stretchToOrder() && shrinkToAfterShortestMatch()) {
145:                        return true;
146:                    }
147:                }
148:                return false; // no more matches
149:            }
150:
151:            /** Advance the subSpans to the same document */
152:            private boolean toSameDoc() throws IOException {
153:                Arrays.sort(subSpansByDoc, spanDocComparator);
154:                int firstIndex = 0;
155:                int maxDoc = subSpansByDoc[subSpansByDoc.length - 1].doc();
156:                while (subSpansByDoc[firstIndex].doc() != maxDoc) {
157:                    if (!subSpansByDoc[firstIndex].skipTo(maxDoc)) {
158:                        more = false;
159:                        inSameDoc = false;
160:                        return false;
161:                    }
162:                    maxDoc = subSpansByDoc[firstIndex].doc();
163:                    if (++firstIndex == subSpansByDoc.length) {
164:                        firstIndex = 0;
165:                    }
166:                }
167:                for (int i = 0; i < subSpansByDoc.length; i++) {
168:                    assert (subSpansByDoc[i].doc() == maxDoc) : " NearSpansOrdered.toSameDoc() spans "
169:                            + subSpansByDoc[0]
170:                            + "\n at doc "
171:                            + subSpansByDoc[i].doc()
172:                            + ", but should be at "
173:                            + maxDoc;
174:                }
175:                inSameDoc = true;
176:                return true;
177:            }
178:
179:            /** Check whether two Spans in the same document are ordered.
180:             * @param spans1 
181:             * @param spans2 
182:             * @return true iff spans1 starts before spans2
183:             *              or the spans start at the same position,
184:             *              and spans1 ends before spans2.
185:             */
186:            static final boolean docSpansOrdered(Spans spans1, Spans spans2) {
187:                assert spans1.doc() == spans2.doc() : "doc1 " + spans1.doc()
188:                        + " != doc2 " + spans2.doc();
189:                int start1 = spans1.start();
190:                int start2 = spans2.start();
191:                /* Do not call docSpansOrdered(int,int,int,int) to avoid invoking .end() : */
192:                return (start1 == start2) ? (spans1.end() < spans2.end())
193:                        : (start1 < start2);
194:            }
195:
196:            /** Like {@link #docSpansOrdered(Spans,Spans)}, but use the spans
197:             * starts and ends as parameters.
198:             */
199:            private static final boolean docSpansOrdered(int start1, int end1,
200:                    int start2, int end2) {
201:                return (start1 == start2) ? (end1 < end2) : (start1 < start2);
202:            }
203:
204:            /** Order the subSpans within the same document by advancing all later spans
205:             * after the previous one.
206:             */
207:            private boolean stretchToOrder() throws IOException {
208:                matchDoc = subSpans[0].doc();
209:                for (int i = 1; inSameDoc && (i < subSpans.length); i++) {
210:                    while (!docSpansOrdered(subSpans[i - 1], subSpans[i])) {
211:                        if (!subSpans[i].next()) {
212:                            inSameDoc = false;
213:                            more = false;
214:                            break;
215:                        } else if (matchDoc != subSpans[i].doc()) {
216:                            inSameDoc = false;
217:                            break;
218:                        }
219:                    }
220:                }
221:                return inSameDoc;
222:            }
223:
224:            /** The subSpans are ordered in the same doc, so there is a possible match.
225:             * Compute the slop while making the match as short as possible by advancing
226:             * all subSpans except the last one in reverse order.
227:             */
228:            private boolean shrinkToAfterShortestMatch() throws IOException {
229:                matchStart = subSpans[subSpans.length - 1].start();
230:                matchEnd = subSpans[subSpans.length - 1].end();
231:                int matchSlop = 0;
232:                int lastStart = matchStart;
233:                int lastEnd = matchEnd;
234:                for (int i = subSpans.length - 2; i >= 0; i--) {
235:                    Spans prevSpans = subSpans[i];
236:                    int prevStart = prevSpans.start();
237:                    int prevEnd = prevSpans.end();
238:                    while (true) { // Advance prevSpans until after (lastStart, lastEnd)
239:                        if (!prevSpans.next()) {
240:                            inSameDoc = false;
241:                            more = false;
242:                            break; // Check remaining subSpans for final match.
243:                        } else if (matchDoc != prevSpans.doc()) {
244:                            inSameDoc = false; // The last subSpans is not advanced here.
245:                            break; // Check remaining subSpans for last match in this document.
246:                        } else {
247:                            int ppStart = prevSpans.start();
248:                            int ppEnd = prevSpans.end(); // Cannot avoid invoking .end()
249:                            if (!docSpansOrdered(ppStart, ppEnd, lastStart,
250:                                    lastEnd)) {
251:                                break; // Check remaining subSpans.
252:                            } else { // prevSpans still before (lastStart, lastEnd)
253:                                prevStart = ppStart;
254:                                prevEnd = ppEnd;
255:                            }
256:                        }
257:                    }
258:                    assert prevStart <= matchStart;
259:                    if (matchStart > prevEnd) { // Only non overlapping spans add to slop.
260:                        matchSlop += (matchStart - prevEnd);
261:                    }
262:                    /* Do not break on (matchSlop > allowedSlop) here to make sure
263:                     * that subSpans[0] is advanced after the match, if any.
264:                     */
265:                    matchStart = prevStart;
266:                    lastStart = prevStart;
267:                    lastEnd = prevEnd;
268:                }
269:                return matchSlop <= allowedSlop; // ordered and allowed slop
270:            }
271:
272:            public String toString() {
273:                return getClass().getName()
274:                        + "("
275:                        + query.toString()
276:                        + ")@"
277:                        + (firstTime ? "START" : (more ? (doc() + ":" + start()
278:                                + "-" + end()) : "END"));
279:            }
280:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.