# ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
from unittest import main
from BaseTestRangeFilter import BaseTestRangeFilter
from lucene import *
#
# A basic 'positive' Unit test class for the TermRangeFilter class.
#
# NOTE: at the moment, this class only tests for 'positive' results,
# it does not verify the results to ensure there are no 'false positives',
# nor does it adequately test 'negative' results. It also does not test
# that garbage in results in an Exception.
#
class TestTermRangeFilter(BaseTestRangeFilter):
def testRangeFilterId(self):
index = self.signedIndex
reader = IndexReader.open(index.index, True);
search = IndexSearcher(reader)
medId = ((self.maxId - self.minId) / 2)
minIP = self.pad(self.minId)
maxIP = self.pad(self.maxId)
medIP = self.pad(medId)
numDocs = reader.numDocs()
self.assertEqual(numDocs, 1 + self.maxId - self.minId, "num of docs")
q = TermQuery(Term("body","body"))
# test id, bounded on both ends
result = search.search(q, TermRangeFilter("id", minIP, maxIP,
True, True), 50)
self.assertEqual(numDocs, result.totalHits, "find all")
result = search.search(q, TermRangeFilter("id", minIP, maxIP,
True, False), 50)
self.assertEqual(numDocs - 1, result.totalHits, "all but last")
result = search.search(q, TermRangeFilter("id", minIP, maxIP,
False, True), 50)
self.assertEqual(numDocs - 1, result.totalHits, "all but first")
result = search.search(q, TermRangeFilter("id", minIP, maxIP,
False, False), 50)
self.assertEqual(numDocs - 2, result.totalHits, "all but ends")
result = search.search(q, TermRangeFilter("id", medIP, maxIP,
True, True), 50)
self.assertEqual(1 + self.maxId - medId, result.totalHits, "med and up")
result = search.search(q, TermRangeFilter("id", minIP, medIP,
True, True), 50)
self.assertEqual(1 + medId - self.minId, result.totalHits, "up to med")
# unbounded id
result = search.search(q, TermRangeFilter("id", minIP, None,
True, False), 50)
self.assertEqual(numDocs, result.totalHits, "min and up")
result = search.search(q, TermRangeFilter("id", None, maxIP,
False, True), 50)
self.assertEqual(numDocs, result.totalHits, "max and down")
result = search.search(q, TermRangeFilter("id", minIP, None,
False, False), 50)
self.assertEqual(numDocs - 1, result.totalHits, "not min, but up")
result = search.search(q, TermRangeFilter("id", None, maxIP,
False, False), 50)
self.assertEqual(numDocs - 1, result.totalHits, "not max, but down")
result = search.search(q, TermRangeFilter("id",medIP, maxIP,
True, False), 50)
self.assertEqual(self.maxId - medId, result.totalHits, "med and up, not max")
result = search.search(q, TermRangeFilter("id", minIP, medIP,
False, True), 50)
self.assertEqual(medId - self.minId, result.totalHits, "not min, up to med")
# very small sets
result = search.search(q, TermRangeFilter("id", minIP, minIP,
False, False), 50)
self.assertEqual(0, result.totalHits, "min, min, False, False")
result = search.search(q, TermRangeFilter("id", medIP, medIP,
False, False), 50)
self.assertEqual(0, result.totalHits, "med, med, False, False")
result = search.search(q, TermRangeFilter("id", maxIP, maxIP,
False, False), 50)
self.assertEqual(0, result.totalHits, "max, max, False, False")
result = search.search(q, TermRangeFilter("id", minIP, minIP,
True, True), 50)
self.assertEqual(1, result.totalHits, "min, min, True, True")
result = search.search(q, TermRangeFilter("id", None, minIP,
False, True), 50)
self.assertEqual(1, result.totalHits, "nul, min, False, True")
result = search.search(q, TermRangeFilter("id", maxIP, maxIP,
True, True), 50)
self.assertEqual(1, result.totalHits, "max, max, True, True")
result = search.search(q, TermRangeFilter("id", maxIP, None,
True, False), 50)
self.assertEqual(1, result.totalHits, "max, nul, True, True")
result = search.search(q, TermRangeFilter("id", medIP, medIP,
True, True), 50)
self.assertEqual(1, result.totalHits, "med, med, True, True")
def testRangeFilterIdCollating(self):
index = self.signedIndex
reader = IndexReader.open(index.index, True)
search = IndexSearcher(reader)
c = Collator.getInstance(Locale.ENGLISH)
medId = ((self.maxId - self.minId) / 2)
minIP = self.pad(self.minId)
maxIP = self.pad(self.maxId)
medIP = self.pad(medId)
numDocs = reader.numDocs()
self.assertEqual(numDocs, 1 + self.maxId - self.minId, "num of docs")
q = TermQuery(Term("body", "body"))
# test id, bounded on both ends
numHits = search.search(q, TermRangeFilter("id", minIP, maxIP,
True, True, c), 1000).totalHits
self.assertEqual(numDocs, numHits, "find all")
numHits = search.search(q, TermRangeFilter("id", minIP, maxIP,
True, False, c), 1000).totalHits
self.assertEqual(numDocs - 1, numHits, "all but last")
numHits = search.search(q, TermRangeFilter("id", minIP, maxIP,
False, True, c), 1000).totalHits
self.assertEqual(numDocs - 1, numHits, "all but first")
numHits = search.search(q, TermRangeFilter("id", minIP, maxIP,
False, False, c), 1000).totalHits
self.assertEqual(numDocs - 2, numHits, "all but ends")
numHits = search.search(q, TermRangeFilter("id", medIP, maxIP,
True, True, c), 1000).totalHits
self.assertEqual(1 + self.maxId - medId, numHits, "med and up")
numHits = search.search(q, TermRangeFilter("id", minIP, medIP,
True, True, c), 1000).totalHits
self.assertEqual(1 + medId - self.minId, numHits, "up to med")
# unbounded id
numHits = search.search(q, TermRangeFilter("id", minIP, None,
True, False, c), 1000).totalHits
self.assertEqual(numDocs, numHits, "min and up")
numHits = search.search(q, TermRangeFilter("id", None, maxIP,
False, True, c), 1000).totalHits
self.assertEqual(numDocs, numHits, "max and down")
numHits = search.search(q, TermRangeFilter("id", minIP, None,
False, False, c), 1000).totalHits
self.assertEqual(numDocs - 1, numHits, "not min, but up")
numHits = search.search(q, TermRangeFilter("id", None, maxIP,
False, False, c), 1000).totalHits
self.assertEqual(numDocs - 1, numHits, "not max, but down")
numHits = search.search(q, TermRangeFilter("id", medIP, maxIP,
True, False, c), 1000).totalHits
self.assertEqual(self.maxId - medId, numHits, "med and up, not max")
numHits = search.search(q, TermRangeFilter("id", minIP, medIP,
False, True, c), 1000).totalHits
self.assertEqual(medId - self.minId, numHits, "not min, up to med")
# very small sets
numHits = search.search(q, TermRangeFilter("id", minIP, minIP,
False, False, c), 1000).totalHits
self.assertEqual(0, numHits, "min, min, F, F")
numHits = search.search(q, TermRangeFilter("id", medIP, medIP,
False, False, c), 1000).totalHits
self.assertEqual(0, numHits, "med, med, F, F")
numHits = search.search(q, TermRangeFilter("id", maxIP, maxIP,
False, False, c), 1000).totalHits
self.assertEqual(0, numHits, "max, max, F, F")
numHits = search.search(q, TermRangeFilter("id", minIP, minIP,
True, True, c), 1000).totalHits
self.assertEqual(1, numHits, "min, min, T, T")
numHits = search.search(q, TermRangeFilter("id", None, minIP,
False, True, c), 1000).totalHits
self.assertEqual(1, numHits, "nul, min, F, T")
numHits = search.search(q, TermRangeFilter("id", maxIP, maxIP,
True, True, c), 1000).totalHits
self.assertEqual(1, numHits, "max, max, T, T")
numHits = search.search(q, TermRangeFilter("id", maxIP, None,
True, False, c), 1000).totalHits
self.assertEqual(1, numHits, "max, nul, T, T")
numHits = search.search(q, TermRangeFilter("id", medIP, medIP,
True, True, c), 1000).totalHits
self.assertEqual(1, numHits, "med, med, T, T")
def testRangeFilterRand(self):
index = self.signedIndex
reader = IndexReader.open(index.index, True)
search = IndexSearcher(reader)
minRP = self.pad(index.minR)
maxRP = self.pad(index.maxR)
numDocs = reader.numDocs()
self.assertEqual(numDocs, 1 + self.maxId - self.minId, "num of docs")
q = TermQuery(Term("body", "body"))
# test extremes, bounded on both ends
result = search.search(q, TermRangeFilter("rand", minRP, maxRP,
True, True), 50)
self.assertEqual(numDocs, result.totalHits, "find all")
result = search.search(q, TermRangeFilter("rand", minRP, maxRP,
True, False), 50)
self.assertEqual(numDocs - 1, result.totalHits, "all but biggest")
result = search.search(q, TermRangeFilter("rand", minRP, maxRP,
False, True), 50)
self.assertEqual(numDocs - 1, result.totalHits, "all but smallest")
result = search.search(q, TermRangeFilter("rand", minRP, maxRP,
False, False), 50)
self.assertEqual(numDocs - 2, result.totalHits, "all but extremes")
# unbounded
result = search.search(q, TermRangeFilter("rand", minRP, None,
True, False), 50)
self.assertEqual(numDocs, result.totalHits, "smallest and up")
result = search.search(q, TermRangeFilter("rand", None, maxRP,
False, True), 50)
self.assertEqual(numDocs, result.totalHits, "biggest and down")
result = search.search(q, TermRangeFilter("rand", minRP, None,
False, False), 50)
self.assertEqual(numDocs - 1, result.totalHits, "not smallest, but up")
result = search.search(q, TermRangeFilter("rand", None, maxRP,
False, False), 50)
self.assertEqual(numDocs - 1, result.totalHits, "not biggest, but down")
# very small sets
result = search.search(q, TermRangeFilter("rand", minRP, minRP,
False, False), 50)
self.assertEqual(0, result.totalHits, "min, min, False, False")
result = search.search(q, TermRangeFilter("rand", maxRP, maxRP,
False, False), 50)
self.assertEqual(0, result.totalHits, "max, max, False, False")
result = search.search(q, TermRangeFilter("rand", minRP, minRP,
True, True), 50)
self.assertEqual(1, result.totalHits, "min, min, True, True")
result = search.search(q, TermRangeFilter("rand", None, minRP,
False, True), 50)
self.assertEqual(1, result.totalHits, "nul, min, False, True")
result = search.search(q, TermRangeFilter("rand", maxRP, maxRP,
True, True), 50)
self.assertEqual(1, result.totalHits, "max, max, True, True")
result = search.search(q, TermRangeFilter("rand", maxRP, None,
True, False), 50)
self.assertEqual(1, result.totalHits, "max, nul, True, True")
def testRangeFilterRandCollating(self):
# using the unsigned index because collation seems to ignore hyphens
index = self.unsignedIndex
reader = IndexReader.open(index.index, True)
search = IndexSearcher(reader)
c = Collator.getInstance(Locale.ENGLISH)
minRP = self.pad(index.minR)
maxRP = self.pad(index.maxR)
numDocs = reader.numDocs()
self.assertEqual(numDocs, 1 + self.maxId - self.minId, "num of docs")
q = TermQuery(Term("body", "body"))
# test extremes, bounded on both ends
numHits = search.search(q, TermRangeFilter("rand", minRP, maxRP,
True, True, c), 1000).totalHits
self.assertEqual(numDocs, numHits, "find all")
numHits = search.search(q, TermRangeFilter("rand", minRP, maxRP,
True, False, c), 1000).totalHits
self.assertEqual(numDocs - 1, numHits, "all but biggest")
numHits = search.search(q, TermRangeFilter("rand", minRP, maxRP,
False, True, c), 1000).totalHits
self.assertEqual(numDocs - 1, numHits, "all but smallest")
numHits = search.search(q, TermRangeFilter("rand", minRP, maxRP,
False, False, c), 1000).totalHits
self.assertEqual(numDocs - 2, numHits, "all but extremes")
# unbounded
numHits = search.search(q, TermRangeFilter("rand", minRP, None,
True, False, c), 1000).totalHits
self.assertEqual(numDocs, numHits, "smallest and up")
numHits = search.search(q, TermRangeFilter("rand", None, maxRP,
False, True, c), 1000).totalHits
self.assertEqual(numDocs, numHits, "biggest and down")
numHits = search.search(q, TermRangeFilter("rand", minRP, None,
False, False, c), 1000).totalHits
self.assertEqual(numDocs - 1, numHits, "not smallest, but up")
numHits = search.search(q, TermRangeFilter("rand", None, maxRP,
False, False, c), 1000).totalHits
self.assertEqual(numDocs - 1, numHits, "not biggest, but down")
# very small sets
numHits = search.search(q, TermRangeFilter("rand", minRP, minRP,
False, False, c), 1000).totalHits
self.assertEqual(0, numHits, "min, min, F, F")
numHits = search.search(q, TermRangeFilter("rand", maxRP, maxRP,
False, False, c), 1000).totalHits
self.assertEqual(0, numHits, "max, max, F, F")
numHits = search.search(q, TermRangeFilter("rand", minRP, minRP,
True, True, c), 1000).totalHits
self.assertEqual(1, numHits, "min, min, T, T")
numHits = search.search(q, TermRangeFilter("rand", None, minRP,
False, True, c), 1000).totalHits
self.assertEqual(1, numHits, "nul, min, F, T")
numHits = search.search(q, TermRangeFilter("rand", maxRP, maxRP,
True, True, c), 1000).totalHits
self.assertEqual(1, numHits, "max, max, T, T")
numHits = search.search(q, TermRangeFilter("rand", maxRP, None,
True, False, c), 1000).totalHits
self.assertEqual(1, numHits, "max, nul, T, T")
def testFarsi(self):
# build an index
farsiIndex = RAMDirectory()
writer = IndexWriter(farsiIndex, SimpleAnalyzer(), True,
IndexWriter.MaxFieldLength.LIMITED)
doc = Document()
doc.add(Field("content", u"\u0633\u0627\u0628",
Field.Store.YES, Field.Index.NOT_ANALYZED))
doc.add(Field("body", "body",
Field.Store.YES, Field.Index.NOT_ANALYZED))
writer.addDocument(doc)
writer.optimize()
writer.close()
reader = IndexReader.open(farsiIndex, True)
search = IndexSearcher(reader)
q = TermQuery(Term("body", "body"))
# Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
# RuleBasedCollator. However, the Arabic Locale seems to order the
# Farsi characters properly.
collator = Collator.getInstance(Locale("ar"))
# Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
# orders the U+0698 character before the U+0633 character, so the
# single index Term below should NOT be returned by a
# TermRangeFilter with a Farsi Collator (or an Arabic one for the
# case when Farsi is not supported).
numHits = search.search(q, TermRangeFilter("content", u"\u062F", u"\u0698", True, True, collator), 1000).totalHits
self.assertEqual(0, numHits, "The index Term should not be included.")
numHits = search.search(q, TermRangeFilter("content", u"\u0633", u"\u0638", True, True, collator), 1000).totalHits
self.assertEqual(1, numHits, "The index Term should be included.")
search.close()
def testDanish(self):
# build an index
danishIndex = RAMDirectory()
writer = IndexWriter(danishIndex, SimpleAnalyzer(), True,
IndexWriter.MaxFieldLength.LIMITED)
# Danish collation orders the words below in the given order
# (example taken from TestSort.testInternationalSort() ).
words = [u"H\u00D8T", u"H\u00C5T", "MAND"]
for word in words:
doc = Document()
doc.add(Field("content", word, Field.Store.YES,
Field.Index.NOT_ANALYZED))
doc.add(Field("body", "body", Field.Store.YES,
Field.Index.NOT_ANALYZED))
writer.addDocument(doc)
writer.optimize()
writer.close()
reader = IndexReader.open(danishIndex, True)
search = IndexSearcher(reader)
q = TermQuery(Term("body", "body"))
collator = Collator.getInstance(Locale("da", "dk"))
query = TermRangeQuery("content", "H\u00D8T", "MAND", False, False,
collator)
# Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ],
# but Danish collation does.
numHits = search.search(q, TermRangeFilter("content", u"H\u00D8T", "MAND", False, False, collator), 1000).totalHits
self.assertEqual(1, numHits, "The index Term should be included.")
numHits = search.search(q, TermRangeFilter("content", u"H\u00C5T", "MAND", False, False, collator), 1000).totalHits
self.assertEqual(0, numHits, "The index Term should not be included.")
search.close()
if __name__ == "__main__":
import sys, lucene
lucene.initVM()
if '-loop' in sys.argv:
sys.argv.remove('-loop')
while True:
try:
main(defaultTest='TestTermRangeFilter')
except:
pass
else:
main(defaultTest='TestTermRangeFilter')
|