# ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
from unittest import TestCase
from lucene import \
StandardAnalyzer, RAMDirectory, IndexWriter, Term, Document, Field, \
IndexSearcher, TermQuery, PhraseQuery, QueryParser, StringReader, \
TermAttribute, PositionIncrementAttribute, Version
from lia.analysis.AnalyzerUtils import AnalyzerUtils
from lia.analysis.synonym.SynonymAnalyzer import SynonymAnalyzer
from lia.analysis.synonym.MockSynonymEngine import MockSynonymEngine
class SynonymAnalyzerTest(TestCase):
synonymAnalyzer = SynonymAnalyzer(MockSynonymEngine())
def setUp(self):
self.directory = RAMDirectory()
writer = IndexWriter(self.directory, self.synonymAnalyzer, True,
IndexWriter.MaxFieldLength.UNLIMITED)
doc = Document()
doc.add(Field("content",
"The quick brown fox jumps over the lazy dogs",
Field.Store.YES, Field.Index.ANALYZED))
writer.addDocument(doc)
writer.close()
self.searcher = IndexSearcher(self.directory, True)
def tearDown(self):
self.searcher.close()
def testJumps(self):
stream = self.synonymAnalyzer.tokenStream("contents",
StringReader("jumps"))
term = stream.addAttribute(TermAttribute.class_)
posIncr = stream.addAttribute(PositionIncrementAttribute.class_)
i = 0
expected = ["jumps", "hops", "leaps"]
while stream.incrementToken():
self.assertEqual(expected[i], term.term())
if i == 0:
expectedPos = 1
else:
expectedPos = 0
self.assertEqual(expectedPos, posIncr.getPositionIncrement())
i += 1
self.assertEqual(3, i)
def testSearchByAPI(self):
tq = TermQuery(Term("content", "hops"))
topDocs = self.searcher.search(tq, 50)
self.assertEqual(1, topDocs.totalHits)
pq = PhraseQuery()
pq.add(Term("content", "fox"))
pq.add(Term("content", "hops"))
topDocs = self.searcher.search(pq, 50)
self.assertEquals(1, topDocs.totalHits)
def testWithQueryParser(self):
query = QueryParser(Version.LUCENE_CURRENT, "content",
self.synonymAnalyzer).parse('"fox jumps"')
topDocs = self.searcher.search(query, 50)
# in Lucene 1.9, position increments are no longer ignored
self.assertEqual(1, topDocs.totalHits, "!!!! what?!")
query = QueryParser(Version.LUCENE_CURRENT, "content",
StandardAnalyzer(Version.LUCENE_CURRENT)).parse('"fox jumps"')
topDocs = self.searcher.search(query, 50)
self.assertEqual(1, topDocs.totalHits, "*whew*")
def main(cls):
query = QueryParser(Version.LUCENE_CURRENT, "content",
cls.synonymAnalyzer).parse('"fox jumps"')
print "\"fox jumps\" parses to ", query.toString("content")
print "From AnalyzerUtils.tokensFromAnalysis: "
AnalyzerUtils.displayTokens(cls.synonymAnalyzer, "\"fox jumps\"")
print ''
main = classmethod(main)
|