# ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
import os
from itertools import izip
from unittest import TestCase
from time import time
from datetime import timedelta
from lucene import \
IndexWriter, SimpleAnalyzer, Document, Field, System, File, \
Term, TermQuery, IndexSearcher, SimpleFSDirectory
class FieldLengthTest(TestCase):
keywords = ["1", "2"]
unindexed = ["Netherlands", "Italy"]
unstored = ["Amsterdam has lots of bridges",
"Venice has lots of canals"]
text = ["Amsterdam", "Venice"]
def setUp(self):
indexDir = os.path.join(System.getProperty("java.io.tmpdir", "tmp"),
"index-dir")
self.dir = SimpleFSDirectory(File(indexDir))
def testFieldSize(self):
self.addDocuments(self.dir, 10)
self.assertEqual(1, self.getHitCount("contents", "bridges"))
self.addDocuments(self.dir, 1)
self.assertEqual(0, self.getHitCount("contents", "bridges"))
def getHitCount(self, fieldName, searchString):
searcher = IndexSearcher(self.dir, True)
t = Term(fieldName, searchString)
query = TermQuery(t)
hitCount = len(searcher.search(query, 50).scoreDocs)
searcher.close()
return hitCount
def addDocuments(self, dir, maxFieldLength):
writer = IndexWriter(dir, SimpleAnalyzer(), True,
IndexWriter.MaxFieldLength(maxFieldLength))
for keyword, unindexed, unstored, text in \
izip(self.keywords, self.unindexed, self.unstored, self.text):
doc = Document()
doc.add(Field("id", keyword,
Field.Store.YES, Field.Index.NOT_ANALYZED))
doc.add(Field("country", unindexed,
Field.Store.YES, Field.Index.NO))
doc.add(Field("contents", unstored,
Field.Store.NO, Field.Index.ANALYZED))
doc.add(Field("city", text,
Field.Store.YES, Field.Index.ANALYZED))
writer.addDocument(doc)
writer.optimize()
writer.close()
|