indexer_xapian.py : » Issue-Tracker » Roundup-Issue-Tracker » roundup-1.4.13 » roundup » backends » Python Open Source

1.	3.1.2 Python
2.	Ajax
3.	Aspect Oriented
4.	Blog
5.	Build
6.	Business Application
7.	Chart Report
8.	Content Management Systems
9.	Cryptographic
10.	Database
11.	Development
12.	Editor
13.	Email
14.	ERP
15.	Game 2D 3D
16.	GIS
17.	GUI
18.	IDE
19.	Installer
20.	IRC
21.	Issue Tracker
22.	Language Interface
23.	Log
24.	Math
25.	Media Sound Audio
26.	Mobile
27.	Network
28.	Parser
29.	PDF
30.	Project Management
31.	RSS
32.	Search
33.	Security
34.	Template Engines
35.	Test
36.	UML
37.	USB Serial
38.	Web Frameworks
39.	Web Server
40.	Web Services
41.	Web Unit
42.	Wiki
43.	Windows
44.	XML

Python Open Source » Issue Tracker » Roundup Issue Tracker

Roundup Issue Tracker » roundup 1.4.13 » roundup » backends » indexer_xapian.py

#$Id: indexer_xapian.py,v 1.6 2007-10-25 07:02:42 richard Exp $
''' This implements the full-text indexer using the Xapian indexer.
'''
import re, os

import xapian

from roundup.backends.indexer_common import Indexer

# TODO: we need to delete documents when a property is *reindexed*

class Indexer(IndexerBase):
    def __init__(self, db):
        IndexerBase.__init__(self, db)
        self.db_path = db.config.DATABASE
        self.reindex = 0
        self.transaction_active = False

    def _get_database(self):
        index = os.path.join(self.db_path, 'text-index')
        return xapian.WritableDatabase(index, xapian.DB_CREATE_OR_OPEN)

    def save_index(self):
        '''Save the changes to the index.'''
        if not self.transaction_active:
            return
        # XXX: Xapian databases don't actually implement transactions yet
        database = self._get_database()
        database.commit_transaction()
        self.transaction_active = False

    def close(self):
        '''close the indexing database'''
        pass

    def rollback(self):
        if not self.transaction_active:
            return
        # XXX: Xapian databases don't actually implement transactions yet
        database = self._get_database()
        database.cancel_transaction()
        self.transaction_active = False

    def force_reindex(self):
        '''Force a reindexing of the database.  This essentially
        empties the tables ids and index and sets a flag so
        that the databases are reindexed'''
        self.reindex = 1

    def should_reindex(self):
        '''returns True if the indexes need to be rebuilt'''
        return self.reindex

    def add_text(self, identifier, text, mime_type='text/plain'):
        ''' "identifier" is  (classname, itemid, property) '''
        if mime_type != 'text/plain':
            return
        if not text: text = ''

        # open the database and start a transaction if needed
        database = self._get_database()
        # XXX: Xapian databases don't actually implement transactions yet
        #if not self.transaction_active:
            #database.begin_transaction()
            #self.transaction_active = True

        # TODO: allow configuration of other languages
        stemmer = xapian.Stem("english")

        # We use the identifier twice: once in the actual "text" being
        # indexed so we can search on it, and again as the "data" being
        # indexed so we know what we're matching when we get results
        identifier = '%s:%s:%s'%identifier

        # see if the id is in the database
        enquire = xapian.Enquire(database)
        query = xapian.Query(xapian.Query.OP_AND, [identifier])
        enquire.set_query(query)
        matches = enquire.get_mset(0, 10)
        if matches.size():      # would it killya to implement __len__()??
            b = matches.begin()
            docid = b.get_docid()
        else:
            docid = None

        # create the new document
        doc = xapian.Document()
        doc.set_data(identifier)
        doc.add_posting(identifier, 0)

        for match in re.finditer(r'\b\w{%d,%d}\b'
                                 % (self.minlength, self.maxlength),
                                 text.upper()):
            word = match.group(0)
            if self.is_stopword(word):
                continue
            term = stemmer(word)
            doc.add_posting(term, match.start(0))
        if docid:
            database.replace_document(docid, doc)
        else:
            database.add_document(doc)

    def find(self, wordlist):
        '''look up all the words in the wordlist.
        If none are found return an empty dictionary
        * more rules here
        '''
        if not wordlist:
            return {}

        database = self._get_database()

        enquire = xapian.Enquire(database)
        stemmer = xapian.Stem("english")
        terms = []
        for term in [word.upper() for word in wordlist
                          if self.minlength <= len(word) <= self.maxlength]:
            if not self.is_stopword(term):
                terms.append(stemmer(term))
        query = xapian.Query(xapian.Query.OP_AND, terms)

        enquire.set_query(query)
        matches = enquire.get_mset(0, 10)

        return [tuple(m[xapian.MSET_DOCUMENT].get_data().split(':'))
            for m in matches]

www.java2java.com | Contact Us

All other trademarks are property of their respective owners.