indexer_common.py : » Issue-Tracker » Roundup-Issue-Tracker » roundup-1.4.13 » roundup » backends » Python Open Source

1.	3.1.2 Python
2.	Ajax
3.	Aspect Oriented
4.	Blog
5.	Build
6.	Business Application
7.	Chart Report
8.	Content Management Systems
9.	Cryptographic
10.	Database
11.	Development
12.	Editor
13.	Email
14.	ERP
15.	Game 2D 3D
16.	GIS
17.	GUI
18.	IDE
19.	Installer
20.	IRC
21.	Issue Tracker
22.	Language Interface
23.	Log
24.	Math
25.	Media Sound Audio
26.	Mobile
27.	Network
28.	Parser
29.	PDF
30.	Project Management
31.	RSS
32.	Search
33.	Security
34.	Template Engines
35.	Test
36.	UML
37.	USB Serial
38.	Web Frameworks
39.	Web Server
40.	Web Services
41.	Web Unit
42.	Wiki
43.	Windows
44.	XML

Python Open Source » Issue Tracker » Roundup Issue Tracker

Roundup Issue Tracker » roundup 1.4.13 » roundup » backends » indexer_common.py

#$Id: indexer_common.py,v 1.11 2008-09-11 19:41:07 schlatterbeck Exp $
import re
# Python 2.3 ... 2.6 compatibility:
from roundup.anypy.sets_ import set

from roundup import hyperdb

STOPWORDS = [
    "A", "AND", "ARE", "AS", "AT", "BE", "BUT", "BY",
    "FOR", "IF", "IN", "INTO", "IS", "IT",
    "NO", "NOT", "OF", "ON", "OR", "SUCH",
    "THAT", "THE", "THEIR", "THEN", "THERE", "THESE",
    "THEY", "THIS", "TO", "WAS", "WILL", "WITH"
]

def _isLink(propclass):
    return (isinstance(propclass, hyperdb.Link) or
            isinstance(propclass, hyperdb.Multilink))

class Indexer:
    def __init__(self, db):
        self.stopwords = set(STOPWORDS)
        for word in db.config[('main', 'indexer_stopwords')]:
            self.stopwords.add(word)
        # Do not index anything longer than 25 characters since that'll be
        # gibberish (encoded text or somesuch) or shorter than 2 characters
        self.minlength = 2
        self.maxlength = 25

    def is_stopword(self, word):
        return word in self.stopwords

    def getHits(self, search_terms, klass):
        return self.find(search_terms)

    def search(self, search_terms, klass, ignore={}):
        """Display search results looking for [search, terms] associated
        with the hyperdb Class "klass". Ignore hits on {class: property}.
        """
        # do the index lookup
        hits = self.getHits(search_terms, klass)
        if not hits:
            return {}

        designator_propname = {}
        for nm, propclass in klass.getprops().items():
            if _isLink(propclass):
                designator_propname.setdefault(propclass.classname,
                    []).append(nm)

        # build a dictionary of nodes and their associated messages
        # and files
        nodeids = {}      # this is the answer
        propspec = {}     # used to do the klass.find
        for l in designator_propname.values():
            for propname in l:
                propspec[propname] = {}  # used as a set (value doesn't matter)

        # don't unpack hits entries as sqlite3's Row can't be unpacked :(
        for entry in hits:
            # skip this result if we don't care about this class/property
            classname = entry[0]
            property = entry[2]
            if ignore.has_key((classname, property)):
                continue

            # if it's a property on klass, it's easy
            # (make sure the nodeid is str() not unicode() as returned by some
            # backends as that can cause problems down the track)
            nodeid = str(entry[1])
            if classname == klass.classname:
                if not nodeids.has_key(nodeid):
                    nodeids[nodeid] = {}
                continue

            # make sure the class is a linked one, otherwise ignore
            if not designator_propname.has_key(classname):
                continue

            # it's a linked class - set up to do the klass.find
            for linkprop in designator_propname[classname]:
                propspec[linkprop][nodeid] = 1

        # retain only the meaningful entries
        for propname, idset in propspec.items():
            if not idset:
                del propspec[propname]

        # klass.find tells me the klass nodeids the linked nodes relate to
        propdefs = klass.getprops()
        for resid in klass.find(**propspec):
            resid = str(resid)
            if resid in nodeids:
                continue # we ignore duplicate resids
            nodeids[resid] = {}
            node_dict = nodeids[resid]
            # now figure out where it came from
            for linkprop in propspec.keys():
                v = klass.get(resid, linkprop)
                # the link might be a Link so deal with a single result or None
                if isinstance(propdefs[linkprop], hyperdb.Link):
                    if v is None: continue
                    v = [v]
                for nodeid in v:
                    if propspec[linkprop].has_key(nodeid):
                        # OK, this node[propname] has a winner
                        if not node_dict.has_key(linkprop):
                            node_dict[linkprop] = [nodeid]
                        else:
                            node_dict[linkprop].append(nodeid)
        return nodeids

www.java2java.com | Contact Us

All other trademarks are property of their respective owners.