Fingerprint.py :  » Development » Frowns » frowns » Fingerprint » Python Open Source

Home
Python Open Source
1.3.1.2 Python
2.Ajax
3.Aspect Oriented
4.Blog
5.Build
6.Business Application
7.Chart Report
8.Content Management Systems
9.Cryptographic
10.Database
11.Development
12.Editor
13.Email
14.ERP
15.Game 2D 3D
16.GIS
17.GUI
18.IDE
19.Installer
20.IRC
21.Issue Tracker
22.Language Interface
23.Log
24.Math
25.Media Sound Audio
26.Mobile
27.Network
28.Parser
29.PDF
30.Project Management
31.RSS
32.Search
33.Security
34.Template Engines
35.Test
36.UML
37.USB Serial
38.Web Frameworks
39.Web Server
40.Web Services
41.Web Unit
42.Wiki
43.Windows
44.XML
Python Open Source » Development » Frowns 
Frowns » frowns » Fingerprint » Fingerprint.py
"""Fingerprint
 generate fingerprints from molecules.

 fp = Fingerprint(numIntegers)
   Generate a fingerprint object that stores fingerprints
   in an array of numIntegers.

 fp.addPath(path)
   add a path to the fingerprint.  path is any str'able
   value.

 fp in fp2
   returns 1 if the fingerprint fp is a contained in the fingerprint
   fp2.

   This might be a bastardazation of __contains__ but I sort
   of like it.
   
 The method for creating fingerprints is remarkably simple.

 a sequence of non branching paths is extracted from a molecule.
 A path is a string value named atoms and named bonds
 through the traversal.  For instance:
 'C-C-C-C-N' or even 'Carbon through single bond to Carbon'.
 Any string will do as long as the string is always the
 same for the same path through the same or other molecules.

 For each path
  1 convert the string to an integer value and use it to
    seed a random number generator
    (random.seed can use any hashable value as the seed!
     python, again, is cool!)
    random.seed(path)
  2 pull out two random integers from the seeded generator
    index = int(random.random() * NUM_INTS)
    bit   = int(random.random() * INT_SIZE)

  fingerprint[index] = fingerprint[index] | 1<<bit

 we store a fingerprint as an array of integers.  Each integer
 has a certain number of bits that can be flipped.  The process
 of adding a path to a fingerprint is simply choosing the index
 and bit position for a path.  The above procedure does this
 in a deterministic fashion.
"""
import random
import sys
if sys.version_info[0:2] == (2,3):
    random = random.WichmannHill()
del sys

# We also need a good way to cache these to disk.  I suppose
# cPickle will work just fine for now...
class Fingerprint:
    def __init__(self, numIntegers=1, fingerprint=None):
        if fingerprint:
            self.fingerprint = fingerprint
        else:
            self.fingerprint = [0] * numIntegers

    def addPath(self, path):
        random.seed(path)
        fingerprint = self.fingerprint
        index = int(random.random() * len(fingerprint))
        # XXX FIX ME
        # might not be 32 on all platforms!
        bit = int(random.random() * 32)
        fingerprint[index] |= 1 << bit
        assert fingerprint[index] != 0
        return index, bit

    def __contains__(self, other):
        # self contains other
        if len(other.fingerprint) != len(self.fingerprint):
            raise "Fingerprints not the same size!"
        
        for a,b in zip(self.fingerprint, other.fingerprint):
            if(a & b) != b:
                return 0
        return 1

    def intTobitlist(self, i):
        # we'll assume that i is indeed a real integer
        result = []
        for loop in range(32):
            bit = i&1
            result.append(bit)
            i = i >> 1
        result.reverse()
        return result
    
    def to_list(self):
        result = []
        int_to_bitlist = self.intTobitlist
        
        for i in self.fingerprint:
            result += int_to_bitlist(i)
        return result

class SplitFingerprint:
    def __init__(self, maxdepth=7, integersPerPrint=[4]*6):
        assert maxdepth-1 == len(integersPerPrint)
        p = self.paths = []
        for numints in integersPerPrint:
            p.append(Fingerprint(numIntegers=numints))

    def addPath(self, length, path):
        self.paths[length-2].addPath(path)

    def __contains__(self, other):
        # does self contain other
        assert len(self.paths) == len(other.paths)
        for pother, pself in zip(other.paths, self.paths):
            if pother not in pself:
                return 0

        return 1

    def to_list(self):
        result = []
        for p in self.paths:
            result += p.to_list()
        return result
    
def test():
    # XXX FIX ME
    # better tests are needed of course :)
    paths = ["my",
             "dog",
             "has",
             "fleas"]
    
    fp = Fingerprint(32)
    fp2 = Fingerprint(32)
    for path in paths:
        fp.addPath(path)

    paths.reverse()
    for path in paths:
        fp2.addPath(path)
        
    assert fp.fingerprint == fp2.fingerprint
    assert fp in fp2

    fp2.addPath("yuma")
    assert fp in fp2
    assert fp2 not in fp

if __name__ == "__main__":
    test()
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.