findbestmatch.py : » GUI » Python-Win32-GUI-Automation » pywinauto-0.4.0 » pywinauto » Python Open Source

1.	3.1.2 Python
2.	Ajax
3.	Aspect Oriented
4.	Blog
5.	Build
6.	Business Application
7.	Chart Report
8.	Content Management Systems
9.	Cryptographic
10.	Database
11.	Development
12.	Editor
13.	Email
14.	ERP
15.	Game 2D 3D
16.	GIS
17.	GUI
18.	IDE
19.	Installer
20.	IRC
21.	Issue Tracker
22.	Language Interface
23.	Log
24.	Math
25.	Media Sound Audio
26.	Mobile
27.	Network
28.	Parser
29.	PDF
30.	Project Management
31.	RSS
32.	Search
33.	Security
34.	Template Engines
35.	Test
36.	UML
37.	USB Serial
38.	Web Frameworks
39.	Web Server
40.	Web Services
41.	Web Unit
42.	Wiki
43.	Windows
44.	XML
Python Open Source » GUI » Python Win32 GUI Automation
Python Win32 GUI Automation » pywinauto 0.4.0 » pywinauto » findbestmatch.py
# GUI Application automation and testing library
# Copyright (C) 2006 Mark Mc Mahon
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public License
# as published by the Free Software Foundation; either version 2.1
# of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the
#    Free Software Foundation, Inc.,
#    59 Temple Place,
#    Suite 330,
#    Boston, MA 02111-1307 USA

"Module to find the closest match of a string in a list"

__revision__ = "$Revision: 679 $"

import re
import difflib

import fuzzydict
#import ctypes
#import ldistance
#levenshtein_distance = ctypes.cdll.levenshtein.levenshtein_distance
#levenshtein_distance = ldistance.distance


# need to use sets.Set for python 2.3 compatability
# but 2.6 raises a deprecation warning about sets module
try:
    set
except NameError:
    import sets
    set = sets.Set

find_best_control_match_cutoff = .6

#====================================================================
class MatchError(IndexError):
    "A suitable match could not be found"
    def __init__(self, items = None, tofind = ''):
        "Init the parent with the message"
        self.tofind = tofind
        self.items = items
        if self.items is None:
            self.items = []

        IndexError.__init__(self,
            "Could not find '%s' in '%s'"% (tofind, self.items))


_cache = {}

# given a list of texts return the match score for each
# and the best score and text with best score
#====================================================================
def _get_match_ratios(texts, match_against):
    "Get the match ratio of how each item in texts compared to match_against"

    # now time to figre out the matching
    ratio_calc = difflib.SequenceMatcher()
    ratio_calc.set_seq1(match_against)

    ratios = {}
    best_ratio = 0
    best_text = ''

    global cache

    for text in texts:

        if 0:
            pass

        if (text, match_against) in _cache:
            ratios[text] = _cache[(text, match_against)]

        elif(match_against, text) in _cache:
            ratios[text] = _cache[(match_against, text)]

        else:
            # set up the SequenceMatcher with other text
            ratio_calc.set_seq2(text)

            # try using the levenshtein distance instead
            #lev_dist = levenshtein_distance(unicode(match_against), unicode(text))
            #ratio = 1 - lev_dist / 10.0
            #ratios[text] = ratio

            # calculate ratio and store it
            ratios[text] = ratio_calc.ratio()

            _cache[(match_against, text)] = ratios[text]

        # if this is the best so far then update best stats
        if ratios[text] > best_ratio:
            best_ratio = ratios[text]
            best_text = text

    return ratios, best_ratio, best_text




#====================================================================
def find_best_match(search_text, item_texts, items, limit_ratio = .5):
    """Return the item that best matches the search_text

    * **search_text** The text to search for
    * **item_texts** The list of texts to search through
    * **items** The list of items corresponding (1 to 1)
      to the list of texts to search through.
    * **limit_ratio** How well the text has to match the best match.
      If the best match matches lower then this then it is not
      considered a match and a MatchError is raised, (default = .5)
    """
    search_text = _cut_at_tab(search_text)

    text_item_map = UniqueDict()
    # Clean each item, make it unique and map to
    # to the item index
    for text, item in zip(item_texts, items):
        text_item_map[_cut_at_tab(text)] = item

    ratios, best_ratio, best_text = \
        _get_match_ratios(text_item_map.keys(), search_text)

    if best_ratio < limit_ratio:
        raise MatchError(items = text_item_map.keys(), tofind = search_text)

    return text_item_map[best_text]





#====================================================================
_after_tab = re.compile(ur"\t.*", re.UNICODE)
_non_word_chars = re.compile(ur"\W", re.UNICODE)

def _cut_at_tab(text):
    "Clean out non characters from the string and return it"

    # remove anything after the first tab
    return  _after_tab.sub("", text)

def _clean_non_chars(text):
    "Remove non word characters"
    # should this also remove everything after the first tab?

    # remove non alphanumeric characters
    return _non_word_chars.sub("", text)


def IsAboveOrToLeft(ref_control, other_ctrl):
    "Return true if the other_ctrl is above or to the left of ref_control"
    text_r = other_ctrl.Rectangle()
    ctrl_r = ref_control.Rectangle()

    # skip controls where text win is to the right of ctrl
    if text_r.left >= ctrl_r.right:
        return False

    # skip controls where text win is below ctrl
    if text_r.top >= ctrl_r.bottom:
        return False
        
    # text control top left corner is below control
    # top left corner - so not to the above or left :)
    if text_r.top >= ctrl_r.top and text_r.left >= ctrl_r.left:
        return False

    return True


#====================================================================
distance_cuttoff = 999
def GetNonTextControlName(ctrl, controls):
    """return the name for this control by finding the closest
    text control above and to its left"""

    names = []

    ctrl_index = controls.index(ctrl)

    if ctrl_index != 0:
        prev_ctrl = controls[ctrl_index-1]

        if prev_ctrl.FriendlyClassName() == "Static" and \
            prev_ctrl.IsVisible() and prev_ctrl.WindowText() and \
            IsAboveOrToLeft(ctrl, prev_ctrl):

            names.append(
                prev_ctrl.WindowText() +
                    ctrl.FriendlyClassName())

    # get the visible text controls so that we can get
    # the closest text if the control has no text
    text_ctrls = [ctrl_ for ctrl_ in controls
        if ctrl_.IsVisible() and ctrl_.WindowText() and ctrl_.can_be_label]

    best_name = ''
    closest = distance_cuttoff
    # now for each of the visible text controls
    for text_ctrl in text_ctrls:

        # get aliases to the control rectangles
        text_r = text_ctrl.Rectangle()
        ctrl_r = ctrl.Rectangle()

        # skip controls where text win is to the right of ctrl
        if text_r.left >= ctrl_r.right:
            continue

        # skip controls where text win is below ctrl
        if text_r.top >= ctrl_r.bottom:
            continue

        # calculate the distance between the controls
        # at first I just calculated the distance from the top let
        # corner of one control to the top left corner of the other control
        # but this was not best, so as a text control should either be above
        # or to the left of the control I get the distance between
        # the top left of the non text control against the
        #    Top-Right of the text control (text control to the left)
        #    Bottom-Left of the text control (text control above)
        # then I get the min of these two


        # We do not actually need to calculate the difference here as we
        # only need a comparative number. As long as we find the closest one
        # the actual distance is not all that important to us.
        # this reduced the unit tests run on my by about 1 second
        # (from 61 ->60 s)

        # (x^2 + y^2)^.5
        #distance = (
        #    (text_r.left - ctrl_r.left) ** 2 +  #  (x^2 + y^2)
        #    (text_r.bottom - ctrl_r.top) ** 2) \
        #    ** .5  # ^.5

        #distance2 = (
        #    (text_r.right - ctrl_r.left) ** 2 +  #  (x^2 + y^2)
        #    (text_r.top - ctrl_r.top) ** 2) \
        #    ** .5  # ^.5

        distance = abs(text_r.left - ctrl_r.left) + abs(text_r.bottom - ctrl_r.top)
        distance2 = abs(text_r.right - ctrl_r.left) + abs(text_r.top - ctrl_r.top)

        distance = min(distance, distance2)

        # if this distance was closer then the last one
        if distance < closest:
            closest = distance
            best_name = text_ctrl.WindowText() + ctrl.FriendlyClassName()

    names.append(best_name)

    return names


#====================================================================
def get_control_names(control, allcontrols):
    "Returns a list of names for this control"
    names = []

    # if it has a reference control - then use that
    #if hasattr(control, 'ref') and control.ref:
    #    control = control.ref

    # Add the control based on it's friendly class name
    names.append(control.FriendlyClassName())

    # if it has some character text then add it base on that
    # and based on that with friendly class name appended
    cleaned = control.WindowText()
    # Todo - I don't like the hardcoded classnames here!
    if cleaned and control.has_title:
        names.append(cleaned)
        names.append(cleaned + control.FriendlyClassName())

    # it didn't have visible text
    else:
        # so find the text of the nearest text visible control
        non_text_names = GetNonTextControlName(control, allcontrols)
        
        # and if one was found - add it
        if non_text_names:
            names.extend(non_text_names)

    # return the names - and make sure there are no duplicates
    return set(names)


#====================================================================
class UniqueDict(dict):
    "A dictionary subclass that handles making it's keys unique"
    def __setitem__(self, text, item):
        "Set an item of the dictionary"

        # this text is already in the map
        # so we need to make it unique
        if text in self:
            # find next unique text after text1
            unique_text = text
            counter = 2
            while unique_text in self:
                unique_text = text + str(counter)
                counter += 1

            # now we also need to make sure the original item
            # is under text0 and text1 also!
            if text + '0' not in self:
                dict.__setitem__(self, text+'0', self[text])
                dict.__setitem__(self, text+'1', self[text])

            # now that we don't need original 'text' anymore
            # replace it with the uniq text
            text = unique_text

        # add our current item
        dict.__setitem__(self, text, item)


    def FindBestMatches(
        self,
        search_text,
        clean = False,
        ignore_case = False):

        """Return the best matches for search_text in the items

        * **search_text** the text to look for
        * **clean** whether to clean non text characters out of the strings
        * **ignore_case** compare strings case insensitively
        """

        # now time to figure out the matching
        ratio_calc = difflib.SequenceMatcher()

        if ignore_case:
            search_text = search_text.lower()

        ratio_calc.set_seq1(search_text)

        ratios = {}
        best_ratio = 0
        best_texts = []

        ratio_offset = 1
        if clean:
            ratio_offset *= .9

        if ignore_case:
            ratio_offset *= .9

        for text_ in self:

            # make a copy of the text as we need the original later
            text = text_

            if clean:
                text = _clean_non_chars(text)

            if ignore_case:
                text = text.lower()

            # check if this item is in the cache - if yes, then retrieve it
            if (text, search_text) in _cache:
                ratios[text_] = _cache[(text, search_text)]

            elif(search_text, text) in _cache:
                ratios[text_] = _cache[(search_text, text)]

            # not in the cache - calculate it and add it to the cache
            else:
                # set up the SequenceMatcher with other text
                ratio_calc.set_seq2(text)

                # if a very quick check reveals that this is not going
                # to match then
                ratio = ratio_calc.real_quick_ratio() * ratio_offset

                if ratio  >=  find_best_control_match_cutoff:
                    ratio = ratio_calc.quick_ratio() * ratio_offset

                    if ratio >= find_best_control_match_cutoff:
                        ratio = ratio_calc.ratio() * ratio_offset

                # save the match we got and store it in the cache
                ratios[text_] = ratio
                _cache[(text, search_text)] = ratio

            # try using the levenshtein distance instead
            #lev_dist = levenshtein_distance(unicode(search_text), unicode(text))
            #ratio = 1 - lev_dist / 10.0
            #ratios[text_] = ratio
            #print "%5s" %("%0.2f"% ratio), search_text, `text`

            # if this is the best so far then update best stats
            if ratios[text_] > best_ratio and \
                ratios[text_] >= find_best_control_match_cutoff:

                best_ratio = ratios[text_]
                best_texts = [text_]

            elif ratios[text_] == best_ratio:
                best_texts.append(text_)

        #best_ratio *= ratio_offset

        return best_ratio, best_texts


#====================================================================
def build_unique_dict(controls):
    """Build the disambiguated list of controls

    Separated out to a different function so that we can get
    the control identifiers for printing.
    """
    name_control_map = UniqueDict()


    # collect all the possible names for all controls
    # and build a list of them
    for ctrl in controls:
        ctrl_names = get_control_names(ctrl, controls)

        # for each of the names
        for name in ctrl_names:
            name_control_map[name] = ctrl
    return name_control_map


#====================================================================
def find_best_control_matches(search_text, controls):
    """Returns the control that is the the best match to search_text

    This is slightly differnt from find_best_match in that it builds
    up the list of text items to search through using information
    from each control. So for example for there is an OK, Button
    then the following are all added to the search list:
    "OK", "Button", "OKButton"

    But if there is a ListView (which do not have visible 'text')
    then it will just add "ListView".
    """

    name_control_map = build_unique_dict(controls)


#    # collect all the possible names for all controls
#    # and build a list of them
#    for ctrl in controls:
#        ctrl_names = get_control_names(ctrl, controls)
#
#        # for each of the names
#        for name in ctrl_names:
#            name_control_map[name] = ctrl

    search_text = unicode(search_text)

    best_ratio, best_texts = name_control_map.FindBestMatches(search_text)

    best_ratio_ci, best_texts_ci = \
        name_control_map.FindBestMatches(search_text, ignore_case = True)

    best_ratio_clean, best_texts_clean = \
        name_control_map.FindBestMatches(search_text, clean = True)

    best_ratio_clean_ci, best_texts_clean_ci = \
        name_control_map.FindBestMatches(
            search_text, clean = True, ignore_case = True)


    if best_ratio_ci > best_ratio:
        best_ratio = best_ratio_ci
        best_texts = best_texts_ci

    if best_ratio_clean > best_ratio:
        best_ratio = best_ratio_clean
        best_texts = best_texts_clean

    if best_ratio_clean_ci > best_ratio:
        best_ratio = best_ratio_clean_ci
        best_texts = best_texts_clean_ci

    if best_ratio < find_best_control_match_cutoff:
        raise MatchError(items = name_control_map.keys(), tofind = search_text)

    return [name_control_map[best_text] for best_text in best_texts]






#
#def GetControlMatchRatio(text, ctrl):
#    # get the texts for the control
#    ctrl_names = get_control_names(ctrl)
#
#    #get the best match for these
#    matcher = UniqueDict()
#    for name in ctrl_names:
#        matcher[name] = ctrl
#
#    best_ratio, unused = matcher.FindBestMatches(text)
#
#    return best_ratio
#
#
#
#def get_controls_ratios(search_text, controls):
#    name_control_map = UniqueDict()
#
#    # collect all the possible names for all controls
#    # and build a list of them
#    for ctrl in controls:
#        ctrl_names = get_control_names(ctrl)
#
#        # for each of the names
#        for name in ctrl_names:
#            name_control_map[name] = ctrl
#
#    match_ratios, best_ratio, best_text = \
#        _get_match_ratios(name_control_map.keys(), search_text)
#
#    return match_ratios, best_ratio, best_text,
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.