misspelling.py :  » Network » Python-Wikipedia-Robot-Framework » pywikipedia » Python Open Source

Home
Python Open Source
1.3.1.2 Python
2.Ajax
3.Aspect Oriented
4.Blog
5.Build
6.Business Application
7.Chart Report
8.Content Management Systems
9.Cryptographic
10.Database
11.Development
12.Editor
13.Email
14.ERP
15.Game 2D 3D
16.GIS
17.GUI
18.IDE
19.Installer
20.IRC
21.Issue Tracker
22.Language Interface
23.Log
24.Math
25.Media Sound Audio
26.Mobile
27.Network
28.Parser
29.PDF
30.Project Management
31.RSS
32.Search
33.Security
34.Template Engines
35.Test
36.UML
37.USB Serial
38.Web Frameworks
39.Web Server
40.Web Services
41.Web Unit
42.Wiki
43.Windows
44.XML
Python Open Source » Network » Python Wikipedia Robot Framework 
Python Wikipedia Robot Framework » pywikipedia » misspelling.py
# -*- coding: utf-8  -*-
"""
This script works similar to solve_disambiguation.py. It is supposed to fix
links that contain common spelling mistakes. This is only possible on wikis
that have a template for these misspellings.

Command line options:

   -always:XY  instead of asking the user what to do, always perform the same
               action. For example, XY can be "r0", "u" or "2". Be careful with
               this option, and check the changes made by the bot. Note that
               some choices for XY don't make sense and will result in a loop,
               e.g. "l" or "m".

    -start:XY  goes through all misspellings in the category on your wiki
               that is defined (to the bot) as the category containing misspelling
               pages, starting at XY. If the -start argument is not given, it starts
               at the beginning.

   -main       only check pages in the main namespace, not in the talk,
               wikipedia, user, etc. namespaces.
"""
__version__ = '$Id: misspelling.py 7336 2009-09-29 18:27:04Z alexsh $'

# (C) Daniel Herding, 2007
#
# Distributed under the terms of the MIT license.

import wikipedia, solve_disambiguation, catlib, pagegenerators

class MisspellingRobot(solve_disambiguation.DisambiguationRobot):

    misspellingTemplate = {
        'da': None,                     # uses simple redirects
        'de': u'Falschschreibung',
        #'en': u'Template:Misspelling', # rarely used on en:
        'en': None,                     # uses simple redirects
        'hu': None,                     # uses simple redirects
        'nl': None,
        #'pt': u'Pseudo-redirect',      # replaced by another system on pt:
    }

    # Optional: if there is a category, one can use the -start
    # parameter.
    misspellingCategory = {
        'da': u'Omdirigeringer af fejlstavninger', # only contains date redirects at the moment
        'de': u'Kategorie:Wikipedia:Falschschreibung',
        'en': u'Redirects from misspellings',
        'hu': u'tirnytsok hibs nvrl',
        'nl': u'Categorie:Wikipedia:Redirect voor spelfout',
        #'pt': u'Categoria:!Pseudo-redirects',
    }

    msg = {
        'ar': u':     %s',
        'da': u'Omdirigeringer af fejlstavninger',
        'de': u'Bot: korrigiere Link auf Falschschreibung: %s',
        'en': u'Robot: Fixing misspelled link to %s',
        'he': u':      %s',
        'nds': u'Bot: rut mit verkehrt schreven Lenk op %s',
        'nl': u'Bot: verkeerd gespelde verwijzing naar %s gecorrigeerd',
        'pl': u'Robot poprawia literwk w linku do %s',
        'pt': u'Bot: Corrigindo link com erro ortogrfico para %s'
    }

    def __init__(self, always, firstPageTitle, main_only):
        solve_disambiguation.DisambiguationRobot.__init__(self, always, [], True, self.createPageGenerator(firstPageTitle), False, main_only)

    def createPageGenerator(self, firstPageTitle):
        if wikipedia.getSite().lang in self.misspellingCategory:
            misspellingCategoryTitle = self.misspellingCategory[wikipedia.getSite().lang]
            misspellingCategory = catlib.Category(wikipedia.getSite(), misspellingCategoryTitle)
            generator = pagegenerators.CategorizedPageGenerator(misspellingCategory, recurse = True, start = firstPageTitle)
        else:
            misspellingTemplateName = 'Template:%s' % self.misspellingTemplate[wikipedia.getSite().lang]
            misspellingTemplate = wikipedia.Page(wikipedia.getSite(), misspellingTemplateName)
            generator = pagegenerators.ReferringPageGenerator(misspellingTemplate, onlyTemplateInclusion = True)
            if firstPageTitle:
                wikipedia.output(u'-start parameter unsupported on this wiki because there is no category for misspellings.')
        preloadingGen = pagegenerators.PreloadingGenerator(generator)
        return preloadingGen

    # Overrides the DisambiguationRobot method.
    def findAlternatives(self, disambPage):
        if disambPage.isRedirectPage():
            self.alternatives.append(disambPage.getRedirectTarget().title())
            return True
        elif self.misspellingTemplate[disambPage.site().lang] is not None:
            for templateName, params in disambPage.templatesWithParams():
                if templateName in self.misspellingTemplate[wikipedia.getSite().lang]:
                    # The correct spelling is in the last paramter.
                    correctSpelling = params[-1]
                    # On de.wikipedia, there are some cases where the
                    # misspelling is ambigous, see for example:
                    # http://de.wikipedia.org/wiki/Buthan
                    for match in self.linkR.finditer(correctSpelling):
                        self.alternatives.append(match.group('title'))

                    if not self.alternatives:
                        # There were no links in the parameter, so there is
                        # only one correct spelling.
                        self.alternatives.append(correctSpelling)
                    return True

    # Overrides the DisambiguationRobot method.
    def setSummaryMessage(self, disambPage, new_targets, unlink):
        # TODO: setSummaryMessage() in solve_disambiguation now has parameters
        # new_targets and unlink. Make use of these here.
        comment = wikipedia.translate(self.mysite, self.msg) % disambPage.title()
        wikipedia.setAction(comment)

def main():
    # the option that's always selected when the bot wonders what to do with
    # a link. If it's None, the user is prompted (default behaviour).
    always = None
    main_only = False
    firstPageTitle = None

    for arg in wikipedia.handleArgs():
        if arg.startswith('-always:'):
            always = arg[8:]
        elif arg.startswith('-start'):
            if len(arg) == 6:
                firstPageTitle = wikipedia.input(u'At which page do you want to start?')
            else:
                firstPageTitle = arg[7:]
        elif arg == '-main':
            main_only = True


    bot = MisspellingRobot(always, firstPageTitle, main_only)
    bot.run()

if __name__ == "__main__":
    try:
        main()
    finally:
        wikipedia.stopme()
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.