commonscat.py :  » Network » Python-Wikipedia-Robot-Framework » pywikipedia » Python Open Source

Home
Python Open Source
1.3.1.2 Python
2.Ajax
3.Aspect Oriented
4.Blog
5.Build
6.Business Application
7.Chart Report
8.Content Management Systems
9.Cryptographic
10.Database
11.Development
12.Editor
13.Email
14.ERP
15.Game 2D 3D
16.GIS
17.GUI
18.IDE
19.Installer
20.IRC
21.Issue Tracker
22.Language Interface
23.Log
24.Math
25.Media Sound Audio
26.Mobile
27.Network
28.Parser
29.PDF
30.Project Management
31.RSS
32.Search
33.Security
34.Template Engines
35.Test
36.UML
37.USB Serial
38.Web Frameworks
39.Web Server
40.Web Services
41.Web Unit
42.Wiki
43.Windows
44.XML
Python Open Source » Network » Python Wikipedia Robot Framework 
Python Wikipedia Robot Framework » pywikipedia » commonscat.py
#!/usr/bin/python
# -*- coding: utf-8  -*-
"""
With this tool you can add the template {{commonscat}} to categories.
The tool works by following the interwiki links. If the template is present on
another langauge page, the bot will use it.

You could probably use it at articles as well, but this isnt tested.

This bot uses pagegenerators to get a list of pages. The following options are
supported:

&params;

-always           Don't prompt you for each replacement. Warning message
                  has not to be confirmed. ATTENTION: Use this with care!

-summary:XYZ      Set the action summary message for the edit to XYZ.
                  There is no predefined message text yet and this option is
                  recommended.

-checkcurrent     Work on all category pages that use the primary commonscat
                  template.

For example to go through all categories:
commonscat.py -start:Category:!
"""
# TODO:
"""
Commonscat bot:

Take a page. Follow the interwiki's and look for the commonscat template
*Found zero templates. Done.
*Found one template. Add this template
*Found more templates. Ask the user <- still have to implement this

TODO:
*Update interwiki's at commons
*Collect all possibilities also if local wiki already has link.
*Better support for other templates (translations) / redundant templates.
*Check mode, only check pages which already have the template
*More efficient like interwiki.py
*Possibility to update other languages in the same run

"""

#
# (C) Multichill, 2008-2009
# (C) Pywikipedia bot team, 2008-2010
#
# Distributed under the terms of the MIT license.
#
__version__ = '$Id: commonscat.py 7933 2010-02-15 13:46:26Z xqt $'

import wikipedia, config, pagegenerators, add_text, re

docuReplacements = {
    '&params;': pagegenerators.parameterHelp
}

# Primary template, list of alternatives
# No entry needed if it is like _default
commonscatTemplates = {
    '_default': (u'Commonscat', []),
    'af' : (u'CommonsKategorie', [u'commonscat']),
    'ar' : (u' ', [u'Commonscat', u' ', u'Commons cat', u'CommonsCat']),
    'az' : (u'CommonsKat', []),
    'bn' : (u'', [u'Commonscat']),
    'crh' : (u'CommonsKat', [u'Commonscat']),
    'cs' : (u'Commonscat', [u'Commons cat']),
    'da' : (u'Commonscat', [u'Commons cat', u'Commonskat', u'Commonscat2']),
    'de' : (u'Commonscat', [u'CommonsCat',]),
    'en' : (u'Commons category', [u'Commoncat', u'Commons2', u'Cms-catlist-up', u'Catlst commons', u'Commonscategory', u'Commonscat', u'Commons cat']),
    'es' : (u'Commonscat', [u'Ccat', u'Commons cat', u'Categora Commons', u'Commonscat-inline']),
    'eu' : (u'Commonskat', [u'Commonscat']),
    'fa' : (u'-', [u'Commonscat', u'Commons cat', u' ', u'Commons category']),
    'fr' : (u'Commonscat', [u'CommonsCat', u'Commons cat', u'Commons category']),
    'frp' : (u'Commonscat', [u'CommonsCat']), 
    'ga' : (u'Catcmhaoin', [u'Commonscat']),
    'hi' : (u'Commonscat', [u'Commons2', u'Commons cat', u'Commons category']),
    'hu' : (u'Kzvagyonkat', []),
    'hy' : (u'Commons cat', [u'Commonscat']),
    'id' : (u'Commonscat', [u'Commons cat', u'Commons2', u'CommonsCat', u'Commons category']),
    'ja' : (u'Commonscat', [u'Commons cat', u'Commons category']),
    'jv' : (u'Commonscat', [u'Commons cat']),
    'kaa' : (u'Commons cat', [u'Commonscat']),
    'kk' : (u'Commonscat', [u'Commons2']),
    'ko' : (u'Commonscat', [u'Commons cat', u'']),
    'la' : (u'CommuniaCat', []),
    'mk' : (u'-', [u'Commonscat', u'Commons cat', u'CommonsCat', u'Commons2', u'Commons category']),
    'ml' : (u'Commonscat', [u'Commons cat', u'Commons2']),
    'nn' : (u'Commonscat', [u'Commons cat']),
    'os' : (u'Commonscat', [u'Commons cat']),
    'pt' : (u'Commonscat', [u'Commons cat']),
    'ro' : (u'Commonscat', [u'Commons cat']),
    'ru' : (u'Commonscat', [u'-']),
    'sl' : (u'Kategorija v Zbirki', [u'Commonscat', u'Kategorija v zbirki', u'Commons cat', u'Katzbirke']),
    'sv' : (u'Commonscat', [u'Commonscat-rad', u'Commonskat', u'Commons cat']),
    'sw' : (u'Commonscat', [u'Commons2', u'Commons cat']),
    'te' : (u'Commonscat', [u'Commons cat']),
    'tr' : (u'CommonsKat', [u'Commonscat', u'Commons cat']),
    'uk' : (u'Commonscat', [u'Commons cat', u'Category', u'Commonscat-inline']),
    'vi' : (u'Commonscat', [u'Commons2', u'Commons cat', u'Commons category', u'Commons+cat']),
    'zh' : (u'Commonscat', [u'Commons cat']),
    'zh-classical' : (u'', [u'Commonscat']),
    'zh-yue' : (u'', [u'Commonscat', u' ', u'Commons cat']),
}

ignoreTemplates = {
    'af' : [u'commons'],
    'ar' : [u' ', u'', u'', u'Commons'],
    'cs' : [u'Commons', u'Sestiky', u'Sisterlinks'],
    'da' : [u'Commons', u'Commons left', u'Commons2', u'Commonsbilleder', u'Commonscat left', u'Commonscat2', u'GalleriCommons', u'Ssterlinks'],
    'de' : [u'Commons'],
    'en' : [u'Category redirect', u'Commons', u'Commonscat1A', u'Commoncats', u'Commonscat4Ra', u'Sisterlinks', u'Sisterlinkswp', u'Tracking category', u'Template category', u'Wikipedia category'],
    'eo' : [u'Commons', (u'Projekto/box', 'commons='), (u'Projekto', 'commons='), (u'Projektoj', 'commons='), (u'Projektoj', 'commonscat=')],
    'es' : [u'Commons', u'IprCommonscat'],
    'eu' : [u'Commons'],
    'fa' : [u'Commons', u''],
    'fi' : [u'Commonscat-rivi', u'Commons-rivi', u'Commons'],
    'fr' : [u'Commons', u'Commons-inline', (u'Autres projets', 'commons=')],
    'fy' : [u'Commons', u'CommonsLyts'],
    'hr' : [u'Commons', (u'WProjekti', 'commonscat=')],
    'it' : [(u'Ip', 'commons='), (u'Interprogetto', 'commons=')],
    'ja' : [u'CommonscatS', u'SisterlinksN', u'Interwikicat'],
    'nds-nl' : [u'Commons'],
    'nl' : [u'Commons', u'Commonsklein', u'Commonscatklein', u'Catbeg', u'Catsjab', u'Catwiki'],
    'om' : [u'Commons'],
    'pt' : [u'Correlatos'],
    'ru' : [u''],
}

def getCommonscatTemplate (lang = None):
    '''
    Get the template name in a language. Expects the language code.
    Return as tuple containing the primary template and it's alternatives
    '''
    if lang in commonscatTemplates:
        return  commonscatTemplates[lang]
    else:
        return commonscatTemplates[u'_default']

def skipPage(page):
    '''
    Do we want to skip this page?
    '''
    if page.site().language() in ignoreTemplates:
        templatesInThePage = page.templates()
        templatesWithParams = page.templatesWithParams()
        for template in ignoreTemplates[page.site().language()]:
            if type(template) != tuple:
                if template in templatesInThePage:
                    return True
            else:
                for (inPageTemplate, param) in templatesWithParams:
                    if inPageTemplate == template[0] and template[1] in param[0]:
                        return True
    return False

def updateInterwiki (wikipediaPage = None, commonsPage = None):
    '''
    Update the interwiki's at commons from a wikipedia page. The bot just replaces the interwiki links at the commons page with the interwiki's from thewikipediapage. import 
    This should probably be more intelligent. We could use add all the interwiki's and remove duplicates. Or only remove language links if multiple language links to the same language exist.

    This function is disabled for the moment untill i figure out what the best way is to update the interwiki's.
    '''
    interwikis = {}
    comment= u''
    interwikilist = wikipediaPage.interwiki()
    interwikilist.append(wikipediaPage)

    for interwikiPage in interwikilist:
        interwikis[interwikiPage.site()]=interwikiPage
    oldtext = commonsPage.get()
    # The commonssite object doesnt work with interwiki's
    newtext = wikipedia.replaceLanguageLinks(oldtext, interwikis, wikipedia.getSite(u'nl'))
    comment = u'Updating interwiki\'s from [[' + wikipediaPage.site().language()  + u':' + wikipediaPage.title() + u']]'

    if newtext != oldtext:
        #This doesnt seem to work. Newtext has some trailing whitespace
        wikipedia.showDiff(oldtext, newtext)
        commonsPage.put(newtext=newtext, comment=comment)

def addCommonscat (page = None, summary = None, always = False):
    '''
    Take a page. Go to all the interwiki page looking for a commonscat template.
    When all the interwiki's links are checked and a proper category is found add it to the page.
    '''
    wikipedia.output(u'Working on ' + page.title());
    #Get the right templates for this page
    primaryCommonscat, commonscatAlternatives=getCommonscatTemplate(page.site().language())
    commonscatLink = getCommonscatLink (page)
    if commonscatLink:
        wikipedia.output(u'Commonscat template is already on ' + page.title());
        (currentCommonscatTemplate, currentCommonscatTarget) = commonscatLink
        checkedCommonscatTarget = checkCommonscatLink(currentCommonscatTarget)
        if (currentCommonscatTarget==checkedCommonscatTarget):
            #The current commonscat link is good
            wikipedia.output(u'Commonscat link at ' + page.title() + u' to Category:' + currentCommonscatTarget + u' is ok');
            return (True, always)
        elif checkedCommonscatTarget!=u'':
            #We have a new Commonscat link, replace the old one
            changeCommonscat (page, currentCommonscatTemplate, currentCommonscatTarget, primaryCommonscat, checkedCommonscatTarget)
            return (True, always)
        else:
            #Commonscat link is wrong
            commonscatLink = findCommonscatLink(page)
            if (commonscatLink!=u''):
                changeCommonscat (page, currentCommonscatTemplate, currentCommonscatTarget, primaryCommonscat, commonscatLink)
            #else
            #Should i remove the commonscat link?

    elif skipPage(page):
        wikipedia.output("Found a template in the skip list. Skipping " + page.title());
    else:
        commonscatLink = findCommonscatLink(page)
        if (commonscatLink!=u''):
            textToAdd = u'{{' + primaryCommonscat + u'|' + commonscatLink + u'}}'
            (success, status, always) = add_text.add_text(page, textToAdd, summary, None, None, always);
            return (True, always);

    return (True, always);

def changeCommonscat (page = None, oldtemplate = u'', oldcat = u'', newtemplate = u'', newcat = u''):
    '''
    Change the current commonscat template and target. 
    '''
    newtext = re.sub(u'(?i)\{\{' + oldtemplate + u'\|?[^}]*\}\}',  u'{{' + newtemplate + u'|' + newcat + u'}}', page.get())
    comment = u'Changing commonscat link from [[:Commons:Category:' + oldcat + u'|' + oldcat + u']] to [[:Commons:Category:' + newcat + u'|' + newcat + u']]'
    wikipedia.showDiff(page.get(), newtext)
    page.put(newtext, comment)

def findCommonscatLink (page=None):
    for ipage in page.interwiki():
        try:
            if(ipage.exists() and not ipage.isRedirectPage() and not ipage.isDisambig()):
                commonscatLink = getCommonscatLink (ipage)
                if commonscatLink:
                    (currentCommonscatTemplate, possibleCommonscat) = commonscatLink
                    checkedCommonscat = checkCommonscatLink(possibleCommonscat)
                    if (checkedCommonscat!= u''):
                        wikipedia.output("Found link for " + page.title() + " at [[" + ipage.site().language() + ":" + ipage.title() + "]] to " + checkedCommonscat + ".")
                        return checkedCommonscat
        except wikipedia.BadTitle:
            #The interwiki was incorrect
            return u''
    return u''


def getCommonscatLink (wikipediaPage=None):
    '''
    Go through the page and return a tuple of (<templatename>, <target>)
    '''
    primaryCommonscat, commonscatAlternatives=getCommonscatTemplate(wikipediaPage.site().language())
    commonscatTemplate =u''
    commonscatTarget = u''
    #See if commonscat is present

    for template in wikipediaPage.templatesWithParams():
        if template[0]==primaryCommonscat or template[0] in commonscatAlternatives:
            commonscatTemplate = template[0]
            if (len(template[1]) > 0):
                commonscatTarget = template[1][0]
            else:
                commonscatTarget = wikipediaPage.titleWithoutNamespace()
            return (commonscatTemplate, commonscatTarget)

    return None

def checkCommonscatLink (name = ""):
    '''
    This function will retun the name of a valid commons category
    If the page is a redirect this function tries to follow it.
    If the page doesnt exists the function will return an empty string
    '''
    if wikipedia.verbose:
        wikipedia.output("getCommonscat: " + name );
    try:
        #This can throw a wikipedia.BadTitle
        commonsPage = wikipedia.Page(wikipedia.getSite("commons", "commons"), "Category:" + name);

        if not commonsPage.exists():
            if wikipedia.verbose:
                wikipedia.output("getCommonscat: The category doesnt exist.");
            return u''
        elif commonsPage.isRedirectPage():
            if wikipedia.verbose:
                wikipedia.output("getCommonscat: The category is a redirect");
            return checkCommonscatLink(commonsPage.getRedirectTarget().titleWithoutNamespace());
        elif "Category redirect" in commonsPage.templates():
            if wikipedia.verbose:
                wikipedia.output("getCommonscat: The category is a category redirect");
            for template in commonsPage.templatesWithParams():
                if ((template[0]=="Category redirect") and (len(template[1]) > 0)):
                    return checkCommonscatLink(template[1][0])
        elif commonsPage.isDisambig():
            if wikipedia.verbose:
                wikipedia.output("getCommonscat: The category is disambiguation");
            return u''
        else:
            return commonsPage.titleWithoutNamespace()
    except wikipedia.BadTitle:
        #Funky title so not correct
        return u''        

def main():
    '''
    Parse the command line arguments and get a pagegenerator to work on.
    Iterate through all the pages.
    '''
    summary = None; generator = None; checkcurrent = False; always = False
    ns = []
    ns.append(14)
    # Load a lot of default generators
    genFactory = pagegenerators.GeneratorFactory()

    for arg in wikipedia.handleArgs():
        if arg.startswith('-summary'):
            if len(arg) == 8:
                summary = wikipedia.input(u'What summary do you want to use?')
            else:
                summary = arg[9:]
        elif arg.startswith('-checkcurrent'):
            checkcurrent = True
            primaryCommonscat, commonscatAlternatives = getCommonscatTemplate(wikipedia.getSite().language())
            generator = pagegenerators.NamespaceFilterPageGenerator(pagegenerators.ReferringPageGenerator(wikipedia.Page(wikipedia.getSite(), u'Template:' + primaryCommonscat), onlyTemplateInclusion=True), ns)

        elif arg == '-always':
            always = True
        else:
            genFactory.handleArg(arg)

    if not generator:
        generator = genFactory.getCombinedGenerator()
    if not generator:
        raise add_text.NoEnoughData('You have to specify the generator you want to use for the script!')

    pregenerator = pagegenerators.PreloadingGenerator(generator)

    for page in pregenerator:
        if not page.exists():
           wikipedia.output(u'Page %s does not exist. Skipping.' % page.aslink())
        elif page.isRedirectPage():
           wikipedia.output(u'Page %s is a redirect. Skipping.' % page.aslink())
        elif page.isCategoryRedirect():
           wikipedia.output(u'Page %s is a category redirect. Skipping.' % page.aslink())
        elif page.isDisambig():
           wikipedia.output(u'Page %s is a disambiguation. Skipping.' % page.aslink())
        else:
            (status, always) = addCommonscat(page, summary, always)

if __name__ == "__main__":
    try:
        main()
    finally:
        wikipedia.stopme()
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.