followlive.py :  » Network » Python-Wikipedia-Robot-Framework » pywikipedia » Python Open Source

Home
Python Open Source
1.3.1.2 Python
2.Ajax
3.Aspect Oriented
4.Blog
5.Build
6.Business Application
7.Chart Report
8.Content Management Systems
9.Cryptographic
10.Database
11.Development
12.Editor
13.Email
14.ERP
15.Game 2D 3D
16.GIS
17.GUI
18.IDE
19.Installer
20.IRC
21.Issue Tracker
22.Language Interface
23.Log
24.Math
25.Media Sound Audio
26.Mobile
27.Network
28.Parser
29.PDF
30.Project Management
31.RSS
32.Search
33.Security
34.Template Engines
35.Test
36.UML
37.USB Serial
38.Web Frameworks
39.Web Server
40.Web Services
41.Web Unit
42.Wiki
43.Windows
44.XML
Python Open Source » Network » Python Wikipedia Robot Framework 
Python Wikipedia Robot Framework » pywikipedia » followlive.py
#!/usr/bin/python
# -*- coding: utf-8 -*-

"""
Script to follow new articles on a wikipedia and flag them
with a template or eventually blank them.

There must be A LOT of bugs ! Use with caution and verify what
it is doing !
"""

__version__='$Id: followlive.py 8162 2010-05-14 08:46:36Z xqt $'

import sys, datetime, time, traceback
import wikipedia, editarticle

__metaclass__ = type

# The question asked
question = u"""(multiple numbers delimited with ',')

b) blank page
e) edit page
d) delete page (need sysop right)

q) quit cleaningbot
Enter) OK
What is it? """

# templates that can be used followed by the message used as comment
# templates contains list of languages code
#   languages code contains list of templates to be used
#       templates contains a message and its position
templates = {
    'ar': {
        '{{}}' :{
            'msg' : '  :    ',
            'pos': 'top'},

        '{{}}' :{
            'msg' : '  :    ',
            'pos': 'top'},

        '{{}}':{
            'msg' : '  :   ',
            'pos': 'bottom'},

        '{{}}' :{
            'msg' : '  :         [[: | ]]',
            'pos' : 'top'},
    },
    'en': {
        '{{db-reason}}' :{
            'msg' : 'Robot-assisted tagging: this article should be deleted',
            'pos': 'top'},

        '{{cleanup}}' :{
            'msg' : 'Robot-assisted tagging: this article need cleanup',
            'pos': 'top'},

        '{{stub}}':{
            'msg' : 'Robot-assisted tagging: this article is a stub',
            'pos': 'bottom'},

        '{{uncategorized}}' :{
            'msg' : 'Robot-assisted tagging: This article needs to be [[Wikipedia:Categorization|categorized]]',
            'pos' : 'top'},

        '{{notability}}':{
            'msg' : 'Robot-assisted tagging: the [[Wikipedia:Notability|notability]] of this article is unclear.',
            'pos': 'top'},

        '{{not verified}}':{
            'msg' : 'Robot-assisted tagging: this article needs to be checked for factuality.',
            'pos': 'top'},

        '{{copyedit}}':{
            'msg' : 'Robot-assisted tagging: the writing of this article needs to be [[Wikipedia:How to copy-edit|copyedited]] and improved.',
            'pos' : 'top'},

        '{{unreferenced}}':{
            'msg' : 'Robot-assisted tagging: this article needs [[Wikipedia:Citing sources|references]] so it can be verified.',
            'pos': 'bottom'},

        '{{wikify}}' :{
            'msg' : 'Robot-assisted tagging: this article needs to be wikified per the [[Wikipedia:Manual of Style|Manual of Style]]',
            'pos' : 'top'},
    },
    'ia':{
        '{{Eliminar}}' :{
            'msg' : 'Assistite per robot: iste articulo debe esser eliminate',
            'pos': 'top'},

        '{{Revision}}' :{
            'msg' : 'Assistite per robot: iste articulo require revision',
            'pos': 'top'},

        '{{Stub}}' :{
            'msg' : 'Assistite per robot: iste articulo es in stato embryonic',
            'pos': 'bottom'},
    },
    'fr':{
        u'{{suppression}}': {
            'msg' : u' l\'aide du robot: cet article devrait tre supprim',
            'pos': 'top'},

        u'{{ vrifier}}' : {
            'msg': u' l\'aide du robot: cet article est  vrifier',
            'pos': 'top'},

        u'{{bauche}}' : {
            'msg': u' l\'aide du robot: cet article est une bauche',
            'pos': 'top'},
    },
    'he':{
        u'{{}}': {
            'msg' : u'   ',
            'pos': 'top'
        },
        u'{{}}': {
            'msg' : u'   ',
            'pos': 'top'
        },
        u'{{}}': {
            'msg' : u'   ',
            'pos': 'bottom'
        },
        u'{{ }}':{
            'msg' : u'     .',
            'pos': 'top'
        },
        u'{{}}': {
            'msg' : u'   ',
            'pos': 'top'},
    },
    'ia':{
        u'{{Eliminar}}': {
            'msg' : u'Iste articulo debe esser eliminate',
            'pos': 'top'
        },
        u'{{Revision}}': {
            'msg' : u'Iste articulo require revision',
            'pos': 'top'
        },
        u'{{Stub}}': {
            'msg' : u'Iste articulo es in stato embryonic',
            'pos': 'bottom'
        },
    },
   'nl':{
        u'{{weg}}': {
            'msg' : '{weg}',
            'pos' : 'top'
        },
        u'{{nuweg}}': {
            'msg' : '{nuweg}',
            'pos' : 'top'
        },
        u'{{wiu}}': {
            'msg' : '{wiu}',
            'pos' : 'top'
        },
        u'{{beg}}': {
            'msg' : '{beg}',
            'pos' : 'bottom'
        },
        u'{{wikify}}': {
            'msg' : '{wikify}',
            'pos' : 'top'
        },
        u'{{wb}}': {
            'msg' : '{wb}',
            'pos' : 'top'
        },
    },
    'pl':{
        u'{{ek}}': {
            'msg' : u'[[Kategoria:Ekspresowe kasowanko|ek]]',
            'pos':'top'
        },
        u'{{dopracowa}}' : {
            'msg' : u'Dopracowa',
            'pos':'top'
        },
        u'{{linki}}'      : {
            'msg' : u'Linki wewntrzne do dodania',
            'pos':'top'
        },
        u'{{rda}}'     : {
            'msg' : u'W artykule brakuje rde',
            'pos':'top'
        },
        u'{{stub}}'       : {
            'msg' : u'stub (zalek)',
            'pos':'bottom'
        },
    },
    'pt': {
        u'{{wikificar}}' : {
            'msg': 'Assistida por bot: {{wikificar}}',
            'pos':'top'},

        u'{{reciclar}}'  : {
            'msg': 'Assistida por bot: {{reciclar}}',
            'pos':'top'},

        u'{{lixo|~~~~}}' : {
            'msg': 'Assistida por bot: {{lixo}}',
            'pos':'top'},

        u'{{reviso}}' : {
            'msg': 'Assistida por bot: {{reviso}}',
            'pos':'top'},

        u'{{imprprio}}' : {
            'msg': 'Assistida por bot: {{imprprio}}',
            'pos':'top'},

        u'{{apagar vaidade}}' : {
            'msg': 'Assistida por bot: {{apagar vaidade}}',
            'pos':'top'},
    },
    'sv': {
        u'{{radera}}' :{
            'msg' : u'Robotkoll: Artikeln br raderas',
            'pos': 'top'},

        u'{{stda}}' :{
            'msg' : u'Robotkoll: Artikeln br stdas',
            'pos': 'top'},

        u'{{stub}}':{
            'msg' : u'Robotkoll: Artikeln r en stubbe',
            'pos': 'bottom'},

        u'{{subst:relevanskontroll}}':{
            'msg' : u'Robotkoll: Artikeln br kollas mot [[WP:REL|Wikipedias relevanskriterier]].',
            'pos': 'top'},

        u'{{verifieras}}':{
            'msg' : u'Robotkoll: Artikeln br verifieras',
            'pos': 'top'},

        u'{{sprkvrd}}':{
            'msg' : u'Robotkoll: Artikeln br sprkvrdas',
            'pos' : 'top'},

        u'{{Kllor}}':{
            'msg' : u'Robotkoll: Artikeln behver kllor',
            'pos': 'bottom'},

        u'{{wikify}}' :{
            'msg' : u'Robotkoll: Artikeln behver wikifieras',
            'pos' : 'top'},
    },
    'zh': {
        u'{{Delete}}' :{
            'msg' : u': ',
            'pos': 'top'},

        u'{{subst:Cleanup/auto}}' :{
            'msg' : u': ',
            'pos': 'top'},

        u'{{subst:Uncategorized/auto}}' :{
            'msg' : u':  ',
            'pos' : u'bottom'},

        u'{{subst:Notability/auto}}':{
            'msg' : u':  ',
            'pos': 'top'},

        u'{{subst:refimprove/auto}}':{
            'msg' : u':  ',
            'pos': 'top'},

        u'{{copyedit}}':{
            'msg' : u':  ',
            'pos' : 'top'},

        u'{{subst:Unreferenced/auto}}':{
            'msg' : u':  ',
            'pos': 'top'},

        u'{{subst:wikify/auto}}' :{
            'msg' : u':  ',
            'pos' : 'top'},

        u'{{subst:Notchinese/auto}}':{
            'msg' : u': ',
            'pos' : 'top'},

        u'{{subst:Substub/auto}}' :{
            'msg' : u':  ',
            'pos' : 'top'},

        u'{{stub}}':{
            'msg' : u': ',
            'pos': 'bottom'},
        u'{{notchinesetitle}}':{
            'msg' : u': ',
            'pos': 'top'},
        u'{{subst:Translating/auto}}':{
            'msg' : u': ',
            'pos': 'top'},
        u'{{fansite}}':{
            'msg' : u': ',
            'pos': 'top'},

    },
}

# Message used when blanking an article
blanking = {
    'ar': u'    "%s"',
    'en': u'blanked, content was "%s"',
    'fr': u'blanchit, le contenu tait "%s"',
    'he': u',   "%s"',
    'pl': u'wyczyszczony - zawartoci byo "%s"',
    'pt': u'em branco - contedo  "%s"',
    'sv': u'Robot tar bort innehll p grund av "%s"',
    'zh': u' "%s"',
}

# do nothing if this is in it
done = {
    'en':('{{VfD}}', '{{AfD}}', '{{AfD1}}', '{{cleanup}}', '{{nonsense}}', '{{deletedpage}}',
          '{{db-reason}}', '{{notability}}', '{{not verified}}', '{{unreferenced}}', '{{db-empty}}',
          '{{db-nocontext}}', '{{db-foreign}}', '{{db-notenglish}}', '{{db-nocontent}}', '{{db-blankcsd}}',
          '{{db-transwiki}}', '{{db-attack}}', '{{db-band}}', '{{db-club}}', '{{db-bio}}', '{{db-bio-notenglish}}',
          '{{db-inc}}', '{{db-bio-photo}}', '{{db-catempty}}', '{{db-c2}}', '{{db-catfd}}', '{{badname}}', '{{db-pagemove}}',
          '{{db-nonsense}}', '{{db-spam}}', '{{db-copyvio}}', '{{db-test}}', '{{db-vandalism}}', '{{db-repost}}', '{{db-banned}}',
          '{{db-histmerge}}', '{{db-move}}', '{{db-g6}}', '{{db-afd}}', '{{db-disambig}}', '{{db-authora}}', '{{db-author}}',
          '{{db-blanked}}', '{{csd:g7}}', '{{db-talk}}', '{{db-botnomain}}', '{{db-redundantimage}}', '{{db-noimage}}', '{{db-noncom}}',
          '{{db-ccnoncom}}', '{{db-unksource}}', '{{db-norat}}', '{{db-badfairuse}}', '{{duplicate}}', '{{db-meta}}',
          '{{db-emptyportal}}', '{{db-redirnone}}', '{{db-rediruser}}', '{{db-redirtypo}}', '{{csd-c3}}', '{{cc-by-nc-sa}}',
          '{{cc-nd-nc}}', '{{cc-nc}}', '{{cc-by-nc-2.0}}', '{{cc-by-nc-sa-2.0}}', '{{cc-by-nd-nc-2.0}}', '{{cc-by-2.0-nc-nd}}',
          '{{cc-by-nc-nd-2.0}}', '{{db-contact}}', '{{db-i2}}', '{{db-i1}}', '{{communityuseonly}}', '{{db-disparage}}', '{{db-web}}',
          '{{db-userreq}}', '{{db-nouser}}', '{{db-u3}}', '{{db-unfree}}'),
    'fr':(u'{{suppression}}', u'{{ vrifier}}', u'{{bauche}}'),
    'ia':(u'{{Eliminar}}', u'{{Revision}}', u'{{Stub}}'),
    'he':(u'{{}}', u'{{}}', u'{{ }}'),
    'nl':('{{nuweg}}', '{{weg}}', '{{wb}}', '{{wiu}}', '{{nocat}}'),
    'pl':('{{ek}}', u'{{dopracowa}}', '{{linki}}', u'{{rda}}', u'{{stub}}'),
    'pt':('{{wikificar}}', '{{reciclar}}', '{{lixo}}', u'{{reviso}}', u'{{imprprio}}', u'{{apagar vaidade}}'),
    'sv':(u'{{radera', u'{{Radera', u'{{stda}}', u'{{stub}}', u'{{verifieras}}', u'{{sprkvrd}}', u'{{Kllor', u'{{kllor', u'{{wikify}}', u'{{Ickewiki}}', u'{{ickewiki}}', u'{{Wikify}}'),
    'zh':(u'{{VfD}}',u'{{AfD}}',u'{{unreferenced}}',u'{{db-reason}}',u'{{cleanup}}',u'{{stub}}',u'{{uncategorized}}',u'{{notability}}',u'{{copyedit}}',u'{{unreferenced}}',u'{{wikify}}',u'{{Translating}}',u'{{copyvio}}',u'{{Notchinese}}'),
    }

# TODO: merge 'done' with 'templates' above

class PageHandler:
    # Initialization stuff
    def __init__(self, page, date, length, loggedIn, user, comment):
        self.page = page
        self.date = date
        self.length = length
        self.loggedIn = loggedIn
        self.user = user
        self.comment = comment

    # Display informations about an article
    def showpageinfo(self):
        wikipedia.output(u'[[%s]] %s ' % (self.page.title(), self.date))
        print 'Length: %i bytes' % self.length
        wikipedia.output(u'User  : %s' % self.user)

    def couldbebad(self):
        return self.length < 250 or not self.loggedIn

    def handlebadpage(self):
        try:
            self.content = self.page.get()
        except wikipedia.IsRedirectPage:
            wikipedia.output(u'Already redirected, skipping.')
            return
        except wikipedia.NoPage:
            wikipedia.output(u'Already deleted')
            return

        for d in wikipedia.translate(wikipedia.getSite(), done):
            if d in self.content:
                wikipedia.output(u'Found: "%s" in content, nothing necessary'%d)
                return
        print "---- Start content ----------------"
        wikipedia.output(u""+self.content)
        print "---- End of content ---------------"

        # Loop other user answer
        answered = False
        while not answered:
            answer = wikipedia.input(question)

            if answer == 'q':
                sys.exit("Exiting")
            if answer == 'd':
                wikipedia.output(u'Trying to delete page [[%s]].' % self.page.title())
                self.page.delete()
                return
            if answer == 'e':
                oldText = self.page.get()
                text = oldText
                editor = editarticle.TextEditor()
                text = editor.edit(self.page.get())
                if oldText != text:
                    wikipedia.showDiff(oldText, text)
                    msg = wikipedia.input(u'Summary message:')
                    self.page.put(text, msg)
                return
            if answer == 'b':
                wikipedia.output(u'Blanking page [[%s]].' % self.page.title())
                try:
                    self.page.put('', comment = wikipedia.translate(wikipedia.getSite(), blanking) % self.content )
                except EditConflict:
                    print "An edit conflict occured ! Automatically retrying"
                    handlebadpage(self)
                return
            if answer == '':
                print 'Page correct ! Proceeding with next pages.'
                return
            # Check user input:
            if answer[0] == 'u':
                # Answer entered as an utf8 string
                try:
                    choices=answer[1:].split(',')
                except ValueError:
                    # User entered wrong value
                    wikipedia.output(u'ERROR: "%s" is not valid' % answer)
                    continue
            else:
                try:
                    choices=answer.split(',')
                except ValueError:
                    # User entered wrong value
                    wikipedia.output(u'ERROR: "%s" is not valid' % answer)
                    continue
            #test input
            for choice in choices:
                try:
                    x=int(choice)
                except ValueError:
                    break
                else:
                    answered=x in range(1,len(questionlist)+1)
            if not answered:
                wikipedia.output(u'ERROR: "%s" is not valid' % answer)
                continue
        summary = u''
        for choice in choices:
            answer = int(choice)
            # grab the template parameters
            tpl = wikipedia.translate(wikipedia.getSite(), templates)[questionlist[answer]]
            if tpl['pos'] == 'top':
                wikipedia.output(u'prepending %s...' % questionlist[answer])
                self.content = questionlist[answer] + '\n' + self.content
            elif tpl['pos'] == 'bottom':
                wikipedia.output(u'appending %s...' % questionlist[answer])
                self.content += '\n' + questionlist[answer]
            else:
                wikipedia.output(u'ERROR: "pos" should be "top" or "bottom" for template %s. Contact a developer.' % questionlist[answer])
                sys.exit("Exiting")
            summary += tpl['msg']+' '
            wikipedia.output(u'Probably added %s' % questionlist[answer])
#        wikipedia.output(newcontent) bug #2986247
        self.page.put(self.content, comment = summary)
        wikipedia.output(u'with comment %s\n' % summary)

    def run(self):
        self.showpageinfo()
        if self.couldbebad():
            print 'Integrity of page doubtful...'
            try:
                self.handlebadpage()
            except wikipedia.NoPage:
                print 'seems already gone'
        print '----- Current time:', datetime.datetime.now()


class CleaningBot:
    def __init__(self, site=None):
        if site is None:
            site = wikipedia.getSite()
        self.site = site

    def run(self):
        for (page, date, length, loggedIn, username, comment) in wikipedia.getSite().newpages(100, repeat = True):
            handler = PageHandler(page, date, length, loggedIn, username, comment)
            handler.run()

# Generate the question text
i = 0
questions = '\n'
questionlist = {}
for t in wikipedia.translate(wikipedia.getSite(), templates):
    i+=1
    questions += ( u'%s) %s\n' % (i,t) )
    questionlist[i] = t
question = questions + question

# MAIN
if __name__ == "__main__":
    try:
        for arg in wikipedia.handleArgs():
            wikipedia.output(u'Warning: argument "%s" not understood; ignoring.' % arg)
        bot = CleaningBot()
        bot.run()
    except:
        wikipedia.stopme()
        raise
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.