# -*- coding: utf-8 -*-
"""
Script to resolve double redirects, and to delete broken redirects. Requires
access to MediaWiki's maintenance pages or to a XML dump file. Delete
function requires adminship.
Syntax:
python redirect.py action [-arguments ...]
where action can be one of these:
double Fix redirects which point to other redirects
broken Delete redirects where targets don\'t exist. Requires adminship.
both Both of the above. Permitted only with -api. Implies -api.
and arguments can be:
-xml Retrieve information from a local XML dump
(http://download.wikimedia.org). Argument can also be given as
"-xml:filename.xml". Cannot be used with -api or -moves.
-api Retrieve information from the wiki via MediaWikis application
program interface (API). Cannot be used with -xml.
-moves Use the page move log to find double-redirect candidates. Only
works with action "double", does not work with -xml. You may
use -api option for retrieving pages via API
NOTE: If neither of -xml -api -moves is given, info will be
loaded from a special page of the live wiki.
-namespace:n Namespace to process. Can be given multiple times, for several
namespaces. If omitted, only the main (article) namespace is
is treated with -api, with -xml all namespaces are treated,
Works only with an XML dump, or the API interface.
-offset:n With -moves, the number of hours ago to start scanning moved
pages. With -xml, the number of the redirect to restart with
(see progress). Otherwise, ignored.
-start:title With -api, the starting page title in each namespace.
Otherwise ignored. Page needs not exist.
-until:title With -api, the possible last page title in each namespace.
Otherwise ignored. Page needs not exist.
-number:n With -api, the maximum count of redirects to work upon.
Otherwise ignored. Use 0 for unlimited
-always Don't prompt you for each replacement.
"""
#
# (C) Daniel Herding, 2004.
# Purodha Blissenbach, 2009.
#
# Distributed under the terms of the MIT license.
#
#
from __future__ import generators
import wikipedia as pywikibot
import config, query
import xmlreader
import re, sys, datetime
__version__='$Id: redirect.py 8133 2010-04-27 16:45:03Z xqt $'
# Summary message for fixing double redirects
msg_double={
'als':u'Btli: Uflsig vun de doppleti Wyterleitig zue %s',
'ar': u': %s',
'bat-smg': u'Robots: Taisuoms dvgobs paradresavms %s',
'be-x-old': u': %s',
'br': u'Kempennet adkas doubl gant robot %s',
'cs': u'Robot opravil dvojit pesmrovn %s',
'de': u'Bot: Korrigiere doppelte Weiterleitung zu %s',
'en': u'Robot: Fixing double redirect to %s',
'es': u'Robot: Arreglando doble redireccin %s',
'fa': u': %s',
'fi': u'Botti korjasi kaksinkertaisen ohjauksen %s',
'fr': u'Robot: rpare double redirection %s',
'ga': u'Rb: Ag socr athsheolta dbailte %s',
'he': u': %s',
'hr': u'Bot: Popravak dvostrukih preusmjeravanja %s',
'ia': u'Robot: reparation de duple redirection %s',
'is': u'Vlmenni: Lagfri tvfalda tilvsun %s',
'it': u'Bot: Sistemo i redirect doppi a %s',
'ja': u': %s',
'ka': u': %s',
'ko': u': %s',
'kk': u': %s',
'ksh':u'Bot: [[special:doubleredirects|Dubbel mlijdong]] fottjemaat %s',
'lb': u'Bot: Duebel Viruleedung geflckt %s',
'lt': u'robotas: Taisomas dvigubas peradresavimas %s',
'mk': u': %s',
'nds':u'Bot: Dubbelte Wiederleiden rutmakt %s',
'nl': u'Bot: dubbele doorverwijzing gecorrigeerd aan %s',
'nn': u'robot: retta dobbel omdirigering %s',
'no': u'bot: Retter dobbel omdirigering %s',
'pl': u'Robot naprawia podwjne przekierowanie %s',
'pt': u'Bot: Corrigido duplo redirecionamento %s',
'ru': u': %s',
'sr': u': %s',
'sv': u'Robot: Rttar dubbel omdirigering %s',
'szl':u'Robot sprowjo tuplowane przekerowaa %s',
'th': u': %s',
'tr': u'Bot deiiklii: Ynlendirmeye olan ynlendirme %s',
'uk': u': %s',
'war':u'Robot: Gin-ayad in nagduduha nga redirek %s',
'yi': u': %s',
'zh': u': %s',
'zh-yue': u' %s',
'zh-classical': u': %s',
}
# Reason for deleting broken redirects
reason_broken={
'ar': u': ',
'als': u'Wyterleitig wo kaputt isch',
'be-x-old': u': ',
'cs': u'Peruen pesmrovn',
'de': u'Bot: Weiterleitungsziel existiert nicht',
'en': u'[[WP:CSD#G8|G8]]: [[Wikipedia:Redirect|Redirect]] to a deleted or non-existent page',
'es': u'Robot: La pgina a la que redirige no existe',
'fa': u': ',
'fi': u'Botti: Ohjauksen kohdesivua ei ole olemassa',
'fr': u'Robot : Cible du redirect inexistante',
'ga': u'Rb : Targaid athsheoladh ar iarraidh',
'he': u': ',
'it': u'Bot: Il redirect indirizza ad una pagina inesistente',
'ja': u':',
'ka': u': ',
'ko': u': ',
'kk': u': ',
'ksh':u'Bot: D [[Special:BrokenRedirects|mlijdong jingk ennet Liiere]]',
'lt': u'robotas: Peradresavimas niekur',
'nds':u'Bot: Kaputte Wiederleiden ward nich brukt',
'nl': u'Bot: doelpagina doorverwijzing bestaat niet',
'nn': u'robot: mlet for omdirigeringa eksisterer ikkje',
'no': u'robot: mlet for omdirigeringen eksisterer ikke',
'pl': u'Robot: cel przekierowania nie istnieje',
'pt': u'Bot: Redirecionamento no existe',
'ru': u': ',
'sr': u': ',
'th': u': ',
'tr': u'Bot deiiklii: Var olmayan sayfaya olan ynlendirme',
'war':u'Robot: Waray dida an karadto-an han redirek',
'yi': u': ',
'zh': u':',
'zh-yue': u'',
}
# Reason for deleting redirect loops
reason_loop={
'ar': u': ',
'de': u'Bot: Weiterleitungsziel auf sich selbst',
'en': u'[[WP:CSD#G8|G8]]: [[Wikipedia:Redirect|Redirect]] target forms a redirect loop',
}
# Insert deletion template into page with a broken redirect
sd_template = {
'ar': u'{{| }}',
'als':u'{{delete}}Wyterleitig wo kaputt isch--~~~~',
'bar':u'{{delete}}Kaputte Weiterleitung--~~~~',
'de': u'{{sla|Defekte Weiterleitung --~~~~}}',
'cs': u'{{smazat|peruen pesmrovn}}',
'en': u'{{db-r1}}',
'ga': u'{{scrios|Athsheoladh briste}}',
'it': u'{{Cancella subito|9}}',
'ja': u'{{|}}',
'ksh':u'{{Schmie fott}}Di mlijdong jeiht noh nrjendwoh hen.<br />--~~~~~\n\n',
'nds':u'{{delete}}Kaputte Wiederleiden, wat nich brukt ward.<br />--~~~~\n\n',
'pdc':u'{{lsche|Defekte Weiterleitung --~~~~}}',
'war':u'{{delete}}Nautod o nagbinalikbalik nga redirek.--~~~~\n\n',
'zh': u'{{delete|R1}}',
}
class RedirectGenerator:
def __init__(self, xmlFilename=None, namespaces=[], offset=-1,
use_move_log=False, use_api=False, start=None, until=None,
number=None):
self.site = pywikibot.getSite()
self.xmlFilename = xmlFilename
self.namespaces = namespaces
if use_api and self.namespaces == []:
self.namespaces = [ 0 ]
self.offset = offset
self.use_move_log = use_move_log
self.use_api = use_api
self.api_start = start
self.api_until = until
self.api_number = number
if self.api_number is None:
self.api_number = 'max'
def get_redirects_from_dump(self, alsoGetPageTitles=False):
'''
Load a local XML dump file, look at all pages which have the
redirect flag set, and find out where they're pointing at. Return
a dictionary where the redirect names are the keys and the redirect
targets are the values.
'''
xmlFilename = self.xmlFilename
redict = {}
# open xml dump and read page titles out of it
dump = xmlreader.XmlDump(xmlFilename)
redirR = self.site.redirectRegex()
readPagesCount = 0
if alsoGetPageTitles:
pageTitles = set()
for entry in dump.parse():
readPagesCount += 1
# always print status message after 10000 pages
if readPagesCount % 10000 == 0:
pywikibot.output(u'%i pages read...' % readPagesCount)
if len(self.namespaces) > 0:
if pywikibot.Page(self.site, entry.title).namespace() \
not in self.namespaces:
continue
if alsoGetPageTitles:
pageTitles.add(entry.title.replace(' ', '_'))
m = redirR.match(entry.text)
if m:
target = m.group(1)
# There might be redirects to another wiki. Ignore these.
for code in self.site.family.langs.keys():
if target.startswith('%s:' % code) \
or target.startswith(':%s:' % code):
if code == self.site.language():
# link to our wiki, but with the lang prefix
target = target[(len(code)+1):]
if target.startswith(':'):
target = target[1:]
else:
pywikibot.output(
u'NOTE: Ignoring %s which is a redirect to %s:'
% (entry.title, code))
target = None
break
# if the redirect does not link to another wiki
if target:
source = entry.title.replace(' ', '_')
target = target.replace(' ', '_')
# remove leading and trailing whitespace
target = target.strip('_')
# capitalize the first letter
if not pywikibot.getSite().nocapitalize:
source = source[:1].upper() + source[1:]
target = target[:1].upper() + target[1:]
if '#' in target:
target = target[:target.index('#')].rstrip("_")
if '|' in target:
pywikibot.output(
u'HINT: %s is a redirect with a pipelink.'
% entry.title)
target = target[:target.index('|')].rstrip("_")
if target: # in case preceding steps left nothing
redict[source] = target
if alsoGetPageTitles:
return redict, pageTitles
else:
return redict
def get_redirect_pageids_via_api(self):
"""Return generator that yields page IDs of Pages that are redirects."""
params = {
'action': 'query',
'list': 'allpages',
'apfilterredir': 'redirects',
'aplimit': self.api_number,
'apdir': 'ascending',
#'':'',
}
for ns in self.namespaces:
params['apnamespace'] = ns
if self.api_start:
params['apfrom'] = self.api_start
done = False
while not done:
pywikibot.output(u'\nRetrieving pages...', newline=False)
data = query.GetData(params, self.site)
if "limits" in data: # process aplimit = max
params['aplimit'] = int(data['limits']['allpages'])
for x in data['query']['allpages']:
done = self.api_until and x['title'] >= self.api_until
if done: break
yield x['pageid']
if not done and 'query-continue' in data:
params['apfrom'] = data['query-continue']['allpages']['apfrom']
else:
break
def _next_redirect_group(self):
"""
Return a generator that retrieves pageids from the API 500 at a time
and yields them as a list
"""
apiQ = []
for pageid in self.get_redirect_pageids_via_api():
apiQ.append(pageid)
if len(apiQ) >= 500:
yield apiQ
apiQ = []
if apiQ:
yield apiQ
def get_redirects_via_api(self, maxlen=8):
"""
Return a generator that yields tuples of data about redirect Pages:
0 - page title of a redirect page
1 - type of redirect:
0 - broken redirect, target page title missing
1 - normal redirect, target page exists and is not a
redirect
2..maxlen - start of a redirect chain of that many redirects
(currently, the API seems not to return sufficient
data to make these return values possible, but
that may change)
maxlen+1 - start of an even longer chain, or a loop
(currently, the API seems not to return sufficient
data to allow this return values, but that may
change)
None - start of a redirect chain of unknown length, or loop
2 - target page title of the redirect, or chain (may not exist)
3 - target page of the redirect, or end of chain, or page title where
chain or loop detecton was halted, or None if unknown
"""
import urllib
params = {
'action':'query',
'redirects':1,
#'':'',
}
for apiQ in self._next_redirect_group():
params['pageids'] = apiQ
pywikibot.output(u'.', newline=False)
data = query.GetData(params, self.site)
if 'error' in data:
raise RuntimeError("API query error: %s" % data)
if data == [] or 'query' not in data:
raise RuntimeError("No results given.")
redirects = {}
pages = {}
redirects = dict((x['from'], x['to'])
for x in data['query']['redirects'])
for pagetitle in data['query']['pages'].values():
if 'missing' in pagetitle and 'pageid' not in pagetitle:
pages[pagetitle['title']] = False
else:
pages[pagetitle['title']] = True
for redirect in redirects:
target = redirects[redirect]
result = 0
final = None
try:
if pages[target]:
final = target
try:
while result <= maxlen:
result += 1
final = redirects[final]
# result = None
except KeyError:
pass
except KeyError:
result = None
pass
yield (redirect, result, target, final)
def retrieve_broken_redirects(self):
if self.use_api:
count = 0
for (pagetitle, type, target, final) \
in self.get_redirects_via_api(maxlen=2):
if type == 0:
yield pagetitle
if self.api_number:
count += 1
if count >= self.api_number:
break
elif self.xmlFilename == None:
# retrieve information from the live wiki's maintenance page
# broken redirect maintenance page's URL
path = self.site.broken_redirects_address(default_limit=False)
pywikibot.output(u'Retrieving special page...')
maintenance_txt = self.site.getUrl(path)
# regular expression which finds redirects which point to a
# non-existing page inside the HTML
Rredir = re.compile('\<li\>\<a href=".+?" title="(.*?)"')
redir_names = Rredir.findall(maintenance_txt)
pywikibot.output(u'Retrieved %d redirects from special page.\n'
% len(redir_names))
for redir_name in redir_names:
yield redir_name
else:
# retrieve information from XML dump
pywikibot.output(
u'Getting a list of all redirects and of all page titles...')
redirs, pageTitles = self.get_redirects_from_dump(
alsoGetPageTitles=True)
for (key, value) in redirs.iteritems():
if value not in pageTitles:
yield key
def retrieve_double_redirects(self):
if self.use_api and not self.use_move_log:
count = 0
for (pagetitle, type, target, final) \
in self.get_redirects_via_api(maxlen=2):
if type != 0 and type != 1:
yield pagetitle
if self.api_number:
count += 1
if count >= self.api_number:
break
elif self.xmlFilename == None:
if self.use_move_log:
if self.use_api:
gen = self.get_moved_pages_redirects()
else:
gen = self.get_moved_pages_redirects_old()
for redir_page in gen:
yield redir_page.title()
return
# retrieve information from the live wiki's maintenance page
# double redirect maintenance page's URL
# pywikibot.config.special_page_limit = 1000
path = self.site.double_redirects_address(default_limit = False)
pywikibot.output(u'Retrieving special page...')
maintenance_txt = self.site.getUrl(path)
# regular expression which finds redirects which point to
# another redirect inside the HTML
Rredir = re.compile('\<li\>\<a href=".+?" title="(.*?)">')
redir_names = Rredir.findall(maintenance_txt)
pywikibot.output(u'Retrieved %i redirects from special page.\n'
% len(redir_names))
for redir_name in redir_names:
yield redir_name
else:
redict = self.get_redirects_from_dump()
num = 0
for (key, value) in redict.iteritems():
num += 1
# check if the value - that is, the redirect target - is a
# redirect as well
if num > self.offset and value in redict:
yield key
pywikibot.output(u'\nChecking redirect %i of %i...'
% (num + 1, len(redict)))
def get_moved_pages_redirects(self):
'''generate redirects to recently-moved pages'''
# this will run forever, until user interrupts it
if self.offset <= 0:
self.offset = 1
start = datetime.datetime.utcnow() \
- datetime.timedelta(0, self.offset*3600)
# self.offset hours ago
offset_time = start.strftime("%Y%m%d%H%M%S")
pywikibot.output(u'Retrieving %d moved pages via API...'
% self.api_number)
if pywikibot.verbose:
pywikibot.output(u"[%s]" % offset_time)
for moved_page,u,t,c in self.site.logpages(number=self.api_number,
mode='move',
start=offset_time):
try:
if not moved_page.isRedirectPage():
continue
except pywikibot.BadTitle:
continue
except pywikibot.ServerError:
continue
# moved_page is now a redirect, so any redirects pointing
# to it need to be changed
try:
for page in moved_page.getReferences(follow_redirects=True,
redirectsOnly=True):
yield page
except pywikibot.NoPage:
# original title must have been deleted after move
continue
def get_moved_pages_redirects_old(self):
move_regex = re.compile(
r'moved <a href.*?>(.*?)</a> to <a href=.*?>.*?</a>.*?</li>')
if self.offset <= 0:
self.offset = 1
offsetpattern = re.compile(
r"""\(<a href="/w/index\.php\?title=Special:Log&offset=(\d+)"""
r"""&limit=500&type=move" title="Special:Log" rel="next">"""
r"""older 500</a>\)""")
start = datetime.datetime.utcnow() \
- datetime.timedelta(0, self.offset*3600)
# self.offset hours ago
offset_time = start.strftime("%Y%m%d%H%M%S")
while True:
move_url = \
self.site.path() + "?title=Special:Log&limit=500&offset=%s&type=move"\
% offset_time
try:
move_list = self.site.getUrl(move_url)
if pywikibot.verbose:
pywikibot.output(u"[%s]" % offset_time)
except:
import traceback
pywikibot.output(unicode(traceback.format_exc()))
return
g = move_regex.findall(move_list)
if pywikibot.verbose:
pywikibot.output(u"%s moved pages" % len(g))
for moved_title in g:
moved_page = pywikibot.Page(self.site, moved_title)
try:
if not moved_page.isRedirectPage():
continue
except pywikibot.BadTitle:
continue
except pywikibot.ServerError:
continue
# moved_page is now a redirect, so any redirects pointing
# to it need to be changed
try:
for page in moved_page.getReferences(follow_redirects=True,
redirectsOnly=True):
yield page
except pywikibot.NoPage:
# original title must have been deleted after move
continue
m = offsetpattern.search(move_list)
if not m:
break
offset_time = m.group(1)
class RedirectRobot:
def __init__(self, action, generator, always=False, number=None):
self.site = pywikibot.getSite()
self.action = action
self.generator = generator
self.always = always
self.number = number
self.exiting = False
def prompt(self, question):
if not self.always:
choice = pywikibot.inputChoice(question,
['Yes', 'No', 'All', 'Quit'],
['y', 'N', 'a', 'q'], 'N')
if choice == 'n':
return False
elif choice == 'q':
self.exiting = True
return False
elif choice == 'a':
self.always = True
return True
def delete_broken_redirects(self):
# get reason for deletion text
reason = pywikibot.translate(self.site, reason_broken)
for redir_name in self.generator.retrieve_broken_redirects():
self.delete_1_broken_redirect( redir_name, reason)
if self.exiting:
break
def delete_1_broken_redirect(self, redir_name, reason):
redir_page = pywikibot.Page(self.site, redir_name)
# Show the title of the page we're working on.
# Highlight the title in purple.
pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
% redir_page.title())
try:
targetPage = redir_page.getRedirectTarget()
except pywikibot.IsNotRedirectPage:
pywikibot.output(u'%s is not a redirect.' % redir_page.title())
except pywikibot.NoPage:
pywikibot.output(u'%s doesn\'t exist.' % redir_page.title())
else:
try:
targetPage.get()
except pywikibot.NoPage:
if self.prompt(
u'Redirect target %s does not exist. Do you want to delete %s?'
% (targetPage.aslink(),
redir_page.aslink())):
try:
redir_page.delete(reason, prompt = False)
except pywikibot.NoUsername:
if targetPage.site().lang in sd_template \
and targetPage.site().lang in reason_broken:
pywikibot.output(
u"No sysop in user-config.py, put page to speedy deletion.")
content = redir_page.get(get_redirect=True)
content = pywikibot.translate(
targetPage.site().lang,
sd_template)+"\n"+content
summary = pywikibot.translate(
targetPage.site().lang,
reason_broken)
redir_page.put(content, summary)
except pywikibot.IsRedirectPage:
pywikibot.output(
u'Redirect target %s is also a redirect! Won\'t delete anything.'
% targetPage.aslink())
else:
#we successfully get the target page, meaning that
#it exists and is not a redirect: no reason to touch it.
pywikibot.output(
u'Redirect target %s does exist! Won\'t delete anything.'
% targetPage.aslink())
pywikibot.output(u'')
def fix_double_redirects(self):
for redir_name in self.generator.retrieve_double_redirects():
self.fix_1_double_redirect(redir_name)
if self.exiting:
break
def fix_1_double_redirect(self, redir_name):
redir = pywikibot.Page(self.site, redir_name)
# Show the title of the page we're working on.
# Highlight the title in purple.
pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
% redir.title())
newRedir = redir
redirList = [] # bookkeeping to detect loops
while True:
redirList.append(u'%s:%s' % (newRedir.site().lang,
newRedir.sectionFreeTitle()))
try:
targetPage = newRedir.getRedirectTarget()
except pywikibot.IsNotRedirectPage:
if len(redirList) == 1:
pywikibot.output(u'Skipping: Page %s is not a redirect.'
% redir.aslink())
break #do nothing
elif len(redirList) == 2:
pywikibot.output(
u'Skipping: Redirect target %s is not a redirect.'
% newRedir.aslink())
break # do nothing
except pywikibot.SectionError:
pywikibot.output(
u'Warning: Redirect target section %s doesn\'t exist.'
% newRedir.aslink())
except pywikibot.BadTitle, e:
# str(e) is in the format 'BadTitle: [[Foo]]'
pywikibot.output(
u'Warning: Redirect target %s is not a valid page title.'
% str(e)[10:])
#sometimes this error occures. Invalid Title starting with a '#'
except pywikibot.InvalidTitle, err:
pywikibot.output(u'Warning: %s' % err)
break
except pywikibot.NoPage:
if len(redirList) == 1:
pywikibot.output(u'Skipping: Page %s does not exist.'
% redir.aslink())
break
else:
if self.always:
pywikibot.output(
u"Skipping: Redirect target %s doesn't exist."
% newRedir.aslink())
break # skip if automatic
else:
pywikibot.output(
u"Warning: Redirect target %s doesn't exist."
% newRedir.aslink())
except pywikibot.ServerError:
pywikibot.output(u'Skipping: Server Error')
break
else:
pywikibot.output(
u' Links to: %s.'
% targetPage.aslink())
if targetPage.site().sitename() == 'wikipedia:en' \
and targetPage.title() == 'Target page name':
pywikibot.output(u"Skipping: Redirect source is vandalized.")
break
if targetPage.site() != self.site:
pywikibot.output(
u'Warning: redirect target (%s) is on a different site.'
% (targetPage.aslink()))
if self.always:
break # skip if automatic
# watch out for redirect loops
if redirList.count(u'%s:%s'
% (targetPage.site().lang,
targetPage.sectionFreeTitle())
) > 0:
pywikibot.output(
u'Warning: Redirect target %s forms a redirect loop.'
% targetPage.aslink())
break ###xqt doesn't work. edits twice!
try:
content = targetPage.get(get_redirect=True)
except pywikibot.SectionError:
content = pywikibot.Page(
targetPage.site(),
targetPage.sectionFreeTitle()
).get(get_redirect=True)
if targetPage.site().lang in sd_template \
and targetPage.site().lang in sd_tagging_sum:
pywikibot.output(u"Tagging redirect for deletion")
# Delete the two redirects
content = pywikibot.translate(
targetPage.site().lang,
sd_template)+"\n"+content
summ = pywikibot.translate(targetPage.site().lang,
sd_tagging_sum)
targetPage.put(content, summ)
redir.put(content, summ)
break # TODO Better implement loop redirect
else:
newRedir = targetPage
continue
try:
oldText = redir.get(get_redirect=True)
except pywikibot.BadTitle:
pywikibot.output(u"Bad Title Error")
break
text = self.site.redirectRegex().sub(
'#%s %s' %
(self.site.redirect( True ),
targetPage.aslink()),
oldText)
if text == oldText:
break
summary = pywikibot.translate(self.site, msg_double)\
% targetPage.aslink()
pywikibot.showDiff(oldText, text)
if self.prompt(u'Do you want to accept the changes?'):
try:
redir.put(text, summary)
except pywikibot.LockedPage:
pywikibot.output(u'%s is locked.' % redir.title())
except pywikibot.SpamfilterError, error:
pywikibot.output(
u"Saving page [[%s]] prevented by spam filter: %s"
% (redir.title(), error.url))
except pywikibot.PageNotSaved, error:
pywikibot.output(u"Saving page [[%s]] failed: %s"
% (redir.title(), error))
except pywikibot.NoUsername:
pywikibot.output(
u"Page [[%s]] not saved; sysop privileges required."
% redir.title())
except pywikibot.Error, error:
pywikibot.output(
u"Unexpected error occurred trying to save [[%s]]: %s"
% (redir.title(), error))
break
def fix_double_or_delete_broken_redirects(self):
# TODO: part of this should be moved to generator, the rest merged into self.run()
# get reason for deletion text
delete_reason = pywikibot.translate(self.site, reason_broken)
count = 0
for (redir_name, code, target, final)\
in self.generator.get_redirects_via_api(maxlen=2):
if code == 1:
continue
elif code == 0:
self.delete_1_broken_redirect(redir_name, delete_reason)
count += 1
else:
self.fix_1_double_redirect(redir_name)
count += 1
if self.exiting or (self.number and count >= self.number):
break
def run(self):
# TODO: make all generators return a redirect type indicator,
# thus make them usable with 'both'
if self.action == 'double':
self.fix_double_redirects()
elif self.action == 'broken':
self.delete_broken_redirects()
elif self.action == 'both':
self.fix_double_or_delete_broken_redirects()
def main(*args):
# read command line parameters
# what the bot should do (either resolve double redirs, or delete broken
# redirs)
action = None
# where the bot should get his infos from (either None to load the
# maintenance special page from the live wiki, or the filename of a
# local XML dump file)
xmlFilename = None
# Which namespace should be processed when using a XML dump
# default to -1 which means all namespaces will be processed
namespaces = []
# at which redirect shall we start searching double redirects again
# (only with dump); default to -1 which means all redirects are checked
offset = -1
moved_pages = False
api = False
start = ''
until = ''
number = None
always = False
for arg in pywikibot.handleArgs(*args):
if arg == 'double' or arg == 'do':
action = 'double'
elif arg == 'broken' or arg == 'br':
action = 'broken'
elif arg == 'both':
action = 'both'
elif arg == '-api':
api = True
elif arg.startswith('-xml'):
if len(arg) == 4:
xmlFilename = pywikibot.input(
u'Please enter the XML dump\'s filename: ')
else:
xmlFilename = arg[5:]
elif arg.startswith('-moves'):
moved_pages = True
elif arg.startswith('-namespace:'):
ns = arg[11:]
if ns == '':
## "-namespace:" does NOT yield -namespace:0 further down the road!
ns = pywikibot.input(
u'Please enter a namespace by its number: ')
# u'Please enter a namespace by its name or number: ')
# TODO! at least for some generators.
if ns == '':
ns = '0'
try:
ns = int(ns)
except ValueError:
#-namespace:all Process all namespaces. Works only with the API read interface.
pass
if not ns in namespaces:
namespaces.append(ns)
elif arg.startswith('-offset:'):
offset = int(arg[8:])
elif arg.startswith('-start:'):
start = arg[7:]
elif arg.startswith('-until:'):
until = arg[7:]
elif arg.startswith('-number:'):
number = int(arg[8:])
elif arg == '-always':
always = True
else:
pywikibot.output(u'Unknown argument: %s' % arg)
if not action or (xmlFilename and moved_pages)\
or (api and xmlFilename):
pywikibot.showHelp('redirect')
else:
gen = RedirectGenerator(xmlFilename, namespaces, offset, moved_pages,
api, start, until, number)
bot = RedirectRobot(action, gen, always, number)
bot.run()
if __name__ == '__main__':
try:
main()
finally:
pywikibot.stopme()
|