#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
Scripts to manage categories.
Syntax: python category.py action [-option]
where action can be one of these:
* add - mass-add a category to a list of pages
* remove - remove category tag from all pages in a category
* move - move all pages in a category to another category
* tidy - tidy up a category by moving its articles into subcategories
* tree - show a tree of subcategories of a given category
* listify - make a list of all of the articles that are in a category
and option can be one of these:
* -person - sort persons by their last name (for action 'add')
* -rebuild - reset the database
* -from: - The category to move from (for the move option)
Also, the category to remove from in the remove option
Also, the category to make a list of in the listify option
* -to: - The category to move to (for the move option)
- Also, the name of the list to make in the listify option
NOTE: If the category names have spaces in them you may need to use
a special syntax in your shell so that the names aren't treated as
separate parameters. For instance, in BASH, use single quotes,
e.g. -from:'Polar bears'
* -batch - Don't prompt to delete emptied categories (do it
automatically).
* -summary: - Pick a custom edit summary for the bot.
* -inplace - Use this flag to change categories in place rather than
rearranging them.
* -nodelsum - An option for remove, this specifies not to use the custom
edit summary as the deletion reason. Instead, it uses the
default deletion reason for the language, which is "Category
was disbanded" in English.
* -overwrite - An option for listify, this overwrites the current page with
the list even if something is already there.
* -showimages - An option for listify, this displays images rather than
linking them in the list.
* -talkpages - An option for listify, this outputs the links to talk pages
of the pages to be listified in addition to the pages
themselves.
* -recurse - Recurse through all subcategories of categories.
* -match - Only work on pages whose titles match the given regex (for
move and remove actions).
* -create - An option for add: if a page doesn't exist, do not skip it,
create it instead
If action is "add", the following options are supported:
¶ms;
For the actions tidy and tree, the bot will store the category structure
locally in category.dump. This saves time and server load, but if it uses
these data later, they may be outdated; use the -rebuild parameter in this
case.
For example, to create a new category from a list of persons, type:
python category.py add -person
and follow the on-screen instructions.
Or to do it all from the command-line, use the following syntax:
python category.py move -from:US -to:'United States'
This will move all pages in the category US to the category United States.
"""
#
# (C) Rob W.W. Hooft, 2004
# (C) Daniel Herding, 2004
# (C) Anreas J Schwab, 2007
# (C) Pywikipedia team, 2008-2009
#
__version__ = '$Id: category.py 8185 2010-05-17 19:48:08Z xqt $'
#
# Distributed under the terms of the MIT license.
#
import os, re, pickle, bz2
import wikipedia as pywikibot
import catlib, config, pagegenerators
# This is required for the text that is shown when you run this script
# with the parameter -help.
docuReplacements = {
'¶ms;': pagegenerators.parameterHelp
}
# Summary messages
msg_add={
'ar':u': [[:%s]]',
'bat-smg':u'Robots: Pridedama [[Kateguorj:%s]]',
'be-x-old':u': [[:%s]]',
'ca':u'Robot: Afegint [[Categoria:%s]]',
'cs':u'Robot pidal [[Kategorie:%s]]',
'da':u'Robot: Tilfjer [[Kategori:%s]]',
'de':u'Bot: Ergnze [[Kategorie:%s]]',
'en':u'Robot: Adding [[Category:%s]]',
'es':u'Bot: Aadida [[Categora:%s]]',
'id':u'Bot: Menambahkan [[Kategori:%s]]',
'fa':u': [[:%s]]',
'fi':u'Botti lissi luokkaan [[Luokka:%s]]',
'fr':u'Robot : ajoute [[Catgorie:%s]]',
'he':u': [[:%s]]',
'ia':u'Robot: Addition de [[Categoria:%s]]',
'is':u'Vlmenni: Bti vi [[Flokkur:%s]]',
'it':u'Bot: Aggiungo [[Categoria:%s]]',
'ja':u': [[Category:%s]]',
'kk':u': [[:%s]] ',
'ko': u': [[:%s]] ',
'ksh':u'Bot: [[Saachjropp:%s]] erinjedonn',
'lb': u'Bot: Derbi setzen [[Kategorie:%s]]',
'lt':u'robotas: Pridedama [[Kategorija:%s]]',
'nds':u'Kat-Bot: [[Kategorie:%s]] rin',
'nds-nl':u'bot: [[kattegerie:%s]] derbie edaon',
'nl':u'Bot: [[categorie:%s]] toegevoegd',
'no':u'Robot: Legger til [[Kategori:%s]]',
'nn':u'robot: la til [[Kategori:%s]]',
'pl':u'Robot dodaje [[Kategoria:%s]]',
'pt':u'Bot: Adicionando [[Categoria:%s]]',
'ru':u': [[:%s]]',
'sk':u'Robot pridal [[Kategria:%s]]',
'sr':u': [[:%s]]',
'sv':u'Robot: Lgger till [[Kategori:%s]]',
'szl':u'Bot dodowo: [[Kategoria:%s]]',
'uk':u': [[:%s]]',
'zh':u': [[Category:%s]]',
}
msg_change={
'ar':u': %s',
'be-x-old':u': %s',
'ca':u'Robot: Canviant %s',
'cs':u'Robot zmnil %s',
'da':u'Robot: ndrer %s',
'de':u'Bot: ndere %s',
'en':u'Robot: Changing %s',
'es':u'Bot: Cambiada %s',
'id':u'Bot: Mengganti %s',
'fa':u': %s',
'fi':u'Botti muutti luokan %s',
'fr':u'Robot : modifie [[%s]]',
'he':u': %s',
'ia':u'Robot: Modification de %s',
'is':u'Vlmenni: Breyti flokknum [[%s]]',
'it':u'Bot: Modifico %s',
'lt':u'robotas: Keiiama %s',
'ja':u': [[%s]][[%s]]',
'kk':u': %s ',
'ko': u': %s ',
'ksh':u'Bot: %s ujewelt',
'nds':u'Kat-Bot: %s utwesselt',
'nds-nl':u'bot: wieziging %s',
'nl':u'Bot: wijziging %s',
'no':u'Robot: Endrer %s',
'nn':u'robot: endra %s',
'pt':u'Bot: Modificando [[%s]]',
'pl':u'Robot przenosi %s',
'ru':u': %s',
'sk':u'Robot pridal [[Kategria:%s]]',
'sr':u': %s',
'sv':u'Robot: ndrar %s',
'uk':u': [[:%s]]',
'zh':u': [[%s]][[%s]]',
}
msg_replace={
'ar':u': %(oldcat)s %(newcat)s',
'cs':u'Robot nahradil kategorii %(oldcat)s za %(newcat)s',
'de':u'Bot: Ersetze Kategorie %(oldcat)s durch %(newcat)s',
'en':u'Robot: Replacing category %(oldcat)s with %(newcat)s',
'fa':u' %(oldcat) %(newcat)',
'sk':u'Robot nahradil kategriu %(oldcat)s za %(newcat)s',
}
deletion_reason_move = {
'ar':u': [[::%s|%s]]',
'bat-smg':u'Robots: Kateguorj bova parvadnta i [[:Kateguorj:%s|%s]]',
'be-x-old':u': [[::%s|%s]]',
'ca':u'Robot: La categoria s\'ha mogut a [[:Categoria:%s|%s]]',
'cs':u'Kategorie pesunuta na [[:Kategorie:%s|%s]]',
'da':u'Robot: Kategori flyttet til [[:Category:%s|%s]]',
'de':u'Bot: Kategorie wurde nach [[:Category:%s|%s]] verschoben',
'en':u'Robot: Category was moved to [[:Category:%s|%s]]',
'es':u'Robot: La categora ha sido movida a [[:Category:%s|%s]]',
'fa':u': [[::%s|%s]] ',
'fi':u'Botti siirsi luokan nimelle [[:Luokka:%s|%s]]',
'fr':u'Robot : catgorie dplace sur [[:Category:%s|%s]]',
'he':u': [[::%s|%s]]',
'ia':u'Robot: Categoria transferite a [[:Category:%s|%s]]',
'id':u'Bot: Kategori dipindahkan ke [[:Category:%s|%s]]',
'it':u'Bot: La categoria stata sostituita da [[:Categoria:%s|%s]]',
'ja':u': [[:Category:%s]]',
'kk':u': [[::%s|%s]] ',
'ko': u': [[::%s|%s]] ',
'ksh':u'Bot: Saachjropp noh [[:Category:%s|%s]] jeschovve',
'lb': u'Bot: Kategorie gouf grckelt: Nei [[:Kategorie:%s|%s]]',
'lt':u'robotas: Kategorija pervadinta [[:Category:%s|%s]]',
'nds':u'Kat-Bot: Kategorie na [[:Category:%s|%s]] schaven',
'nds-nl':u'Bot: kattegerie is herneumd naor [[:Kattegerie:%s|%s]]',
'nl':u'Bot: Categorie is hernoemd naar [[:Category:%s|%s]]',
'no':u'Robot: Kategorien ble flyttet til [[:Category:%s|%s]]',
'nn':u'robot: kategorien blei flytta til [[:Kategori:%s|%s]]',
'pt':u'Bot: Categoria [[:Category:%s|%s]] foi movida',
'pl':u'Robot przenosi kategori do [[:Category:%s|%s]]',
'ru':u': [[::%s|%s]]',
'sk':u'Kategria bola presunut na [[:Kategria:%s|%s]]',
'sr':u': [[:Category:%s|%s]]',
'sv':u'Robot: Kategori flyttades till [[:Category:%s|%s]]',
'uk':u': [[:%s|%s]]',
'zh':u': [[:Category:%s|%s]]',
}
cfd_templates = {
'wikipedia' : {
'en':[u'cfd', u'cfr', u'cfru', u'cfr-speedy', u'cfm', u'cfdu'],
'fi':[u'roskaa', u'poistettava', u'korjattava/nimi', u'yhdistettvLuokka'],
'he':[u' ', u''],
'nl':[u'categorieweg', u'catweg', u'wegcat', u'weg2']
},
'commons' : {
'commons':[u'cfd', u'move']
}
}
class CategoryDatabase:
'''
This is a temporary knowledge base saving for each category the contained
subcategories and articles, so that category pages do not need to
be loaded over and over again
'''
def __init__(self, rebuild = False, filename = 'category.dump.bz2'):
if rebuild:
self.rebuild()
else:
try:
if not os.path.isabs(filename):
filename = pywikibot.config.datafilepath(filename)
f = bz2.BZ2File(filename, 'r')
pywikibot.output(u'Reading dump from %s'
% pywikibot.config.shortpath(filename))
databases = pickle.load(f)
f.close()
# keys are categories, values are 2-tuples with lists as entries.
self.catContentDB = databases['catContentDB']
# like the above, but for supercategories
self.superclassDB = databases['superclassDB']
del databases
except:
# If something goes wrong, just rebuild the database
self.rebuild()
def rebuild(self):
self.catContentDB={}
self.superclassDB={}
def getSubcats(self, supercat):
'''
For a given supercategory, return a list of Categorys for all its
subcategories.
Saves this list in a temporary database so that it won't be loaded from the import
server next time it's required.
'''
# if we already know which subcategories exist here
if supercat in self.catContentDB:
return self.catContentDB[supercat][0]
else:
subcatlist = supercat.subcategoriesList()
articlelist = supercat.articlesList()
# add to dictionary
self.catContentDB[supercat] = (subcatlist, articlelist)
return subcatlist
def getArticles(self, cat):
'''
For a given category, return a list of Pages for all its articles.
Saves this list in a temporary database so that it won't be loaded from the import
server next time it's required.
'''
# if we already know which articles exist here
if cat in self.catContentDB:
return self.catContentDB[cat][1]
else:
subcatlist = cat.subcategoriesList()
articlelist = cat.articlesList()
# add to dictionary
self.catContentDB[cat] = (subcatlist, articlelist)
return articlelist
def getSupercats(self, subcat):
# if we already know which subcategories exist here
if subcat in self.superclassDB:
return self.superclassDB[subcat]
else:
supercatlist = subcat.supercategoriesList()
# add to dictionary
self.superclassDB[subcat] = supercatlist
return supercatlist
def dump(self, filename = 'category.dump.bz2'):
'''
Saves the contents of the dictionaries superclassDB and catContentDB to disk.
'''
if not os.path.isabs(filename):
filename = pywikibot.config.datafilepath(filename)
pywikibot.output(u'Dumping to %s, please wait...'
% pywikibot.config.shortpath(filename))
f = bz2.BZ2File(filename, 'w')
databases = {
'catContentDB': self.catContentDB,
'superclassDB': self.superclassDB
}
# store dump to disk in binary format
try:
pickle.dump(databases, f, protocol=pickle.HIGHEST_PROTOCOL)
except pickle.PicklingError:
pass
f.close()
def sorted_by_last_name(catlink, pagelink):
'''Return a Category with key that sorts persons by their last names.
Parameters: catlink - The Category to be linked
pagelink - the Page to be placed in the category
Trailing words in brackets will be removed. Example: If
category_name is 'Author' and pl is a Page to [[Alexandre Dumas
(senior)]], this function will return this Category:
[[Category:Author|Dumas, Alexandre]]
'''
page_name = pagelink.title()
site = pagelink.site()
# regular expression that matches a name followed by a space and
# disambiguation brackets. Group 1 is the name without the rest.
bracketsR = re.compile('(.*) \(.+?\)')
match_object = bracketsR.match(page_name)
if match_object:
page_name = match_object.group(1)
split_string = page_name.split(' ')
if len(split_string) > 1:
# pull last part of the name to the beginning, and append the
# rest after a comma; e.g., "John von Neumann" becomes
# "Neumann, John von"
sorted_key = split_string[-1] + ', ' + ' '.join(split_string[:-1])
# give explicit sort key
return pywikibot.Page(site, catlink.title() + '|' + sorted_key)
else:
return pywikibot.Page(site, catlink.title())
def add_category(sort_by_last_name = False, create_pages = False):
'''A robot to mass-add a category to a list of pages.'''
site = pywikibot.getSite()
if gen:
newcatTitle = pywikibot.input(u'Category to add (do not give namespace):')
if not site.nocapitalize:
newcatTitle = newcatTitle[:1].capitalize() + newcatTitle[1:]
# set edit summary message
editSummary = pywikibot.translate(site, msg_add) % newcatTitle
cat_namespace = site.category_namespaces()[0]
answer = ''
for page in gen:
if answer != 'a':
answer = ''
while answer not in ('y','n','a'):
answer = pywikibot.inputChoice(u'%s'% (page.aslink()), ['Yes', 'No', 'All'],['y', 'n', 'a'], 'n')
if answer == 'a':
confirm = pywikibot.inputChoice(u"""\
This should be used if and only if you are sure that your links are correct!
Are you sure?""", ['Yes', 'No'], ['y', 'n'], 'n')
if confirm == 'n':
answer = ''
if answer == 'y' or answer == 'a':
try:
text = page.get()
except pywikibot.NoPage:
if create_pages:
pywikibot.output(u"%s doesn't exist yet. Creating."
% (page.title()))
text = ''
else:
pywikibot.output(u"%s doesn't exist yet. Ignoring."
% (page.title()))
continue
except pywikibot.IsRedirectPage, arg:
redirTarget = pywikibot.Page(site, arg.args[0])
pywikibot.output(
u"WARNING: %s is redirect to %s. Ignoring."
% (page.title(), redirTarget.title()))
continue
cats = page.categories()
# Show the title of the page we're working on.
# Highlight the title in purple.
pywikibot.output(
u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
% page.title())
pywikibot.output(u"Current categories:")
for cat in cats:
pywikibot.output(u"* %s" % cat.title())
catpl = pywikibot.Page(site,
cat_namespace + ':' + newcatTitle)
if sort_by_last_name:
catpl = sorted_by_last_name(catpl, page)
if catpl in cats:
pywikibot.output(u"%s is already in %s."
% (page.title(), catpl.title()))
else:
pywikibot.output(u'Adding %s' % catpl.aslink())
cats.append(catpl)
text = pywikibot.replaceCategoryLinks(text, cats)
try:
page.put(text, comment = editSummary)
except pywikibot.EditConflict:
pywikibot.output(
u'Skipping %s because of edit conflict'
% (page.title()))
class CategoryMoveRobot:
"""Robot to move pages from one category to another."""
def __init__(self, oldCatTitle, newCatTitle, batchMode=False,
editSummary='', inPlace=False, moveCatPage=True,
deleteEmptySourceCat=True, titleRegex=None):
site = pywikibot.getSite()
self.editSummary = editSummary
self.oldCat = catlib.Category(site, oldCatTitle)
self.newCatTitle = newCatTitle
self.inPlace = inPlace
self.moveCatPage = moveCatPage
self.batchMode = batchMode
self.deleteEmptySourceCat = deleteEmptySourceCat
self.titleRegex = titleRegex
def run(self):
site = pywikibot.getSite()
newCat = catlib.Category(site, self.newCatTitle)
# set edit summary message
if not self.editSummary:
try:
self.editSummary = pywikibot.translate(site, msg_change) % (self.oldCat.title(), newCat.title() )
except TypeError:
self.editSummary = pywikibot.translate(site, msg_change) % self.oldCat.title()
# Copy the category contents to the new category page
copied = False
oldMovedTalk = None
if self.oldCat.exists() and self.moveCatPage:
copied = self.oldCat.copyAndKeep(
self.newCatTitle,
pywikibot.translate(site, cfd_templates))
# Also move the talk page
if copied:
reason = pywikibot.translate(site, deletion_reason_move) \
% (self.newCatTitle, self.newCatTitle)
oldTalk = self.oldCat.toggleTalkPage()
if oldTalk.exists():
newTalkTitle = newCat.toggleTalkPage().title()
try:
talkMoved = oldTalk.move(newTalkTitle, reason)
except (pywikibot.NoPage, pywikibot.PageNotSaved), e:
#in order :
#Source talk does not exist, or
#Target talk already exists
pywikibot.output(e.message)
else:
if talkMoved:
oldMovedTalk = oldTalk
# Move articles
gen = pagegenerators.CategorizedPageGenerator(self.oldCat,
recurse=False)
preloadingGen = pagegenerators.PreloadingGenerator(gen)
for article in preloadingGen:
if not self.titleRegex or re.search(self.titleRegex,
article.title()):
catlib.change_category(article, self.oldCat, newCat,
comment=self.editSummary,
inPlace=self.inPlace)
# Move subcategories
gen = pagegenerators.SubCategoriesPageGenerator(self.oldCat,
recurse=False)
preloadingGen = pagegenerators.PreloadingGenerator(gen)
for subcategory in preloadingGen:
if not self.titleRegex or re.search(self.titleRegex,
subcategory.title()):
catlib.change_category(subcategory, self.oldCat, newCat,
comment=self.editSummary,
inPlace=self.inPlace)
# Delete the old category and its moved talk page
if copied and self.deleteEmptySourceCat == True:
if self.oldCat.isEmpty():
reason = pywikibot.translate(site, deletion_reason_move) \
% (self.newCatTitle, self.newCatTitle)
confirm = not self.batchMode
self.oldCat.delete(reason, confirm, mark = True)
if oldMovedTalk is not None:
oldMovedTalk.delete(reason, confirm, mark = True)
else:
pywikibot.output('Couldn\'t delete %s - not empty.'
% self.oldCat.title())
class CategoryListifyRobot:
'''
Creates a list containing all of the members in a category.
'''
listify_msg={
'ar':u': %s (%d )',
'ca':u'Robot: Llistant de %s (%d entrades)',
'en':u'Robot: Listifying from %s (%d entries)',
'fa':u': %s(%d )',
'fi':u'Botti listasi luokan %s (%d jsent)',
'he':u': %s (%d )',
'kk':u': %s (%d ) ',
'nds-nl':u'Bot: lieste van %s (%d pagina\'s)',
'nl':u'Bot: Lijst van %s (%d pagina\'s)',
'pl':u'Robot: listuje kategori %s (%d stron)',
'sv':u'Robot: Skapar en lista frn %s (%d)',
'pt':u'Bot: Listando de %s (%d entradas)',
'zh':u': %s(%d)',
}
def __init__(self, catTitle, listTitle, editSummary, overwrite = False, showImages = False, subCats = False, talkPages = False, recurse = False):
self.editSummary = editSummary
self.overwrite = overwrite
self.showImages = showImages
self.cat = catlib.Category(pywikibot.getSite(), 'Category:' + catTitle)
self.list = pywikibot.Page(pywikibot.getSite(), listTitle)
self.subCats = subCats
self.talkPages = talkPages
self.recurse = recurse
def run(self):
listOfArticles = self.cat.articlesList(recurse = self.recurse)
if self.subCats:
listOfArticles += self.cat.subcategoriesList()
if not self.editSummary:
self.editSummary = pywikibot.translate(pywikibot.getSite(), self.listify_msg) % (self.cat.title(), len(listOfArticles))
listString = ""
for article in listOfArticles:
if (not article.isImage() or self.showImages) and not article.isCategory():
if self.talkPages and not article.isTalkPage():
listString = listString + "*[[%s]] -- [[%s|talk]]\n" % (article.title(), article.toggleTalkPage().title())
else:
listString = listString + "*[[%s]]\n" % article.title()
else:
if self.talkPages and not article.isTalkPage():
listString = listString + "*[[:%s]] -- [[%s|talk]]\n" % (article.title(), article.toggleTalkPage().title())
else:
listString = listString + "*[[:%s]]\n" % article.title()
if self.list.exists() and not self.overwrite:
pywikibot.output(u'Page %s already exists, aborting.' % self.list.title())
else:
self.list.put(listString, comment=self.editSummary)
class CategoryRemoveRobot:
'''
Removes the category tag from allpagesagivencategorythe import
category pages of all subcategories, without prompting.
Does not remove category tags pointing at subcategories.
'''
deletion_reason_remove = {
'ar':u': ',
'be-x-old':u': ',
'ca':u'Robot: La categoria s\'ha eliminat',
'da':u'Robot: Kategorien blev oplst',
'de':u'Bot: Kategorie wurde aufgelst',
'en':u'Robot: Category was disbanded',
'es':u'Robot: La categora ha sido eliminada',
'fi':u'Botti tyhjensi luokan',
'he':u': ',
'ia':u'Robot: Categoria esseva dissolvite',
'kk':u': ',
'ksh':u'Bot: de Saachjropp is nu opjel',
'nds':u'Kat-Bot: Kategorie is nu oplst',
'nds-nl':u'Bot: kattegerie besteet neet meer',
'nl':u'Bot: Categorie is opgeheven',
'no':u'Robot: Kategorien ble opplst',
'nn':u'robot: kategorien blei lyst opp',
'pl':u'Robot: Kategoria zostaa usunita',
'pt':u'Bot: Categoria foi unida',
'ru':u': ',
'sv':u'Robot: Kategorin upplstes',
'uk':u': ',
'zh':u':',
}
msg_remove={
'ar':u': %s',
'bat-smg':u'Robots: Trnama %s',
'be-x-old':u': [[%s]]',
'ca':u'Robot: Eliminant de %s',
'da':u'Robot: Fjerner fra %s',
'de':u'Bot: Entferne aus %s',
'en':u'Robot: Removing from %s',
'es':u'Bot: Eliminada de la %s',
'fa':u': %s',
'fi':u'Botti poisti luokasta %s',
'fr':u'Robot : Retir depuis %s',
'he':u': %s',
'ia':u'Robot: Eliminate de %s',
'is':u'Vlmenni: Fjarlgi [[%s]]',
'ja':u':[[%s]]',
'kk':u': %s ',
'ksh':u'Bot: u de %s ujedraare',
'lb': u'Bot: Ewech huele vun %s',
'nds':u'Kat-Bot: rut ut %s',
'nds-nl':u'Bot: vort-ehaold uut %s',
'nl':u'Bot: Verwijderd uit %s',
'no':u'Robot: Fjerner ifra %s',
'nn':u'robot: fjerna ifr %s',
'pl':u'Robot: Usuwa z kategorii %s',
'pt':u'Bot: Removendo [[%s]]',
'ru':u': %s',
'sr':u': [[%s]]',
'sv':u'Robot: Tar bort frn %s',
'uk':u': %s',
'zh':u': [[%s]]',
}
def __init__(self, catTitle, batchMode = False, editSummary = '', useSummaryForDeletion = True, titleRegex = None, inPlace = False):
self.editSummary = editSummary
self.cat = catlib.Category(pywikibot.getSite(), catTitle)
# get edit summary message
self.useSummaryForDeletion = useSummaryForDeletion
self.batchMode = batchMode
self.titleRegex = titleRegex
self.inPlace = inPlace
if not self.editSummary:
self.editSummary = pywikibot.translate(pywikibot.getSite(), self.msg_remove) % self.cat.title()
def run(self):
articles = self.cat.articlesList(recurse = 0)
if len(articles) == 0:
pywikibot.output(u'There are no articles in category %s' % self.cat.title())
else:
for article in articles:
if not self.titleRegex or re.search(self.titleRegex,article.title()):
catlib.change_category(article, self.cat, None, comment = self.editSummary, inPlace = self.inPlace)
# Also removes the category tag from subcategories' pages
subcategories = self.cat.subcategoriesList(recurse = 0)
if len(subcategories) == 0:
pywikibot.output(u'There are no subcategories in category %s' % self.cat.title())
else:
for subcategory in subcategories:
catlib.change_category(subcategory, self.cat, None, comment = self.editSummary, inPlace = self.inPlace)
# Deletes the category page
if self.cat.exists() and self.cat.isEmpty():
if self.useSummaryForDeletion and self.editSummary:
reason = self.editSummary
else:
reason = pywikibot.translate(pywikibot.getSite(), self.deletion_reason_remove)
talkPage = self.cat.toggleTalkPage()
try:
self.cat.delete(reason, not self.batchMode)
except pywikibot.NoUsername:
pywikibot.output(u'You\'re not setup sysop info, category will not delete.' % self.cat.site())
return
if (talkPage.exists()):
talkPage.delete(reason=reason, prompt=not self.batchMode)
class CategoryTidyRobot:
"""
Script to help a human to tidy up a category by moving its articles into
subcategories
Specify the category name on the command line. The program will pick up the
page, and look for all subcategories and supercategories, and show them with
a number adjacent to them. It will then automatically loop over all pages
in the category. It will ask you to type the number of the appropriate
replacement, and perform the change robotically.
If you don't want to move the article to a subcategory or supercategory, but to
another category, you can use the 'j' (jump) command.
Typing 's' will leave the complete page unchanged.
Typing '?' will show you the first few bytes of the current page, helping
you to find out what the article is about and in which other categories it
currently is.
Important:
* this bot is written to work with the MonoBook skin, so make sure your bot
account uses this skin
"""
def __init__(self, catTitle, catDB):
self.catTitle = catTitle
self.catDB = catDB
self.editSummary = pywikibot.translate(pywikibot.getSite(), msg_change) % catTitle
def move_to_category(self, article, original_cat, current_cat):
'''
Given an article which is in category original_cat, ask the user if
it should be moved to one of original_cat's subcategories.
Recursively run through subcategories' subcategories.
NOTE: current_cat is only used for internal recursion. You should
always use current_cat = original_cat.
'''
pywikibot.output(u'')
# Show the title of the page where the link was found.
# Highlight the title in purple.
pywikibot.output(u'Treating page \03{lightpurple}%s\03{default}, currently in \03{lightpurple}%s\03{default}' % (article.title(), current_cat.title()))
# Determine a reasonable amount of context to print
try:
full_text = article.get(get_redirect = True)
except pywikibot.NoPage:
pywikibot.output(u'Page %s not found.' % article.title())
return
try:
contextLength = full_text.index('\n\n')
except ValueError: # substring not found
contextLength = 500
if full_text.startswith(u'[['): # probably an image
# Add extra paragraph.
contextLength = full_text.find('\n\n', contextLength+2)
if contextLength > 1000 or contextLength < 0:
contextLength = 500
print
pywikibot.output(full_text[:contextLength])
print
subcatlist = self.catDB.getSubcats(current_cat)
supercatlist = self.catDB.getSupercats(current_cat)
alternatives = u'\n'
if len(subcatlist) == 0:
alternatives += u'This category has no subcategories.\n\n'
if len(supercatlist) == 0:
alternatives += u'This category has no supercategories.\n\n'
# show subcategories as possible choices (with numbers)
for i in range(len(supercatlist)):
# layout: we don't expect a cat to have more than 10 supercats
alternatives += (u"u%d - Move up to %s\n" % (i, supercatlist[i].title()))
for i in range(len(subcatlist)):
# layout: we don't expect a cat to have more than 100 subcats
alternatives += (u"%2d - Move down to %s\n" % (i, subcatlist[i].title()))
alternatives += u" j - Jump to another category\n"
alternatives += u" s - Skip this article\n"
alternatives += u" r - Remove this category tag\n"
alternatives += u" l - list these options again\n"
alternatives += u" m - more context\n"
alternatives += (u"Enter - Save category as %s\n" % current_cat.title())
flag = False
longchoice = True
while not flag:
if longchoice:
longchoice = False
pywikibot.output(alternatives)
choice = pywikibot.input(u"Option:")
else:
choice = pywikibot.input(u"Option (#, [j]ump, [s]kip, [r]emove, [l]ist, [m]ore context, [RETURN]):")
if choice in ['s', 'S']:
flag = True
elif choice == '':
pywikibot.output(u'Saving category as %s' % current_cat.title())
if current_cat == original_cat:
print 'No changes necessary.'
else:
newcat = u'[[:%s|%s]]' % (current_cat.title(savetitle=True, decode=True), current_cat.titleWithoutNamespace())
editsum = pywikibot.translate(pywikibot.getSite(), msg_replace) % {'oldcat': original_cat.titleWithoutNamespace(), 'newcat': newcat}
catlib.change_category(article, original_cat, current_cat, comment = editsum)
flag = True
elif choice in ['j', 'J']:
newCatTitle = pywikibot.input(u'Please enter the category the article should be moved to:')
newCat = catlib.Category(pywikibot.getSite(), 'Category:' + newCatTitle)
# recurse into chosen category
self.move_to_category(article, original_cat, newCat)
flag = True
elif choice in ['r', 'R']:
# remove the category tag
catlib.change_category(article, original_cat, None, comment = self.editSummary)
flag = True
elif choice in ['l', 'L']:
longchoice = True
elif choice in ['m', 'M', '?']:
contextLength += 500
print
pywikibot.output(full_text[:contextLength])
print
# if categories possibly weren't visible, show them additionally
# (maybe this should always be shown?)
if len(full_text) > contextLength:
print ''
print 'Original categories: '
for cat in article.categories():
pywikibot.output(u'* %s' % cat.title())
elif choice[0] == 'u':
try:
choice=int(choice[1:])
except ValueError:
# user pressed an unknown command. Prompt him again.
continue
self.move_to_category(article, original_cat, supercatlist[choice])
flag = True
else:
try:
choice=int(choice)
except ValueError:
# user pressed an unknown command. Prompt him again.
continue
# recurse into subcategory
self.move_to_category(article, original_cat, subcatlist[choice])
flag = True
def run(self):
cat = catlib.Category(pywikibot.getSite(), 'Category:' + self.catTitle)
articles = cat.articlesList(recurse = False)
if len(articles) == 0:
pywikibot.output(u'There are no articles in category ' + catTitle)
else:
preloadingGen = pagegenerators.PreloadingGenerator(iter(articles))
for article in preloadingGen:
pywikibot.output(u'\n===================================================================')
self.move_to_category(article, cat, cat)
class CategoryTreeRobot:
'''
Robot to create tree overviews of the category structure.
Parameters:
* catTitle - The category which will be the tree's root.
* catDB - A CategoryDatabase object
* maxDepth - The limit beyond which no subcategories will be listed.
This also guarantees that loops in the category structure
won't be a problem.
* filename - The textfile where the tree should be saved; None to print
the tree to stdout.
'''
def __init__(self, catTitle, catDB, filename = None, maxDepth = 10):
self.catTitle = catTitle
self.catDB = catDB
if filename and not os.path.isabs(filename):
filename = pywikibot.config.datafilepath(filename)
self.filename = filename
# TODO: make maxDepth changeable with a parameter or config file entry
self.maxDepth = maxDepth
def treeview(self, cat, currentDepth = 0, parent = None):
'''
Returns a multi-line string which contains a tree view of all subcategories
of cat, up to level maxDepth. Recursively calls itself.
Parameters:
* cat - the Category of the node we're currently opening
* currentDepth - the current level in the tree (for recursion)
* parent - the Category of the category we're coming from import
'''
# Translations to say that the current category is in more categories than
# the one we're coming from
also_in_cats = {
'ar': u'( %s)',
'be-x-old': u'( %s)',
'ca': u'(tamb a %s)',
'da': u'(ogs i %s)',
'de': u'(auch in %s)',
'en': u'(also in %s)',
'es': u'(tambin en %s)',
'fa': u'( %s)',
'fi': u'(mys luokassa %s)',
'fr': u'(galement dans %s)',
'he': u'( %s)',
'ia': u'(equalmente in %s)',
'is': u'(einnig %s)',
'kk': u'( %s )',
'nds-nl': u'(oek in %s)',
'nl': u'(ook in %s)',
'no': u'(ogs i %s)',
'nn': u'(g i %s)',
'pl': u'(rwnie w %s)',
'pt': u'(tambm em %s)',
'ru': u'( %s)',
'sv': u'(ocks i %s)',
'': u'( %s)',
'zh': u'( %s)',
}
result = u'#' * currentDepth
result += '[[:%s|%s]]' % (cat.title(), cat.title().split(':', 1)[1])
result += ' (%d)' % len(self.catDB.getArticles(cat))
# We will remove an element of this array, but will need the original array
# later, so we create a shallow copy with [:]
supercats = self.catDB.getSupercats(cat)[:]
# Find out which other cats are supercats of the current cat
try:
supercats.remove(parent)
except:
pass
if supercats != []:
supercat_names = []
for i in range(len(supercats)):
# create a list of wiki links to the supercategories
supercat_names.append('[[:%s|%s]]' % (supercats[i].title(), supercats[i].title().split(':', 1)[1]))
# print this list, separated with commas, using translations given in also_in_cats
result += ' ' + pywikibot.translate(pywikibot.getSite(), also_in_cats) % ', '.join(supercat_names)
result += '\n'
if currentDepth < self.maxDepth:
for subcat in self.catDB.getSubcats(cat):
# recurse into subdirectories
result += self.treeview(subcat, currentDepth + 1, parent = cat)
else:
if self.catDB.getSubcats(cat) != []:
# show that there are more categories beyond the depth limit
result += '#' * (currentDepth + 1) + '[...]\n'
return result
def run(self):
"""
Prints the multi-line string generated by treeview or saves it to a file.
Parameters:
* catTitle - the title of the category which will be the tree's root
* maxDepth - the limit beyond which no subcategories will be listed
"""
cat = catlib.Category(pywikibot.getSite(), 'Category:' + self.catTitle)
tree = self.treeview(cat)
if self.filename:
pywikibot.output(u'Saving results in %s' % self.filename)
import codecs
f = codecs.open(self.filename, 'a', 'utf-8')
f.write(tree)
f.close()
else:
pywikibot.output(tree, toStdout = True)
if __name__ == "__main__":
fromGiven = False
toGiven = False
batchMode = False
editSummary = ''
inPlace = False
overwrite = False
showImages = False
talkPages = False
recurse = False
titleRegex = None
# This factory is responsible for processing command line arguments
# that are also used by other scripts and that determine on which pages
# to work on.
genFactory = pagegenerators.GeneratorFactory()
# The generator gives the pages that should be worked upon.
gen = None
# If this is set to true then the custom edit summary given for removing
# categories from articles will also be used as the deletion reason.
useSummaryForDeletion = True
try:
catDB = CategoryDatabase()
action = None
sort_by_last_name = False
restore = False
create_pages = False
for arg in pywikibot.handleArgs():
if arg == 'add':
action = 'add'
elif arg == 'remove':
action = 'remove'
elif arg == 'move':
action = 'move'
elif arg == 'tidy':
action = 'tidy'
elif arg == 'tree':
action = 'tree'
elif arg == 'listify':
action = 'listify'
elif arg == '-person':
sort_by_last_name = True
elif arg == '-rebuild':
catDB.rebuild()
elif arg.startswith('-from:'):
oldCatTitle = arg[len('-from:'):].replace('_', ' ')
fromGiven = True
elif arg.startswith('-to:'):
newCatTitle = arg[len('-to:'):].replace('_', ' ')
toGiven = True
elif arg == '-batch':
batchMode = True
elif arg == '-inplace':
inPlace = True
elif arg == '-delsum':
# This parameter is kept for historical reasons, as it was not previously the default option.
pass
elif arg == '-nodelsum':
useSummaryForDeletion = False
elif arg == '-overwrite':
overwrite = True
elif arg == '-showimages':
showImages = True
elif arg.startswith('-summary:'):
editSummary = arg[len('-summary:'):]
elif arg.startswith('-match'):
if len(arg) == len('-match'):
titleRegex = pywikibot.input(u'Which regular expression should affected objects match?')
else:
titleRegex = arg[len('-match:'):]
elif arg == '-talkpages':
talkPages = True
elif arg == '-recurse':
recurse = True
elif arg == '-create':
create_pages = True
else:
genFactory.handleArg(arg)
if action == 'add':
# Note that the add functionality is the only bot that actually uses the
# the generator factory. Every other bot creates its own generator exclusively
# from the command-line arguments that category.py understands.
if not gen:
gen = genFactory.getCombinedGenerator()
if not gen:
genFactory.handleArg('-links') #default for backwords compatibility
# The preloading generator is responsible for downloading multiple
# pages from the wiki simultaneously.
gen = pagegenerators.PreloadingGenerator(genFactory.getCombinedGenerator())
add_category(sort_by_last_name, create_pages)
elif action == 'remove':
if (fromGiven == False):
oldCatTitle = pywikibot.input(u'Please enter the name of the category that should be removed:')
bot = CategoryRemoveRobot(oldCatTitle, batchMode, editSummary, useSummaryForDeletion, inPlace = inPlace)
bot.run()
elif action == 'move':
if (fromGiven == False):
oldCatTitle = pywikibot.input(u'Please enter the old name of the category:')
if (toGiven == False):
newCatTitle = pywikibot.input(u'Please enter the new name of the category:')
bot = CategoryMoveRobot(oldCatTitle, newCatTitle, batchMode, editSummary, inPlace, titleRegex = titleRegex)
bot.run()
elif action == 'tidy':
catTitle = pywikibot.input(u'Which category do you want to tidy up?')
bot = CategoryTidyRobot(catTitle, catDB)
bot.run()
elif action == 'tree':
catTitle = pywikibot.input(u'For which category do you want to create a tree view?')
filename = pywikibot.input(u'Please enter the name of the file where the tree should be saved, or press enter to simply show the tree:')
bot = CategoryTreeRobot(catTitle, catDB, filename)
bot.run()
elif action == 'listify':
if (fromGiven == False):
oldCatTitle = pywikibot.input(u'Please enter the name of the category to listify:')
if (toGiven == False):
newCatTitle = pywikibot.input(u'Please enter the name of the list to create:')
bot = CategoryListifyRobot(oldCatTitle, newCatTitle, editSummary, overwrite, showImages, subCats = True, talkPages = talkPages, recurse = recurse)
bot.run()
else:
pywikibot.showHelp('category')
finally:
catDB.dump()
pywikibot.stopme()
|