#!/usr/bin/python
#coding: utf-8
"""
This bot takes its input from a file that contains a number of
pages to be put on the wiki. The pages should all have the same
begin and end text (which may not overlap).
By default the text should have the intended title of the page
as the first text in bold (that is, between ''' and '''),
you can modify this behavior with command line options.
The default is not to include the begin and
end text in the page, if you want to include that text, use
the -include option.
Specific arguments:
-start:xxx Specify the text that marks the beginning of a page
-end:xxx Specify the text that marks the end of a page
-file:xxx Give the filename we are getting our material from
-include The beginning and end markers should be included
in the page.
-titlestart:xxx Use xxx in place of ''' for identifying the
beginning of page title
-titleend:xxx Use xxx in place of ''' for identifying the
end of page title
-notitle do not include the title, including titlestart, and
titleend, in the page
-summary:xxx Use xxx as the edit summary for the upload - if
a page exists, standard messages are appended
after xxx for appending, prepending, or replacement
-autosummary Use MediaWikis autosummary when creating a new page,
overrides -summary in this case
-minor set minor edit flag on page edits
-dry Do not really upload pages, just check and report
messages
If the page to be uploaded already exists:
-safe do nothing (default)
-appendtop add the text to the top of it
-appendbottom add the text to the bottom of it
-force overwrite the existing page
"""
#
# (C) Andre Engels, 2004
# (C) Pywikipedia bot team, 2005-2010
#
# Distributed under the terms of the MIT license.
#
__version__='$Id: pagefromfile.py 8174 2010-05-14 21:33:28Z amir $'
import re, codecs
import wikipedia, config
class NoTitle(Exception):
"""No title found"""
def __init__(self, offset):
self.offset = offset
class PageFromFileRobot:
"""
Responsible for writing pages to the wiki, with the titles and contents
given by a PageFromFileReader.
"""
msg = {
'ar': u' ',
'de': u'Automatischer Import von Artikeln',
'en': u'Automated import of articles',
'fa': u': ',
'fr': u'Import automatique',
'he': u' ',
'ia': u'Importation automatic de articulos',
'id': u'Impor artikel automatis',
'it': u'Caricamento automatico',
'ja': u'',
'ksh': u'Bot: automatesch huhjelaade',
'nl': u'Geautomatiseerde import',
'no': u'bot: Automatisk import',
'pl': u'Automatyczny import artykuw',
'pt': u'Importao automtica de artigos',
'zh': u': ',
}
# The following messages are added to topic when the page already exists
msg_top = {
'ar': u' ',
'de': u'ergnze am Anfang',
'en': u'append on top',
'fa': u' ',
'he': u' ',
'fr': u'rajout en haut',
'id': u'ditambahkan di atas',
'it': u'aggiungo in cima',
'ja': u'',
'ksh': u'un dofrjesaz',
'nl': u'bovenaan toegevoegd',
'no': u'legger til verst',
'pl': u'dodaj na grze',
'pt': u'adicionado no topo',
'zh': u': ',
}
msg_bottom = {
'ar': u' ',
'de': u'ergnze am Ende',
'en': u'append on bottom',
'fa': u' ',
'he': u' ',
'fr': u'rajout en bas',
'id': u'ditambahkan di bawah',
'it': u'aggiungo in fondo',
'ja': u'',
'ksh': u'un aanjehange',
'nl': u'onderaan toegevoegd',
'no': u'legger til nederst',
'pl': u'dodaj na dole',
'pt': u'adicionando no fim',
'zh': u': ',
}
msg_force = {
'ar': u' ',
'de': u'bestehender Text berschrieben',
'en': u'existing text overwritten',
'fa': u' ',
'he': u' ',
'fr': u'texte existant cras',
'id': u'menimpa teks yang ada',
'it': u'sovrascritto il testo esistente',
'ja': u'',
'ksh': u'un komplt ujetuusch',
'nl': u'bestaande tekst overschreven',
'no': u'erstatter eksisterende tekst',
'pl': u'aktualny tekst nadpisany',
'pt': u'sobrescrever texto',
'zh': u': ',
}
def __init__(self, reader, force, append, summary, minor, autosummary, debug):
self.reader = reader
self.force = force
self.append = append
self.summary = summary
self.minor = minor
self.autosummary = autosummary
self.dry = debug
def run(self):
for title, contents in self.reader.run():
self.put(title, contents)
def put(self, title, contents):
mysite = wikipedia.getSite()
page = wikipedia.Page(mysite, title)
# Show the title of the page we're working on.
# Highlight the title in purple.
wikipedia.output(u">>> \03{lightpurple}%s\03{default} <<<" % page.title())
if self.summary:
comment = self.summary
else:
comment = wikipedia.translate(mysite, self.msg)
comment_top = comment + " - " + wikipedia.translate(mysite, self.msg_top)
comment_bottom = comment + " - " + wikipedia.translate(mysite, self.msg_bottom)
comment_force = comment + " *** " + wikipedia.translate(mysite, self.msg_force) + " ***"
# Remove trailing newlines (cause troubles when creating redirects)
contents = re.sub('^[\r\n]*','', contents)
if page.exists():
if self.append == "Top":
wikipedia.output(u"Page %s already exists, appending on top!" % title)
contents = contents + page.get()
comment = comment_top
elif self.append == "Bottom":
wikipedia.output(u"Page %s already exists, appending on bottom!" % title)
contents = page.get() + contents
comment = comment_bottom
elif self.force:
wikipedia.output(u"Page %s already exists, ***overwriting!" % title)
comment = comment_force
else:
wikipedia.output(u"Page %s already exists, not adding!" % title)
return
else:
if self.autosummary:
comment = ''
wikipedia.setAction('')
if self.dry:
wikipedia.output("*** Dry mode ***\n" + \
"\03{lightpurple}title\03{default}: " + title + "\n" + \
"\03{lightpurple}contents\03{default}:\n" + contents + "\n" \
"\03{lightpurple}comment\03{default}: " + comment + "\n")
return
try:
page.put(contents, comment = comment, minorEdit = self.minor)
except wikipedia.LockedPage:
wikipedia.output(u"Page %s is locked; skipping." % title)
except wikipedia.EditConflict:
wikipedia.output(u'Skipping %s because of edit conflict' % title)
except wikipedia.SpamfilterError, error:
wikipedia.output(u'Cannot change %s because of spam blacklist entry %s' % (title, error.url))
class PageFromFileReader:
"""
Responsible for reading the file.
The run() method yields a (title, contents) tuple for each found page.
"""
def __init__(self, filename, pageStartMarker, pageEndMarker, titleStartMarker, titleEndMarker, include, notitle):
self.filename = filename
self.pageStartMarker = pageStartMarker
self.pageEndMarker = pageEndMarker
self.titleStartMarker = titleStartMarker
self.titleEndMarker = titleEndMarker
self.include = include
self.notitle = notitle
def run(self):
wikipedia.output('Reading \'%s\'...' % self.filename)
try:
f = codecs.open(self.filename, 'r', encoding = config.textfile_encoding)
except IOError, err:
print err
return
text = f.read()
position = 0
length = 0
while True:
try:
length, title, contents = self.findpage(text[position:])
except AttributeError:
if not length:
wikipedia.output(u'\nStart or end marker not found.')
else:
wikipedia.output(u'End of file.')
break
except NoTitle, err:
wikipedia.output(u'\nNo title found - skipping a page.')
position += err.offset
continue
position += length
yield title, contents
def findpage(self, text):
pageR = re.compile(self.pageStartMarker + "(.*?)" + self.pageEndMarker, re.DOTALL)
titleR = re.compile(self.titleStartMarker + "(.*?)" + self.titleEndMarker)
location = pageR.search(text)
if self.include:
contents = location.group()
else:
contents = location.group(1)
try:
title = titleR.search(contents).group(1)
if self.notitle:
#Remove title (to allow creation of redirects)
contents = titleR.sub('', contents, count = 1)
except AttributeError:
raise NoTitle(location.end())
else:
return location.end(), title, contents
def main():
# Adapt these to the file you are using. 'pageStartMarker' and 'pageEndMarker' are
# the beginning and end of each entry. Take text that should be included
# and does not occur elsewhere in the text.
# TODO: make config variables for these.
filename = "dict.txt"
pageStartMarker = "{{-start-}}"
pageEndMarker = "{{-stop-}}"
titleStartMarker = u"'''"
titleEndMarker = u"'''"
include = False
force = False
append = None
notitle = False
summary = None
minor = False
autosummary = False
dry = False
for arg in wikipedia.handleArgs():
if arg.startswith("-start:"):
pageStartMarker = arg[7:]
elif arg.startswith("-end:"):
pageEndMarker = arg[5:]
elif arg.startswith("-file:"):
filename = arg[6:]
elif arg == "-include":
include = True
elif arg == "-appendtop":
append = "Top"
elif arg == "-appendbottom":
append = "Bottom"
elif arg == "-force":
force=True
elif arg == "-dry":
dry = True
elif arg == "-safe":
force = False
append = None
elif arg == '-notitle':
notitle = True
elif arg == '-minor':
minor = True
elif arg.startswith("-titlestart:"):
titleStartMarker = arg[12:]
elif arg.startswith("-titleend:"):
titleEndMarker = arg[10:]
elif arg.startswith("-summary:"):
summary = arg[9:]
elif arg == '-autosummary':
autosummary = True
else:
wikipedia.output(u"Disregarding unknown argument %s." % arg)
reader = PageFromFileReader(filename, pageStartMarker, pageEndMarker, titleStartMarker, titleEndMarker, include, notitle)
bot = PageFromFileRobot(reader, force, append, summary, minor, autosummary, dry)
bot.run()
if __name__ == "__main__":
try:
main()
finally:
wikipedia.stopme()
|