# -*- coding: utf-8  -*-
File containing all standard fixes


# (C) Pywikipedia team, 2008-2010
__version__ = '$Id: 8228 2010-05-28 17:32:37Z xqt $'
# Distributed under the terms of the MIT license.

help = u"""
                  * HTML        - Convert HTML tags to wiki syntax, and
                                  fix XHTML.
                                    **) NOTE below
                  * isbn        - Fix badly formatted ISBNs.
                                    **) NOTE below
                  * syntax      - Try to fix bad wiki markup. Do not run
                                  this in automatic mode, as the bot may
                                  make mistakes.
                  * syntax-safe - Like syntax, but less risky, so you can
                                  run this in automatic mode.
                                    **) NOTE below
                  * case-de     - fix upper/lower case errors in German
                  * grammar-de  - fix grammar and typography in German
                  * vonbis      - Ersetze Binde-/Gedankenstrich durch "bis"
                                  in German
                  * music       - Links auf Begriffsklrungen in German
                  * datum       - specific date formats in German
                  * correct-ar  - Corrections for Arabic Wikipedia and any
                                  Arabic wiki.
                  * yu-tld      - the yu top-level domain will soon be
                                  disabled, see
                  * fckeditor   - Try to convert FCKeditor HTML tags to wiki

                                    **) NOTE: these fixes are part of the
                               You may use
                                        that script instead.


fixes = {
    # These replacements will convert HTML to wiki syntax where possible, and
    # make remaining tags XHTML compliant.
    'HTML': {
        'regex': True,
        'msg': {
            'ar':u': / HTML',
            'be':u':  HTML',
            'cs':u'pevod/oprava HTML',
            'en':u'Robot: Converting/fixing HTML',
            'eo':u'Bot: koredtado de HTMLa teksto',
            'fa':u':/  ',
            'de':u'Bot: konvertiere/korrigiere HTML',
            'fr':u'Robot: convertit/fixe HTML',
            'he':u': / HTML',
            'ja':u': HTML',
            'ksh':u'Bot: vun HTML en Wikikood wandelle',
            'ia':u'Robot: conversion/reparation de HTML',
            'lt':u'robotas: konvertuojamas/taisomas HTML',
            'nl':u'Bot: conversie/reparatie HTML',
            'pl':u'Robot konwertuje/naprawia HTML',
            'pt':u'Bot: Corrigindo HTML',
            'ru':u':  HTML',
            'sr':u':  HTML-',
            'sv':u'Bot: Konverterar/korrigerar HTML',
            'uk':u': i HTML',
            'zh':u': HTML',
        'replacements': [
            # Everything case-insensitive (?i)
            # Keep in mind that MediaWiki automatically converts <br> to <br />
            # when rendering pages, so you might comment the next two lines out
            # to save some time/edits.
            #(r'(?i)<br>',                      r'<br />'),
            # linebreak with attributes
            #(r'(?i)<br ([^>/]+?)>',            r'<br \1 />'),
            (r'(?i)<b>(.*?)</b>',              r"'''\1'''"),
            (r'(?i)<strong>(.*?)</strong>',    r"'''\1'''"),
            (r'(?i)<i>(.*?)</i>',              r"''\1''"),
            (r'(?i)<em>(.*?)</em>',            r"''\1''"),
            # horizontal line without attributes in a single line
            (r'(?i)([\r\n])<hr[ /]*>([\r\n])', r'\1----\2'),
            # horizontal line without attributes with more text in the same line
            #(r'(?i) +<hr[ /]*> +',             r'\r\n----\r\n'),
            # horizontal line with attributes; can't be done with wiki syntax
            # so we only make it XHTML compliant
            (r'(?i)<hr ([^>/]+?)>',            r'<hr \1 />'),
            # a header where only spaces are in the same line
            (r'(?i)([\r\n]) *<h1> *([^<]+?) *</h1> *([\r\n])',  r"\1= \2 =\3"),
            (r'(?i)([\r\n]) *<h2> *([^<]+?) *</h2> *([\r\n])',  r"\1== \2 ==\3"),
            (r'(?i)([\r\n]) *<h3> *([^<]+?) *</h3> *([\r\n])',  r"\1=== \2 ===\3"),
            (r'(?i)([\r\n]) *<h4> *([^<]+?) *</h4> *([\r\n])',  r"\1==== \2 ====\3"),
            (r'(?i)([\r\n]) *<h5> *([^<]+?) *</h5> *([\r\n])',  r"\1===== \2 =====\3"),
            (r'(?i)([\r\n]) *<h6> *([^<]+?) *</h6> *([\r\n])',  r"\1====== \2 ======\3"),
            # TODO: maybe we can make the bot replace <p> tags with \r\n's.
        'exceptions': {
            'inside-tags': [

    # Grammar fixes for German language
    # Do NOT run this automatically!
    'grammar-de': {
        'regex': True,
        'msg': {
            'de':u'Bot: korrigiere Grammatik',
        'replacements': [
            #(u'([Ss]owohl) ([^,\.]+?), als auch',                                                            r'\1 \2 als auch'),
            #(u'([Ww]eder) ([^,\.]+?), noch', r'\1 \2 noch'),
            # Vorsicht bei Substantiven, z. B. 3-Jhriger!
            (u'(\d+)(mintig|stndig|tgig|wchig|jhrig|mintlich|stndlich|tglich|wchentlich|jhrlich|fach|mal|malig|kpfig|teilig|gliedrig|geteilt|elementig|dimensional|bndig|eckig|farbig|stimmig)', r'\1-\2'),
            # zusammengesetztes Wort, Bindestrich wird durchgeschleift
            (u'(?<!\w)(\d+|\d+[\.,]\d+)(\$||DM|||mg|g|kg|ml|cl|l|t|ms|min|m|mm|cm|dm|m|km|ha|C|kB|MB|GB|TB|W|kW|MW|GW|PS|Nm|eV|kcal|mA|mV|kV||Hz|kHz|MHz|GHz|mol|Pa|Bq|Sv|mSv)([]?-[\w\[])',           r'\1-\2\3'),
            # Grenangabe ohne Leerzeichen vor Einheit
            # weggelassen wegen vieler falsch Positiver: s, A, V, C, S, J, %
            (u'(?<!\w)(\d+|\d+[\.,]\d+)(\$||DM|||mg|g|kg|ml|cl|l|t|ms|min|m|mm|cm|dm|m|km|ha|C|kB|MB|GB|TB|W|kW|MW|GW|PS|Nm|eV|kcal|mA|mV|kV||Hz|kHz|MHz|GHz|mol|Pa|Bq|Sv|mSv)(?=\W|||$)',          r'\1 \2'),
            # Temperaturangabe mit falsch gesetztem Leerzeichen
            (u'(?<!\w)(\d+|\d+[\.,]\d+) C(?=\W|||$)',          ur'\1 C'),
            # Kein Leerzeichen nach Komma
            (u'([a-z](\]\])?,)((\[\[)?[a-zA-Z])',                                                                          r'\1 \3'),
            # Leerzeichen und Komma vertauscht
            (u'([a-z](\]\])?) ,((\[\[)?[a-zA-Z])',                                                                          r'\1, \3'),
            # Plenks (d. h. Leerzeichen auch vor dem Komma/Punkt/Ausrufezeichen/Fragezeichen)
            # Achtung bei Franzsisch:
            # Leerzeichen vor Doppelpunkt/Semikolon kann korrekt sein, nach irgendeiner Norm fr Zitationen.
            (u'([a-z](\]\])?) ([,\.!\?]) ((\[\[)?[a-zA-Z])',                                                                          r'\1\3 \4'),
            #(u'([a-z]\.)([A-Z])',                                                                             r'\1 \2'),
        'exceptions': {
            'inside-tags': [
                'pre',           # because of code examples
                'source',        # because of code examples
                'startspace',    # because of code examples
                'hyperlink',     # e.g. commas in URLs
                'gallery',       # because of filenames
            'text-contains': [
                r'',     # Schweizer News-Seite
            'inside': [
                r'<code>.*</code>', # because of code examples
                ur'{{\|.*?}}',  # Gesetzesparagraph
                ur' ?\d+[a-z]',  # Gesetzesparagraph
                r'Ju 52/1m', # Flugzeugbezeichnung
                r'Ju 52/3m', # Flugzeugbezeichnung
                r'AH-1W',    # Hubschrauberbezeichnung
                r'ZPG-3W',   # Luftschiffbezeichnung
                r'8mm',      # Filmtitel
                r'802.11g',  # WLAN-Standard
                r'DOS/4GW',  # Software
                r'ntfs-3g',  # Dateisystem-Treiber
                r'/\w(,\w)*/',     # Laut-Aufzhlung in der Linguistik
                r'[xyz](,[xyz])+', # Variablen in der Mathematik (unklar, ob Leerzeichen hier Pflicht sind)
                r'(?m)^;(.*?)$', # Definitionslisten, dort gibt es oft absichtlich Leerzeichen vor Doppelpunkten
                r'\d+h( |&nbsp;)\d+m', # Schreibweise fr Zeiten, vor allem in Film-Infoboxen. Nicht korrekt, aber dafr schn kurz.
                r'(?i)\[\[(Bild|Image|Media):.+?\|', # Dateinamen auslassen
                r'{{bgc\|.*?}}',  # Hintergrundfarbe
                r'<sup>\d+m</sup>',                   # bei chemischen Formeln
                r'\([A-Z][A-Za-z]*(,[A-Z][A-Za-z]*(<sup>.*?</sup>|<sub>.*?</sub>|))+\)' # chemische Formel, z. B. AuPb(Pb,Sb,Bi)Te. Hier sollen keine Leerzeichen hinter die Kommata.
            'title': [
                r'Arsen',  # chemische Formel

    # Do NOT run this automatically!
    # Recommendation: First run syntax-safe automatically, afterwards
    # run syntax manually, carefully checking that you're not breaking
    # anything.
    'syntax': {
        'regex': True,
        'msg': {
            'ar':u':   ',
            'be':u':  ii-ii',
            'cs':u'Oprava wikisyntaxe',
            'de':u'Bot: Korrigiere Wiki-Syntax',
            'en':u'Bot: Fixing wiki syntax',
            'eo':u'Bot: Korektado de vikia sintakso',
            'fa':u':  ',
            'fr':u'Bot: Corrige wiki-syntaxe',
            'he':u':   ',
            'ia':u'Robot: Reparation de syntaxe wiki',
            'ja':u': wiki',
            'lt':u'robotas: Taisoma wiki sintaks',
            'nl':u'Bot: reparatie wikisyntaxis',
            'pl':u'Robot poprawia wiki-skadni',
            'pt':u'Bot: Corrigindo sintaxe wiki',
            'ru':u':   ',
            'sr':u':   ',
            'uk':u': i ii-',
            'zh':u': wiki',
        'replacements': [
            # external link in double brackets
            (r'\[\[(?P<url>https?://[^\]]+?)\]\]',   r'[\g<url>]'),
            # external link starting with double bracket
            (r'\[\[(?P<url>https?://.+?)\]',   r'[\g<url>]'),
            # external link with forgotten closing bracket
            #(r'\[(?P<url>https?://[^\]\s]+)\r\n',  r'[\g<url>]\r\n'),
            # external link ending with double bracket.
            # do not change weblinks that contain wiki links inside
            # inside the description
            (r'\[(?P<url>https?://[^\[\]]+?)\]\](?!\])',   r'[\g<url>]'),
            # external link and description separated by a dash.
            # ATTENTION: while this is a mistake in most cases, there are some
            # valid URLs that contain dashes!
            (r'\[(?P<url>https?://[^\|\]\s]+?) *\| *(?P<label>[^\|\]]+?)\]', r'[\g<url> \g<label>]'),
            # wiki link closed by single bracket.
            # ATTENTION: There are some false positives, for example
            # Brainfuck code examples or MS-DOS parameter instructions.
            # There are also sometimes better ways to fix it than
            # just putting an additional ] after the link.
            (r'\[\[([^\[\]]+?)\](?!\])',  r'[[\1]]'),
            # wiki link opened by single bracket.
            # ATTENTION: same as above.
            (r'(?<!\[)\[([^\[\]]+?)\]\](?!\])',  r'[[\1]]'),
            # template closed by single bracket
            # ATTENTION: There are some false positives, especially in
            # mathematical context or program code.
            (r'{{([^{}]+?)}(?!})',       r'{{\1}}'),
        'exceptions': {
            'inside-tags': [
                'source',        # because of code examples
                'startspace',    # because of code examples
            'text-contains': [
                r'http://.*?object=tx\|',               # regular dash in URL
                r'http://.*?allmusic\.com',             # regular dash in URL
                r'http://.*?allmovie\.com',             # regular dash in URL
                r'',            # regular dash in URL
                r'', # regular dash in URL
                r'',   # regular dash in URL
                r'&object=med',                         # regular dash in URL
                r'\[CDATA\['                            # lots of brackets

    # The same as syntax, but restricted to replacements that should
    # be safe to run automatically.
    'syntax-safe': {
        'regex': True,
        'msg': {
            'ar':u':   ',
            'be':u':  ii-ii',
            'cs':u'Oprava wikisyntaxe',
            'de':u'Bot: Korrigiere Wiki-Syntax',
            'en':u'Bot: Fixing wiki syntax',
            'eo':u'Bot: Korektado de vikia sintakso',
            'fa':u':  ',
            'fr':u'Bot: Corrige wiki-syntaxe',
            'he':u':   ',
            'ia':u'Robot: Reparation de syntaxe wiki',
            'ja':u': wiki',
            'lt':u'robotas: Taisoma wiki sintaks',
            'nl':u'Bot: reparatie wikisyntaxis',
            'pl':u'Robot poprawia wiki-skadni',
            'pt':u'Bot: Corrigindo sintaxe wiki',
            'ru':u':   ',
            'sr':u':   ',
            'uk':u': i ii-',
            'zh':u': wiki',
        'replacements': [
            # external link in double brackets
            (r'\[\[(?P<url>https?://[^\]]+?)\]\]',   r'[\g<url>]'),
            # external link starting with double bracket
            (r'\[\[(?P<url>https?://.+?)\]',   r'[\g<url>]'),
            # external link with forgotten closing bracket
            #(r'\[(?P<url>https?://[^\]\s]+)\r\n',   r'[\g<url>]\r\n'),
            # external link and description separated by a dash, with
            # whitespace in front of the dash, so that it is clear that
            # the dash is not a legitimate part of the URL.
            (r'\[(?P<url>https?://[^\|\] \r\n]+?) +\| *(?P<label>[^\|\]]+?)\]', r'[\g<url> \g<label>]'),
            # dash in external link, where the correct end of the URL can
            # be detected from the file extension. It is very unlikely that
            # this will cause mistakes.
            (r'\[(?P<url>https?://[^\|\] ]+?(\.pdf|\.html|\.htm|\.php|\.asp|\.aspx|\.jsp)) *\| *(?P<label>[^\|\]]+?)\]', r'[\g<url> \g<label>]'),
        'exceptions': {
            'inside-tags': [
                'source',        # because of code examples
                'startspace',    # because of code examples

    'case-de': { # German upper / lower case issues
        'regex': True,
        'msg': {
            'de':u'Bot: Korrigiere Gro-/Kleinschreibung',
        'replacements': [
            (r'\batlantische(r|n|) Ozean', r'Atlantische\1 Ozean'),
            (r'\bdeutsche(r|n|) Bundestag\b', r'Deutsche\1 Bundestag'),
            (r'\bdeutschen Bundestags\b', r'Deutschen Bundestags'), # Aufpassen, z. B. 'deutsche Bundestagswahl'
            (r'\bdeutsche(r|n|) Reich\b', r'Deutsche\1 Reich'),
            (r'\bdeutschen Reichs\b', r'Deutschen Reichs'), # Aufpassen, z. B. 'deutsche Reichsgrenzen'
            (r'\bdritte(n|) Welt(?!krieg)', r'Dritte\1 Welt'),
            (r'\bdreiigjhrige(r|n|) Krieg', r'Dreiigjhrige\1 Krieg'),
            (r'\beuropische(n|) Gemeinschaft', r'Europische\1 Gemeinschaft'),
            (r'\beuropische(n|) Kommission', r'Europische\1 Kommission'),
            (r'\beuropische(n|) Parlament', r'Europische\1 Parlament'),
            (r'\beuropische(n|) Union', r'Europische\1 Union'),
            (r'\berste(r|n|) Weltkrieg', r'Erste\1 Weltkrieg'),
            (r'\bkalte(r|n|) Krieg', r'Kalte\1 Krieg'),
            (r'\bpazifische(r|n|) Ozean', r'Pazifische\1 Ozean'),
            (r'Tag der deutschen Einheit', r'Tag der Deutschen Einheit'),
            (r'\bzweite(r|n|) Weltkrieg', r'Zweite\1 Weltkrieg'),
        'exceptions': {
            'inside-tags': [
            'text-contains': [

    'vonbis': {
        'regex': True,
        'msg': {
            'de':u'Bot: Ersetze Binde-/Gedankenstrich durch "bis"',
        'replacements': [
            # Bindestrich, Gedankenstrich, Geviertstrich
            (u'(von \d{3,4}) *(-|&ndash;||&mdash;|) *(\d{3,4})', r'\1 bis \3'),

    # some disambiguation stuff for de:
    # python -fix:music -subcat:Album
    'music': {
        'regex': False,
        'msg': {
            'de':u'Bot: korrigiere Links auf Begriffsklrungen',
        'replacements': [
            (u'[[CD]]', u'[[Audio-CD|CD]]'),
            (u'[[LP]]', u'[[Langspielplatte|LP]]'),
            (u'[[EP]]', u'[[Extended Play|EP]]'),
            (u'[[MC]]', u'[[Musikkassette|MC]]'),
            (u'[[Single]]', u'[[Single (Musik)|Single]]'),
        'exceptions': {
            'inside-tags': [

    # format of dates of birth and death, for de:
    # python -fix:datum -ref:Vorlage:Personendaten
    'datum': {
        'regex': True,
        'msg': {
            'de': u'Bot: Korrigiere Datumsformat',
        'replacements': [
            # space after birth sign w/ year
            #(u'\(\*(\d{3,4})', u'(* \\1'),
            ## space after death sign w/ year
            #(u'(\d{3,4})', u' \\1'),
            #(u'&dagger;(\d{3,4})', u' \\1'),
            ## space after birth sign w/ linked date
            #(u'\(\*\[\[(\d)', u'(* [[\\1'),
            ## space after death sign w/ linked date
            #(u'\[\[(\d)', u' [[\\1'),
            #(u'&dagger;\[\[(\d)', u' [[\\1'),
            (u'\[\[(\d+\. (?:Januar|Februar|Mrz|April|Mai|Juni|Juli|August|September|Oktober|November|Dezember)) (\d{1,4})\]\]', u'[[\\1]] [[\\2]]'),
            # Keine fhrende Null beim Datum (ersteinmal nur bei denen, bei denen auch ein Leerzeichen fehlt)
            (u'0(\d+)\.(Januar|Februar|Mrz|April|Mai|Juni|Juli|August|September|Oktober|November|Dezember)', r'\1. \2'),
            # Kein Leerzeichen zwischen Tag und Monat
            (u'(\d+)\.(Januar|Februar|Mrz|April|Mai|Juni|Juli|August|September|Oktober|November|Dezember)', r'\1. \2'),
            # Kein Punkt vorm Jahr
            (u'(\d+)\. (Januar|Februar|Mrz|April|Mai|Juni|Juli|August|September|Oktober|November|Dezember)\.(\d{1,4})', r'\1. \2 \3'),
        'exceptions': {
            'inside': [
                r'\[\[20. Juli 1944\]\]', # Hitler-Attentat
                r'\[\[17. Juni 1953\]\]', # Ost-Berliner Volksaufstand
                r'\[\[1. April 2000\]\]', # Film
                r'\[\[11. September 2001\]\]', # Anschlge in den USA
                r'\[\[7. Juli 2005\]\]',  # Terroranschlge in Spanien

    'isbn': {
        'regex': True,
        'msg': {
            'ar': u':   ISBN',
            'be': u':  ISBN ',
            'cs': u'Oprava formtu ISBN',
            'de': u'Bot: Korrigiere ISBN-Format',
            'en': u'Robot: Fixing ISBN format',
            'es': u'Arreglando formato ISBN',
            'eo': u'Bot: Korekto de teksto en ISBN-formato',
            'fa': u': ',
            'he': u':   ISBN',
            'ja': u': ISBN',
            'ru': u':  ISBN ',
            'uk': u':  ISBN ',
            'zh': u': ISBN',
        'replacements': [
            # colon
            (r'ISBN: (\d+)', r'ISBN \1'),
            # superfluous word "number"
            (r'ISBN( number| no\.?| No\.?|-Nummer|-Nr\.):? (\d+)', r'ISBN \2'),
            # Space, minus, dot,  hypen, en dash, em dash, etc. instead of
            # hyphen-minus as separator, or spaces between digits and separators.
            # Note that these regular expressions also match valid ISBNs, but
            # these won't be changed.
            (ur'ISBN (978|979) *[\- \.-] *(\d+) *[\- \.-] *(\d+) *[\- \.-] *(\d+) *[\- \.-] *(\d)(?!\d)', r'ISBN \1-\2-\3-\4-\5'), # ISBN-13
            (ur'ISBN (\d+) *[\- \.-] *(\d+) *[\- \.-] *(\d+) *[\- \.-] *(\d|X|x)(?!\d)', r'ISBN \1-\2-\3-\4'), # ISBN-10
            # missing space before ISBN-10 or before ISBN-13,
            # or non-breaking space.
            (r'ISBN(|&nbsp;| )((\d(-?)){12}\d|(\d(-?)){9}[\dXx])', r'ISBN \2'),
        'exceptions': {
            'inside-tags': [
            'inside': [
                r'ISBN (\d(-?)){12}\d',    # matches valid ISBN-13s
                r'ISBN (\d(-?)){9}[\dXx]', # matches valid ISBN-10s

    #Corrections for Arabic Wikipedia and any Arabic wiki.
    #python -always -start:! -fix:correct-ar

    'correct-ar': {
        'regex': True,
        'msg': {
            'ar':u' . 528   .',
        'replacements': [
            #(u' ,', u' '), #FIXME: Do not replace comma in non-Arabic text, interwiki, image links or <math> syntax.
            (ur'\b\b', u''),
            (ur'\b\b', ur''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'==[ ]? [ ]?==', u'==   =='),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            #(ur'\b\b', u''), #FIXME: Do not replace this (and all others) in interwiki links. This is an Arabic typo, but it is correct in Farsi.
            (ur'\b(|)\b', ur'\1'),
            (ur'\b(|)\b', ur'\1'),
            (ur'\b(|)\b', ur'\1'),
            (ur'\b(|)\b', ur'\1'),
            (ur'\b(|)\b', ur'\1'),
            (ur'\b(|)\b', ur'\1'),
            (ur'\b(|)\b', ur'\1'),
            (ur'\b(|)\b', ur'\1'),
            (ur'\b(|)\b', ur'\1'),
            (ur'\b(|)\b', ur'\1'),
            (ur'\b(|)\b', ur'\1'),
            (ur'\b(|)\b', ur'\1'),
            (ur'\b(|)\b', ur'\1'),
            (ur'\b(|)\b', ur'\1'),
            (ur'\b(|)\b', ur'\1'),
            (ur'\b(|)\b', ur'\1'),
            (ur'\b(|)\b', ur'\1'),
            (ur'\b(|)\b', ur'\1'),
            (ur'\b(|)\b', ur'\1'),
            (ur'\b(|)\b', ur'\1'),
            (ur'\b(|)\b', ur'\1'),
            (ur'\b(|)\b', ur'\1'),
            (ur'\b(|)\b', ur'\1'),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b\b', u''),
            (ur'\b(|)(||)(|||||||||)\b', ur'\1\2\3'),
            (ur'\b(|)(|)(||||||||||)\b', ur'\1\2\3'),
            (ur'\b(|)(|||||)(|||||||||)\b', ur'\1\2\3'),
            (ur'\b(|)(||||)(|||||||||)\b', ur'\1\2\3'),
            (ur'\b(|)(|)(|)(|||||||||)\b', ur'\1\4'),
            (ur'\b(||||||||||||)(|||)\b', ur'\1\2'),
            (ur'\b(|||||||||)\b', ur'\1'),
    'specialpages': {
        'regex': False,
        'msg': {
            'en': u'Robot: Fixing special page capitalisation',
        'replacements': [
            (u'Special:Allpages',        u'Special:AllPages'),
            (u'Special:Blockip',         u'Special:BlockIP'),
            (u'Special:Blankpage',       u'Special:BlankPage'),
            (u'Special:Filepath',        u'Special:FilePath'),
            (u'Special:Globalusers',     u'Special:GlobalUsers'),
            (u'Special:Imagelist',       u'Special:ImageList'),
            (u'Special:Ipblocklist',     u'Special:IPBlockList'),
            (u'Special:Listgrouprights', u'Special:ListGroupRights'),
            (u'Special:Listusers',       u'Special:ListUsers'),
            (u'Special:Newimages',       u'Special:NewImages'),
            (u'Special:Prefixindex',     u'Special:PrefixIndex'),
            (u'Special:Protectedpages',  u'Special:ProtectedPages'),
            (u'Special:Recentchanges',   u'Special:RecentChanges'),
            (u'Special:Specialpages',    u'Special:SpecialPages'),
            (u'Special:Unlockdb',        u'Special:UnlockDB'),
            (u'Special:Userlogin',       u'Special:UserLogin'),
            (u'Special:Userlogout',      u'Special:UserLogout'),
            (u'Special:Whatlinkshere',   u'Special:WhatLinksHere'),
    # yu top-level domain will soon be disabled,
    # see
    # The following are domains that are often-used.
    'yu-tld': {
        'regex': False,
        'nocase': True,
        'msg': {
            'de': u'Bot: Ersetze Links auf .yu-Domains',
            'en': u'Robot: Replacing links to .yu domains',
            'fr': u'Robot: Correction des liens pointant vers le domaine .yu, qui expire en 2009',
            'ksh': u'Bot: de ahle .yu-Domains loufe us, drm ujetuusch',
         'replacements': [
            (u'',             u''),
            (u'',                  u''),
            (u'',              u''),
            (u'',          u''),
            (u'',               u''),
            (u'www.nbs.yu',                  u''),
            (u'',        u''),
            (u'eunet.yu',                    u''),
            (u'',      u''),
            (u'',        u''),
            # (u'',             u''), # Archive links don't seem to work
            (u'',             u''),
            (u'',              u''),
            (u'',             u''),
            (u'',   u''),
            (u'',      u''),
            (u'',           u''),
            (u'',    u''),
            (u'',              u''),
            (u'',          u''),
            (u'',  u''),
            (u'',   u''),
            (u'',           u''),
            (u'',        u''),
            (u'', u''),
            (u'', u''),
            (u'',        u''),
            (u'',     u''),
            (u'',          u''),
            (u'',       u''),
            (u'www.spc.yu/sr',               u''),
            (u'',                u''),
            (u'',         u''),
            (u'',          u''),
            (u'',             u''),
            (u'',     u''),
            (u'',              u''),
            (u'',          u''),
            (u'',               u''),
            (u'',          u''),
            (u'',              u''),
            (u'', u''),
    # These replacements will convert HTML tag from FCK-editor to wiki syntax.
    'fckeditor': {
        'regex': True,
        'msg': {
            'en': u'Robot: Fixing rich-editor html',
         'replacements': [
            # replace <br> with a new line
            (r'(?i)<br>',                      r'\n'),
            # replace &nbsp; with a space
            (r'(?i)&nbsp;',                      r' '),

# Load the user fixes file.

import config

    execfile(config.datafilepath(config.base_dir, ""))
except IOError:
