titletranslate.py :  » Network » Python-Wikipedia-Robot-Framework » pywikipedia » Python Open Source

Home
Python Open Source
1.3.1.2 Python
2.Ajax
3.Aspect Oriented
4.Blog
5.Build
6.Business Application
7.Chart Report
8.Content Management Systems
9.Cryptographic
10.Database
11.Development
12.Editor
13.Email
14.ERP
15.Game 2D 3D
16.GIS
17.GUI
18.IDE
19.Installer
20.IRC
21.Issue Tracker
22.Language Interface
23.Log
24.Math
25.Media Sound Audio
26.Mobile
27.Network
28.Parser
29.PDF
30.Project Management
31.RSS
32.Search
33.Security
34.Template Engines
35.Test
36.UML
37.USB Serial
38.Web Frameworks
39.Web Server
40.Web Services
41.Web Unit
42.Wiki
43.Windows
44.XML
Python Open Source » Network » Python Wikipedia Robot Framework 
Python Wikipedia Robot Framework » pywikipedia » titletranslate.py
# -*- coding: utf-8  -*-
#
# (C) Rob W.W. Hooft, 2003
# (C) Yuri Astrakhan, 2005
#
# Distributed under the terms of the MIT license.
#
__version__ = '$Id: titletranslate.py 7565 2009-10-29 15:33:46Z xqt $'
#
import re

import wikipedia as pywikibot
import date

def translate(page, hints = None, auto = True, removebrackets = False):
    """
    Please comment your source code! --Daniel

    Does some magic stuff. Returns a list of pages.
    """
    result = []
    site = page.site()
    if hints:
        for h in hints:
            if ':' not in h:
                # argument given as -hint:xy where xy is a language code
                codes = h
                newname = ''
            else:
                codes, newname = h.split(':', 1)
            if newname == '':
                # if given as -hint:xy or -hint:xy:, assume that there should
                # be a page in language xy with the same title as the page
                # we're currently working on ...
                ns = page.namespace()
                if ns:
                    newname = u'%s:%s' % (site.family.namespace('_default', ns), page.titleWithoutNamespace())
                else:
                    # article in the main namespace
                    newname = page.title()
                # ... unless we do want brackets
                if removebrackets:
                    newname = re.sub(re.compile(ur"\W*?\(.*?\)\W*?", re.UNICODE), u" ", newname)
            try:
                number = int(codes)
                codes = site.family.languages_by_size[:number]
            except ValueError:
                if codes == 'all':
                    codes = site.family.languages_by_size
                elif codes in site.family.language_groups:
                    codes = site.family.language_groups[codes]
                else:
                    codes = codes.split(',')
            for newcode in codes:
                if newcode in site.languages():
                    if newcode != site.language():
                        x = pywikibot.Page(site.getSite(code=newcode), newname)
                        if x not in result:
                            result.append(x)
                else:
                    if pywikibot.verbose:
                        pywikibot.output(u"Ignoring unknown language code %s"%newcode)

    # Autotranslate dates into all other languages, the rest will come from existing interwiki links.
    if auto:
        # search inside all dictionaries for this link
        dictName, value = date.getAutoFormat( page.site().language(), page.title() )
        if dictName:
            if not (dictName == 'yearsBC' and page.site().language() in date.maxyearBC and value > date.maxyearBC[page.site().language()]) or (dictName == 'yearsAD' and page.site().language() in date.maxyearAD and value > date.maxyearAD[page.site().language()]):
                pywikibot.output(u'TitleTranslate: %s was recognized as %s with value %d' % (page.title(),dictName,value))
                for entryLang, entry in date.formats[dictName].iteritems():
                    if entryLang != page.site().language():
                        if dictName == 'yearsBC' and entryLang in date.maxyearBC and value > date.maxyearBC[entryLang]:
                            pass
                        elif dictName == 'yearsAD' and entryLang in date.maxyearAD and value > date.maxyearAD[entryLang]:
                            pass
            else:
                            newname = entry(value)
                            x = pywikibot.Page( pywikibot.getSite(code=entryLang, fam=site.family), newname )
                            if x not in result:
                                result.append(x) # add new page
    return result

bcDateErrors = [u'[[ko:%d]]']

def appendFormatedDates( result, dictName, value ):
    for code, func in date.formats[dictName].iteritems():
        result.append( u'[[%s:%s]]' % (code,func(value)) )

def getPoisonedLinks(pl):
    """Returns a list of known corrupted links that should be removed if seen
    """
    result = []

    pywikibot.output( u'getting poisoned links for %s' % pl.title() )

    dictName, value = date.getAutoFormat( pl.site().language(), pl.title() )
    if dictName is not None:
        pywikibot.output( u'date found in %s' % dictName )

        # errors in year BC
        if dictName in date.bcFormats:
            for fmt in bcDateErrors:
                result.append( fmt % value )

        # i guess this is like friday the 13th for the years
        if value == 398 and dictName == 'yearsBC':
            appendFormatedDates( result, dictName, 399 )

        if dictName == 'yearsBC':
            appendFormatedDates( result, 'decadesBC', value )
            appendFormatedDates( result, 'yearsAD', value )

        if dictName == 'yearsAD':
            appendFormatedDates( result, 'decadesAD', value )
            appendFormatedDates( result, 'yearsBC', value )

        if dictName == 'centuriesBC':
            appendFormatedDates( result, 'decadesBC', value*100+1 )

        if dictName == 'centuriesAD':
            appendFormatedDates( result, 'decadesAD', value*100+1 )

    return result
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.