throttle.py :  » Network » Python-Wikipedia-Robot-Framework » pywikipedia » pywikibot » Python Open Source

Home
Python Open Source
1.3.1.2 Python
2.Ajax
3.Aspect Oriented
4.Blog
5.Build
6.Business Application
7.Chart Report
8.Content Management Systems
9.Cryptographic
10.Database
11.Development
12.Editor
13.Email
14.ERP
15.Game 2D 3D
16.GIS
17.GUI
18.IDE
19.Installer
20.IRC
21.Issue Tracker
22.Language Interface
23.Log
24.Math
25.Media Sound Audio
26.Mobile
27.Network
28.Parser
29.PDF
30.Project Management
31.RSS
32.Search
33.Security
34.Template Engines
35.Test
36.UML
37.USB Serial
38.Web Frameworks
39.Web Server
40.Web Services
41.Web Unit
42.Wiki
43.Windows
44.XML
Python Open Source » Network » Python Wikipedia Robot Framework 
Python Wikipedia Robot Framework » pywikipedia » pywikibot » throttle.py
# -*- coding: utf-8  -*-
"""
Mechanics to slow down wiki read and/or write rate.
"""
#
# (C) Pywikipedia bot team, 2008
#
# Distributed under the terms of the MIT license.
#
__version__ = '$Id: throttle.py 8035 2010-03-22 14:49:07Z xqt $'

import wikipedia as pywikibot
import config

import math
import threading
import time

pid = False     # global process identifier
                # when the first Throttle is instantiated, it will set this
                # variable to a positive integer, which will apply to all
                # throttle objects created by this process.


class Throttle(object):
    """Control rate of access to wiki server

    Calling this object blocks the calling thread until at least 'delay'
    seconds have passed since the previous call.

    The framework initiates two Throttle objects: get_throttle to control
    the rate of read access, and put_throttle to control the rate of write
    access.

    """
    def __init__(self, mindelay=None, maxdelay=None, writedelay=None,
                 multiplydelay=True, verbosedelay=False, write=False):
        self.lock = threading.RLock()
        self.mysite = None
        self.ctrlfilename = config.datafilepath('pywikibot', 'throttle.ctrl')
        self.mindelay = mindelay
        if self.mindelay is None:
            self.mindelay = config.minthrottle
        self.maxdelay = maxdelay
        if self.maxdelay is None:
            self.maxdelay = config.maxthrottle
        self.writedelay = writedelay
        if self.writedelay is None:
            self.writedelay = config.put_throttle
        self.last_read = 0
        self.last_write = 0
        self.next_multiplicity = 1.0
        self.checkdelay = 120  # Check logfile again after this many seconds
        self.dropdelay = 360   # Ignore processes that have not made
                               # a check in this many seconds
        self.releasepid = 1200 # Free the process id after this many seconds
        self.lastwait = 0.0
        self.delay = 0
        self.checktime = 0
        self.verbosedelay = verbosedelay
        self.multiplydelay = multiplydelay
        if self.multiplydelay:
            self.checkMultiplicity()
        self.setDelay()
        self.write = write

    def checkMultiplicity(self):
        """Count running processes for site and set process_multiplicity."""
        global pid
        self.lock.acquire()
        mysite = self.mysite = str(pywikibot.getSite())
        if pywikibot.verbose:
            pywikibot.output(u"Checking multiplicity: pid = %(pid)s" % globals())
        try:
            processes = []
            my_pid = pid or 1  # start at 1 if global pid not yet set
            count = 1
            # open throttle.log
            try:
                f = open(self.ctrlfilename, 'r')
            except IOError:
                if not pid:
                    pass
                else:
                    raise
            else:
                now = time.time()
                for line in f.readlines():
                    # parse line; format is "pid timestamp site"
                    try:
                        line = line.split(' ')
                        this_pid = int(line[0])
                        ptime = int(line[1].split('.')[0])
                        this_site = line[2].rstrip()
                    except (IndexError, ValueError):
                        continue    # Sometimes the file gets corrupted
                                    # ignore that line
                    if now - ptime > self.releasepid:
                        continue    # process has expired, drop from file
                    if now - ptime <= self.dropdelay \
                       and this_site == mysite \
                       and this_pid != pid:
                        count += 1
                    if this_site != self.mysite or this_pid != pid:
                        processes.append({'pid': this_pid,
                                          'time': ptime,
                                          'site': this_site})
                    if not pid and this_pid >= my_pid:
                        my_pid = this_pid+1 # next unused process id

            if not pid:
                pid = my_pid
            self.checktime = time.time()
            processes.append({'pid': pid,
                              'time': self.checktime,
                              'site': mysite})
            processes.sort(key=lambda p:(p['pid'], p['site']))
            try:
                f = open(self.ctrlfilename, 'w')
                for p in processes:
                    f.write("%(pid)s %(time)s %(site)s\n" % p)
            except IOError:
                pass
            f.close()
            self.process_multiplicity = count
            if self.verbosedelay or pywikibot.verbose:
                pywikibot.output(
                    u"Found %(count)s %(mysite)s processes running, including this one."
                    % locals())
        finally:
            self.lock.release()

    def setDelay(self, delay=None, writedelay=None, absolute=False):
        """Set the nominal delays in seconds. Defaults to config values."""
        self.lock.acquire()
        try:
            maxdelay = self.maxdelay
            if delay is None:
                delay = self.mindelay
            if writedelay is None:
                writedelay = config.put_throttle
            if absolute:
                self.maxdelay = delay
                self.mindelay = delay
            self.delay = delay
            self.writedelay = min(max(self.mindelay, writedelay),
                                  self.maxdelay)
            # Start the delay count now, not at the next check
            self.last_read = self.last_write = time.time()
        finally:
            self.lock.release()

    def getDelay(self, write=False):
        """Return the actual delay, accounting for multiple processes.

        This value is the maximum wait between reads/writes, not taking
        account of how much time has elapsed since the last access.

        """
        if write:
            thisdelay = self.writedelay
        else:
            thisdelay = self.delay
        if self.multiplydelay: # We're checking for multiple processes
            if time.time() > self.checktime + self.checkdelay:
                self.checkMultiplicity()
            if thisdelay < (self.mindelay * self.next_multiplicity):
                thisdelay = self.mindelay * self.next_multiplicity
            elif thisdelay > self.maxdelay:
                thisdelay = self.maxdelay
            thisdelay *= self.process_multiplicity
        return thisdelay

    def waittime(self, write=False):
        """Return waiting time in seconds if a query would be made right now"""
        # Take the previous requestsize in account calculating the desired
        # delay this time
        thisdelay = self.getDelay(write=write)
        now = time.time()
        if write:
            ago = now - self.last_write
        else:
            ago = now - self.last_read
        if ago < thisdelay:
            delta = thisdelay - ago
            return delta
        else:
            return 0.0

    def drop(self):
        """Remove me from the list of running bot processes."""
        # drop all throttles with this process's pid, regardless of site
        self.checktime = 0
        processes = []
        try:
            f = open(self.ctrlfilename, 'r')
        except IOError:
            return
        else:
            now = time.time()
            for line in f.readlines():
                try:
                    line = line.split(' ')
                    this_pid = int(line[0])
                    ptime = int(line[1].split('.')[0])
                    this_site = line[2].rstrip()
                except (IndexError,ValueError):
                    continue    # Sometimes the file gets corrupted
                                # ignore that line
                if now - ptime <= self.releasepid \
                   and this_pid != pid:
                    processes.append({'pid': this_pid,
                                      'time': ptime,
                                      'site': this_site})
        processes.sort(key=lambda p:p['pid'])
        try:
            f = open(self.ctrlfilename, 'w')
            for p in processes:
                f.write("%(pid)s %(time)s %(site)s\n" % p)
        except IOError:
            pass
        f.close()

    def __call__(self, requestsize=1, write=False):
        """Block the calling program if the throttle time has not expired.

        Parameter requestsize is the number of Pages to be read/written;
        multiply delay time by an appropriate factor.

        Because this seizes the throttle lock, it will prevent any other
        thread from writing to the same site the script started with
        until the wait expires.

        """
        self.lock.acquire()
        try:
            wait = self.waittime(write=write or self.write)
            # Calculate the multiplicity of the next delay based on how
            # big the request is that is being posted now.
            # We want to add "one delay" for each factor of two in the
            # size of the request. Getting 64 pages at once allows 6 times
            # the delay time for the server.
            self.next_multiplicity = math.log(1+requestsize)/math.log(2.0)
            # Announce the delay if it exceeds a preset limit
            if wait > 0:
                if wait > config.noisysleep or pywikibot.verbose:
                    pywikibot.output(
                        u"Sleeping for %(wait).1f seconds, %(now)s"
                        % {'wait': wait,
                           'now' : time.strftime("%Y-%m-%d %H:%M:%S",
                                                 time.localtime())
                        } )
                time.sleep(wait)
            if write or self.write:
                self.last_write = time.time()
            else:
                self.last_read = time.time()
        finally:
            self.lock.release()

    def lag(self, lagtime):
        """Seize the throttle lock due to server lag.

        This will prevent any thread from accessing this site.

        """
        started = time.time()
        self.lock.acquire()
        try:
            # start at 1/2 the current server lag time
            # wait at least 5 seconds but not more than 120 seconds
            delay = min(max(5, lagtime//2), 120)
            # account for any time we waited while acquiring the lock
            wait = delay - (time.time() - started)
            if wait > 0:
                if wait > config.noisysleep:
                    pywikibot.output(
                        u"Sleeping for %(wait).1f seconds, %(now)s"
                        % {'wait': wait,
                           'now': time.strftime("%Y-%m-%d %H:%M:%S",
                                                time.localtime())
                        } )
                time.sleep(wait)
        finally:
            self.lock.release()

www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.