#!/usr/bin/env python
# $SnapHashLicense:
#
# SnapLogic - Open source data services
#
# Copyright (C) 2009, SnapLogic, Inc. All rights reserved.
#
# See http://www.snaplogic.org for more information about
# the SnapLogic project.
#
# This program is free software, distributed under the terms of
# the GNU General Public License Version 2. See the LEGAL file
# at the top of the source tree.
#
# "SnapLogic" is a trademark of SnapLogic, Inc.
#
# $
#
# $Id: sqlite_backup.py 7363 2009-04-24 17:35:55Z pamor $
# This utility watches an sqlite DB file, using inotify capabilities.
# When changes occur it perform a safe copy of the DB file.
# To implement specific backup capabilities the perform_backup_1()
# and perform_backup_2() functions should be modified. The first
# of those - perform_backup_1() - is executed while the DB file is locked,
# so it should only do some very quick operations, such as creating
# a temporary copy. The more involved and expensive activities should
# happen in perform_backup_2(), such as backup over the network.
#
# Please note that the kernel needs to be inotify enabled. Furthermore,
# the 'inotifywait' system utility needs to be in the path.
#
# Run with the --help option to get information about the usage.
import sqlite3
import os
import threading
import time
import datetime
import getopt
import sys
# How many seconds to wait before we perform the backup operation, after
# a change has been detected.
DELAY_DEFAULT = 5
# The logging attempts to prevent the rapid logging of the same log text.
# This commonly occurs if additional modifications take place in a watched
# file, while we have a backup scheduled alread. You can see many instances
# of "Detected change, but older copy process still pending." in rapid succession.
# This time interval here (in seconds) determines for how many seconds identical
# log messages should be surpressed.
AVOID_SAME_LOG_LINE_INTERVAL = 10
__copy_is_pending = False
__be_verbose = False
__backup_completed = True
__last_log_text = ""
__last_log_time = None
def perform_backup_1(fname):
"""
Do the quick backup/copy operation.
Whatever happens here should be quick! The file is locked
while we are in here...
Here is where we can add whatever action we want to
take when it comes to making a backup. Important, though:
While this function executes the DB file is locked! Thus,
this function here should NOT perform expensive operations,
such as backing up stuff over the network. If a local copy
is all that's needed then this can be done here. Otherwise,
just make a local temporary copy as quickly as possible
and return that filename. The more expensive stuff then
should take place in perform_backup_2().
@param fname: Name of the sqlite DB file.
@type fname: string
@return: Some arbitrary information, which will
be useful to perform_backup_2().
For example, if we made a temporary
local copy here, then we could pass
the name of that temporary file back.
@rtype: object
"""
# Currently, we just do a simple copy in the local
# filesystem.
import shutil
backup_fname = fname+"_backup_"+str(time.time())
shutil.copyfile(fname, backup_fname)
return backup_fname
def perform_backup_2(arg):
"""
Perform the more expensive portion of the backup.
For example, copying the file across the network.
perform_backup_1() has done the initial work while the
file was locked, and has passed some information to us
(maybe the name of a temporary file it created), which
we can now use to do the more expensive information.
"""
# Please note! If perform_backup_1() has created a temporary
# file then we should remember to clean up after us here.
pass
# Some simple logging functions, which can output to stdout or
# stderr, with a simple time stamp. Large numbers of endlessly
# repeating log messages are avoided by detecting duplicate
# messages that appear within a short time.
def _log(f, text):
global __last_log_text, __last_log_time
now = datetime.datetime.now()
time_delta = now - __last_log_time
from snaplogic.common.snap_log import TIME_STAMP_FORMAT
if text == __last_log_text and time_delta.seconds < AVOID_SAME_LOG_LINE_INTERVAL:
return
f.write("%s: %s\n" % (time.strftime(TIME_STAMP_FORMAT), text))
f.flush()
__last_log_text = text
__last_log_time = now
def log(text):
if __be_verbose:
_log(sys.stdout, text)
def elog(text):
if __be_verbose:
_log(sys.stderr, text)
def do_sqlite_backup(fname):
"""
Make a safe copy of an sqlite DB file.
To ensure that no update operations happen WHILE we make
the copy of the DB file, we force a lock on the file.
This is done by starting a dummy transaction, performing
a straight file-system level file copy operation, and then
rolling back and closing that transaction.
@param fname: Name of the sqlite DB file.
@type fname: string
"""
global __copy_is_pending, __backup_completed
p1_successful = False
__backup_completed = False
try:
log("Starting copy process. Locking DB file '%s'..." % fname)
c = sqlite3.Connection(fname)
c.execute("BEGIN IMMEDIATE")
except Exception, e:
elog("ERROR: Cannot lock DB file. Error: '%s'." % str(e))
__backup_completed = True
return
# Call the user-defined function that performs whatever
# backup steps are desired.
log("Starting user defined copy...")
try:
ret = perform_backup_1(fname)
p1_successful = True
except Exception, e:
elog("ERROR: User-defined copy function 1 produced error '%s'." % str(e))
# We update this flag while the database is still locked.
# That way we can be sure that no change will go 'unscheduled'.
__copy_is_pending = False
c.rollback()
c.close()
log("File '%s' has been unlocked." % fname)
if p1_successful:
try:
perform_backup_2(ret)
log("User-defined copy completed.")
except Exception, e:
elog("ERROR: User-defined copy function 2 produced error '%s'." % str(e))
__backup_completed = True
def schedule_future_copy(fname, delay):
"""
Initiate a copy of the sqlite DB file some time in the future.
This is executed as its own thread, while the detector can
continue to track incoming change notifications. The point of
waiting is that we don't want to start a copy for every single
(small) write operation. Instead, we want to wait some time to
see if we can catch a couple of them. However, we never want
to wait more than some specified number of seconds.
The detector will not call initiate another thread for this
if a previous thread has been created already and is waiting
or still working.
@param fname: Name of the sqlite DB file.
@type fname: string
@param delay: Amount of seconds to wait before starting
the copy operation.
@type delay: integer
"""
try:
time.sleep(delay)
while not __backup_completed:
log("Attempting to start backup, but previous backup operation still in progress. Waiting...")
time.sleep(2)
do_sqlite_backup(fname)
except Exception, e:
elog("ERROR: Problem while waiting and copying. Error: '%s'." % str(e))
def change_detector(fname, delay):
"""
Watch for changes in the specified sqlite DB file and schedule a backup.
@param fname: Name of the sqlite DB file.
@type fname: string
@param delay: Amount of seconds to wait before starting
the copy operation.
@type delay: integer
"""
global __copy_is_pending
# There is an inotify interface for Python, but it seems to be
# essentially undocumented.
# Using the command line inotify utilities may be a bit clumsy, but
# at least it works and can be done in just a few lines of code.
(sin, sout) = os.popen4("inotifywait %s -m" % fname)
log("Starting to watch '%s' for modifications with copy delay of %d seconds..." % (fname, delay))
while True:
line = sout.readline()
# We get output from inotifywait that consists of a simple text
# line per event. Something like this:
#
# <filename> MODIFY
#
# There are also other events, but we are only interested in
# the 'MODIFY' one.
if line and " MODIFY" in line:
# Only schedule a new copy operation if we don't have
# one pending alrady.
if not __copy_is_pending:
log("Detected change, scheduled copy in %d seconds." % delay)
__copy_is_pending = True
t = threading.Thread(target=schedule_future_copy, args=(fname, delay))
t.start()
else:
log("Detected change, but older copy process still pending.")
def usage():
"""
Print usage information about this utility.
"""
print "\nsqlite_backup.py [-d|--delay delay] [-v|--verbose] [-h|--help] fname\n"
print "\t-h, --help:"
print "\t\tShows this help screen.\n"
print "\t-v, --verbose:"
print "\t\tLogs actions to stdout and stderr.\n"
print "\t-d, --delay:"
print "\t\tDelay in seconds between detecting a change and starting a copy."
print "\t\tBy waiting, we give multiple smaller changes a chance to be handled"
print "\t\tthrough a single copy. At the same time, even if the change is just"
print "\t\tsmall, we will never wait longer than this specified number of"
print "\t\tseconds. Default: 5\n"
print "\tfname:"
print "\t\tName of the sqlite file to be watched and backed up.\n"
print "\tNote: To perform your own, specific backup actions, modify the 'perform_backup_1()'"
print "\tand perform_backup_2() functions.\n"
if __name__ == "__main__":
__last_log_time = datetime.datetime.now()
delay = DELAY_DEFAULT
try:
argv = sys.argv[1:]
if len(argv) < 1:
raise getopt.GetoptError("Not enough arguments.")
opts, args = getopt.getopt(sys.argv[1:], "vhd:", ["verbose", "help", "delay="])
except getopt.GetoptError:
usage()
sys.exit(2)
for opt, arg in opts:
if opt in ("-h", "--help"):
usage()
sys.exit()
if opt in ("-v", "--verbose"):
__be_verbose = True
if opt in ("-d", "--delay"):
try:
delay = int(arg)
except:
print "'-d, --delay' requires integer argument. Got '%s' instead." % arg
sys.exit(2)
fname = sys.argv[len(sys.argv)-1]
# Check that the file is there.
try:
f = open(fname, "r")
f.close()
except:
print "Cannot open file '%s'." % fname
sys.exit(2)
change_detector(fname, delay)
|