# Part of the A-A-P recipe executive: Access files which may be remote
# Copyright (C) 2002-2003 Stichting NLnet Labs
# Permission to copy and use this file is specified in the file COPYING.
# If this file is missing you can find it here: http://www.a-a-p.org/COPYING
#
# Access files by their URL.
# If they are remote, may download or upload the file.
# Uses the Cache to avoid up/downloading too often.
#
import os.path
import shutil
import time
from urlparse import urlparse
from urllib import urlretrieve,urlcleanup,urlopen
from Util import *
from Message import *
def is_url(name):
"""Return non-zero when "name" is a URL, zero when it's a local file."""
# On MS-Windows c:/name is a file, not a URL.
if len(name) > 2 and name[1] == ':':
return 0
scheme, mach, path, parm, query, frag = urlparse(name, '', 0)
return scheme != ''
def url_split3(name):
"""Split a URL into scheme, machine and path."""
# On MS-Windows c:/name is a file, not a URL.
if len(name) > 2 and name[1] == ':':
return '', '', name
scheme, mach, path, parm, query, frag = urlparse(name, '', 0)
path = path + parm + query + frag
# Python 2.4 understands rsync, but includes an extra slash.
if scheme == 'rsync' and mach != '':
i = string.index(name, mach) + len(mach)
if i < len(name) and name[i] == '/':
path = name[i+1:]
if scheme != '' and mach == '' and path[:2] == '//':
# urlparse doesn't handle scp://machine/path correctly
# Python 2.3 doesn't handle rsync either.
mach = path[2:]
path = ''
i = string.find(mach, '/')
if i > 0:
path = mach[i + 1:]
mach = mach[:i]
if scheme == "file":
scheme = '' # A file is the same as no scheme.
# For "ftp://mach/foobar" the file is "foobar", not "/foobar".
if scheme == "ftp" and len(path) > 1 and path[0] == '/':
path = path[1:]
return scheme, mach, path
def url_time(recdict, name):
"""Obtain the timestamp in seconds (in GMT if possible) for the URL "name".
Returns zero (very old) if the timestamp can't be obtained."""
if is_url(name):
from Cache import cache_lookup
c = cache_lookup(recdict, name)
if c:
# use timestamp for cached file.
t = c.timestamp()
else:
# obtain timestamp for remote files.
t = remote_time(recdict, name)
else:
try:
t = os.path.getmtime(name)
except (IOError, OSError):
t = 0
return t
def remote_time(recdict, name):
"""Get the timestamp of a remote file."""
try:
msg_info(recdict, _('getting timestamp for "%s"') % name)
up = urlopen(name)
t = get_header_date(up.info())
up.close()
if t == 1:
msg_info(recdict, _('"%s" can be found but has no timestamp') % name)
except:
msg_info(recdict, _('Could not obtain timestamp for "%s"') % name)
t = 0
return t
def get_header_date(headers):
"""Get the date from a MIME header. Returns zero when not available."""
from rfc822 import parsedate
if headers.has_key("Last-Modified"):
return time.mktime(parsedate(headers["Last-Modified"]))
if headers.has_key("Date"):
return time.mktime(parsedate(headers["Date"]))
# When a file does exist but has no timestamp return 1, so that it's
# different from a file that does not exist.
return 1
def get_progname_rsync(recdict):
"""
Use $RSYNC if defined, otherwise use "rsync -p --rsh=ssh --copy-links".
"""
return get_progname(recdict, "RSYNC", "rsync", " -p --rsh=ssh --copy-links")
def get_progname_scp(recdict):
"""
Use $SCP if defined, otherwise use "scp -C -p".
"""
return get_progname(recdict, "SCP", "scp", " -C -p")
def get_progname_rcp(recdict):
"""
Use $RCP if defined, otherwise use "rcp -p".
"""
return get_progname(recdict, "RCP", "rcp", " -p")
def url_download(recdict, url, fname):
"""Attempt downloading file "url" to file "fname".
Overwrite "fname" if it already exists.
When "fname" is empty, use a temporary file. The caller has to use
"url_cleanup()" when done with it.
Returns a tuple of the filename and the timestamp of the remote file
when possible.
Throws an IOError if downloading failed."""
msg_info(recdict, _('Attempting download of "%s"' % url))
rtime = 0
fscheme, fmach, fpath = url_split3(url)
# First try using a function the user specified, this overrules our own
# stuff.
scope = recdict.get("_no")
if not scope is None:
user_func = scope.get("fetch_" + fscheme)
else:
user_func = None
if (user_func
or fscheme == 'rcp'
or fscheme == 'scp'
or fscheme == 'rsync'
or fscheme == 'ftp'):
if fname == '':
from RecPython import tempfname
resfile = tempfname()
else:
resfile = fname
if user_func:
res = apply(user_func, (recdict, fmach, fpath, resfile))
if not res:
raise IOError, (_("fetch_%s() could not download %s")
% (fscheme, url))
elif fscheme == 'rcp':
# Install rcp when needed.
from DoInstall import assert_pkg
from Work import getrpstack
assert_pkg(getrpstack(recdict), recdict, "rcp")
cmd = '%s %s:%s %s' % (get_progname_rcp(recdict), fmach, fpath, resfile)
logged_system(recdict, cmd)
elif fscheme == 'scp':
# Install scp when needed.
from DoInstall import assert_pkg
from Work import getrpstack
assert_pkg(getrpstack(recdict), recdict, "scp")
cmd = '%s %s:%s %s' % (get_progname_scp(recdict), fmach, fpath, resfile)
if os.name != "posix":
# Can't use "tee" and scp may prompt for a password.
cmd = "{interactive} " + cmd
logged_system(recdict, cmd)
elif fscheme == 'rsync':
# Install rsync when needed.
from DoInstall import assert_pkg
from Work import getrpstack
assert_pkg(getrpstack(recdict), recdict, "rsync")
cmd = '%s %s:%s %s' % (get_progname_rsync(recdict),
fmach, fpath, resfile)
if os.name != "posix":
# Can't use "tee" and ssh may prompt for a password.
cmd = "{interactive} " + cmd
logged_system(recdict, cmd)
elif fscheme == 'ftp':
# urlretrieve() doesn't work well for ftp in Python 1.5, use ftplib.
# This also allows us to cache the connections.
# And it avoids a bug in urllib that trying to download a file without
# read permission results in a directory listing.
from CopyMove import ftpConnect
# Create the output file first (opening ftp connection may take time).
msg = ''
try:
ftpfile = open(resfile, "wb")
except StandardError, e:
msg = _('Cannot open "%s" for writing: %s') % (resfile, e)
if not msg:
ftp, msg = ftpConnect(fmach)
if not msg:
# Invoke the ftp command. Use a passive connection, this
# appears to work best.
import ftplib
ftp.set_pasv(1)
try:
ftp.retrbinary("RETR " + fpath, ftpfile.write, 8192)
except ftplib.all_errors, e:
msg = e
ftpfile.close()
if msg:
# Delete an empty or truncated result file.
os.remove(resfile)
if msg:
raise IOError, msg
else:
if fname == '':
# read to temporary file
resfile, h = urlretrieve(url)
else:
resfile, h = urlretrieve(url, fname)
if resfile != fname:
# Using a cached file, need to make a copy.
shutil.copy2(resfile, fname)
resfile = fname
urlcleanup()
# When obtaining a file through http:// an non-existing page isn't
# noticed. Check for a 404 error by looking in the file. Limit the
# search to the first 1000 bytes, an error page should not be longer,
# while an actual file can be very long.
f = open(resfile)
txt = f.read(1000)
f.close()
import re
if re.search("<title>\\s*404\\s*not\\s*found", txt, re.IGNORECASE):
try_delete(resfile)
raise IOError, (_("fetch_%s() encountered a 404 error for %s")
% (fscheme, url))
if h:
rtime = get_header_date(h)
if fname == '':
msg_info(recdict, _('Downloaded "%s"' % url))
else:
msg_info(recdict, _('Downloaded "%s" to "%s"' % (url, fname)))
return resfile, rtime
def url_cleanup(scheme):
"""Cleanup after using url_download with scheme "scheme"."""
if not scheme in ['scp', 'rsync']:
urlcleanup() # remove any cached file from urlretrieve()
def download_file(recdict, url_dl, node, use_cache):
"""Download a file according to "url_dl" and copy it over "node.name".
Use the cache when "use_cache" is non-zero, otherwise obtain a fresh
copy.
Can also be used for a local file, it is copied.
Return non-zero for success."""
from Cache import local_name
from VersCont import separate_scheme
# When copying a local file invoke local_name() and copy the file, the
# cache isn't really used, even though "use_cache" is set.
url = url_dl["name"]
scheme, fname = separate_scheme(url)
if scheme == "file" or not is_url(url):
use_cache = 1
# When not using the cache download directly to the destination file.
# Avoids that the cache is filled with files that are never used again.
if not use_cache:
if skip_commands():
msg_info(recdict, _('Skip download for "%s"') % node.short_name())
return 1
try:
f, rtime = url_download(recdict, url, node.absname)
except EnvironmentError, e:
msg_note(recdict, _('Cannot download "%s" to "%s": %s')
% (url, node.short_name(), str(e)))
return 0
return 1
if url_dl.has_key("cache_update"):
cu = url_dl["cache_update"]
else:
cu = None
# This downloads the file when it's not in the cache already.
# TODO: handle attributes (e.g., login and password)
fname, used_cache = local_name(recdict, url, cu)
if fname and os.path.exists(fname):
if skip_commands():
msg_info(recdict, _('Skip copy file: "%s"') % node.short_name())
else:
# copy the downloaded file over the original one.
try:
shutil.copyfile(fname, node.absname)
except IOError, e:
raise UserError, (_('Cannot copy "%s" to "%s": ')
% (fname, node.name) + str(e))
if used_cache:
msg_info(recdict, _('Copied file from cache: "%s"')
% node.short_name())
else:
msg_info(recdict, _('Copied file "%s" to "%s"')
% (fname, node.short_name()))
return 1
return 0
def upload_file(recdict, url_dl, nodelist):
"""Upload nodes in "nodelist" according to "url_dl".
Return list of nodes that failed."""
# TODO: use other attributes in url_dl, e.g. a login name.
from CopyMove import remote_copy_move
from VersCont import repl_file_name
failed = []
# Make a copy of the nodelist. Remove items that have been done until none
# are left.
todolist = nodelist[:]
while todolist:
# Collect nodes that are in the same directory and where the source and
# destination file names are identical.
to_item = ''
fromlist = []
for node in todolist[:]:
to_name = repl_file_name(url_dl["name"], node.name)
if os.path.basename(node.absname) == os.path.basename(to_name):
d = os.path.dirname(to_name)
if not d:
d = "."
if not to_item or d == to_item:
fromlist.append({"name" : node.absname})
todolist.remove(node)
to_item = d
elif not to_item:
# Source and target name are different, must copy this one by
# itself.
fromlist = [ {"name" : node.absname } ]
todolist.remove(node)
to_item = to_name
break
# When there is only one item include the file name in the to_item,
# this avoids using the name of a directory for a file.
if len(fromlist) == 1 and to_item != to_name:
to_item = path_join(to_item, os.path.basename(fromlist[0]["name"]))
msg_info(recdict, 'Uploading %s to %s' % (str(map(lambda x:
x["name"], shorten_dictlist(fromlist))), to_item))
flist = remote_copy_move([], recdict, 1, fromlist,
{ "name" : to_item }, {"mkdir": 1}, 0, errmsg = 0)
# Find the nodes for the failed file names.
for f in flist:
for node in nodelist:
if node.absname == f:
failed.append(node)
return failed
def remote_remove(recdict, url_dl, node):
"""Delete remote file for node "node" according to "url_dl"."""
msg_info(recdict, 'Removing "%s" NOT IMPLEMENTED YET' % url_dl["name"])
return 1
# vim: set sw=4 et sts=4 tw=79 fo+=l:
|