# Part of the A-A-P recipe executive: Store signatures
# Copyright (C) 2002-2003 Stichting NLnet Labs
# Permission to copy and use this file is specified in the file COPYING.
# If this file is missing you can find it here: http://www.a-a-p.org/COPYING
#
# This module handles remembering signatures of targets and sources.
#
import os
import os.path
import string
# md5 is deprecated in Python 2.6, avoid a warning.
try:
import hashlib
except ImportError:
import md5
class hashlib:
md5 = md5.new
import time
from Util import *
from Message import *
from Filetype import ft_detect
import Global
# Both "signatures" dictionaries are indexed by the name of the target Node
# (file or directory).
# For non-virtual nodes the absulute name is used.
# Each entry is a dictionary indexed by the source-name@check-name and has a
# string value.
# The "buildcheck" entry is used for the build commands.
# The "signfile" entry is used to remember the sign file that stores the
# signatures for this target.
# "old_signatures" is for the signatures when we started.
# "upd_signatures" is for the signatures of items for which the build commands
# were successfully executed and are to be stored for the next time.
# Example:
# {"/aa/bb/file.o" : { "signfile" : "/aa/bb/AAPDIR/sign",
# "/aa/bb/file.c@md5" : "13a445e5",
# "buildcheck" : "-O2"},
# "/aa/bb/bar.o" : { "signfile" : "/aa/bb/mysign",
# "/aa/bb/bar-debug.c@time" : "143234",
# "aa/bb/bar.h@time" : "423421"}}
old_signatures = {}
upd_signatures = {}
# "new_signatures" caches the signatures we computed this invocation. It is a
# dictionary of dictionaries:
# new_signatures["/path/file"]["md5"] = md5hex("/path/file")
# The key for the toplevel dictionary is the Node name.
# The key for the second level is the check name. The target name isn't used
# here.
new_signatures = {}
# "chd_signatures" remembers which files were marked as changed with --changed
# or ":changed".
chd_signatures = {}
# Key used for the timestamp on the signature entry. Used to find the last
# updated entry for published files.
timekey = "lastupdate"
# Name for the sign file relative to the directory of the target or the recipe.
sign_normal_fname = in_aap_dir("sign")
sign_normal_fname_len = len(sign_normal_fname)
# Remember which sign files have been read.
# Also when the file couldn't actually be read, so that we remember to write
# this file when signs have been updated.
# An entry exists when the file has been read. It's value is non-zero when the
# file should be written back.
sign_files = {}
def get_sign_file(recdict, target, update):
"""Get the sign file that is used for "target" if it wasn't done already.
When "update" is non-zero, mark the file needs writing."""
fname = fname_fold(target.get_sign_fname())
if not sign_files.has_key(fname):
sign_files[fname] = update
sign_read(recdict, fname)
elif update:
sign_files[fname] = 1
def sign_file_dir(fname):
"""Return the directory to which files in sign file "fname" are relative
to. Use uniform format (forward slashes)."""
# When using "AAPDIR/sign" remove two parts, otherwise only remove the file
# name itself.
if (len(fname) >= sign_normal_fname_len
and fname_fold(fname[-sign_normal_fname_len:])
== fname_fold(sign_normal_fname)):
fname = os.path.dirname(fname)
return fname_fold(os.path.dirname(fname))
# In the sign files, file names are stored with a leading "-" for a virtual
# node and "=" for a file name. Expand to an absolute name for non-virtual
# nodes.
def sign_expand_name(recdict, dir, name):
"""Expand "name", which is used in a sign file in directory "dir" or
"dir/AAPDIR"."""
n = name[1:]
if name[0] == '-' or os.path.isabs(n):
return n
# Make a full path by joining the dir and the file name.
n_len = len(n)
if n_len <= 3 or n[:3] != "../":
return fname_fold(os.path.join(dir, n))
# Remove "../" items. Don't use os.path.normpath(), it's a bit slow (it
# does more than removing ".." items).
di = len(dir)
ni = 3
while 1:
di = string.rfind(dir, "/", 0, di)
if di < 0:
# "cannot happen": sign file corrupted? Return the name with the
# ".." (equivalent to sign not found).
msg_error(recdict, _('In sign file: Too many ".." in "%s" for directory "%s"') % (name, dir))
return fname_fold(os.path.join(dir, n))
if ni + 3 >= n_len or n[ni:ni+3] != "../":
break
ni = ni + 3
return dir[:di + 1] + n[ni:]
def sign_reduce_name(dir, name):
"""Reduce "name" to what is used in a sign file."""
if os.path.isabs(name):
return '=' + fname_fold(shorten_name(name, dir))
return '-' + fname_fold(name)
#
# A sign file stores the signatures for items (sources and targets) with the
# values they when they were computed in the past.
# The format of each line is:
# =foo.o<ESC>=foo.c@md5_c=012346<ESC>...<ESC>\n
# "md5_c" can be "md5", "time", etc. Note that it's not always equal to
# the "check" attribute, both "time" and "older" use "time" here.
def sign_read(recdict, fname):
"""Read the signature file "fname" into our dictionary of signatures."""
basedir = sign_file_dir(fname)
try:
f = open(fname, "rb")
for line in f.readlines():
e = string.find(line, "\033")
if e > 0: # Only use lines with an ESC
name = sign_expand_name(recdict, basedir, line[:e])
old_signatures[name] = {"signfile" : fname_fold(fname)}
while 1:
s = e + 1
e = string.find(line, "\033", s)
if e < 1:
break
i = string.rfind(line, "=", s, e)
if i < 1:
break
old_signatures[name][sign_expand_name(recdict,
basedir, line[s:i])] = line[i + 1:e]
f.close()
except StandardError, e:
# TODO: handle errors? It's not an error if the file does not exist.
msg_note(recdict, (_('Cannot read sign file "%s": ')
% shorten_name(fname)) + str(e))
def sign_write_all(recdict):
"""Write all updated signature files from our dictionary of signatures."""
# This assumes we are the only one updating this signature file, thus there
# is no locking. It wouldn't make sense sharing with others, since
# building would fail as well.
for fname in sign_files.keys():
if sign_files[fname]:
# This sign file needs to be written.
sign_write(recdict, fname)
def sign_write(recdict, fname):
"""Write one updated signature file."""
sign_dir = os.path.dirname(fname)
if not os.path.exists(sign_dir):
try:
os.makedirs(sign_dir)
except StandardError, e:
msg_warning(recdict,
(_('Cannot create directory for signature file "%s": ')
% fname) + str(e))
try:
f = open(fname, "wb")
except StandardError, e:
msg_warning(recdict,
(_('Cannot open signature file for writing: "%s": ')
% fname) + str(e))
return
def write_sign_line(f, basedir, s, old, new):
"""Write a line to sign file "f" in directory "basedir" for item "s",
with checks from "old", using checks from "new" if they are present."""
f.write(sign_reduce_name(basedir, s) + "\033")
# Go over all old checks, write all of them, using the new value
# if it is available.
for c in old.keys():
if c != "signfile":
if new and new.has_key(c):
val = new[c]
else:
val = old[c]
f.write("%s=%s\033" % (sign_reduce_name(basedir, c), val))
# Go over all new checks, write the ones for which there is no old
# value.
if new:
for c in new.keys():
if c != "signfile" and not old.has_key(c):
f.write("%s=%s\033" % (sign_reduce_name(basedir, c),
new[c]))
f.write("\n")
basedir = sign_file_dir(fname)
try:
# Go over all old signatures, write all of them, using checks from
# upd_signatures when they are present.
# When the item is in upd_signatures, use the directory specified
# there, otherwise use the directory of old_signatures.
for s in old_signatures.keys():
if upd_signatures.has_key(s):
if upd_signatures[s]["signfile"] != fname:
continue
new = upd_signatures[s]
else:
if old_signatures[s]["signfile"] != fname:
continue
new = None
write_sign_line(f, basedir, s, old_signatures[s], new)
# Go over all updated signatures, write only the ones for which there
# is no old signature.
for s in upd_signatures.keys():
if (not old_signatures.has_key(s)
and upd_signatures[s]["signfile"] == fname):
write_sign_line(f, basedir, s, upd_signatures[s], None)
f.close()
except StandardError, e:
msg_warning(recdict, (_('Write error for signature file "%s": '),
fname) + str(e))
def hexdigest(m):
"""Turn an md5 object into a string of hex characters."""
# NOTE: This routine is a method in the Python 2.0 interface
# of the native md5 module, not in Python 1.5.
h = string.hexdigits
r = ''
for c in m.digest():
i = ord(c)
r = r + h[(i >> 4) & 0xF] + h[i & 0xF]
return r
def check_md5(recdict, fname, msg = 1):
if not os.path.isfile(fname):
# A non-existing file isn't that bad, could be a virtual target that
# wasn't marked as being virtual.
if msg:
msg_note(recdict,
_('Cannot compute md5 checksum for "%s": it does not exist')
% fname)
return "unknown"
try:
f = open(fname, "rb")
m = hashlib.md5()
while 1:
# Read big blocks at a time for speed, but don't read the whole
# file at once to reduce memory usage.
data = f.read(32768)
if not data:
break
m.update(data)
f.close()
res = hexdigest(m)
except StandardError, e:
if msg:
msg_warning(recdict, (_('Cannot compute md5 checksum for "%s": ')
% fname) + str(e))
res = "unknown"
return res
def check_c_md5(recdict, fname):
"""Compute an md5 signature after filtering out irrelevant items for C
code (white space and comments)."""
try:
f = open(fname)
except StandardError, e:
# Can't open a URL here.
msg_warning(recdict, (_('Cannot compute md5 checksum for "%s": ')
% fname) + str(e))
return "unknown"
m = hashlib.md5()
inquote = 0
incomment = 0
while 1:
# Read one line at a time.
try:
data = f.readline()
except StandardError, e:
# Can't read the file.
msg_warning(recdict, (_('Cannot read "%s": ') % fname) + str(e))
return "unknown"
if not data:
break
# Filter out irrelevant changes:
# - Collapse sequences of white space into one space.
# - Remove comments.
# TODO: double-byte characters may have a backslash or double quote
# as their second byte, how to know this?
data_len = len(data) - 1
s = 0
skipwhite = 1
i = 0
while i < data_len:
if inquote:
# Only need to search for the endquote.
while i < data_len:
c = data[i]
i = i + 1
if c == '"':
inquote = 0
break
elif c == '\\':
i = i + 1
continue
if incomment:
# Only need to search for the comment end "*/".
while i < data_len:
if data[i] == '*' and data[i + 1] == '/':
incomment = 0
i = i + 2
s = i
skipwhite = 1
break
i = i + 1
continue
c = data[i]
if c == ' ' or c == '\t':
# White space after non-white: dump text.
if not skipwhite:
m.update(data[s:i] + ' ')
# Skip white space
while 1:
i = i + 1
if i == data_len:
break
c = data[i]
if c != ' ' and c != '\t':
break
s = i
skipwhite = 0
if i == data_len:
break
if c == '/' and (data[i + 1] == '/' or data[i + 1] == '*'):
# Start of // or /* comment.
if i > s:
m.update(data[s:i] + ' ')
i = i + 1
if data[i] == '/':
s = data_len
break
incomment = 1
else:
skipwhite = 0
if c == "'":
# skip '"' or '\'', not the start of a sting
if data[i + 1] == '\\':
i = i + 1
i = i + 2
elif c == '"':
inquote = 1
i = i + 1
if not (incomment or skipwhite) and s < data_len:
m.update(data[s:data_len] + ' ')
try:
f.close()
except:
# Error while closing a read file???
pass
return hexdigest(m)
def buildcheckstr2sign(str):
"""Compute a signature from a string for the buildcheck."""
return hexdigest(hashlib.md5(str))
def _sign_lookup(signatures, name, key):
"""
Get the "key" signature for item "name" from dictionary "signatures".
"name" must have gone through fname_fold().
"""
if not signatures.has_key(name):
return ''
s = signatures[name]
if not s.has_key(key):
return ''
return s[key]
def sign_clear(name):
"""
Clear the new signatures of an item.
Store an item to note that it was cleared (see below).
Used when it has been build.
"""
new_signatures[name] = {}
new_signatures[name]["cleared"] = 1
def get_new_sign(recdict, name, check, force = 0):
"""Get the current "check" signature for the item "name".
"name" is the absolute name for non-virtual nodes.
This doesn't depend on the target. "name" can be a URL.
When "force" is non-zero also use a cleared signature (for --touch).
Returns a string (also for timestamps)."""
# When not executing build commands and a target has been pretended to be
# build, its signature is cleared. Don't recompute it then, the file will
# not be different but we do want a different signature.
name = fname_fold(name)
if (not force
and skip_commands()
and new_signatures.has_key(name)
and new_signatures[name].has_key("cleared")):
return "cleared"
key = check
res = _sign_lookup(new_signatures, fname_fold(name), key)
if not res:
# Compute the signature now
if check == "time":
from Remote import url_time
res = str(url_time(recdict, name))
elif check == "md5":
res = check_md5(recdict, name)
elif check == "c_md5":
res = check_c_md5(recdict, name)
# TODO: other checks, defined with actions
else:
res = "unknown"
# Store the new signature to avoid recomputing it many times.
if not new_signatures.has_key(name):
new_signatures[name] = {}
new_signatures[name][key] = res
return res
def sign_clear_target(recdict, target):
"""Called to clear old signatures after successfully executing build rules
for "target". sign_updated() should be called next for each source."""
get_sign_file(recdict, target, 1)
target_name = fname_fold(target.get_name())
if old_signatures.has_key(target_name):
del old_signatures[target_name]
if upd_signatures.has_key(target_name):
del upd_signatures[target_name]
def sign_clear_file(fname, recursive):
"""Called to clear signatures for a file "fname".
Used for ":changed" and "--changed=FILE"."""
chd_signatures[full_fname(fname)] = recursive
if upd_signatures.has_key(fname):
del upd_signatures[fname]
def sign_clear_all():
"""Clear all computed signatures. Used when starting to execute a toplevel
recipe."""
global old_signatures, upd_signatures, new_signatures, chd_signatures
global sign_files
old_signatures = {}
chd_signatures = {}
upd_signatures = {}
new_signatures = {}
sign_files = {}
def _sign_upd_sign(recdict, target, key, value):
"""Update signature for node "target" with "key" to "value"."""
get_sign_file(recdict, target, 1)
target_name = fname_fold(target.get_name())
if not upd_signatures.has_key(target_name):
upd_signatures[target_name] = {"signfile":
fname_fold(target.get_sign_fname())}
upd_signatures[target_name][key] = value
# Update the timestamp on the target.
upd_signatures[target_name][timekey] = str(time.time())
def sign_updated(recdict, source, dict, target):
"""Called after successfully executing build rules for node "target" from
node "source", using check based on dictionary "dict"."""
name = source.get_name()
check = check_name(recdict, name, dict, source.attributes)
res = get_new_sign(recdict, name, check, force = 1)
_sign_upd_sign(recdict, target, name + '@' + check, res)
# if the source file was considered changed and recursive attribute used,
# the target should be as well.
if chd_signatures.get(fname_fold(name)):
chd_signatures[fname_fold(target.get_name())] = 1
def buildcheck_updated(recdict, target, value):
"""Called after successfully executing build rules for node "target" with
the new buildcheck signature "value"."""
_sign_upd_sign(recdict, target, '@buildcheck', value)
def get_old_sign(recdict, name, check, target, rootname = None):
"""Get the old "check" signature for item "name" and target node "target".
"name" must be an absolute and normalized path.
"rootname" is used for publishing and the "--contents" option.
If it doesn't exist an empty string is returned."""
# Check if this file was marked as changed.
name = fname_fold(name)
if chd_signatures.has_key(name):
return "changed"
# May need to read the sign file for this target.
get_sign_file(recdict, target, 0)
key = name + '@' + check
if not rootname:
# Use the updated signature if it exists, otherwise use the old one.
name = fname_fold(target.get_name())
ret = _sign_lookup(upd_signatures, name, key)
if ret:
return ret
return _sign_lookup(old_signatures, name, key)
# Go through all updated and old signatures to check if "rootname" matches.
# Find the entry that was updated most recently.
rootname = fname_fold(rootname)
rootname_len = len(rootname)
ret = ''
newtime = 0
for sigdict in [upd_signatures, old_signatures]:
for name in sigdict.keys():
if (len(name) > rootname_len
and name[:rootname_len] == rootname
and sigdict[name].has_key(key)
and sigdict[name].has_key(timekey)
and float(sigdict[name][timekey]) > newtime):
ret = sigdict[name][key]
newtime = float(sigdict[name][timekey])
return ret
def check_name(recdict, name, itemdict, altdict = None):
"""Return the check name to be used for item "name" with dictlist
"itemdict". Also use "altdict" if given (attributes of the node)."""
if itemdict.has_key("check"):
check = itemdict["check"]
elif altdict and altdict.has_key("check"):
check = altdict["check"]
else:
# TODO: make mapping from name or filetype to check configurable
#if itemdict.has_key("filetype"):
# type = itemdict["filetype"]
#else:
# type = ft_detect(itemdict["name"])
if ((itemdict.get("directory")
or (altdict and altdict.get("directory")))
or os.path.isdir(name)):
check = "none" # default check for directories: none
else:
# default check is given with $DEFAULTCHECK
check = get_var_val_int(recdict, "DEFAULTCHECK")
return check
# vim: set sw=4 et sts=4 tw=79 fo+=l:
|