# $SnapHashLicense:
# SnapLogic - Open source data services
# Copyright (C) 2008-2009, SnapLogic, Inc. All rights reserved.
# See http://www.snaplogic.org for more information about
# the SnapLogic project.
# This program is free software, distributed under the terms of
# the GNU General Public License Version 2. See the LEGAL file
# at the top of the source tree.
# "SnapLogic" is a trademark of SnapLogic, Inc.
# $
# $Id: snap_crypt.py 7646 2009-05-11 20:33:05Z darin $
This module contains hashing and obfuscation functions.
For starters, we have some obfuscation-related functions.
Not really 'crypt', but this module seemed to be the best
place for them.
At the end then, we have the public domain implementation of
the md5crypt algorithm. We would like to fully acknowledge
Poul-Henning Kamp, the original author of the md5crypt algorithm,
as well as Michal Wallace, who has done this Python implementation
of the algorithm.
SnapLogic has done two small modification: According to the
Python documentation, the md5 module is outdated with Python2.5.
Instead, it is recommedned to use hashlib. We have done this
very small change.
Also, we have removed the test cases at the end of the module.
Other than that, the program text at the end was left entirely
import os
import array
import random
import base64
import string
seed_array = range(ord('a'),ord('z'))+range(ord('A'),ord('Z'))
char_array = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
# -------------------------------------------------------------------
# We don't want to send passwords and credentials that are needed
# by our components to contact other servers across the wire in clear
# text. We don't want to store them in clear text either. However,
# without proper key management, we cannot securely encrypt them.
# Since insecure encryption is not any better than mere obfuscation,
# we are just sticking to obfuscation for now. If you want to
# secure the transfer of data between server processes, please use
# SSL. The README contains information on how to set up SnapLogic
# to run behind a proxy that uses SSL. If you want to secure the
# data on disk, please place the repository in directories with
# proper, secure access rights.
# -------------------------------------------------------------------
def obfuscate(text):
Obfuscate some unicode text.
Multiple obfuscations of the same text should all look
different and have different lengths. That 'looks' more
secure, and makes trivial plain-text attacks a bit more
difficult. But then, of course, it's not like this
algorithm here is particularly 'secret' (not that this
would be a good idea anyway).
So, why bother? Well, the results are sufficiently random
looking to allow a later drop in replacement with something
that actually encrypts, should we ever choose to do so.
@param text: Some text.
@type text: string (unicode)
@return: An obfuscated version of the same string.
@rtype: string
# Complicated by the fact that text may be unicode
u = text.encode("utf-8")
a = array.array('b')
# The result should have a random length, so we are
# adding some random padding bytes
random_pad_len = random.randint(3,17) # How many?
for i in xrange(random_pad_len):
l = len(a)-1 # Position of last pad byte
# We select a random seed. The number of padding bytes
# is appended (obfuscated by the random seed) and also
# the random seed itself.
random_seed = random.choice(seed_array)
a.append(random_seed ^ random_pad_len)
# Process all bytes backwards, starting from the last random pad
prev = random_seed
for i in xrange(l, -1, -1):
prev = a[i] = a[i] ^ prev
# Calculate a simple 8-bit checksum and append
csum = 0
for b in a:
csum ^= b
# Don't like the '=' padding bytes that base64 encoding adds
# in the end. We count and remove them and then add a character
# in the very end that tells us how many of those we have removed.
b64_result = base64.b64encode(a.tostring().encode("utf-8"))
num_b64_pads = b64_result[4:].count("=")
if num_b64_pads:
return b64_result[:-num_b64_pads] + str(num_b64_pads)
return b64_result + random.choice(char_array)
return None
def deobfuscate(text):
De-obfuscates a string and returns the original.
This is the opposite to obfuscate().
@param text: Some obfuscated text as produced by obfuscate().
@type text: string (unicode)
@return: A clear-text version of the same string.
@rtype: string
# The last character implies the number of "=" pads that
# were added via base64 encoding. We are restoring those
# first.
l = len(text)
a = array.array('b')
num_b64_pads_str = text[l-1]
if num_b64_pads_str.isdigit():
num_pads = int(num_b64_pads_str)
num_pads = 0
a.fromstring(base64.b64decode(text[:l-1] + "=" * num_pads))
# Last character is a checksum
p = len(a)-1
csum = a[p]
new_csum = 0
for b in a[:-1]:
new_csum ^= b
if new_csum != csum:
raise Exception("Wrong checksum")
# Next up from the end is the random seed
p -= 1
random_seed = a[p]
# The random pad length is next
p -= 1
random_pad_len = a[p] ^ random_seed
# We can now 'decrypt' all the bytes of the array the same
# way we encrypted them in 'obfuscate'
l = p-1
prev = random_seed
for i in xrange(l, -1, -1):
new_prev = a[i]
a[i] = a[i] ^ prev
prev = new_prev
orig_len = p-random_pad_len
res = a[:orig_len].tostring().decode("utf-8")
return res
return None
def output_obfuscate(text, n):
Produce string representation of obfuscated parameter.
'n' determines if the entire output should be obfuscated
(0), whther the first n characters should be shown in
clear text (positive integer) or the last n characters
in the clear (negative integer).
If the entire output is to be obfuscated, it simply
returns 8 "*". Otherwise, it returns n clear text
characters, followed or preceded by exactly as many "*"
as there would be remaining characters in the text.
The function sees if it can deobfuscate the provided
text. If that fails, it uses the text as is.
@param text: Possibly obfuscated text.
@type text: string
@param n: An integer, indicating how many
(if any) characters should be shown
in clear text and whether these
characters are at the start or end
of the text.
@type n: integer
@return: String representation of the
obfuscated text, using "*" to show
the obfuscation.
@rtype: string
if n == 0:
return "********"
# See if we can deobfuscate the text
ctext = deobfuscate(text)
if not ctext:
ctext = text
if n > 0:
# The first 'n' characters in clear text, the rest as "*"
return ctext[:n] + len(ctext[n:]) * "*"
return len(ctext[:n]) * "*" + ctext[n:]
def make_salt():
Return a random 8 character string as a salt.
@return: An 8 character salt.
@rtype: string
c = list(string.letters+string.digits)
buf = ""
l = len(c)-1
# 8 bytes of salt
for i in xrange(0,8):
buf += c[random.randint(0,l)]
return buf
# -------------------------------------------------------------
# Here now the mostly original md5crypt program text, including
# all acknowledgements.
# -------------------------------------------------------------
# md5crypt.py
# 0423.2000 by michal wallace http://www.sabren.com/
# based on perl's Crypt::PasswdMD5 by Luis Munoz (lem@cantv.net)
# based on /usr/src/libcrypt/crypt.c from FreeBSD 2.2.5-RELEASE
# Carey Evans - http://home.clear.net.nz/pages/c.evans/
# Dennis Marti - http://users.starpower.net/marti1/
# For the patches that got this thing working!
"""md5crypt.py - Provides interoperable MD5-based crypt() function
import md5crypt.py
cryptedpassword = md5crypt.md5crypt(password, salt);
unix_md5_crypt() provides a crypt()-compatible interface to the
rather new MD5-based crypt() function found in modern operating systems.
It's based on the implementation found on FreeBSD 2.2.[56]-RELEASE and
contains the following license in it:
"THE BEER-WARE LICENSE" (Revision 42):
<phk@login.dknet.dk> wrote this file. As long as you retain this notice you
can do whatever you want with this stuff. If we meet some day, and you think
this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp
apache_md5_crypt() provides a function compatible with Apache's
.htpasswd files. This was contributed by Bryan Hart <bryan@eai.com>.
MAGIC = '$1$' # Magic string
ITOA64 = "./0123456789" + char_array
# Modified to use hashlib, rather than the md5 module, since that supposedly
# is not deprecated.
import hashlib
def to64 (v, n):
ret = ''
while (n - 1 >= 0):
n = n - 1
ret = ret + ITOA64[v & 0x3f]
v = v >> 6
return ret
def apache_md5_crypt (pw, salt):
# change the Magic string to match the one used by Apache
return unix_md5_crypt(pw, salt, '$apr1$')
def unix_md5_crypt(pw, salt, magic=None):
if magic==None:
magic = MAGIC
# Take care of the magic string if present
if salt[:len(magic)] == magic:
salt = salt[len(magic):]
# salt can have up to 8 characters:
import string
salt = string.split(salt, '$', 1)[0]
salt = salt[:8]
ctx = pw + magic + salt
final = hashlib.md5(pw + salt + pw).digest()
for pl in range(len(pw),0,-16):
if pl > 16:
ctx = ctx + final[:16]
ctx = ctx + final[:pl]
# Now the 'weird' xform (??)
i = len(pw)
while i:
if i & 1:
ctx = ctx + chr(0) #if ($i & 1) { $ctx->add(pack("C", 0)); }
ctx = ctx + pw[0]
i = i >> 1
final = hashlib.md5(ctx).digest()
# The following is supposed to make
# things run slower.
# my question: WTF???
for i in range(10):
ctx1 = ''
if i & 1:
ctx1 = ctx1 + pw
ctx1 = ctx1 + final[:16]
if i % 3:
ctx1 = ctx1 + salt
if i % 7:
ctx1 = ctx1 + pw
if i & 1:
ctx1 = ctx1 + final[:16]
ctx1 = ctx1 + pw
final = hashlib.md5(ctx1).digest()
# Final xform
passwd = ''
passwd = passwd + to64((int(ord(final[0])) << 16)
|(int(ord(final[6])) << 8)
passwd = passwd + to64((int(ord(final[1])) << 16)
|(int(ord(final[7])) << 8)
|(int(ord(final[13]))), 4)
passwd = passwd + to64((int(ord(final[2])) << 16)
|(int(ord(final[8])) << 8)
|(int(ord(final[14]))), 4)
passwd = passwd + to64((int(ord(final[3])) << 16)
|(int(ord(final[9])) << 8)
|(int(ord(final[15]))), 4)
passwd = passwd + to64((int(ord(final[4])) << 16)
|(int(ord(final[10])) << 8)
|(int(ord(final[5]))), 4)
passwd = passwd + to64((int(ord(final[11]))), 2)
return magic + salt + '$' + passwd
## assign a wrapper function:
md5crypt = unix_md5_crypt
def generate_random_string():
"""Generate a random value."""
return base64.urlsafe_b64encode(os.urandom(30))