Renovated password hashing, including the forgotten files in r17253.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@17254 bcc190cf-cafb-0310-a4f2-bffc1f526a37
2024-12-01 15:42:04 +01:00 · 2011-12-23 03:53:56 +00:00 · 2011-12-23 03:53:56 +00:00 · 90e05aaeac
commit 90e05aaeac
parent dce820ff70
4 changed files with 623 additions and 63 deletions
--- a/django/contrib/auth/hashers.py
+++ b/django/contrib/auth/hashers.py
@ -0,0 +1,362 @@
+import hashlib
+
+from django.conf import settings
+from django.utils import importlib
+from django.utils.datastructures import SortedDict
+from django.utils.encoding import smart_str
+from django.core.exceptions import ImproperlyConfigured
+from django.utils.crypto import (
+    pbkdf2, constant_time_compare, get_random_string)
+
+
+UNUSABLE_PASSWORD = '!'  # This will never be a valid encoded hash
+HASHERS = None  # lazily loaded from PASSWORD_HASHERS
+PREFERRED_HASHER = None  # defaults to first item in PASSWORD_HASHERS
+
+
+def is_password_usable(encoded):
+    return (encoded is not None and encoded != UNUSABLE_PASSWORD)
+
+
+def check_password(password, encoded, setter=None, preferred='default'):
+    """
+    Returns a boolean of whether the raw password matches the three
+    part encoded digest.
+
+    If setter is specified, it'll be called when you need to
+    regenerate the password.
+    """
+    if not password or not is_password_usable(encoded):
+        return False
+
+    preferred = get_hasher(preferred)
+    raw_password = password
+    password = smart_str(password)
+    encoded = smart_str(encoded)
+
+    if len(encoded) == 32 and '$' not in encoded:
+        hasher = get_hasher('md5')
+    else:
+        algorithm = encoded.split('$', 1)[0]
+        hasher = get_hasher(algorithm)
+
+    must_update = hasher.algorithm != preferred.algorithm
+    is_correct = hasher.verify(password, encoded)
+    if setter and is_correct and must_update:
+        setter(raw_password)
+    return is_correct
+
+
+def make_password(password, salt=None, hasher='default'):
+    """
+    Turn a plain-text password into a hash for database storage
+
+    Same as encode() but generates a new random salt.  If
+    password is None or blank then UNUSABLE_PASSWORD will be
+    returned which disallows logins.
+    """
+    if not password:
+        return UNUSABLE_PASSWORD
+
+    hasher = get_hasher(hasher)
+    password = smart_str(password)
+
+    if not salt:
+        salt = hasher.salt()
+    salt = smart_str(salt)
+
+    return hasher.encode(password, salt)
+
+
+def load_hashers():
+    global HASHERS
+    global PREFERRED_HASHER
+    hashers = []
+    for backend in settings.PASSWORD_HASHERS:
+        try:
+            mod_path, cls_name = backend.rsplit('.', 1)
+            mod = importlib.import_module(mod_path)
+            hasher_cls = getattr(mod, cls_name)
+        except (AttributeError, ImportError, ValueError):
+            raise ImproperlyConfigured("hasher not found: %s" % backend)
+        hasher = hasher_cls()
+        if not getattr(hasher, 'algorithm'):
+            raise ImproperlyConfigured("hasher doesn't specify an "
+                                       "algorithm name: %s" % backend)
+        hashers.append(hasher)
+    HASHERS = dict([(hasher.algorithm, hasher) for hasher in hashers])
+    PREFERRED_HASHER = hashers[0]
+
+
+def get_hasher(algorithm='default'):
+    """
+    Returns an instance of a loaded password hasher.
+
+    If algorithm is 'default', the default hasher will be returned.
+    This function will also lazy import hashers specified in your
+    settings file if needed.
+    """
+    if hasattr(algorithm, 'algorithm'):
+        return algorithm
+
+    elif algorithm == 'default':
+        if PREFERRED_HASHER is None:
+            load_hashers()
+        return PREFERRED_HASHER
+    else:
+        if HASHERS is None:
+            load_hashers()
+        if algorithm not in HASHERS:
+            raise ValueError("Unknown password hashing algorithm '%s'. "
+                             "Did you specify it in the PASSWORD_HASHERS "
+                             "setting?" % algorithm)
+        return HASHERS[algorithm]
+
+
+def mask_hash(hash, show=6, char="*"):
+    """
+    Returns the given hash, with only the first ``show`` number shown. The
+    rest are masked with ``char`` for security reasons.
+    """
+    masked = hash[:show]
+    masked += char * len(hash[show:])
+    return masked
+
+
+class BasePasswordHasher(object):
+    """
+    Abstract base class for password hashers
+
+    When creating your own hasher, you need to override algorithm,
+    verify(), encode() and safe_summary().
+
+    PasswordHasher objects are immutable.
+    """
+    algorithm = None
+    library = None
+
+    def _load_library(self):
+        if self.library is not None:
+            if isinstance(self.library, (tuple, list)):
+                name, mod_path = self.library
+            else:
+                name = mod_path = self.library
+            try:
+                module = importlib.import_module(mod_path)
+            except ImportError:
+                raise ValueError("Couldn't load %s password algorithm "
+                                 "library" % name)
+            return module
+        raise ValueError("Hasher '%s' doesn't specify a library attribute" %
+                         self.__class__)
+
+    def salt(self):
+        """
+        Generates a cryptographically secure nonce salt in ascii
+        """
+        return get_random_string()
+
+    def verify(self, password, encoded):
+        """
+        Checks if the given password is correct
+        """
+        raise NotImplementedError()
+
+    def encode(self, password, salt):
+        """
+        Creates an encoded database value
+
+        The result is normally formatted as "algorithm$salt$hash" and
+        must be fewer than 128 characters.
+        """
+        raise NotImplementedError()
+
+    def safe_summary(self, encoded):
+        """
+        Returns a summary of safe values
+
+        The result is a dictionary and will be used where the password field
+        must be displayed to construct a safe representation of the password.
+        """
+        raise NotImplementedError()
+
+
+class PBKDF2PasswordHasher(BasePasswordHasher):
+    """
+    Secure password hashing using the PBKDF2 algorithm (recommended)
+
+    Configured to use PBKDF2 + HMAC + SHA256 with 10000 iterations.
+    The result is a 64 byte binary string.  Iterations may be changed
+    safely but you must rename the algorithm if you change SHA256.
+    """
+    algorithm = "pbkdf2_sha256"
+    iterations = 10000
+    digest = hashlib.sha256
+
+    def encode(self, password, salt, iterations=None):
+        assert password
+        assert salt and '$' not in salt
+        if not iterations:
+            iterations = self.iterations
+        hash = pbkdf2(password, salt, iterations, digest=self.digest)
+        hash = hash.encode('base64').strip()
+        return "%s$%d$%s$%s" % (self.algorithm, iterations, salt, hash)
+
+    def verify(self, password, encoded):
+        algorithm, iterations, salt, hash = encoded.split('$', 3)
+        assert algorithm == self.algorithm
+        encoded_2 = self.encode(password, salt, int(iterations))
+        return constant_time_compare(encoded, encoded_2)
+
+    def safe_summary(self, encoded):
+        algorithm, iterations, salt, hash = encoded.split('$', 3)
+        assert algorithm == self.algorithm
+        return SortedDict([
+            ('algorithm', algorithm),
+            ('iterations', iterations),
+            ('salt', mask_hash(salt)),
+            ('hash', mask_hash(hash)),
+        ])
+
+
+class PBKDF2SHA1PasswordHasher(PBKDF2PasswordHasher):
+    """
+    Alternate PBKDF2 hasher which uses SHA1, the default PRF
+    recommended by PKCS #5. This is compatible with other
+    implementations of PBKDF2, such as openssl's
+    PKCS5_PBKDF2_HMAC_SHA1().
+    """
+    algorithm = "pbkdf2_sha1"
+    digest = hashlib.sha1
+
+
+class BCryptPasswordHasher(BasePasswordHasher):
+    """
+    Secure password hashing using the bcrypt algorithm (recommended)
+
+    This is considered by many to be the most secure algorithm but you
+    must first install the py-bcrypt library.  Please be warned that
+    this library depends on native C code and might cause portability
+    issues.
+    """
+    algorithm = "bcrypt"
+    library = ("py-bcrypt", "bcrypt")
+    rounds = 12
+
+    def salt(self):
+        bcrypt = self._load_library()
+        return bcrypt.gensalt(self.rounds)
+
+    def encode(self, password, salt):
+        bcrypt = self._load_library()
+        data = bcrypt.hashpw(password, salt)
+        return "%s$%s" % (self.algorithm, data)
+
+    def verify(self, password, encoded):
+        algorithm, data = encoded.split('$', 1)
+        assert algorithm == self.algorithm
+        bcrypt = self._load_library()
+        return constant_time_compare(data, bcrypt.hashpw(password, data))
+
+    def safe_summary(self, encoded):
+        algorithm, empty, algostr, work_factor, data = encoded.split('$', 4)
+        assert algorithm == self.algorithm
+        salt, checksum = data[:22], data[22:]
+        return SortedDict([
+            ('algorithm', algorithm),
+            ('work factor', work_factor),
+            ('salt', mask_hash(salt)),
+            ('checksum', mask_hash(checksum)),
+        ])
+
+
+class SHA1PasswordHasher(BasePasswordHasher):
+    """
+    The SHA1 password hashing algorithm (not recommended)
+    """
+    algorithm = "sha1"
+
+    def encode(self, password, salt):
+        assert password
+        assert salt and '$' not in salt
+        hash = hashlib.sha1(salt + password).hexdigest()
+        return "%s$%s$%s" % (self.algorithm, salt, hash)
+
+    def verify(self, password, encoded):
+        algorithm, salt, hash = encoded.split('$', 2)
+        assert algorithm == self.algorithm
+        encoded_2 = self.encode(password, salt)
+        return constant_time_compare(encoded, encoded_2)
+
+    def safe_summary(self, encoded):
+        algorithm, salt, hash = encoded.split('$', 2)
+        assert algorithm == self.algorithm
+        return SortedDict([
+            ('algorithm', algorithm),
+            ('salt', mask_hash(salt, show=2)),
+            ('hash', mask_hash(hash)),
+        ])
+
+
+class MD5PasswordHasher(BasePasswordHasher):
+    """
+    I am an incredibly insecure algorithm you should *never* use;
+    stores unsalted MD5 hashes without the algorithm prefix.
+
+    This class is implemented because Django used to store passwords
+    this way. Some older Django installs still have these values
+    lingering around so we need to handle and upgrade them properly.
+    """
+    algorithm = "md5"
+
+    def salt(self):
+        return ''
+
+    def encode(self, password, salt):
+        return hashlib.md5(password).hexdigest()
+
+    def verify(self, password, encoded):
+        encoded_2 = self.encode(password, '')
+        return constant_time_compare(encoded, encoded_2)
+
+    def safe_summary(self, encoded):
+        return SortedDict([
+            ('algorithm', self.algorithm),
+            ('hash', mask_hash(encoded, show=3)),
+        ])
+
+
+class CryptPasswordHasher(BasePasswordHasher):
+    """
+    Password hashing using UNIX crypt (not recommended)
+
+    The crypt module is not supported on all platforms.
+    """
+    algorithm = "crypt"
+    library = "crypt"
+
+    def salt(self):
+        return get_random_string(2)
+
+    def encode(self, password, salt):
+        crypt = self._load_library()
+        assert len(salt) == 2
+        data = crypt.crypt(password, salt)
+        # we don't need to store the salt, but Django used to do this
+        return "%s$%s$%s" % (self.algorithm, '', data)
+
+    def verify(self, password, encoded):
+        crypt = self._load_library()
+        algorithm, salt, data = encoded.split('$', 2)
+        assert algorithm == self.algorithm
+        return constant_time_compare(data, crypt.crypt(password, data))
+
+    def safe_summary(self, encoded):
+        algorithm, salt, data = encoded.split('$', 2)
+        assert algorithm == self.algorithm
+        return SortedDict([
+            ('algorithm', algorithm),
+            ('salt', salt),
+            ('hash', mask_hash(data, show=3)),
+        ])
+
--- a/django/contrib/auth/tests/hashers.py
+++ b/django/contrib/auth/tests/hashers.py
@ -0,0 +1,128 @@
+from django.conf.global_settings import PASSWORD_HASHERS as default_hashers
+from django.contrib.auth.hashers import (is_password_usable, 
+    check_password, make_password, PBKDF2PasswordHasher, load_hashers,
+    PBKDF2SHA1PasswordHasher, get_hasher, UNUSABLE_PASSWORD)
+from django.utils import unittest
+from django.utils.unittest import skipUnless
+from django.test.utils import override_settings
+
+
+try:
+    import crypt
+except ImportError:
+    crypt = None
+
+try:
+    import bcrypt
+except ImportError:
+    bcrypt = None
+
+
+class TestUtilsHashPass(unittest.TestCase):
+    def setUp(self):
+        load_hashers()
+
+    def test_simple(self):
+        encoded = make_password('letmein')
+        self.assertTrue(encoded.startswith('pbkdf2_sha256$'))
+        self.assertTrue(is_password_usable(encoded))
+        self.assertTrue(check_password(u'letmein', encoded))
+        self.assertFalse(check_password('letmeinz', encoded))
+
+    def test_pkbdf2(self):
+        encoded = make_password('letmein', 'seasalt', 'pbkdf2_sha256')
+        self.assertEqual(encoded, 
+'pbkdf2_sha256$10000$seasalt$FQCNpiZpTb0zub+HBsH6TOwyRxJ19FwvjbweatNmK/Y=')
+        self.assertTrue(is_password_usable(encoded))
+        self.assertTrue(check_password(u'letmein', encoded))
+        self.assertFalse(check_password('letmeinz', encoded))
+
+    def test_sha1(self):
+        encoded = make_password('letmein', 'seasalt', 'sha1')
+        self.assertEqual(encoded, 
+'sha1$seasalt$fec3530984afba6bade3347b7140d1a7da7da8c7')
+        self.assertTrue(is_password_usable(encoded))
+        self.assertTrue(check_password(u'letmein', encoded))
+        self.assertFalse(check_password('letmeinz', encoded))
+
+    def test_md5(self):
+        encoded = make_password('letmein', 'seasalt', 'md5')
+        self.assertEqual(encoded, '0d107d09f5bbe40cade3de5c71e9e9b7')
+        self.assertTrue(is_password_usable(encoded))
+        self.assertTrue(check_password(u'letmein', encoded))
+        self.assertFalse(check_password('letmeinz', encoded))
+
+    @skipUnless(crypt, "no crypt module to generate password.")
+    def test_crypt(self):
+        encoded = make_password('letmein', 'ab', 'crypt')
+        self.assertEqual(encoded, 'crypt$$abN/qM.L/H8EQ')
+        self.assertTrue(is_password_usable(encoded))
+        self.assertTrue(check_password(u'letmein', encoded))
+        self.assertFalse(check_password('letmeinz', encoded))
+
+    @skipUnless(bcrypt, "py-bcrypt not installed")
+    def test_bcrypt(self):
+        encoded = make_password('letmein', hasher='bcrypt')
+        self.assertTrue(is_password_usable(encoded))
+        self.assertTrue(encoded.startswith('bcrypt$'))
+        self.assertTrue(check_password(u'letmein', encoded))
+        self.assertFalse(check_password('letmeinz', encoded))
+
+    def test_unusable(self):
+        encoded = make_password(None)
+        self.assertFalse(is_password_usable(encoded))
+        self.assertFalse(check_password(None, encoded))
+        self.assertFalse(check_password(UNUSABLE_PASSWORD, encoded))
+        self.assertFalse(check_password('', encoded))
+        self.assertFalse(check_password(u'letmein', encoded))
+        self.assertFalse(check_password('letmeinz', encoded))
+
+    def test_bad_algorithm(self):
+        def doit():
+            make_password('letmein', hasher='lolcat')
+        self.assertRaises(ValueError, doit)
+
+    def test_low_level_pkbdf2(self):
+        hasher = PBKDF2PasswordHasher()
+        encoded = hasher.encode('letmein', 'seasalt')
+        self.assertEqual(encoded, 
+'pbkdf2_sha256$10000$seasalt$FQCNpiZpTb0zub+HBsH6TOwyRxJ19FwvjbweatNmK/Y=')
+        self.assertTrue(hasher.verify('letmein', encoded))
+
+    def test_low_level_pbkdf2_sha1(self):
+        hasher = PBKDF2SHA1PasswordHasher()
+        encoded = hasher.encode('letmein', 'seasalt')
+        self.assertEqual(encoded, 
+'pbkdf2_sha1$10000$seasalt$91JiNKgwADC8j2j86Ije/cc4vfQ=')
+        self.assertTrue(hasher.verify('letmein', encoded))
+
+    def test_upgrade(self):
+        self.assertEqual('pbkdf2_sha256', get_hasher('default').algorithm)
+        for algo in ('sha1', 'md5'):
+            encoded = make_password('letmein', hasher=algo)
+            state = {'upgraded': False}
+            def setter(password):
+                state['upgraded'] = True
+            self.assertTrue(check_password('letmein', encoded, setter))
+            self.assertTrue(state['upgraded'])
+
+    def test_no_upgrade(self):
+        encoded = make_password('letmein')
+        state = {'upgraded': False}
+        def setter():
+            state['upgraded'] = True
+        self.assertFalse(check_password('WRONG', encoded, setter))
+        self.assertFalse(state['upgraded'])
+
+    def test_no_upgrade_on_incorrect_pass(self):
+        self.assertEqual('pbkdf2_sha256', get_hasher('default').algorithm)
+        for algo in ('sha1', 'md5'):
+            encoded = make_password('letmein', hasher=algo)
+            state = {'upgraded': False}
+            def setter():
+                state['upgraded'] = True
+            self.assertFalse(check_password('WRONG', encoded, setter))
+            self.assertFalse(state['upgraded'])
+
+
+TestUtilsHashPass = override_settings(PASSWORD_HASHERS=default_hashers)(TestUtilsHashPass)
--- a/django/contrib/auth/utils.py
+++ b/django/contrib/auth/utils.py
@ -1,63 +0,0 @@
-import hashlib
-from django.utils.encoding import smart_str
-from django.utils.crypto import constant_time_compare
-
-UNUSABLE_PASSWORD = '!' # This will never be a valid hash
-
-def get_hexdigest(algorithm, salt, raw_password):
-    """
-    Returns a string of the hexdigest of the given plaintext password and salt
-    using the given algorithm ('md5', 'sha1' or 'crypt').
-    """
-    raw_password, salt = smart_str(raw_password), smart_str(salt)
-    if algorithm == 'crypt':
-        try:
-            import crypt
-        except ImportError:
-            raise ValueError('"crypt" password algorithm not supported in this environment')
-        return crypt.crypt(raw_password, salt)
-
-    if algorithm == 'md5':
-        return hashlib.md5(salt + raw_password).hexdigest()
-    elif algorithm == 'sha1':
-        return hashlib.sha1(salt + raw_password).hexdigest()
-    raise ValueError("Got unknown password algorithm type in password.")
-
-def get_random_string(length=12, allowed_chars='abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'):
-    """
-    Returns a random string of length characters from the set of a-z, A-Z, 0-9
-    for use as a salt.
-
-    The default length of 12 with the a-z, A-Z, 0-9 character set returns
-    a 71-bit salt. log_2((26+26+10)^12) =~ 71 bits
-    """
-    import random
-    try:
-        random = random.SystemRandom()
-    except NotImplementedError:
-        pass
-    return ''.join([random.choice(allowed_chars) for i in range(length)])
-
-def check_password(raw_password, enc_password):
-    """
-    Returns a boolean of whether the raw_password was correct. Handles
-    hashing formats behind the scenes.
-    """
-    parts = enc_password.split('$')
-    if len(parts) != 3:
-        return False
-    algo, salt, hsh = parts
-    return constant_time_compare(hsh, get_hexdigest(algo, salt, raw_password))
-
-def is_password_usable(encoded_password):
-    return encoded_password is not None and encoded_password != UNUSABLE_PASSWORD
-
-def make_password(algo, raw_password):
-    """
-    Produce a new password string in this format: algorithm$salt$hash
-    """
-    if raw_password is None:
-        return UNUSABLE_PASSWORD
-    salt = get_random_string()
-    hsh = get_hexdigest(algo, salt, raw_password)
-    return '%s$%s$%s' % (algo, salt, hsh)
--- a/tests/regressiontests/utils/crypto.py
+++ b/tests/regressiontests/utils/crypto.py
@ -0,0 +1,133 @@
+
+import math
+import timeit
+import hashlib
+
+from django.utils import unittest
+from django.utils.crypto import pbkdf2
+
+
+class TestUtilsCryptoPBKDF2(unittest.TestCase):
+
+    # http://tools.ietf.org/html/draft-josefsson-pbkdf2-test-vectors-06
+    rfc_vectors = [
+        {
+            "args": {
+                "password": "password",
+                "salt": "salt",
+                "iterations": 1,
+                "dklen": 20,
+                "digest": hashlib.sha1,
+            },
+            "result": "0c60c80f961f0e71f3a9b524af6012062fe037a6",
+        },
+        {
+            "args": {
+                "password": "password",
+                "salt": "salt",
+                "iterations": 2,
+                "dklen": 20,
+                "digest": hashlib.sha1,
+            },
+            "result": "ea6c014dc72d6f8ccd1ed92ace1d41f0d8de8957",
+        },
+        {
+            "args": {
+                "password": "password",
+                "salt": "salt",
+                "iterations": 4096,
+                "dklen": 20,
+                "digest": hashlib.sha1,
+            },
+            "result": "4b007901b765489abead49d926f721d065a429c1",
+        },
+        # # this takes way too long :(
+        # {
+        #     "args": {
+        #         "password": "password",
+        #         "salt": "salt",
+        #         "iterations": 16777216,
+        #         "dklen": 20,
+        #         "digest": hashlib.sha1,
+        #     },
+        #     "result": "eefe3d61cd4da4e4e9945b3d6ba2158c2634e984",
+        # },
+        {
+            "args": {
+                "password": "passwordPASSWORDpassword",
+                "salt": "saltSALTsaltSALTsaltSALTsaltSALTsalt",
+                "iterations": 4096,
+                "dklen": 25,
+                "digest": hashlib.sha1,
+            },
+            "result": "3d2eec4fe41c849b80c8d83662c0e44a8b291a964cf2f07038",
+        },
+        {
+            "args": {
+                "password": "pass\0word",
+                "salt": "sa\0lt",
+                "iterations": 4096,
+                "dklen": 16,
+                "digest": hashlib.sha1,
+            },
+            "result": "56fa6aa75548099dcc37d7f03425e0c3",
+        },
+    ]
+
+    regression_vectors = [
+        {
+            "args": {
+                "password": "password",
+                "salt": "salt",
+                "iterations": 1,
+                "dklen": 20,
+                "digest": hashlib.sha256,
+            },
+            "result": "120fb6cffcf8b32c43e7225256c4f837a86548c9",
+        },
+        {
+            "args": {
+                "password": "password",
+                "salt": "salt",
+                "iterations": 1,
+                "dklen": 20,
+                "digest": hashlib.sha512,
+            },
+            "result": "867f70cf1ade02cff3752599a3a53dc4af34c7a6",
+        },
+        {
+            "args": {
+                "password": "password",
+                "salt": "salt",
+                "iterations": 1000,
+                "dklen": 0,
+                "digest": hashlib.sha512,
+            },
+            "result": ("afe6c5530785b6cc6b1c6453384731bd5ee432ee"
+                       "549fd42fb6695779ad8a1c5bf59de69c48f774ef"
+                       "c4007d5298f9033c0241d5ab69305e7b64eceeb8d"
+                       "834cfec"),
+        },
+    ]
+
+    def test_public_vectors(self):
+        for vector in self.rfc_vectors:
+            result = pbkdf2(**vector['args'])
+            self.assertEqual(result.encode('hex'), vector['result'])
+
+    def test_regression_vectors(self):
+        for vector in self.regression_vectors:
+            result = pbkdf2(**vector['args'])
+            self.assertEqual(result.encode('hex'), vector['result'])
+
+    def test_performance_scalability(self):
+        """
+        Theory: If you run with 100 iterations, it should take 100
+        times as long as running with 1 iteration.
+        """
+        n1, n2 = 100, 10000
+        elapsed = lambda f: timeit.timeit(f, number=1)
+        t1 = elapsed(lambda: pbkdf2("password", "salt", iterations=n1))
+        t2 = elapsed(lambda: pbkdf2("password", "salt", iterations=n2))
+        measured_scale_exponent = math.log(t2 / t1, n2 / n1)
+        self.assertLess(measured_scale_exponent, 1.1)