0
0
mirror of https://github.com/wagtail/wagtail.git synced 2024-12-01 11:41:20 +01:00

Use hashlib.file_digest when available and applicable

This implementation is faster / more efficient than ours, but can only be used on binary-mode files.

Also increase the read buffer size to improve efficiency and performance.
This commit is contained in:
Jake Howard 2023-09-12 10:00:27 +01:00 committed by LB (Ben Johnston)
parent a1f8edc84d
commit 62f91e1b49
2 changed files with 21 additions and 11 deletions

View File

@ -553,7 +553,7 @@ class HashFileLikeTestCase(SimpleTestCase):
"""
def __init__(self):
self.iterations = 20000
self.iterations = 5000
def read(self, bytes):
self.iterations -= 1
@ -564,5 +564,5 @@ class HashFileLikeTestCase(SimpleTestCase):
self.assertEqual(
hash_filelike(FakeLargeFile()),
"187cc1db32624dccace20d042f6d631f1a483020",
"bd36f0c5a02cd6e9e34202ea3ff8db07b533e025",
)

View File

@ -1,9 +1,9 @@
from hashlib import sha1
import hashlib
from io import UnsupportedOperation
from django.utils.encoding import force_bytes
HASH_READ_SIZE = 65536 # 64k
HASH_READ_SIZE = 2**18 # 256k - matches `hashlib.file_digest`
def hash_filelike(filelike):
@ -20,13 +20,23 @@ def hash_filelike(filelike):
except (AttributeError, UnsupportedOperation):
pass
hasher = sha1()
while True:
data = filelike.read(HASH_READ_SIZE)
if not data:
break
# Use `force_bytes` to account for files opened as text
hasher.update(force_bytes(data))
hasher = None
if hasattr(hashlib, "file_digest"):
try:
hasher = hashlib.file_digest(filelike, hashlib.sha1)
except ValueError:
# If the value can't be accepted by `file_digest` (eg text-mode files), use our fallback implementation
pass
if hasher is None:
hasher = hashlib.sha1()
while True:
data = filelike.read(HASH_READ_SIZE)
if not data:
break
# Use `force_bytes` to account for files opened as text
hasher.update(force_bytes(data))
if hasattr(filelike, "seek"):
# Reset the file handler to where it was before