mirror of
https://github.com/python/cpython.git
synced 2024-11-21 12:59:38 +01:00
gh-122854: Add Py_HashBuffer() function (#122855)
This commit is contained in:
parent
3d60dfbe17
commit
d8e69b2c1b
@ -89,6 +89,25 @@ See also the :c:member:`PyTypeObject.tp_hash` member and :ref:`numeric-hash`.
|
||||
|
||||
.. versionadded:: 3.13
|
||||
|
||||
|
||||
.. c:function:: Py_hash_t Py_HashBuffer(const void *ptr, Py_ssize_t len)
|
||||
|
||||
Compute and return the hash value of a buffer of *len* bytes
|
||||
starting at address *ptr*. The hash is guaranteed to match that of
|
||||
:class:`bytes`, :class:`memoryview`, and other built-in objects
|
||||
that implement the :ref:`buffer protocol <bufferobjects>`.
|
||||
|
||||
Use this function to implement hashing for immutable objects whose
|
||||
:c:member:`~PyTypeObject.tp_richcompare` function compares to another
|
||||
object's buffer.
|
||||
|
||||
*len* must be greater than or equal to ``0``.
|
||||
|
||||
This function always succeeds.
|
||||
|
||||
.. versionadded:: 3.14
|
||||
|
||||
|
||||
.. c:function:: Py_hash_t PyObject_GenericHash(PyObject *obj)
|
||||
|
||||
Generic hashing function that is meant to be put into a type
|
||||
|
@ -489,6 +489,9 @@ New Features
|
||||
similar to ``sep.join(iterable)`` in Python.
|
||||
(Contributed by Victor Stinner in :gh:`121645`.)
|
||||
|
||||
* Add :c:func:`Py_HashBuffer` to compute and return the hash value of a buffer.
|
||||
(Contributed by Antoine Pitrou and Victor Stinner in :gh:`122854`.)
|
||||
|
||||
|
||||
Porting to Python 3.14
|
||||
----------------------
|
||||
|
@ -45,3 +45,5 @@ PyAPI_FUNC(PyHash_FuncDef*) PyHash_GetFuncDef(void);
|
||||
|
||||
PyAPI_FUNC(Py_hash_t) Py_HashPointer(const void *ptr);
|
||||
PyAPI_FUNC(Py_hash_t) PyObject_GenericHash(PyObject *);
|
||||
|
||||
PyAPI_FUNC(Py_hash_t) Py_HashBuffer(const void *ptr, Py_ssize_t len);
|
||||
|
@ -20,9 +20,6 @@ _Py_HashPointerRaw(const void *ptr)
|
||||
return (Py_hash_t)x;
|
||||
}
|
||||
|
||||
// Export for '_datetime' shared extension
|
||||
PyAPI_FUNC(Py_hash_t) _Py_HashBytes(const void*, Py_ssize_t);
|
||||
|
||||
/* Hash secret
|
||||
*
|
||||
* memory layout on 64 bit systems
|
||||
|
@ -78,6 +78,16 @@ class CAPITest(unittest.TestCase):
|
||||
VOID_P_MAX = -1 & (2 ** (8 * SIZEOF_VOID_P) - 1)
|
||||
self.assertEqual(hash_pointer(VOID_P_MAX), -2)
|
||||
|
||||
def test_hash_buffer(self):
|
||||
hash_buffer = _testcapi.hash_buffer
|
||||
|
||||
def check(data):
|
||||
self.assertEqual(hash_buffer(data), hash(data))
|
||||
|
||||
check(b'')
|
||||
check(b'abc')
|
||||
check(b'x' * 1024)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
@ -0,0 +1,2 @@
|
||||
Add :c:func:`Py_HashBuffer` to compute and return the hash value of a buffer.
|
||||
Patch by Antoine Pitrou and Victor Stinner.
|
@ -3842,7 +3842,7 @@ datetime_date_replace_impl(PyDateTime_Date *self, int year, int month,
|
||||
static Py_hash_t
|
||||
generic_hash(unsigned char *data, int len)
|
||||
{
|
||||
return _Py_HashBytes(data, len);
|
||||
return Py_HashBuffer(data, len);
|
||||
}
|
||||
|
||||
|
||||
|
@ -25,7 +25,6 @@
|
||||
#include <stdbool.h>
|
||||
#include "Python.h"
|
||||
#include "pycore_hashtable.h"
|
||||
#include "pycore_pyhash.h" // _Py_HashBytes()
|
||||
#include "pycore_strhex.h" // _Py_strhex()
|
||||
#include "hashlib.h"
|
||||
|
||||
@ -186,7 +185,7 @@ static const py_hashentry_t py_hashes[] = {
|
||||
|
||||
static Py_uhash_t
|
||||
py_hashentry_t_hash_name(const void *key) {
|
||||
return _Py_HashBytes(key, strlen((const char *)key));
|
||||
return Py_HashBuffer(key, strlen((const char *)key));
|
||||
}
|
||||
|
||||
static int
|
||||
|
@ -2944,7 +2944,7 @@ pattern_hash(PatternObject *self)
|
||||
return -1;
|
||||
}
|
||||
|
||||
hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize);
|
||||
hash2 = Py_HashBuffer(self->code, sizeof(self->code[0]) * self->codesize);
|
||||
hash ^= hash2;
|
||||
|
||||
hash ^= self->flags;
|
||||
|
@ -45,6 +45,14 @@ hash_getfuncdef(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args))
|
||||
}
|
||||
|
||||
|
||||
static PyObject *
|
||||
long_from_hash(Py_hash_t hash)
|
||||
{
|
||||
Py_BUILD_ASSERT(sizeof(long long) >= sizeof(hash));
|
||||
return PyLong_FromLongLong(hash);
|
||||
}
|
||||
|
||||
|
||||
static PyObject *
|
||||
hash_pointer(PyObject *Py_UNUSED(module), PyObject *arg)
|
||||
{
|
||||
@ -54,8 +62,21 @@ hash_pointer(PyObject *Py_UNUSED(module), PyObject *arg)
|
||||
}
|
||||
|
||||
Py_hash_t hash = Py_HashPointer(ptr);
|
||||
Py_BUILD_ASSERT(sizeof(long long) >= sizeof(hash));
|
||||
return PyLong_FromLongLong(hash);
|
||||
return long_from_hash(hash);
|
||||
}
|
||||
|
||||
|
||||
static PyObject *
|
||||
hash_buffer(PyObject *Py_UNUSED(module), PyObject *args)
|
||||
{
|
||||
char *ptr;
|
||||
Py_ssize_t len;
|
||||
if (!PyArg_ParseTuple(args, "y#", &ptr, &len)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Py_hash_t hash = Py_HashBuffer(ptr, len);
|
||||
return long_from_hash(hash);
|
||||
}
|
||||
|
||||
|
||||
@ -64,14 +85,14 @@ object_generichash(PyObject *Py_UNUSED(module), PyObject *arg)
|
||||
{
|
||||
NULLABLE(arg);
|
||||
Py_hash_t hash = PyObject_GenericHash(arg);
|
||||
Py_BUILD_ASSERT(sizeof(long long) >= sizeof(hash));
|
||||
return PyLong_FromLongLong(hash);
|
||||
return long_from_hash(hash);
|
||||
}
|
||||
|
||||
|
||||
static PyMethodDef test_methods[] = {
|
||||
{"hash_getfuncdef", hash_getfuncdef, METH_NOARGS},
|
||||
{"hash_pointer", hash_pointer, METH_O},
|
||||
{"hash_buffer", hash_buffer, METH_VARARGS},
|
||||
{"object_generichash", object_generichash, METH_O},
|
||||
{NULL},
|
||||
};
|
||||
|
@ -15,7 +15,6 @@
|
||||
#endif
|
||||
|
||||
#include <Python.h>
|
||||
#include "pycore_pyhash.h" // _Py_HashBytes()
|
||||
#include <stdlib.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
@ -45,7 +44,7 @@ static int fuzz_builtin_int(const char* data, size_t size) {
|
||||
/* Pick a random valid base. (When the fuzzed function takes extra
|
||||
parameters, it's somewhat normal to hash the input to generate those
|
||||
parameters. We want to exercise all code paths, so we do so here.) */
|
||||
int base = _Py_HashBytes(data, size) % 37;
|
||||
int base = Py_HashBuffer(data, size) % 37;
|
||||
if (base == 1) {
|
||||
// 1 is the only number between 0 and 36 that is not a valid base.
|
||||
base = 0;
|
||||
|
@ -1598,7 +1598,7 @@ _Py_COMP_DIAG_PUSH
|
||||
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
|
||||
if (a->ob_shash == -1) {
|
||||
/* Can't fail */
|
||||
a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
|
||||
a->ob_shash = Py_HashBuffer(a->ob_sval, Py_SIZE(a));
|
||||
}
|
||||
return a->ob_shash;
|
||||
_Py_COMP_DIAG_POP
|
||||
|
@ -2561,12 +2561,12 @@ hash_const(const void *key)
|
||||
if (PySlice_Check(op)) {
|
||||
PySliceObject *s = (PySliceObject *)op;
|
||||
PyObject *data[3] = { s->start, s->stop, s->step };
|
||||
return _Py_HashBytes(&data, sizeof(data));
|
||||
return Py_HashBuffer(&data, sizeof(data));
|
||||
}
|
||||
else if (PyTuple_CheckExact(op)) {
|
||||
Py_ssize_t size = PyTuple_GET_SIZE(op);
|
||||
PyObject **data = _PyTuple_ITEMS(op);
|
||||
return _Py_HashBytes(data, sizeof(PyObject *) * size);
|
||||
return Py_HashBuffer(data, sizeof(PyObject *) * size);
|
||||
}
|
||||
Py_hash_t h = PyObject_Hash(op);
|
||||
if (h == -1) {
|
||||
|
@ -3087,7 +3087,7 @@ memory_hash(PyObject *_self)
|
||||
}
|
||||
|
||||
/* Can't fail */
|
||||
self->hash = _Py_HashBytes(mem, view->len);
|
||||
self->hash = Py_HashBuffer(mem, view->len);
|
||||
|
||||
if (mem != view->buf)
|
||||
PyMem_Free(mem);
|
||||
|
@ -11688,7 +11688,7 @@ unicode_hash(PyObject *self)
|
||||
if (hash != -1) {
|
||||
return hash;
|
||||
}
|
||||
x = _Py_HashBytes(PyUnicode_DATA(self),
|
||||
x = Py_HashBuffer(PyUnicode_DATA(self),
|
||||
PyUnicode_GET_LENGTH(self) * PyUnicode_KIND(self));
|
||||
|
||||
FT_ATOMIC_STORE_SSIZE_RELAXED(_PyUnicode_HASH(self), x);
|
||||
|
@ -1174,7 +1174,7 @@ hashtable_key_from_2_strings(PyObject *str1, PyObject *str2, const char sep)
|
||||
static Py_uhash_t
|
||||
hashtable_hash_str(const void *key)
|
||||
{
|
||||
return _Py_HashBytes(key, strlen((const char *)key));
|
||||
return Py_HashBuffer(key, strlen((const char *)key));
|
||||
}
|
||||
|
||||
static int
|
||||
|
@ -22,7 +22,7 @@ extern PyHash_FuncDef PyHash_Func;
|
||||
static PyHash_FuncDef PyHash_Func;
|
||||
#endif
|
||||
|
||||
/* Count _Py_HashBytes() calls */
|
||||
/* Count Py_HashBuffer() calls */
|
||||
#ifdef Py_HASH_STATS
|
||||
#define Py_HASH_STATS_MAX 32
|
||||
static Py_ssize_t hashstats[Py_HASH_STATS_MAX + 1] = {0};
|
||||
@ -146,9 +146,8 @@ PyObject_GenericHash(PyObject *obj)
|
||||
}
|
||||
|
||||
Py_hash_t
|
||||
_Py_HashBytes(const void *src, Py_ssize_t len)
|
||||
Py_HashBuffer(const void *ptr, Py_ssize_t len)
|
||||
{
|
||||
Py_hash_t x;
|
||||
/*
|
||||
We make the hash of the empty string be 0, rather than using
|
||||
(prefix ^ suffix), since this slightly obfuscates the hash secret
|
||||
@ -161,11 +160,12 @@ _Py_HashBytes(const void *src, Py_ssize_t len)
|
||||
hashstats[(len <= Py_HASH_STATS_MAX) ? len : 0]++;
|
||||
#endif
|
||||
|
||||
Py_hash_t x;
|
||||
#if Py_HASH_CUTOFF > 0
|
||||
if (len < Py_HASH_CUTOFF) {
|
||||
/* Optimize hashing of very small strings with inline DJBX33A. */
|
||||
Py_uhash_t hash;
|
||||
const unsigned char *p = src;
|
||||
const unsigned char *p = ptr;
|
||||
hash = 5381; /* DJBX33A starts with 5381 */
|
||||
|
||||
switch(len) {
|
||||
@ -186,10 +186,13 @@ _Py_HashBytes(const void *src, Py_ssize_t len)
|
||||
}
|
||||
else
|
||||
#endif /* Py_HASH_CUTOFF */
|
||||
x = PyHash_Func.hash(src, len);
|
||||
{
|
||||
x = PyHash_Func.hash(ptr, len);
|
||||
}
|
||||
|
||||
if (x == -1)
|
||||
if (x == -1) {
|
||||
return -2;
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user