mirror of
https://github.com/python/cpython.git
synced 2024-11-24 00:38:00 +01:00
bpo-46845: Reduce dict size when all keys are Unicode (GH-31564)
This commit is contained in:
parent
21099fc064
commit
9833bb91e4
@ -404,6 +404,11 @@ Optimizations
|
||||
larger *k*).
|
||||
(Contributed by Serhiy Storchaka in :issue:`37295`.)
|
||||
|
||||
* Dict don't store hash value when all inserted keys are Unicode objects.
|
||||
This reduces dict size. For example, ``sys.getsizeof(dict.fromkeys("abcdefg"))``
|
||||
becomes 272 bytes from 352 bytes on 64bit platform.
|
||||
(Contributed by Inada Naoki in :issue:`46845`.)
|
||||
|
||||
|
||||
CPython bytecode changes
|
||||
========================
|
||||
|
@ -43,6 +43,11 @@ typedef struct {
|
||||
PyObject *me_value; /* This field is only meaningful for combined tables */
|
||||
} PyDictKeyEntry;
|
||||
|
||||
typedef struct {
|
||||
PyObject *me_key; /* The key must be Unicode and have hash. */
|
||||
PyObject *me_value; /* This field is only meaningful for combined tables */
|
||||
} PyDictUnicodeEntry;
|
||||
|
||||
extern PyDictKeysObject *_PyDict_NewKeysForClass(void);
|
||||
extern PyObject *_PyDict_FromKeys(PyObject *, PyObject *, PyObject *);
|
||||
|
||||
@ -70,6 +75,7 @@ extern PyObject *_PyDict_Pop_KnownHash(PyObject *, PyObject *, Py_hash_t, PyObje
|
||||
#define DKIX_EMPTY (-1)
|
||||
#define DKIX_DUMMY (-2) /* Used internally */
|
||||
#define DKIX_ERROR (-3)
|
||||
#define DKIX_KEY_CHANGED (-4) /* Used internally */
|
||||
|
||||
typedef enum {
|
||||
DICT_KEYS_GENERAL = 0,
|
||||
@ -114,7 +120,7 @@ struct _dictkeysobject {
|
||||
Dynamically sized, SIZEOF_VOID_P is minimum. */
|
||||
char dk_indices[]; /* char is required to avoid strict aliasing. */
|
||||
|
||||
/* "PyDictKeyEntry dk_entries[dk_usable];" array follows:
|
||||
/* "PyDictKeyEntry or PyDictUnicodeEntry dk_entries[USABLE_FRACTION(DK_SIZE(dk))];" array follows:
|
||||
see the DK_ENTRIES() macro */
|
||||
};
|
||||
|
||||
@ -148,13 +154,20 @@ struct _dictvalues {
|
||||
2 : sizeof(int32_t))
|
||||
#endif
|
||||
#define DK_ENTRIES(dk) \
|
||||
((PyDictKeyEntry*)(&((int8_t*)((dk)->dk_indices))[(size_t)1 << (dk)->dk_log2_index_bytes]))
|
||||
(assert(dk->dk_kind == DICT_KEYS_GENERAL), (PyDictKeyEntry*)(&((int8_t*)((dk)->dk_indices))[(size_t)1 << (dk)->dk_log2_index_bytes]))
|
||||
#define DK_UNICODE_ENTRIES(dk) \
|
||||
(assert(dk->dk_kind != DICT_KEYS_GENERAL), (PyDictUnicodeEntry*)(&((int8_t*)((dk)->dk_indices))[(size_t)1 << (dk)->dk_log2_index_bytes]))
|
||||
#define DK_IS_UNICODE(dk) ((dk)->dk_kind != DICT_KEYS_GENERAL)
|
||||
|
||||
extern uint64_t _pydict_global_version;
|
||||
|
||||
#define DICT_NEXT_VERSION() (++_pydict_global_version)
|
||||
|
||||
extern PyObject *_PyObject_MakeDictFromInstanceAttributes(PyObject *obj, PyDictValues *values);
|
||||
extern PyObject *_PyDict_FromItems(
|
||||
PyObject *const *keys, Py_ssize_t keys_offset,
|
||||
PyObject *const *values, Py_ssize_t values_offset,
|
||||
Py_ssize_t length);
|
||||
|
||||
static inline void
|
||||
_PyDictValues_AddToInsertionOrder(PyDictValues *values, Py_ssize_t ix)
|
||||
|
@ -1346,8 +1346,12 @@ class SizeofTest(unittest.TestCase):
|
||||
check({}.__iter__, size('2P'))
|
||||
# empty dict
|
||||
check({}, size('nQ2P'))
|
||||
# dict
|
||||
check({"a": 1}, size('nQ2P') + calcsize(DICT_KEY_STRUCT_FORMAT) + 8 + (8*2//3)*calcsize('n2P'))
|
||||
# dict (string key)
|
||||
check({"a": 1}, size('nQ2P') + calcsize(DICT_KEY_STRUCT_FORMAT) + 8 + (8*2//3)*calcsize('2P'))
|
||||
longdict = {str(i): i for i in range(8)}
|
||||
check(longdict, size('nQ2P') + calcsize(DICT_KEY_STRUCT_FORMAT) + 16 + (16*2//3)*calcsize('2P'))
|
||||
# dict (non-string key)
|
||||
check({1: 1}, size('nQ2P') + calcsize(DICT_KEY_STRUCT_FORMAT) + 8 + (8*2//3)*calcsize('n2P'))
|
||||
longdict = {1:1, 2:2, 3:3, 4:4, 5:5, 6:6, 7:7, 8:8}
|
||||
check(longdict, size('nQ2P') + calcsize(DICT_KEY_STRUCT_FORMAT) + 16 + (16*2//3)*calcsize('n2P'))
|
||||
# dictionary-keyview
|
||||
@ -1506,14 +1510,14 @@ class SizeofTest(unittest.TestCase):
|
||||
)
|
||||
class newstyleclass(object): pass
|
||||
# Separate block for PyDictKeysObject with 8 keys and 5 entries
|
||||
check(newstyleclass, s + calcsize(DICT_KEY_STRUCT_FORMAT) + 64 + 42*calcsize("n2P"))
|
||||
check(newstyleclass, s + calcsize(DICT_KEY_STRUCT_FORMAT) + 64 + 42*calcsize("2P"))
|
||||
# dict with shared keys
|
||||
[newstyleclass() for _ in range(100)]
|
||||
check(newstyleclass().__dict__, size('nQ2P') + self.P)
|
||||
o = newstyleclass()
|
||||
o.a = o.b = o.c = o.d = o.e = o.f = o.g = o.h = 1
|
||||
# Separate block for PyDictKeysObject with 16 keys and 10 entries
|
||||
check(newstyleclass, s + calcsize(DICT_KEY_STRUCT_FORMAT) + 64 + 42*calcsize("n2P"))
|
||||
check(newstyleclass, s + calcsize(DICT_KEY_STRUCT_FORMAT) + 64 + 42*calcsize("2P"))
|
||||
# dict with shared keys
|
||||
check(newstyleclass().__dict__, size('nQ2P') + self.P)
|
||||
# unicode
|
||||
|
@ -0,0 +1,3 @@
|
||||
Reduces dict size by removing hash value from hash table when all inserted
|
||||
keys are Unicode. For example, ``sys.getsizeof(dict.fromkeys("abcdefg"))``
|
||||
becomes 272 bytes from 352 bytes on 64bit platform.
|
@ -934,26 +934,11 @@ PyObject *
|
||||
_PyStack_AsDict(PyObject *const *values, PyObject *kwnames)
|
||||
{
|
||||
Py_ssize_t nkwargs;
|
||||
PyObject *kwdict;
|
||||
Py_ssize_t i;
|
||||
|
||||
assert(kwnames != NULL);
|
||||
nkwargs = PyTuple_GET_SIZE(kwnames);
|
||||
kwdict = _PyDict_NewPresized(nkwargs);
|
||||
if (kwdict == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for (i = 0; i < nkwargs; i++) {
|
||||
PyObject *key = PyTuple_GET_ITEM(kwnames, i);
|
||||
PyObject *value = *values++;
|
||||
/* If key already exists, replace it with the new value */
|
||||
if (PyDict_SetItem(kwdict, key, value)) {
|
||||
Py_DECREF(kwdict);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
return kwdict;
|
||||
return _PyDict_FromItems(&PyTuple_GET_ITEM(kwnames, 0), 1,
|
||||
values, 1, nkwargs);
|
||||
}
|
||||
|
||||
|
||||
|
@ -70,8 +70,8 @@ A values array
|
||||
Tunable Dictionary Parameters
|
||||
-----------------------------
|
||||
|
||||
See comments for PyDict_MINSIZE_SPLIT, PyDict_MINSIZE_COMBINED,
|
||||
USABLE_FRACTION and GROWTH_RATE in dictobject.c
|
||||
See comments for PyDict_MINSIZE, USABLE_FRACTION and GROWTH_RATE in
|
||||
dictobject.c
|
||||
|
||||
Tune-ups should be measured across a broad range of applications and
|
||||
use cases. A change to any parameter will help in some situations and
|
||||
|
1211
Objects/dictobject.c
1211
Objects/dictobject.c
File diff suppressed because it is too large
Load Diff
@ -1457,7 +1457,7 @@ eval_frame_handle_pending(PyThreadState *tstate)
|
||||
LOAD_##attr_or_method); \
|
||||
assert(dict->ma_keys->dk_kind == DICT_KEYS_UNICODE); \
|
||||
assert(cache0->index < dict->ma_keys->dk_nentries); \
|
||||
PyDictKeyEntry *ep = DK_ENTRIES(dict->ma_keys) + cache0->index; \
|
||||
PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(dict->ma_keys) + cache0->index; \
|
||||
res = ep->me_value; \
|
||||
DEOPT_IF(res == NULL, LOAD_##attr_or_method); \
|
||||
STAT_INC(LOAD_##attr_or_method, hit); \
|
||||
@ -1595,6 +1595,19 @@ is_method(PyObject **stack_pointer, int args) {
|
||||
return PEEK(args+2) != NULL;
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
dictkeys_get_value_by_index(PyDictKeysObject *dk, int index)
|
||||
{
|
||||
if (DK_IS_UNICODE(dk)) {
|
||||
PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(dk) + index;
|
||||
return ep->me_value;
|
||||
}
|
||||
else {
|
||||
PyDictKeyEntry *ep = DK_ENTRIES(dk) + index;
|
||||
return ep->me_value;
|
||||
}
|
||||
}
|
||||
|
||||
#define KWNAMES_LEN() \
|
||||
(call_shape.kwnames == NULL ? 0 : ((int)PyTuple_GET_SIZE(call_shape.kwnames)))
|
||||
|
||||
@ -3030,8 +3043,7 @@ handle_eval_breaker:
|
||||
_PyLoadGlobalCache *cache = (_PyLoadGlobalCache *)next_instr;
|
||||
uint32_t version = read32(&cache->module_keys_version);
|
||||
DEOPT_IF(dict->ma_keys->dk_version != version, LOAD_GLOBAL);
|
||||
PyDictKeyEntry *ep = DK_ENTRIES(dict->ma_keys) + cache->index;
|
||||
PyObject *res = ep->me_value;
|
||||
PyObject *res = dictkeys_get_value_by_index(dict->ma_keys, cache->index);
|
||||
DEOPT_IF(res == NULL, LOAD_GLOBAL);
|
||||
JUMPBY(INLINE_CACHE_ENTRIES_LOAD_GLOBAL);
|
||||
STAT_INC(LOAD_GLOBAL, hit);
|
||||
@ -3051,8 +3063,7 @@ handle_eval_breaker:
|
||||
uint16_t bltn_version = cache->builtin_keys_version;
|
||||
DEOPT_IF(mdict->ma_keys->dk_version != mod_version, LOAD_GLOBAL);
|
||||
DEOPT_IF(bdict->ma_keys->dk_version != bltn_version, LOAD_GLOBAL);
|
||||
PyDictKeyEntry *ep = DK_ENTRIES(bdict->ma_keys) + cache->index;
|
||||
PyObject *res = ep->me_value;
|
||||
PyObject *res = dictkeys_get_value_by_index(bdict->ma_keys, cache->index);
|
||||
DEOPT_IF(res == NULL, LOAD_GLOBAL);
|
||||
JUMPBY(INLINE_CACHE_ENTRIES_LOAD_GLOBAL);
|
||||
STAT_INC(LOAD_GLOBAL, hit);
|
||||
@ -3272,20 +3283,12 @@ handle_eval_breaker:
|
||||
}
|
||||
|
||||
TARGET(BUILD_MAP) {
|
||||
Py_ssize_t i;
|
||||
PyObject *map = _PyDict_NewPresized((Py_ssize_t)oparg);
|
||||
PyObject *map = _PyDict_FromItems(
|
||||
&PEEK(2*oparg), 2,
|
||||
&PEEK(2*oparg - 1), 2,
|
||||
oparg);
|
||||
if (map == NULL)
|
||||
goto error;
|
||||
for (i = oparg; i > 0; i--) {
|
||||
int err;
|
||||
PyObject *key = PEEK(2*i);
|
||||
PyObject *value = PEEK(2*i - 1);
|
||||
err = PyDict_SetItem(map, key, value);
|
||||
if (err != 0) {
|
||||
Py_DECREF(map);
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
while (oparg--) {
|
||||
Py_DECREF(POP());
|
||||
@ -3351,7 +3354,6 @@ handle_eval_breaker:
|
||||
}
|
||||
|
||||
TARGET(BUILD_CONST_KEY_MAP) {
|
||||
Py_ssize_t i;
|
||||
PyObject *map;
|
||||
PyObject *keys = TOP();
|
||||
if (!PyTuple_CheckExact(keys) ||
|
||||
@ -3360,20 +3362,12 @@ handle_eval_breaker:
|
||||
"bad BUILD_CONST_KEY_MAP keys argument");
|
||||
goto error;
|
||||
}
|
||||
map = _PyDict_NewPresized((Py_ssize_t)oparg);
|
||||
map = _PyDict_FromItems(
|
||||
&PyTuple_GET_ITEM(keys, 0), 1,
|
||||
&PEEK(oparg + 1), 1, oparg);
|
||||
if (map == NULL) {
|
||||
goto error;
|
||||
}
|
||||
for (i = oparg; i > 0; i--) {
|
||||
int err;
|
||||
PyObject *key = PyTuple_GET_ITEM(keys, oparg - i);
|
||||
PyObject *value = PEEK(i + 1);
|
||||
err = PyDict_SetItem(map, key, value);
|
||||
if (err != 0) {
|
||||
Py_DECREF(map);
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
Py_DECREF(POP());
|
||||
while (oparg--) {
|
||||
@ -3538,9 +3532,16 @@ handle_eval_breaker:
|
||||
PyObject *name = GETITEM(names, cache0->original_oparg);
|
||||
uint16_t hint = cache0->index;
|
||||
DEOPT_IF(hint >= (size_t)dict->ma_keys->dk_nentries, LOAD_ATTR);
|
||||
PyDictKeyEntry *ep = DK_ENTRIES(dict->ma_keys) + hint;
|
||||
DEOPT_IF(ep->me_key != name, LOAD_ATTR);
|
||||
res = ep->me_value;
|
||||
if (DK_IS_UNICODE(dict->ma_keys)) {
|
||||
PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(dict->ma_keys) + hint;
|
||||
DEOPT_IF(ep->me_key != name, LOAD_ATTR);
|
||||
res = ep->me_value;
|
||||
}
|
||||
else {
|
||||
PyDictKeyEntry *ep = DK_ENTRIES(dict->ma_keys) + hint;
|
||||
DEOPT_IF(ep->me_key != name, LOAD_ATTR);
|
||||
res = ep->me_value;
|
||||
}
|
||||
DEOPT_IF(res == NULL, LOAD_ATTR);
|
||||
STAT_INC(LOAD_ATTR, hit);
|
||||
Py_INCREF(res);
|
||||
@ -3630,15 +3631,27 @@ handle_eval_breaker:
|
||||
PyObject *name = GETITEM(names, cache0->original_oparg);
|
||||
uint16_t hint = cache0->index;
|
||||
DEOPT_IF(hint >= (size_t)dict->ma_keys->dk_nentries, STORE_ATTR);
|
||||
PyDictKeyEntry *ep = DK_ENTRIES(dict->ma_keys) + hint;
|
||||
DEOPT_IF(ep->me_key != name, STORE_ATTR);
|
||||
PyObject *old_value = ep->me_value;
|
||||
DEOPT_IF(old_value == NULL, STORE_ATTR);
|
||||
STAT_INC(STORE_ATTR, hit);
|
||||
STACK_SHRINK(1);
|
||||
PyObject *value = POP();
|
||||
ep->me_value = value;
|
||||
PyObject *value, *old_value;
|
||||
if (DK_IS_UNICODE(dict->ma_keys)) {
|
||||
PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(dict->ma_keys) + hint;
|
||||
DEOPT_IF(ep->me_key != name, STORE_ATTR);
|
||||
old_value = ep->me_value;
|
||||
DEOPT_IF(old_value == NULL, STORE_ATTR);
|
||||
STACK_SHRINK(1);
|
||||
value = POP();
|
||||
ep->me_value = value;
|
||||
}
|
||||
else {
|
||||
PyDictKeyEntry *ep = DK_ENTRIES(dict->ma_keys) + hint;
|
||||
DEOPT_IF(ep->me_key != name, STORE_ATTR);
|
||||
old_value = ep->me_value;
|
||||
DEOPT_IF(old_value == NULL, STORE_ATTR);
|
||||
STACK_SHRINK(1);
|
||||
value = POP();
|
||||
ep->me_value = value;
|
||||
}
|
||||
Py_DECREF(old_value);
|
||||
STAT_INC(STORE_ATTR, hit);
|
||||
/* Ensure dict is GC tracked if it needs to be */
|
||||
if (!_PyObject_GC_IS_TRACKED(dict) && _PyObject_GC_MAY_BE_TRACKED(value)) {
|
||||
_PyObject_GC_TRACK(dict);
|
||||
|
@ -787,12 +787,6 @@ class PyDictObjectPtr(PyObjectPtr):
|
||||
def _get_entries(keys):
|
||||
dk_nentries = int(keys['dk_nentries'])
|
||||
dk_size = 1<<int(keys['dk_log2_size'])
|
||||
try:
|
||||
# <= Python 3.5
|
||||
return keys['dk_entries'], dk_size
|
||||
except RuntimeError:
|
||||
# >= Python 3.6
|
||||
pass
|
||||
|
||||
if dk_size <= 0xFF:
|
||||
offset = dk_size
|
||||
@ -805,7 +799,10 @@ class PyDictObjectPtr(PyObjectPtr):
|
||||
|
||||
ent_addr = keys['dk_indices'].address
|
||||
ent_addr = ent_addr.cast(_type_unsigned_char_ptr()) + offset
|
||||
ent_ptr_t = gdb.lookup_type('PyDictKeyEntry').pointer()
|
||||
if int(keys['dk_kind']) == 0: # DICT_KEYS_GENERAL
|
||||
ent_ptr_t = gdb.lookup_type('PyDictKeyEntry').pointer()
|
||||
else:
|
||||
ent_ptr_t = gdb.lookup_type('PyDictUnicodeEntry').pointer()
|
||||
ent_addr = ent_addr.cast(ent_ptr_t)
|
||||
|
||||
return ent_addr, dk_nentries
|
||||
|
Loading…
Reference in New Issue
Block a user