aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Shannon <mark@hotpy.org>2021-09-17 12:20:51 +0100
committerGitHub <noreply@github.com>2021-09-17 12:20:51 +0100
commit064464fc38269e70f7e3a34cb25fc9085ab85782 (patch)
tree685df2fa53696910d4968ef900aadaebbbf4035d
parentbpo-45217: adds note that `allow_no_value` in `configparser` is optional (GH-... (diff)
downloadcpython-064464fc38269e70f7e3a34cb25fc9085ab85782.tar.gz
cpython-064464fc38269e70f7e3a34cb25fc9085ab85782.tar.bz2
cpython-064464fc38269e70f7e3a34cb25fc9085ab85782.zip
bpo-45219: Factor dictkey indexing (GH-28389)
-rw-r--r--Include/cpython/dictobject.h4
-rw-r--r--Objects/dictobject.c126
-rw-r--r--Python/specialize.c39
3 files changed, 106 insertions, 63 deletions
diff --git a/Include/cpython/dictobject.h b/Include/cpython/dictobject.h
index de3c1160b48..7c63374c566 100644
--- a/Include/cpython/dictobject.h
+++ b/Include/cpython/dictobject.h
@@ -85,4 +85,6 @@ PyAPI_FUNC(PyObject *) _PyDictView_Intersect(PyObject* self, PyObject *other);
/* Gets a version number unique to the current state of the keys of dict, if possible.
* Returns the version number, or zero if it was not possible to get a version number. */
-uint32_t _PyDictKeys_GetVersionForCurrentState(PyDictObject *dict);
+uint32_t _PyDictKeys_GetVersionForCurrentState(PyDictKeysObject *dictkeys);
+
+Py_ssize_t _PyDictKeys_StringLookup(PyDictKeysObject* dictkeys, PyObject *key);
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index c78cbafe583..85122eca7b7 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -734,6 +734,73 @@ lookdict_index(PyDictKeysObject *k, Py_hash_t hash, Py_ssize_t index)
Py_UNREACHABLE();
}
+static Py_ssize_t
+dictkeys_stringlookup(PyDictKeysObject* dk, PyObject *key, Py_hash_t hash)
+{
+ PyDictKeyEntry *ep0 = DK_ENTRIES(dk);
+ size_t mask = DK_MASK(dk);
+ size_t perturb = hash;
+ size_t i = (size_t)hash & mask;
+ Py_ssize_t ix;
+ for (;;) {
+ ix = dictkeys_get_index(dk, i);
+ if (ix >= 0) {
+ PyDictKeyEntry *ep = &ep0[ix];
+ assert(ep->me_key != NULL);
+ assert(PyUnicode_CheckExact(ep->me_key));
+ if (ep->me_key == key ||
+ (ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
+ return ix;
+ }
+ }
+ else if (ix == DKIX_EMPTY) {
+ return DKIX_EMPTY;
+ }
+ perturb >>= PERTURB_SHIFT;
+ i = mask & (i*5 + perturb + 1);
+ ix = dictkeys_get_index(dk, i);
+ if (ix >= 0) {
+ PyDictKeyEntry *ep = &ep0[ix];
+ assert(ep->me_key != NULL);
+ assert(PyUnicode_CheckExact(ep->me_key));
+ if (ep->me_key == key ||
+ (ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
+ return ix;
+ }
+ }
+ else if (ix == DKIX_EMPTY) {
+ return DKIX_EMPTY;
+ }
+ perturb >>= PERTURB_SHIFT;
+ i = mask & (i*5 + perturb + 1);
+ }
+ Py_UNREACHABLE();
+}
+
+/* Lookup a string in a (all unicode) dict keys.
+ * Returns DKIX_ERROR if key is not a string,
+ * or if the dict keys is not all strings.
+ * If the keys is present then return the index of key.
+ * If the key is not present then return DKIX_EMPTY.
+ */
+Py_ssize_t
+_PyDictKeys_StringLookup(PyDictKeysObject* dk, PyObject *key)
+{
+ DictKeysKind kind = dk->dk_kind;
+ if (!PyUnicode_CheckExact(key) || kind == DICT_KEYS_GENERAL) {
+ return DKIX_ERROR;
+ }
+ Py_hash_t hash = ((PyASCIIObject *)key)->hash;
+ if (hash == -1) {
+ hash = PyUnicode_Type.tp_hash(key);
+ if (hash == -1) {
+ PyErr_Clear();
+ return DKIX_ERROR;
+ }
+ }
+ return dictkeys_stringlookup(dk, key, hash);
+}
+
/*
The basic lookup function used by all operations.
This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
@@ -756,49 +823,24 @@ _Py_dict_lookup(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject **valu
start:
dk = mp->ma_keys;
DictKeysKind kind = dk->dk_kind;
+ if (PyUnicode_CheckExact(key) && kind != DICT_KEYS_GENERAL) {
+ Py_ssize_t ix = dictkeys_stringlookup(dk, key, hash);
+ if (ix == DKIX_EMPTY) {
+ *value_addr = NULL;
+ }
+ else if (kind == DICT_KEYS_SPLIT) {
+ *value_addr = mp->ma_values[ix];
+ }
+ else {
+ *value_addr = DK_ENTRIES(dk)[ix].me_value;
+ }
+ return ix;
+ }
PyDictKeyEntry *ep0 = DK_ENTRIES(dk);
size_t mask = DK_MASK(dk);
size_t perturb = hash;
size_t i = (size_t)hash & mask;
Py_ssize_t ix;
- if (PyUnicode_CheckExact(key) && kind != DICT_KEYS_GENERAL) {
- /* Strings only */
- for (;;) {
- ix = dictkeys_get_index(mp->ma_keys, i);
- if (ix >= 0) {
- PyDictKeyEntry *ep = &ep0[ix];
- assert(ep->me_key != NULL);
- assert(PyUnicode_CheckExact(ep->me_key));
- if (ep->me_key == key ||
- (ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
- goto found;
- }
- }
- else if (ix == DKIX_EMPTY) {
- *value_addr = NULL;
- return DKIX_EMPTY;
- }
- perturb >>= PERTURB_SHIFT;
- i = mask & (i*5 + perturb + 1);
- ix = dictkeys_get_index(mp->ma_keys, i);
- if (ix >= 0) {
- PyDictKeyEntry *ep = &ep0[ix];
- assert(ep->me_key != NULL);
- assert(PyUnicode_CheckExact(ep->me_key));
- if (ep->me_key == key ||
- (ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
- goto found;
- }
- }
- else if (ix == DKIX_EMPTY) {
- *value_addr = NULL;
- return DKIX_EMPTY;
- }
- perturb >>= PERTURB_SHIFT;
- i = mask & (i*5 + perturb + 1);
- }
- Py_UNREACHABLE();
- }
for (;;) {
ix = dictkeys_get_index(dk, i);
if (ix == DKIX_EMPTY) {
@@ -4997,15 +5039,15 @@ _PyDictKeys_DecRef(PyDictKeysObject *keys)
static uint32_t next_dict_keys_version = 2;
-uint32_t _PyDictKeys_GetVersionForCurrentState(PyDictObject *dict)
+uint32_t _PyDictKeys_GetVersionForCurrentState(PyDictKeysObject *dictkeys)
{
- if (dict->ma_keys->dk_version != 0) {
- return dict->ma_keys->dk_version;
+ if (dictkeys->dk_version != 0) {
+ return dictkeys->dk_version;
}
if (next_dict_keys_version == 0) {
return 0;
}
uint32_t v = next_dict_keys_version++;
- dict->ma_keys->dk_version = v;
+ dictkeys->dk_version = v;
return v;
}
diff --git a/Python/specialize.c b/Python/specialize.c
index 398524cdb5b..1ab79bf3ea0 100644
--- a/Python/specialize.c
+++ b/Python/specialize.c
@@ -489,7 +489,7 @@ specialize_module_load_attr(
SPECIALIZATION_FAIL(opcode, SPEC_FAIL_OUT_OF_RANGE);
return -1;
}
- uint32_t keys_version = _PyDictKeys_GetVersionForCurrentState(dict);
+ uint32_t keys_version = _PyDictKeys_GetVersionForCurrentState(dict->ma_keys);
if (keys_version == 0) {
SPECIALIZATION_FAIL(opcode, SPEC_FAIL_OUT_OF_VERSIONS);
return -1;
@@ -601,23 +601,19 @@ specialize_dict_access(
}
// We found an instance with a __dict__.
PyDictObject *dict = (PyDictObject *)*dictptr;
+ PyDictKeysObject *keys = dict->ma_keys;
if ((type->tp_flags & Py_TPFLAGS_HEAPTYPE)
- && dict->ma_keys == ((PyHeapTypeObject*)type)->ht_cached_keys
+ && keys == ((PyHeapTypeObject*)type)->ht_cached_keys
) {
// Keys are shared
assert(PyUnicode_CheckExact(name));
- Py_hash_t hash = PyObject_Hash(name);
- if (hash == -1) {
- return -1;
- }
- PyObject *value;
- Py_ssize_t index = _Py_dict_lookup(dict, name, hash, &value);
+ Py_ssize_t index = _PyDictKeys_StringLookup(keys, name);
assert (index != DKIX_ERROR);
if (index != (uint16_t)index) {
SPECIALIZATION_FAIL(base_op, SPEC_FAIL_OUT_OF_RANGE);
return 0;
}
- uint32_t keys_version = _PyDictKeys_GetVersionForCurrentState(dict);
+ uint32_t keys_version = _PyDictKeys_GetVersionForCurrentState(keys);
if (keys_version == 0) {
SPECIALIZATION_FAIL(base_op, SPEC_FAIL_OUT_OF_VERSIONS);
return 0;
@@ -966,7 +962,7 @@ _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name,
goto fail;
}
}
- keys_version = _PyDictKeys_GetVersionForCurrentState(owner_dict);
+ keys_version = _PyDictKeys_GetVersionForCurrentState(owner_dict->ma_keys);
if (keys_version == 0) {
SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_OUT_OF_VERSIONS);
goto fail;
@@ -1020,17 +1016,17 @@ _Py_Specialize_LoadGlobal(
if (!PyDict_CheckExact(globals)) {
goto fail;
}
- if (((PyDictObject *)globals)->ma_keys->dk_kind != DICT_KEYS_UNICODE) {
+ PyDictKeysObject * globals_keys = ((PyDictObject *)globals)->ma_keys;
+ Py_ssize_t index = _PyDictKeys_StringLookup(globals_keys, name);
+ if (index == DKIX_ERROR) {
+ SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_NON_STRING_OR_SPLIT);
goto fail;
}
- PyObject *value = NULL;
- Py_ssize_t index = _PyDict_GetItemHint((PyDictObject *)globals, name, -1, &value);
- assert (index != DKIX_ERROR);
if (index != DKIX_EMPTY) {
if (index != (uint16_t)index) {
goto fail;
}
- uint32_t keys_version = _PyDictKeys_GetVersionForCurrentState((PyDictObject *)globals);
+ uint32_t keys_version = _PyDictKeys_GetVersionForCurrentState(globals_keys);
if (keys_version == 0) {
goto fail;
}
@@ -1042,20 +1038,23 @@ _Py_Specialize_LoadGlobal(
if (!PyDict_CheckExact(builtins)) {
goto fail;
}
- if (((PyDictObject *)builtins)->ma_keys->dk_kind != DICT_KEYS_UNICODE) {
+ PyDictKeysObject * builtin_keys = ((PyDictObject *)builtins)->ma_keys;
+ index = _PyDictKeys_StringLookup(builtin_keys, name);
+ if (index == DKIX_ERROR) {
+ SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_NON_STRING_OR_SPLIT);
goto fail;
}
- index = _PyDict_GetItemHint((PyDictObject *)builtins, name, -1, &value);
- assert (index != DKIX_ERROR);
if (index != (uint16_t)index) {
goto fail;
}
- uint32_t globals_version = _PyDictKeys_GetVersionForCurrentState((PyDictObject *)globals);
+ uint32_t globals_version = _PyDictKeys_GetVersionForCurrentState(globals_keys);
if (globals_version == 0) {
+ SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_OUT_OF_VERSIONS);
goto fail;
}
- uint32_t builtins_version = _PyDictKeys_GetVersionForCurrentState((PyDictObject *)builtins);
+ uint32_t builtins_version = _PyDictKeys_GetVersionForCurrentState(builtin_keys);
if (builtins_version == 0) {
+ SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_OUT_OF_VERSIONS);
goto fail;
}
cache1->module_keys_version = globals_version;