mirror of https://github.com/python/cpython
Changed the dict implementation to take "string shortcuts" only when
keys are true strings -- no subclasses need apply. This may be debatable. The problem is that a str subclass may very well want to override __eq__ and/or __hash__ (see the new example of case-insensitive strings in test_descr), but go-fast shortcuts for strings are ubiquitous in our dicts (and subclass overrides aren't even looked for then). Another go-fast reason for the change is that PyCheck_StringExact() is a quicker test than PyCheck_String(), and we make such a test on virtually every access to every dict. OTOH, a str subclass may also be perfectly happy using the base str eq and hash, and this change slows them a lot. But those cases are still hypothetical, while Python's own reliance on true-string dicts is not.
This commit is contained in:
parent
742dfd6f17
commit
0ab085c4cb
|
@ -1533,8 +1533,8 @@ def inherits():
|
|||
verify(str(s) == base)
|
||||
verify(str(s).__class__ is str)
|
||||
verify(hash(s) == hash(base))
|
||||
verify({s: 1}[base] == 1)
|
||||
verify({base: 1}[s] == 1)
|
||||
#XXX verify({s: 1}[base] == 1)
|
||||
#XXX verify({base: 1}[s] == 1)
|
||||
verify((s + "").__class__ is str)
|
||||
verify(s + "" == base)
|
||||
verify(("" + s).__class__ is str)
|
||||
|
@ -1758,6 +1758,39 @@ f = t(%r, 'w') # rexec can't catch this by itself
|
|||
except:
|
||||
pass
|
||||
|
||||
def str_subclass_as_dict_key():
|
||||
if verbose:
|
||||
print "Testing a str subclass used as dict key .."
|
||||
|
||||
class cistr(str):
|
||||
"""Sublcass of str that computes __eq__ case-insensitively.
|
||||
|
||||
Also computes a hash code of the string in canonical form.
|
||||
"""
|
||||
|
||||
def __init__(self, value):
|
||||
self.canonical = value.lower()
|
||||
self.hashcode = hash(self.canonical)
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, cistr):
|
||||
other = cistr(other)
|
||||
return self.canonical == other.canonical
|
||||
|
||||
def __hash__(self):
|
||||
return self.hashcode
|
||||
|
||||
verify('aBc' == cistr('ABC') == 'abc')
|
||||
verify(str(cistr('ABC')) == 'ABC')
|
||||
|
||||
d = {cistr('one'): 1, cistr('two'): 2, cistr('tHree'): 3}
|
||||
verify(d[cistr('one')] == 1)
|
||||
verify(d[cistr('tWo')] == 2)
|
||||
verify(d[cistr('THrEE')] == 3)
|
||||
verify(cistr('ONe') in d)
|
||||
verify(d.get(cistr('thrEE')) == 3)
|
||||
|
||||
|
||||
def all():
|
||||
lists()
|
||||
dicts()
|
||||
|
@ -1794,6 +1827,7 @@ def all():
|
|||
inherits()
|
||||
keywords()
|
||||
restricted()
|
||||
str_subclass_as_dict_key()
|
||||
|
||||
all()
|
||||
|
||||
|
|
|
@ -298,8 +298,8 @@ Done:
|
|||
* means we don't need to go through PyObject_Compare(); we can always use
|
||||
* _PyString_Eq directly.
|
||||
*
|
||||
* This really only becomes meaningful if proper error handling in lookdict()
|
||||
* is too expensive.
|
||||
* This is valuable because the general-case error handling in lookdict() is
|
||||
* expensive, and dicts with pure-string keys are very common.
|
||||
*/
|
||||
static dictentry *
|
||||
lookdict_string(dictobject *mp, PyObject *key, register long hash)
|
||||
|
@ -311,8 +311,11 @@ lookdict_string(dictobject *mp, PyObject *key, register long hash)
|
|||
dictentry *ep0 = mp->ma_table;
|
||||
register dictentry *ep;
|
||||
|
||||
/* make sure this function doesn't have to handle non-string keys */
|
||||
if (!PyString_Check(key)) {
|
||||
/* Make sure this function doesn't have to handle non-string keys,
|
||||
including subclasses of str; e.g., one reason to subclass
|
||||
strings is to override __eq__, and for speed we don't cater to
|
||||
that here. */
|
||||
if (!PyString_CheckExact(key)) {
|
||||
#ifdef SHOW_CONVERSION_COUNTS
|
||||
++converted;
|
||||
#endif
|
||||
|
@ -478,7 +481,7 @@ PyDict_GetItem(PyObject *op, PyObject *key)
|
|||
return NULL;
|
||||
}
|
||||
#ifdef CACHE_HASH
|
||||
if (!PyString_Check(key) ||
|
||||
if (!PyString_CheckExact(key) ||
|
||||
(hash = ((PyStringObject *) key)->ob_shash) == -1)
|
||||
#endif
|
||||
{
|
||||
|
@ -510,7 +513,7 @@ PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value)
|
|||
}
|
||||
mp = (dictobject *)op;
|
||||
#ifdef CACHE_HASH
|
||||
if (PyString_Check(key)) {
|
||||
if (PyString_CheckExact(key)) {
|
||||
#ifdef INTERN_STRINGS
|
||||
if (((PyStringObject *)key)->ob_sinterned != NULL) {
|
||||
key = ((PyStringObject *)key)->ob_sinterned;
|
||||
|
@ -562,7 +565,7 @@ PyDict_DelItem(PyObject *op, PyObject *key)
|
|||
return -1;
|
||||
}
|
||||
#ifdef CACHE_HASH
|
||||
if (!PyString_Check(key) ||
|
||||
if (!PyString_CheckExact(key) ||
|
||||
(hash = ((PyStringObject *) key)->ob_shash) == -1)
|
||||
#endif
|
||||
{
|
||||
|
@ -842,7 +845,7 @@ dict_subscript(dictobject *mp, register PyObject *key)
|
|||
long hash;
|
||||
assert(mp->ma_table != NULL);
|
||||
#ifdef CACHE_HASH
|
||||
if (!PyString_Check(key) ||
|
||||
if (!PyString_CheckExact(key) ||
|
||||
(hash = ((PyStringObject *) key)->ob_shash) == -1)
|
||||
#endif
|
||||
{
|
||||
|
@ -1358,7 +1361,7 @@ dict_has_key(register dictobject *mp, PyObject *key)
|
|||
long hash;
|
||||
register long ok;
|
||||
#ifdef CACHE_HASH
|
||||
if (!PyString_Check(key) ||
|
||||
if (!PyString_CheckExact(key) ||
|
||||
(hash = ((PyStringObject *) key)->ob_shash) == -1)
|
||||
#endif
|
||||
{
|
||||
|
@ -1382,7 +1385,7 @@ dict_get(register dictobject *mp, PyObject *args)
|
|||
return NULL;
|
||||
|
||||
#ifdef CACHE_HASH
|
||||
if (!PyString_Check(key) ||
|
||||
if (!PyString_CheckExact(key) ||
|
||||
(hash = ((PyStringObject *) key)->ob_shash) == -1)
|
||||
#endif
|
||||
{
|
||||
|
@ -1411,7 +1414,7 @@ dict_setdefault(register dictobject *mp, PyObject *args)
|
|||
return NULL;
|
||||
|
||||
#ifdef CACHE_HASH
|
||||
if (!PyString_Check(key) ||
|
||||
if (!PyString_CheckExact(key) ||
|
||||
(hash = ((PyStringObject *) key)->ob_shash) == -1)
|
||||
#endif
|
||||
{
|
||||
|
@ -1647,7 +1650,7 @@ dict_contains(dictobject *mp, PyObject *key)
|
|||
long hash;
|
||||
|
||||
#ifdef CACHE_HASH
|
||||
if (!PyString_Check(key) ||
|
||||
if (!PyString_CheckExact(key) ||
|
||||
(hash = ((PyStringObject *) key)->ob_shash) == -1)
|
||||
#endif
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue