Changed the dict implementation to take "string shortcuts" only when

keys are true strings -- no subclasses need apply.  This may be debatable.

The problem is that a str subclass may very well want to override __eq__
and/or __hash__ (see the new example of case-insensitive strings in
test_descr), but go-fast shortcuts for strings are ubiquitous in our dicts
(and subclass overrides aren't even looked for then).  Another go-fast
reason for the change is that PyCheck_StringExact() is a quicker test
than PyCheck_String(), and we make such a test on virtually every access
to every dict.

OTOH, a str subclass may also be perfectly happy using the base str eq
and hash, and this change slows them a lot.  But those cases are still
hypothetical, while Python's own reliance on true-string dicts is not.
This commit is contained in:
Tim Peters 2001-09-14 00:25:33 +00:00
parent 742dfd6f17
commit 0ab085c4cb
2 changed files with 52 additions and 15 deletions

View File

@ -1533,8 +1533,8 @@ def inherits():
verify(str(s) == base)
verify(str(s).__class__ is str)
verify(hash(s) == hash(base))
verify({s: 1}[base] == 1)
verify({base: 1}[s] == 1)
#XXX verify({s: 1}[base] == 1)
#XXX verify({base: 1}[s] == 1)
verify((s + "").__class__ is str)
verify(s + "" == base)
verify(("" + s).__class__ is str)
@ -1758,6 +1758,39 @@ f = t(%r, 'w') # rexec can't catch this by itself
except:
pass
def str_subclass_as_dict_key():
if verbose:
print "Testing a str subclass used as dict key .."
class cistr(str):
"""Sublcass of str that computes __eq__ case-insensitively.
Also computes a hash code of the string in canonical form.
"""
def __init__(self, value):
self.canonical = value.lower()
self.hashcode = hash(self.canonical)
def __eq__(self, other):
if not isinstance(other, cistr):
other = cistr(other)
return self.canonical == other.canonical
def __hash__(self):
return self.hashcode
verify('aBc' == cistr('ABC') == 'abc')
verify(str(cistr('ABC')) == 'ABC')
d = {cistr('one'): 1, cistr('two'): 2, cistr('tHree'): 3}
verify(d[cistr('one')] == 1)
verify(d[cistr('tWo')] == 2)
verify(d[cistr('THrEE')] == 3)
verify(cistr('ONe') in d)
verify(d.get(cistr('thrEE')) == 3)
def all():
lists()
dicts()
@ -1794,6 +1827,7 @@ def all():
inherits()
keywords()
restricted()
str_subclass_as_dict_key()
all()

View File

@ -298,8 +298,8 @@ Done:
* means we don't need to go through PyObject_Compare(); we can always use
* _PyString_Eq directly.
*
* This really only becomes meaningful if proper error handling in lookdict()
* is too expensive.
* This is valuable because the general-case error handling in lookdict() is
* expensive, and dicts with pure-string keys are very common.
*/
static dictentry *
lookdict_string(dictobject *mp, PyObject *key, register long hash)
@ -311,8 +311,11 @@ lookdict_string(dictobject *mp, PyObject *key, register long hash)
dictentry *ep0 = mp->ma_table;
register dictentry *ep;
/* make sure this function doesn't have to handle non-string keys */
if (!PyString_Check(key)) {
/* Make sure this function doesn't have to handle non-string keys,
including subclasses of str; e.g., one reason to subclass
strings is to override __eq__, and for speed we don't cater to
that here. */
if (!PyString_CheckExact(key)) {
#ifdef SHOW_CONVERSION_COUNTS
++converted;
#endif
@ -478,7 +481,7 @@ PyDict_GetItem(PyObject *op, PyObject *key)
return NULL;
}
#ifdef CACHE_HASH
if (!PyString_Check(key) ||
if (!PyString_CheckExact(key) ||
(hash = ((PyStringObject *) key)->ob_shash) == -1)
#endif
{
@ -510,7 +513,7 @@ PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value)
}
mp = (dictobject *)op;
#ifdef CACHE_HASH
if (PyString_Check(key)) {
if (PyString_CheckExact(key)) {
#ifdef INTERN_STRINGS
if (((PyStringObject *)key)->ob_sinterned != NULL) {
key = ((PyStringObject *)key)->ob_sinterned;
@ -562,7 +565,7 @@ PyDict_DelItem(PyObject *op, PyObject *key)
return -1;
}
#ifdef CACHE_HASH
if (!PyString_Check(key) ||
if (!PyString_CheckExact(key) ||
(hash = ((PyStringObject *) key)->ob_shash) == -1)
#endif
{
@ -842,7 +845,7 @@ dict_subscript(dictobject *mp, register PyObject *key)
long hash;
assert(mp->ma_table != NULL);
#ifdef CACHE_HASH
if (!PyString_Check(key) ||
if (!PyString_CheckExact(key) ||
(hash = ((PyStringObject *) key)->ob_shash) == -1)
#endif
{
@ -1358,7 +1361,7 @@ dict_has_key(register dictobject *mp, PyObject *key)
long hash;
register long ok;
#ifdef CACHE_HASH
if (!PyString_Check(key) ||
if (!PyString_CheckExact(key) ||
(hash = ((PyStringObject *) key)->ob_shash) == -1)
#endif
{
@ -1382,7 +1385,7 @@ dict_get(register dictobject *mp, PyObject *args)
return NULL;
#ifdef CACHE_HASH
if (!PyString_Check(key) ||
if (!PyString_CheckExact(key) ||
(hash = ((PyStringObject *) key)->ob_shash) == -1)
#endif
{
@ -1411,7 +1414,7 @@ dict_setdefault(register dictobject *mp, PyObject *args)
return NULL;
#ifdef CACHE_HASH
if (!PyString_Check(key) ||
if (!PyString_CheckExact(key) ||
(hash = ((PyStringObject *) key)->ob_shash) == -1)
#endif
{
@ -1647,7 +1650,7 @@ dict_contains(dictobject *mp, PyObject *key)
long hash;
#ifdef CACHE_HASH
if (!PyString_Check(key) ||
if (!PyString_CheckExact(key) ||
(hash = ((PyStringObject *) key)->ob_shash) == -1)
#endif
{