Fixed issue #1564: The set implementation should special-case PyUnicode instead of PyString
I moved the unicode_eq to stringlib/eq.h to keep the function static and possible inline for setobject.c and dictobject.h. I also removed the unused _PyString_Eq method. If it's required in the future it can be added to eq.h
This commit is contained in:
parent
110194048e
commit
0ded5b54bb
|
@ -58,7 +58,6 @@ PyAPI_FUNC(PyObject *) PyString_Repr(PyObject *, int);
|
||||||
PyAPI_FUNC(void) PyString_Concat(PyObject **, PyObject *);
|
PyAPI_FUNC(void) PyString_Concat(PyObject **, PyObject *);
|
||||||
PyAPI_FUNC(void) PyString_ConcatAndDel(PyObject **, PyObject *);
|
PyAPI_FUNC(void) PyString_ConcatAndDel(PyObject **, PyObject *);
|
||||||
PyAPI_FUNC(int) _PyString_Resize(PyObject **, Py_ssize_t);
|
PyAPI_FUNC(int) _PyString_Resize(PyObject **, Py_ssize_t);
|
||||||
PyAPI_FUNC(int) _PyString_Eq(PyObject *, PyObject*);
|
|
||||||
PyAPI_FUNC(PyObject *) PyString_Format(PyObject *, PyObject *);
|
PyAPI_FUNC(PyObject *) PyString_Format(PyObject *, PyObject *);
|
||||||
PyAPI_FUNC(PyObject *) _PyString_FormatLong(PyObject*, int, int,
|
PyAPI_FUNC(PyObject *) _PyString_FormatLong(PyObject*, int, int,
|
||||||
int, char**, int*);
|
int, char**, int*);
|
||||||
|
|
|
@ -7,6 +7,7 @@ import pickle
|
||||||
import os
|
import os
|
||||||
from random import randrange, shuffle
|
from random import randrange, shuffle
|
||||||
import sys
|
import sys
|
||||||
|
import warnings
|
||||||
|
|
||||||
class PassThru(Exception):
|
class PassThru(Exception):
|
||||||
pass
|
pass
|
||||||
|
@ -817,6 +818,44 @@ class TestBasicOpsTriple(TestBasicOps):
|
||||||
self.length = 3
|
self.length = 3
|
||||||
self.repr = None
|
self.repr = None
|
||||||
|
|
||||||
|
#------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestBasicOpsString(TestBasicOps):
|
||||||
|
def setUp(self):
|
||||||
|
self.case = "string set"
|
||||||
|
self.values = ["a", "b", "c"]
|
||||||
|
self.set = set(self.values)
|
||||||
|
self.dup = set(self.values)
|
||||||
|
self.length = 3
|
||||||
|
self.repr = "{'a', 'c', 'b'}"
|
||||||
|
|
||||||
|
#------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestBasicOpsBytes(TestBasicOps):
|
||||||
|
def setUp(self):
|
||||||
|
self.case = "string set"
|
||||||
|
self.values = [b"a", b"b", b"c"]
|
||||||
|
self.set = set(self.values)
|
||||||
|
self.dup = set(self.values)
|
||||||
|
self.length = 3
|
||||||
|
self.repr = "{b'a', b'c', b'b'}"
|
||||||
|
|
||||||
|
#------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestBasicOpsMixedStringBytes(TestBasicOps):
|
||||||
|
def setUp(self):
|
||||||
|
self.warning_filters = warnings.filters[:]
|
||||||
|
warnings.simplefilter('ignore', BytesWarning)
|
||||||
|
self.case = "string and bytes set"
|
||||||
|
self.values = ["a", "b", b"a", b"b"]
|
||||||
|
self.set = set(self.values)
|
||||||
|
self.dup = set(self.values)
|
||||||
|
self.length = 4
|
||||||
|
self.repr = "{'a', b'a', 'b', b'b'}"
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
warnings.filters = self.warning_filters
|
||||||
|
|
||||||
#==============================================================================
|
#==============================================================================
|
||||||
|
|
||||||
def baditer():
|
def baditer():
|
||||||
|
@ -1581,6 +1620,9 @@ def test_main(verbose=None):
|
||||||
TestBasicOpsSingleton,
|
TestBasicOpsSingleton,
|
||||||
TestBasicOpsTuple,
|
TestBasicOpsTuple,
|
||||||
TestBasicOpsTriple,
|
TestBasicOpsTriple,
|
||||||
|
TestBasicOpsString,
|
||||||
|
TestBasicOpsBytes,
|
||||||
|
TestBasicOpsMixedStringBytes,
|
||||||
TestBinaryOps,
|
TestBinaryOps,
|
||||||
TestUpdateOps,
|
TestUpdateOps,
|
||||||
TestMutate,
|
TestMutate,
|
||||||
|
|
|
@ -14,6 +14,9 @@ Core and Builtins
|
||||||
|
|
||||||
- Issue #1573: Improper use of the keyword-only syntax makes the parser crash
|
- Issue #1573: Improper use of the keyword-only syntax makes the parser crash
|
||||||
|
|
||||||
|
- Issue #1564: The set implementation should special-case PyUnicode instead
|
||||||
|
of PyString
|
||||||
|
|
||||||
|
|
||||||
Extension Modules
|
Extension Modules
|
||||||
-----------------
|
-----------------
|
||||||
|
|
|
@ -8,6 +8,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "Python.h"
|
#include "Python.h"
|
||||||
|
#include "stringlib/eq.h"
|
||||||
|
|
||||||
|
|
||||||
/* Set a key error with the specified argument, wrapping it in a
|
/* Set a key error with the specified argument, wrapping it in a
|
||||||
|
@ -327,25 +328,6 @@ lookdict(PyDictObject *mp, PyObject *key, register long hash)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Return 1 if two unicode objects are equal, 0 if not. */
|
|
||||||
static int
|
|
||||||
unicode_eq(PyObject *aa, PyObject *bb)
|
|
||||||
{
|
|
||||||
PyUnicodeObject *a = (PyUnicodeObject *)aa;
|
|
||||||
PyUnicodeObject *b = (PyUnicodeObject *)bb;
|
|
||||||
|
|
||||||
if (a->length != b->length)
|
|
||||||
return 0;
|
|
||||||
if (a->length == 0)
|
|
||||||
return 1;
|
|
||||||
if (a->str[0] != b->str[0])
|
|
||||||
return 0;
|
|
||||||
if (a->length == 1)
|
|
||||||
return 1;
|
|
||||||
return memcmp(a->str, b->str, a->length * sizeof(Py_UNICODE)) == 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Hacked up version of lookdict which can assume keys are always
|
* Hacked up version of lookdict which can assume keys are always
|
||||||
* unicodes; this assumption allows testing for errors during
|
* unicodes; this assumption allows testing for errors during
|
||||||
|
|
|
@ -9,6 +9,7 @@
|
||||||
|
|
||||||
#include "Python.h"
|
#include "Python.h"
|
||||||
#include "structmember.h"
|
#include "structmember.h"
|
||||||
|
#include "stringlib/eq.h"
|
||||||
|
|
||||||
/* Set a key error with the specified argument, wrapping it in a
|
/* Set a key error with the specified argument, wrapping it in a
|
||||||
* tuple automatically so that tuple keys are not unpacked as the
|
* tuple automatically so that tuple keys are not unpacked as the
|
||||||
|
@ -55,6 +56,7 @@ _PySet_Dummy(void)
|
||||||
static PySetObject *free_sets[MAXFREESETS];
|
static PySetObject *free_sets[MAXFREESETS];
|
||||||
static int num_free_sets = 0;
|
static int num_free_sets = 0;
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
The basic lookup function used by all operations.
|
The basic lookup function used by all operations.
|
||||||
This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
|
This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
|
||||||
|
@ -144,12 +146,12 @@ set_lookkey(PySetObject *so, PyObject *key, register long hash)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Hacked up version of set_lookkey which can assume keys are always strings;
|
* Hacked up version of set_lookkey which can assume keys are always unicode;
|
||||||
* This means we can always use _PyString_Eq directly and not have to check to
|
* This means we can always use unicode_eq directly and not have to check to
|
||||||
* see if the comparison altered the table.
|
* see if the comparison altered the table.
|
||||||
*/
|
*/
|
||||||
static setentry *
|
static setentry *
|
||||||
set_lookkey_string(PySetObject *so, PyObject *key, register long hash)
|
set_lookkey_unicode(PySetObject *so, PyObject *key, register long hash)
|
||||||
{
|
{
|
||||||
register Py_ssize_t i;
|
register Py_ssize_t i;
|
||||||
register size_t perturb;
|
register size_t perturb;
|
||||||
|
@ -158,11 +160,11 @@ set_lookkey_string(PySetObject *so, PyObject *key, register long hash)
|
||||||
setentry *table = so->table;
|
setentry *table = so->table;
|
||||||
register setentry *entry;
|
register setentry *entry;
|
||||||
|
|
||||||
/* Make sure this function doesn't have to handle non-string keys,
|
/* Make sure this function doesn't have to handle non-unicode keys,
|
||||||
including subclasses of str; e.g., one reason to subclass
|
including subclasses of str; e.g., one reason to subclass
|
||||||
strings is to override __eq__, and for speed we don't cater to
|
strings is to override __eq__, and for speed we don't cater to
|
||||||
that here. */
|
that here. */
|
||||||
if (!PyString_CheckExact(key)) {
|
if (!PyUnicode_CheckExact(key)) {
|
||||||
so->lookup = set_lookkey;
|
so->lookup = set_lookkey;
|
||||||
return set_lookkey(so, key, hash);
|
return set_lookkey(so, key, hash);
|
||||||
}
|
}
|
||||||
|
@ -173,7 +175,7 @@ set_lookkey_string(PySetObject *so, PyObject *key, register long hash)
|
||||||
if (entry->key == dummy)
|
if (entry->key == dummy)
|
||||||
freeslot = entry;
|
freeslot = entry;
|
||||||
else {
|
else {
|
||||||
if (entry->hash == hash && _PyString_Eq(entry->key, key))
|
if (entry->hash == hash && unicode_eq(entry->key, key))
|
||||||
return entry;
|
return entry;
|
||||||
freeslot = NULL;
|
freeslot = NULL;
|
||||||
}
|
}
|
||||||
|
@ -188,7 +190,7 @@ set_lookkey_string(PySetObject *so, PyObject *key, register long hash)
|
||||||
if (entry->key == key
|
if (entry->key == key
|
||||||
|| (entry->hash == hash
|
|| (entry->hash == hash
|
||||||
&& entry->key != dummy
|
&& entry->key != dummy
|
||||||
&& _PyString_Eq(entry->key, key)))
|
&& unicode_eq(entry->key, key)))
|
||||||
return entry;
|
return entry;
|
||||||
if (entry->key == dummy && freeslot == NULL)
|
if (entry->key == dummy && freeslot == NULL)
|
||||||
freeslot = entry;
|
freeslot = entry;
|
||||||
|
@ -375,8 +377,8 @@ set_add_key(register PySetObject *so, PyObject *key)
|
||||||
register long hash;
|
register long hash;
|
||||||
register Py_ssize_t n_used;
|
register Py_ssize_t n_used;
|
||||||
|
|
||||||
if (!PyString_CheckExact(key) ||
|
if (!PyUnicode_CheckExact(key) ||
|
||||||
(hash = ((PyStringObject *) key)->ob_shash) == -1) {
|
(hash = ((PyUnicodeObject *) key)->hash) == -1) {
|
||||||
hash = PyObject_Hash(key);
|
hash = PyObject_Hash(key);
|
||||||
if (hash == -1)
|
if (hash == -1)
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -422,8 +424,9 @@ set_discard_key(PySetObject *so, PyObject *key)
|
||||||
PyObject *old_key;
|
PyObject *old_key;
|
||||||
|
|
||||||
assert (PyAnySet_Check(so));
|
assert (PyAnySet_Check(so));
|
||||||
if (!PyString_CheckExact(key) ||
|
|
||||||
(hash = ((PyStringObject *) key)->ob_shash) == -1) {
|
if (!PyUnicode_CheckExact(key) ||
|
||||||
|
(hash = ((PyUnicodeObject *) key)->hash) == -1) {
|
||||||
hash = PyObject_Hash(key);
|
hash = PyObject_Hash(key);
|
||||||
if (hash == -1)
|
if (hash == -1)
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -668,8 +671,8 @@ set_contains_key(PySetObject *so, PyObject *key)
|
||||||
long hash;
|
long hash;
|
||||||
setentry *entry;
|
setentry *entry;
|
||||||
|
|
||||||
if (!PyString_CheckExact(key) ||
|
if (!PyUnicode_CheckExact(key) ||
|
||||||
(hash = ((PyStringObject *) key)->ob_shash) == -1) {
|
(hash = ((PyUnicodeObject *) key)->hash) == -1) {
|
||||||
hash = PyObject_Hash(key);
|
hash = PyObject_Hash(key);
|
||||||
if (hash == -1)
|
if (hash == -1)
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -989,7 +992,7 @@ make_new_set(PyTypeObject *type, PyObject *iterable)
|
||||||
INIT_NONZERO_SET_SLOTS(so);
|
INIT_NONZERO_SET_SLOTS(so);
|
||||||
}
|
}
|
||||||
|
|
||||||
so->lookup = set_lookkey_string;
|
so->lookup = set_lookkey_unicode;
|
||||||
so->weakreflist = NULL;
|
so->weakreflist = NULL;
|
||||||
|
|
||||||
if (iterable != NULL) {
|
if (iterable != NULL) {
|
||||||
|
@ -1352,7 +1355,7 @@ set_isdisjoint(PySetObject *so, PyObject *other)
|
||||||
while ((key = PyIter_Next(it)) != NULL) {
|
while ((key = PyIter_Next(it)) != NULL) {
|
||||||
int rv;
|
int rv;
|
||||||
setentry entry;
|
setentry entry;
|
||||||
long hash = PyObject_Hash(key);
|
long hash = PyObject_Hash(key);;
|
||||||
|
|
||||||
if (hash == -1) {
|
if (hash == -1) {
|
||||||
Py_DECREF(key);
|
Py_DECREF(key);
|
||||||
|
|
|
@ -0,0 +1,21 @@
|
||||||
|
/* Fast unicode equal function optimized for dictobject.c and setobject.c */
|
||||||
|
|
||||||
|
/* Return 1 if two unicode objects are equal, 0 if not.
|
||||||
|
* unicode_eq() is called when the hash of two unicode objects is equal.
|
||||||
|
*/
|
||||||
|
Py_LOCAL_INLINE(int)
|
||||||
|
unicode_eq(PyObject *aa, PyObject *bb)
|
||||||
|
{
|
||||||
|
register PyUnicodeObject *a = (PyUnicodeObject *)aa;
|
||||||
|
register PyUnicodeObject *b = (PyUnicodeObject *)bb;
|
||||||
|
|
||||||
|
if (a->length != b->length)
|
||||||
|
return 0;
|
||||||
|
if (a->length == 0)
|
||||||
|
return 1;
|
||||||
|
if (a->str[0] != b->str[0])
|
||||||
|
return 0;
|
||||||
|
if (a->length == 1)
|
||||||
|
return 1;
|
||||||
|
return memcmp(a->str, b->str, a->length * sizeof(Py_UNICODE)) == 0;
|
||||||
|
}
|
|
@ -877,16 +877,6 @@ string_richcompare(PyStringObject *a, PyStringObject *b, int op)
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
|
||||||
_PyString_Eq(PyObject *o1, PyObject *o2)
|
|
||||||
{
|
|
||||||
PyStringObject *a = (PyStringObject*) o1;
|
|
||||||
PyStringObject *b = (PyStringObject*) o2;
|
|
||||||
return Py_Size(a) == Py_Size(b)
|
|
||||||
&& *a->ob_sval == *b->ob_sval
|
|
||||||
&& memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static long
|
static long
|
||||||
string_hash(PyStringObject *a)
|
string_hash(PyStringObject *a)
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue