From 3f5fc70c6213008243e7d605f7d8a2d8f94cf919 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Sun, 30 Dec 2018 09:24:03 +0000 Subject: [PATCH] bpo-32492: 1.6x speed up in namedtuple attribute access using C fast-path (#10495) * bpo-32492: 2.5x speed up in namedtuple attribute access using C fast path * Add News entry * fixup! bpo-32492: 2.5x speed up in namedtuple attribute access using C fast path * Check for tuple in the __get__ of the new descriptor and don't cache the descriptor itself * Don't inherit from property. Implement GC methods to handle __doc__ * Add a test for the docstring substitution in descriptors * Update NEWS entry to reflect time against 3.7 branch * Simplify implementation with argument clinic, better error messages, only __new__ * Use positional-only parameters for the __new__ * Use PyTuple_GET_SIZE and PyTuple_GET_ITEM to tighter the implementation of tuplegetterdescr_get * Implement __set__ to make tuplegetter a data descriptor * Use Py_INCREF now that we inline PyTuple_GetItem * Apply the valid_index() function, saving one test * Move Py_None test out of the critical path. --- Lib/collections/__init__.py | 14 +- Lib/test/test_collections.py | 8 + .../2018-11-13-01-03-10.bpo-32492.voIdcp.rst | 2 + Modules/_collectionsmodule.c | 163 ++++++++++++++++++ Modules/clinic/_collectionsmodule.c.h | 28 +++ 5 files changed, 211 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2018-11-13-01-03-10.bpo-32492.voIdcp.rst create mode 100644 Modules/clinic/_collectionsmodule.c.h diff --git a/Lib/collections/__init__.py b/Lib/collections/__init__.py index 4724b0edf33..0b74c3f8915 100644 --- a/Lib/collections/__init__.py +++ b/Lib/collections/__init__.py @@ -311,6 +311,11 @@ except ImportError: ### namedtuple ################################################################################ +try: + from _collections import _tuplegetter +except ImportError: + _tuplegetter = lambda index, doc: property(_itemgetter(index), doc=doc) + _nt_itemgetters = {} def namedtuple(typename, field_names, *, rename=False, defaults=None, module=None): @@ -454,12 +459,13 @@ def namedtuple(typename, field_names, *, rename=False, defaults=None, module=Non cache = _nt_itemgetters for index, name in enumerate(field_names): try: - itemgetter_object, doc = cache[index] + doc = cache[index] except KeyError: - itemgetter_object = _itemgetter(index) doc = f'Alias for field number {index}' - cache[index] = itemgetter_object, doc - class_namespace[name] = property(itemgetter_object, doc=doc) + cache[index] = doc + + tuplegetter_object = _tuplegetter(index, doc) + class_namespace[name] = tuplegetter_object result = type(typename, (tuple,), class_namespace) diff --git a/Lib/test/test_collections.py b/Lib/test/test_collections.py index e2ffaaafd45..c0815947f90 100644 --- a/Lib/test/test_collections.py +++ b/Lib/test/test_collections.py @@ -514,6 +514,14 @@ class TestNamedTuple(unittest.TestCase): a.w = 5 self.assertEqual(a.__dict__, {'w': 5}) + def test_namedtuple_can_mutate_doc_of_descriptors_independently(self): + A = namedtuple('A', 'x y') + B = namedtuple('B', 'x y') + A.x.__doc__ = 'foo' + B.x.__doc__ = 'bar' + self.assertEqual(A.x.__doc__, 'foo') + self.assertEqual(B.x.__doc__, 'bar') + ################################################################################ ### Abstract Base Classes diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-11-13-01-03-10.bpo-32492.voIdcp.rst b/Misc/NEWS.d/next/Core and Builtins/2018-11-13-01-03-10.bpo-32492.voIdcp.rst new file mode 100644 index 00000000000..24682b14f36 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2018-11-13-01-03-10.bpo-32492.voIdcp.rst @@ -0,0 +1,2 @@ +Speed up :class:`namedtuple` attribute access by 1.6x using a C fast-path +for the name descriptors. Patch by Pablo Galindo. diff --git a/Modules/_collectionsmodule.c b/Modules/_collectionsmodule.c index 4da0662f0c7..cc325e1aaac 100644 --- a/Modules/_collectionsmodule.c +++ b/Modules/_collectionsmodule.c @@ -7,6 +7,14 @@ #include /* For size_t */ #endif +/*[clinic input] +class _tuplegetter "_tuplegetterobject *" "&tuplegetter_type" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=ee5ed5baabe35068]*/ + +static PyTypeObject tuplegetter_type; +#include "clinic/_collectionsmodule.c.h" + /* collections module implementation of a deque() datatype Written and maintained by Raymond D. Hettinger */ @@ -2328,6 +2336,156 @@ done: Py_RETURN_NONE; } +/* Helper functions for namedtuples */ + +typedef struct { + PyObject_HEAD + Py_ssize_t index; + PyObject* doc; +} _tuplegetterobject; + +/*[clinic input] +@classmethod +_tuplegetter.__new__ as tuplegetter_new + + index: Py_ssize_t + doc: object + / +[clinic start generated code]*/ + +static PyObject * +tuplegetter_new_impl(PyTypeObject *type, Py_ssize_t index, PyObject *doc) +/*[clinic end generated code: output=014be444ad80263f input=87c576a5bdbc0bbb]*/ +{ + _tuplegetterobject* self; + self = (_tuplegetterobject *)type->tp_alloc(type, 0); + if (self == NULL) { + return NULL; + } + self->index = index; + Py_INCREF(doc); + self->doc = doc; + return (PyObject *)self; +} + +static PyObject * +tuplegetterdescr_get(PyObject *self, PyObject *obj, PyObject *type) +{ + PyObject *result; + if (obj == NULL) { + Py_INCREF(self); + return self; + } + if (!PyTuple_Check(obj)) { + if (obj == Py_None) { + Py_INCREF(self); + return self; + } + PyErr_Format(PyExc_TypeError, + "descriptor for index '%d' for tuple subclasses " + "doesn't apply to '%s' object", + ((_tuplegetterobject*)self)->index, + obj->ob_type->tp_name); + return NULL; + } + + Py_ssize_t index = ((_tuplegetterobject*)self)->index; + + if (!valid_index(index, PyTuple_GET_SIZE(obj))) { + PyErr_SetString(PyExc_IndexError, "tuple index out of range"); + return NULL; + } + + result = PyTuple_GET_ITEM(obj, index); + Py_INCREF(result); + return result; +} + +static int +tuplegetter_set(PyObject *self, PyObject *obj, PyObject *value) +{ + if (value == NULL) { + PyErr_SetString(PyExc_AttributeError, "can't delete attribute"); + } else { + PyErr_SetString(PyExc_AttributeError, "can't set attribute"); + } + return -1; +} + +static int +tuplegetter_traverse(PyObject *self, visitproc visit, void *arg) +{ + _tuplegetterobject *tuplegetter = (_tuplegetterobject *)self; + Py_VISIT(tuplegetter->doc); + return 0; +} + +static int +tuplegetter_clear(PyObject *self) +{ + _tuplegetterobject *tuplegetter = (_tuplegetterobject *)self; + Py_CLEAR(tuplegetter->doc); + return 0; +} + +static void +tuplegetter_dealloc(_tuplegetterobject *self) +{ + PyObject_GC_UnTrack(self); + tuplegetter_clear((PyObject*)self); + Py_TYPE(self)->tp_free((PyObject*)self); +} + + +static PyMemberDef tuplegetter_members[] = { + {"__doc__", T_OBJECT, offsetof(_tuplegetterobject, doc), 0}, + {0} +}; + +static PyTypeObject tuplegetter_type = { + PyVarObject_HEAD_INIT(NULL, 0) + "_collections._tuplegetter", /* tp_name */ + sizeof(_tuplegetterobject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)tuplegetter_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_reserved */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ + 0, /* tp_doc */ + (traverseproc)tuplegetter_traverse, /* tp_traverse */ + (inquiry)tuplegetter_clear, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + tuplegetter_members, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + tuplegetterdescr_get, /* tp_descr_get */ + tuplegetter_set, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + tuplegetter_new, /* tp_new */ + 0, +}; + + /* module level code ********************************************************/ PyDoc_STRVAR(module_doc, @@ -2386,5 +2544,10 @@ PyInit__collections(void) Py_INCREF(&dequereviter_type); PyModule_AddObject(m, "_deque_reverse_iterator", (PyObject *)&dequereviter_type); + if (PyType_Ready(&tuplegetter_type) < 0) + return NULL; + Py_INCREF(&tuplegetter_type); + PyModule_AddObject(m, "_tuplegetter", (PyObject *)&tuplegetter_type); + return m; } diff --git a/Modules/clinic/_collectionsmodule.c.h b/Modules/clinic/_collectionsmodule.c.h new file mode 100644 index 00000000000..12626c1f661 --- /dev/null +++ b/Modules/clinic/_collectionsmodule.c.h @@ -0,0 +1,28 @@ +/*[clinic input] +preserve +[clinic start generated code]*/ + +static PyObject * +tuplegetter_new_impl(PyTypeObject *type, Py_ssize_t index, PyObject *doc); + +static PyObject * +tuplegetter_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) +{ + PyObject *return_value = NULL; + Py_ssize_t index; + PyObject *doc; + + if ((type == &tuplegetter_type) && + !_PyArg_NoKeywords("_tuplegetter", kwargs)) { + goto exit; + } + if (!PyArg_ParseTuple(args, "nO:_tuplegetter", + &index, &doc)) { + goto exit; + } + return_value = tuplegetter_new_impl(type, index, doc); + +exit: + return return_value; +} +/*[clinic end generated code: output=83746071eacc28d3 input=a9049054013a1b77]*/