From 12f433411bba8a0cdc4f09ba34472745ae9da0d1 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 20 Jul 2020 15:53:55 +0300 Subject: [PATCH] bpo-41334: Convert constructors of str, bytes and bytearray to Argument Clinic (GH-21535) --- Doc/whatsnew/3.10.rst | 4 + Lib/test/test_grammar.py | 14 +-- .../2020-07-18-18-01-10.bpo-41334.t5xMGp.rst | 2 + Objects/bytearrayobject.c | 24 +++--- Objects/bytesobject.c | 86 +++++++++---------- Objects/clinic/bytearrayobject.c.h | 71 ++++++++++++++- Objects/clinic/bytesobject.c.h | 71 ++++++++++++++- Objects/clinic/unicodeobject.c.h | 71 ++++++++++++++- Objects/unicodeobject.c | 57 ++++++------ 9 files changed, 307 insertions(+), 93 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2020-07-18-18-01-10.bpo-41334.t5xMGp.rst diff --git a/Doc/whatsnew/3.10.rst b/Doc/whatsnew/3.10.rst index 0296c8ad60b..e4beb600b8d 100644 --- a/Doc/whatsnew/3.10.rst +++ b/Doc/whatsnew/3.10.rst @@ -121,6 +121,10 @@ arguments passed to the Python executable. Optimizations ============= +* Constructors :func:`str`, :func:`bytes` and :func:`bytearray` are now faster + (around 30--40% for small objects). + (Contributed by Serhiy Storchaka in :issue:`41334`.) + * The :mod:`runpy` module now imports fewer modules. The ``python3 -m module-name`` command startup time is 1.3x faster in average. diff --git a/Lib/test/test_grammar.py b/Lib/test/test_grammar.py index a51452e739f..5235fa2c783 100644 --- a/Lib/test/test_grammar.py +++ b/Lib/test/test_grammar.py @@ -584,12 +584,14 @@ class GrammarTests(unittest.TestCase): d22v(1, *(2, 3), **{'d': 4}) # keyword argument type tests - try: - str('x', **{b'foo':1 }) - except TypeError: - pass - else: - self.fail('Bytes should not work as keyword argument names') + with warnings.catch_warnings(): + warnings.simplefilter('ignore', BytesWarning) + try: + str('x', **{b'foo':1 }) + except TypeError: + pass + else: + self.fail('Bytes should not work as keyword argument names') # keyword only argument tests def pos0key1(*, key): return key pos0key1(key=100) diff --git a/Misc/NEWS.d/next/Core and Builtins/2020-07-18-18-01-10.bpo-41334.t5xMGp.rst b/Misc/NEWS.d/next/Core and Builtins/2020-07-18-18-01-10.bpo-41334.t5xMGp.rst new file mode 100644 index 00000000000..5d44527a561 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2020-07-18-18-01-10.bpo-41334.t5xMGp.rst @@ -0,0 +1,2 @@ +Constructors :func:`str`, :func:`bytes` and :func:`bytearray` are now faster +(around 30--40% for small objects). diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c index 70350619330..8b57fb679d3 100644 --- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -738,13 +738,20 @@ bytearray_ass_subscript(PyByteArrayObject *self, PyObject *index, PyObject *valu } } +/*[clinic input] +bytearray.__init__ + + source as arg: object = NULL + encoding: str = NULL + errors: str = NULL + +[clinic start generated code]*/ + static int -bytearray_init(PyByteArrayObject *self, PyObject *args, PyObject *kwds) +bytearray___init___impl(PyByteArrayObject *self, PyObject *arg, + const char *encoding, const char *errors) +/*[clinic end generated code: output=4ce1304649c2f8b3 input=1141a7122eefd7b9]*/ { - static char *kwlist[] = {"source", "encoding", "errors", 0}; - PyObject *arg = NULL; - const char *encoding = NULL; - const char *errors = NULL; Py_ssize_t count; PyObject *it; PyObject *(*iternext)(PyObject *); @@ -755,11 +762,6 @@ bytearray_init(PyByteArrayObject *self, PyObject *args, PyObject *kwds) return -1; } - /* Parse arguments */ - if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytearray", kwlist, - &arg, &encoding, &errors)) - return -1; - /* Make a quick exit if no first argument */ if (arg == NULL) { if (encoding != NULL || errors != NULL) { @@ -2354,7 +2356,7 @@ PyTypeObject PyByteArray_Type = { 0, /* tp_descr_get */ 0, /* tp_descr_set */ 0, /* tp_dictoffset */ - (initproc)bytearray_init, /* tp_init */ + (initproc)bytearray___init__, /* tp_init */ PyType_GenericAlloc, /* tp_alloc */ PyType_GenericNew, /* tp_new */ PyObject_Del, /* tp_free */ diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 7632cb5e4dd..3a922d32b16 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -2580,24 +2580,27 @@ static PyNumberMethods bytes_as_number = { }; static PyObject * -bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds); +bytes_subtype_new(PyTypeObject *, PyObject *); + +/*[clinic input] +@classmethod +bytes.__new__ as bytes_new + + source as x: object = NULL + encoding: str = NULL + errors: str = NULL + +[clinic start generated code]*/ static PyObject * -bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +bytes_new_impl(PyTypeObject *type, PyObject *x, const char *encoding, + const char *errors) +/*[clinic end generated code: output=1e0c471be311a425 input=f0a966d19b7262b4]*/ { - PyObject *x = NULL; - const char *encoding = NULL; - const char *errors = NULL; - PyObject *new = NULL; + PyObject *bytes; PyObject *func; Py_ssize_t size; - static char *kwlist[] = {"source", "encoding", "errors", 0}; - if (type != &PyBytes_Type) - return bytes_subtype_new(type, args, kwds); - if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x, - &encoding, &errors)) - return NULL; if (x == NULL) { if (encoding != NULL || errors != NULL) { PyErr_SetString(PyExc_TypeError, @@ -2606,78 +2609,73 @@ bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds) "errors without a string argument"); return NULL; } - return PyBytes_FromStringAndSize(NULL, 0); + bytes = PyBytes_FromStringAndSize(NULL, 0); } - - if (encoding != NULL) { + else if (encoding != NULL) { /* Encode via the codec registry */ if (!PyUnicode_Check(x)) { PyErr_SetString(PyExc_TypeError, "encoding without a string argument"); return NULL; } - new = PyUnicode_AsEncodedString(x, encoding, errors); - if (new == NULL) - return NULL; - assert(PyBytes_Check(new)); - return new; + bytes = PyUnicode_AsEncodedString(x, encoding, errors); } - - if (errors != NULL) { + else if (errors != NULL) { PyErr_SetString(PyExc_TypeError, PyUnicode_Check(x) ? "string argument without an encoding" : "errors without a string argument"); return NULL; } - /* We'd like to call PyObject_Bytes here, but we need to check for an integer argument before deferring to PyBytes_FromObject, something PyObject_Bytes doesn't do. */ - func = _PyObject_LookupSpecial(x, &PyId___bytes__); - if (func != NULL) { - new = _PyObject_CallNoArg(func); + else if ((func = _PyObject_LookupSpecial(x, &PyId___bytes__)) != NULL) { + bytes = _PyObject_CallNoArg(func); Py_DECREF(func); - if (new == NULL) + if (bytes == NULL) return NULL; - if (!PyBytes_Check(new)) { + if (!PyBytes_Check(bytes)) { PyErr_Format(PyExc_TypeError, - "__bytes__ returned non-bytes (type %.200s)", - Py_TYPE(new)->tp_name); - Py_DECREF(new); + "__bytes__ returned non-bytes (type %.200s)", + Py_TYPE(bytes)->tp_name); + Py_DECREF(bytes); return NULL; } - return new; } else if (PyErr_Occurred()) return NULL; - - if (PyUnicode_Check(x)) { + else if (PyUnicode_Check(x)) { PyErr_SetString(PyExc_TypeError, "string argument without an encoding"); return NULL; } /* Is it an integer? */ - if (_PyIndex_Check(x)) { + else if (_PyIndex_Check(x)) { size = PyNumber_AsSsize_t(x, PyExc_OverflowError); if (size == -1 && PyErr_Occurred()) { if (!PyErr_ExceptionMatches(PyExc_TypeError)) return NULL; PyErr_Clear(); /* fall through */ + bytes = PyBytes_FromObject(x); } else { if (size < 0) { PyErr_SetString(PyExc_ValueError, "negative count"); return NULL; } - new = _PyBytes_FromSize(size, 1); - if (new == NULL) - return NULL; - return new; + bytes = _PyBytes_FromSize(size, 1); } } + else { + bytes = PyBytes_FromObject(x); + } - return PyBytes_FromObject(x); + if (bytes != NULL && type != &PyBytes_Type) { + Py_SETREF(bytes, bytes_subtype_new(type, bytes)); + } + + return bytes; } static PyObject* @@ -2889,15 +2887,12 @@ PyBytes_FromObject(PyObject *x) } static PyObject * -bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +bytes_subtype_new(PyTypeObject *type, PyObject *tmp) { - PyObject *tmp, *pnew; + PyObject *pnew; Py_ssize_t n; assert(PyType_IsSubtype(type, &PyBytes_Type)); - tmp = bytes_new(&PyBytes_Type, args, kwds); - if (tmp == NULL) - return NULL; assert(PyBytes_Check(tmp)); n = PyBytes_GET_SIZE(tmp); pnew = type->tp_alloc(type, n); @@ -2907,7 +2902,6 @@ bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds) ((PyBytesObject *)pnew)->ob_shash = ((PyBytesObject *)tmp)->ob_shash; } - Py_DECREF(tmp); return pnew; } diff --git a/Objects/clinic/bytearrayobject.c.h b/Objects/clinic/bytearrayobject.c.h index cbe6f20344e..3452b241740 100644 --- a/Objects/clinic/bytearrayobject.c.h +++ b/Objects/clinic/bytearrayobject.c.h @@ -2,6 +2,75 @@ preserve [clinic start generated code]*/ +static int +bytearray___init___impl(PyByteArrayObject *self, PyObject *arg, + const char *encoding, const char *errors); + +static int +bytearray___init__(PyObject *self, PyObject *args, PyObject *kwargs) +{ + int return_value = -1; + static const char * const _keywords[] = {"source", "encoding", "errors", NULL}; + static _PyArg_Parser _parser = {NULL, _keywords, "bytearray", 0}; + PyObject *argsbuf[3]; + PyObject * const *fastargs; + Py_ssize_t nargs = PyTuple_GET_SIZE(args); + Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 0; + PyObject *arg = NULL; + const char *encoding = NULL; + const char *errors = NULL; + + fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, 0, 3, 0, argsbuf); + if (!fastargs) { + goto exit; + } + if (!noptargs) { + goto skip_optional_pos; + } + if (fastargs[0]) { + arg = fastargs[0]; + if (!--noptargs) { + goto skip_optional_pos; + } + } + if (fastargs[1]) { + if (!PyUnicode_Check(fastargs[1])) { + _PyArg_BadArgument("bytearray", "argument 'encoding'", "str", fastargs[1]); + goto exit; + } + Py_ssize_t encoding_length; + encoding = PyUnicode_AsUTF8AndSize(fastargs[1], &encoding_length); + if (encoding == NULL) { + goto exit; + } + if (strlen(encoding) != (size_t)encoding_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } + if (!--noptargs) { + goto skip_optional_pos; + } + } + if (!PyUnicode_Check(fastargs[2])) { + _PyArg_BadArgument("bytearray", "argument 'errors'", "str", fastargs[2]); + goto exit; + } + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(fastargs[2], &errors_length); + if (errors == NULL) { + goto exit; + } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } +skip_optional_pos: + return_value = bytearray___init___impl((PyByteArrayObject *)self, arg, encoding, errors); + +exit: + return return_value; +} + PyDoc_STRVAR(bytearray_clear__doc__, "clear($self, /)\n" "--\n" @@ -1051,4 +1120,4 @@ bytearray_sizeof(PyByteArrayObject *self, PyObject *Py_UNUSED(ignored)) { return bytearray_sizeof_impl(self); } -/*[clinic end generated code: output=0cd59180c7d5dce5 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=47cd9ad3fdc3ac0c input=a9049054013a1b77]*/ diff --git a/Objects/clinic/bytesobject.c.h b/Objects/clinic/bytesobject.c.h index 201627eee23..27ac6b10674 100644 --- a/Objects/clinic/bytesobject.c.h +++ b/Objects/clinic/bytesobject.c.h @@ -809,4 +809,73 @@ skip_optional_pos: exit: return return_value; } -/*[clinic end generated code: output=dc1bc13e6990e452 input=a9049054013a1b77]*/ + +static PyObject * +bytes_new_impl(PyTypeObject *type, PyObject *x, const char *encoding, + const char *errors); + +static PyObject * +bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) +{ + PyObject *return_value = NULL; + static const char * const _keywords[] = {"source", "encoding", "errors", NULL}; + static _PyArg_Parser _parser = {NULL, _keywords, "bytes", 0}; + PyObject *argsbuf[3]; + PyObject * const *fastargs; + Py_ssize_t nargs = PyTuple_GET_SIZE(args); + Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 0; + PyObject *x = NULL; + const char *encoding = NULL; + const char *errors = NULL; + + fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, 0, 3, 0, argsbuf); + if (!fastargs) { + goto exit; + } + if (!noptargs) { + goto skip_optional_pos; + } + if (fastargs[0]) { + x = fastargs[0]; + if (!--noptargs) { + goto skip_optional_pos; + } + } + if (fastargs[1]) { + if (!PyUnicode_Check(fastargs[1])) { + _PyArg_BadArgument("bytes", "argument 'encoding'", "str", fastargs[1]); + goto exit; + } + Py_ssize_t encoding_length; + encoding = PyUnicode_AsUTF8AndSize(fastargs[1], &encoding_length); + if (encoding == NULL) { + goto exit; + } + if (strlen(encoding) != (size_t)encoding_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } + if (!--noptargs) { + goto skip_optional_pos; + } + } + if (!PyUnicode_Check(fastargs[2])) { + _PyArg_BadArgument("bytes", "argument 'errors'", "str", fastargs[2]); + goto exit; + } + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(fastargs[2], &errors_length); + if (errors == NULL) { + goto exit; + } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } +skip_optional_pos: + return_value = bytes_new_impl(type, x, encoding, errors); + +exit: + return return_value; +} +/*[clinic end generated code: output=6101b417d6a6a717 input=a9049054013a1b77]*/ diff --git a/Objects/clinic/unicodeobject.c.h b/Objects/clinic/unicodeobject.c.h index ecd409e84cb..9ef8ce2e353 100644 --- a/Objects/clinic/unicodeobject.c.h +++ b/Objects/clinic/unicodeobject.c.h @@ -1258,4 +1258,73 @@ unicode_sizeof(PyObject *self, PyObject *Py_UNUSED(ignored)) { return unicode_sizeof_impl(self); } -/*[clinic end generated code: output=c5eb21e314da78b8 input=a9049054013a1b77]*/ + +static PyObject * +unicode_new_impl(PyTypeObject *type, PyObject *x, const char *encoding, + const char *errors); + +static PyObject * +unicode_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) +{ + PyObject *return_value = NULL; + static const char * const _keywords[] = {"object", "encoding", "errors", NULL}; + static _PyArg_Parser _parser = {NULL, _keywords, "str", 0}; + PyObject *argsbuf[3]; + PyObject * const *fastargs; + Py_ssize_t nargs = PyTuple_GET_SIZE(args); + Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 0; + PyObject *x = NULL; + const char *encoding = NULL; + const char *errors = NULL; + + fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, 0, 3, 0, argsbuf); + if (!fastargs) { + goto exit; + } + if (!noptargs) { + goto skip_optional_pos; + } + if (fastargs[0]) { + x = fastargs[0]; + if (!--noptargs) { + goto skip_optional_pos; + } + } + if (fastargs[1]) { + if (!PyUnicode_Check(fastargs[1])) { + _PyArg_BadArgument("str", "argument 'encoding'", "str", fastargs[1]); + goto exit; + } + Py_ssize_t encoding_length; + encoding = PyUnicode_AsUTF8AndSize(fastargs[1], &encoding_length); + if (encoding == NULL) { + goto exit; + } + if (strlen(encoding) != (size_t)encoding_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } + if (!--noptargs) { + goto skip_optional_pos; + } + } + if (!PyUnicode_Check(fastargs[2])) { + _PyArg_BadArgument("str", "argument 'errors'", "str", fastargs[2]); + goto exit; + } + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(fastargs[2], &errors_length); + if (errors == NULL) { + goto exit; + } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } +skip_optional_pos: + return_value = unicode_new_impl(type, x, encoding, errors); + +exit: + return return_value; +} +/*[clinic end generated code: output=f10cf85d3935b3b7 input=a9049054013a1b77]*/ diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 2e1045ad3a7..82e09ad05fc 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -15466,52 +15466,57 @@ PyUnicode_Format(PyObject *format, PyObject *args) } static PyObject * -unicode_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds); +unicode_subtype_new(PyTypeObject *type, PyObject *unicode); + +/*[clinic input] +@classmethod +str.__new__ as unicode_new + + object as x: object = NULL + encoding: str = NULL + errors: str = NULL + +[clinic start generated code]*/ static PyObject * -unicode_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +unicode_new_impl(PyTypeObject *type, PyObject *x, const char *encoding, + const char *errors) +/*[clinic end generated code: output=fc72d4878b0b57e9 input=e81255e5676d174e]*/ { - PyObject *x = NULL; - static char *kwlist[] = {"object", "encoding", "errors", 0}; - char *encoding = NULL; - char *errors = NULL; + PyObject *unicode; + if (x == NULL) { + unicode = unicode_new_empty(); + } + else if (encoding == NULL && errors == NULL) { + unicode = PyObject_Str(x); + } + else { + unicode = PyUnicode_FromEncodedObject(x, encoding, errors); + } - if (type != &PyUnicode_Type) - return unicode_subtype_new(type, args, kwds); - if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:str", - kwlist, &x, &encoding, &errors)) - return NULL; - if (x == NULL) - _Py_RETURN_UNICODE_EMPTY(); - if (encoding == NULL && errors == NULL) - return PyObject_Str(x); - else - return PyUnicode_FromEncodedObject(x, encoding, errors); + if (unicode != NULL && type != &PyUnicode_Type) { + Py_SETREF(unicode, unicode_subtype_new(type, unicode)); + } + return unicode; } static PyObject * -unicode_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +unicode_subtype_new(PyTypeObject *type, PyObject *unicode) { - PyObject *unicode, *self; + PyObject *self; Py_ssize_t length, char_size; int share_wstr, share_utf8; unsigned int kind; void *data; assert(PyType_IsSubtype(type, &PyUnicode_Type)); - - unicode = unicode_new(&PyUnicode_Type, args, kwds); - if (unicode == NULL) - return NULL; assert(_PyUnicode_CHECK(unicode)); if (PyUnicode_READY(unicode) == -1) { - Py_DECREF(unicode); return NULL; } self = type->tp_alloc(type, 0); if (self == NULL) { - Py_DECREF(unicode); return NULL; } kind = PyUnicode_KIND(unicode); @@ -15580,11 +15585,9 @@ unicode_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds) #ifdef Py_DEBUG _PyUnicode_HASH(self) = _PyUnicode_HASH(unicode); #endif - Py_DECREF(unicode); return self; onError: - Py_DECREF(unicode); Py_DECREF(self); return NULL; }