bpo-41334: Convert constructors of str, bytes and bytearray to Argument Clinic (GH-21535)

This commit is contained in:
Serhiy Storchaka 2020-07-20 15:53:55 +03:00 committed by GitHub
parent e123012d79
commit 12f433411b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 307 additions and 93 deletions

View File

@ -121,6 +121,10 @@ arguments passed to the Python executable.
Optimizations
=============
* Constructors :func:`str`, :func:`bytes` and :func:`bytearray` are now faster
(around 30--40% for small objects).
(Contributed by Serhiy Storchaka in :issue:`41334`.)
* The :mod:`runpy` module now imports fewer modules.
The ``python3 -m module-name`` command startup time is 1.3x faster in
average.

View File

@ -584,6 +584,8 @@ class GrammarTests(unittest.TestCase):
d22v(1, *(2, 3), **{'d': 4})
# keyword argument type tests
with warnings.catch_warnings():
warnings.simplefilter('ignore', BytesWarning)
try:
str('x', **{b'foo':1 })
except TypeError:

View File

@ -0,0 +1,2 @@
Constructors :func:`str`, :func:`bytes` and :func:`bytearray` are now faster
(around 30--40% for small objects).

View File

@ -738,13 +738,20 @@ bytearray_ass_subscript(PyByteArrayObject *self, PyObject *index, PyObject *valu
}
}
/*[clinic input]
bytearray.__init__
source as arg: object = NULL
encoding: str = NULL
errors: str = NULL
[clinic start generated code]*/
static int
bytearray_init(PyByteArrayObject *self, PyObject *args, PyObject *kwds)
bytearray___init___impl(PyByteArrayObject *self, PyObject *arg,
const char *encoding, const char *errors)
/*[clinic end generated code: output=4ce1304649c2f8b3 input=1141a7122eefd7b9]*/
{
static char *kwlist[] = {"source", "encoding", "errors", 0};
PyObject *arg = NULL;
const char *encoding = NULL;
const char *errors = NULL;
Py_ssize_t count;
PyObject *it;
PyObject *(*iternext)(PyObject *);
@ -755,11 +762,6 @@ bytearray_init(PyByteArrayObject *self, PyObject *args, PyObject *kwds)
return -1;
}
/* Parse arguments */
if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytearray", kwlist,
&arg, &encoding, &errors))
return -1;
/* Make a quick exit if no first argument */
if (arg == NULL) {
if (encoding != NULL || errors != NULL) {
@ -2354,7 +2356,7 @@ PyTypeObject PyByteArray_Type = {
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
(initproc)bytearray_init, /* tp_init */
(initproc)bytearray___init__, /* tp_init */
PyType_GenericAlloc, /* tp_alloc */
PyType_GenericNew, /* tp_new */
PyObject_Del, /* tp_free */

View File

@ -2580,24 +2580,27 @@ static PyNumberMethods bytes_as_number = {
};
static PyObject *
bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
bytes_subtype_new(PyTypeObject *, PyObject *);
/*[clinic input]
@classmethod
bytes.__new__ as bytes_new
source as x: object = NULL
encoding: str = NULL
errors: str = NULL
[clinic start generated code]*/
static PyObject *
bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
bytes_new_impl(PyTypeObject *type, PyObject *x, const char *encoding,
const char *errors)
/*[clinic end generated code: output=1e0c471be311a425 input=f0a966d19b7262b4]*/
{
PyObject *x = NULL;
const char *encoding = NULL;
const char *errors = NULL;
PyObject *new = NULL;
PyObject *bytes;
PyObject *func;
Py_ssize_t size;
static char *kwlist[] = {"source", "encoding", "errors", 0};
if (type != &PyBytes_Type)
return bytes_subtype_new(type, args, kwds);
if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
&encoding, &errors))
return NULL;
if (x == NULL) {
if (encoding != NULL || errors != NULL) {
PyErr_SetString(PyExc_TypeError,
@ -2606,78 +2609,73 @@ bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
"errors without a string argument");
return NULL;
}
return PyBytes_FromStringAndSize(NULL, 0);
bytes = PyBytes_FromStringAndSize(NULL, 0);
}
if (encoding != NULL) {
else if (encoding != NULL) {
/* Encode via the codec registry */
if (!PyUnicode_Check(x)) {
PyErr_SetString(PyExc_TypeError,
"encoding without a string argument");
return NULL;
}
new = PyUnicode_AsEncodedString(x, encoding, errors);
if (new == NULL)
return NULL;
assert(PyBytes_Check(new));
return new;
bytes = PyUnicode_AsEncodedString(x, encoding, errors);
}
if (errors != NULL) {
else if (errors != NULL) {
PyErr_SetString(PyExc_TypeError,
PyUnicode_Check(x) ?
"string argument without an encoding" :
"errors without a string argument");
return NULL;
}
/* We'd like to call PyObject_Bytes here, but we need to check for an
integer argument before deferring to PyBytes_FromObject, something
PyObject_Bytes doesn't do. */
func = _PyObject_LookupSpecial(x, &PyId___bytes__);
if (func != NULL) {
new = _PyObject_CallNoArg(func);
else if ((func = _PyObject_LookupSpecial(x, &PyId___bytes__)) != NULL) {
bytes = _PyObject_CallNoArg(func);
Py_DECREF(func);
if (new == NULL)
if (bytes == NULL)
return NULL;
if (!PyBytes_Check(new)) {
if (!PyBytes_Check(bytes)) {
PyErr_Format(PyExc_TypeError,
"__bytes__ returned non-bytes (type %.200s)",
Py_TYPE(new)->tp_name);
Py_DECREF(new);
Py_TYPE(bytes)->tp_name);
Py_DECREF(bytes);
return NULL;
}
return new;
}
else if (PyErr_Occurred())
return NULL;
if (PyUnicode_Check(x)) {
else if (PyUnicode_Check(x)) {
PyErr_SetString(PyExc_TypeError,
"string argument without an encoding");
return NULL;
}
/* Is it an integer? */
if (_PyIndex_Check(x)) {
else if (_PyIndex_Check(x)) {
size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
if (size == -1 && PyErr_Occurred()) {
if (!PyErr_ExceptionMatches(PyExc_TypeError))
return NULL;
PyErr_Clear(); /* fall through */
bytes = PyBytes_FromObject(x);
}
else {
if (size < 0) {
PyErr_SetString(PyExc_ValueError, "negative count");
return NULL;
}
new = _PyBytes_FromSize(size, 1);
if (new == NULL)
return NULL;
return new;
bytes = _PyBytes_FromSize(size, 1);
}
}
else {
bytes = PyBytes_FromObject(x);
}
return PyBytes_FromObject(x);
if (bytes != NULL && type != &PyBytes_Type) {
Py_SETREF(bytes, bytes_subtype_new(type, bytes));
}
return bytes;
}
static PyObject*
@ -2889,15 +2887,12 @@ PyBytes_FromObject(PyObject *x)
}
static PyObject *
bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
bytes_subtype_new(PyTypeObject *type, PyObject *tmp)
{
PyObject *tmp, *pnew;
PyObject *pnew;
Py_ssize_t n;
assert(PyType_IsSubtype(type, &PyBytes_Type));
tmp = bytes_new(&PyBytes_Type, args, kwds);
if (tmp == NULL)
return NULL;
assert(PyBytes_Check(tmp));
n = PyBytes_GET_SIZE(tmp);
pnew = type->tp_alloc(type, n);
@ -2907,7 +2902,6 @@ bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
((PyBytesObject *)pnew)->ob_shash =
((PyBytesObject *)tmp)->ob_shash;
}
Py_DECREF(tmp);
return pnew;
}

View File

@ -2,6 +2,75 @@
preserve
[clinic start generated code]*/
static int
bytearray___init___impl(PyByteArrayObject *self, PyObject *arg,
const char *encoding, const char *errors);
static int
bytearray___init__(PyObject *self, PyObject *args, PyObject *kwargs)
{
int return_value = -1;
static const char * const _keywords[] = {"source", "encoding", "errors", NULL};
static _PyArg_Parser _parser = {NULL, _keywords, "bytearray", 0};
PyObject *argsbuf[3];
PyObject * const *fastargs;
Py_ssize_t nargs = PyTuple_GET_SIZE(args);
Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 0;
PyObject *arg = NULL;
const char *encoding = NULL;
const char *errors = NULL;
fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, 0, 3, 0, argsbuf);
if (!fastargs) {
goto exit;
}
if (!noptargs) {
goto skip_optional_pos;
}
if (fastargs[0]) {
arg = fastargs[0];
if (!--noptargs) {
goto skip_optional_pos;
}
}
if (fastargs[1]) {
if (!PyUnicode_Check(fastargs[1])) {
_PyArg_BadArgument("bytearray", "argument 'encoding'", "str", fastargs[1]);
goto exit;
}
Py_ssize_t encoding_length;
encoding = PyUnicode_AsUTF8AndSize(fastargs[1], &encoding_length);
if (encoding == NULL) {
goto exit;
}
if (strlen(encoding) != (size_t)encoding_length) {
PyErr_SetString(PyExc_ValueError, "embedded null character");
goto exit;
}
if (!--noptargs) {
goto skip_optional_pos;
}
}
if (!PyUnicode_Check(fastargs[2])) {
_PyArg_BadArgument("bytearray", "argument 'errors'", "str", fastargs[2]);
goto exit;
}
Py_ssize_t errors_length;
errors = PyUnicode_AsUTF8AndSize(fastargs[2], &errors_length);
if (errors == NULL) {
goto exit;
}
if (strlen(errors) != (size_t)errors_length) {
PyErr_SetString(PyExc_ValueError, "embedded null character");
goto exit;
}
skip_optional_pos:
return_value = bytearray___init___impl((PyByteArrayObject *)self, arg, encoding, errors);
exit:
return return_value;
}
PyDoc_STRVAR(bytearray_clear__doc__,
"clear($self, /)\n"
"--\n"
@ -1051,4 +1120,4 @@ bytearray_sizeof(PyByteArrayObject *self, PyObject *Py_UNUSED(ignored))
{
return bytearray_sizeof_impl(self);
}
/*[clinic end generated code: output=0cd59180c7d5dce5 input=a9049054013a1b77]*/
/*[clinic end generated code: output=47cd9ad3fdc3ac0c input=a9049054013a1b77]*/

View File

@ -809,4 +809,73 @@ skip_optional_pos:
exit:
return return_value;
}
/*[clinic end generated code: output=dc1bc13e6990e452 input=a9049054013a1b77]*/
static PyObject *
bytes_new_impl(PyTypeObject *type, PyObject *x, const char *encoding,
const char *errors);
static PyObject *
bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
{
PyObject *return_value = NULL;
static const char * const _keywords[] = {"source", "encoding", "errors", NULL};
static _PyArg_Parser _parser = {NULL, _keywords, "bytes", 0};
PyObject *argsbuf[3];
PyObject * const *fastargs;
Py_ssize_t nargs = PyTuple_GET_SIZE(args);
Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 0;
PyObject *x = NULL;
const char *encoding = NULL;
const char *errors = NULL;
fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, 0, 3, 0, argsbuf);
if (!fastargs) {
goto exit;
}
if (!noptargs) {
goto skip_optional_pos;
}
if (fastargs[0]) {
x = fastargs[0];
if (!--noptargs) {
goto skip_optional_pos;
}
}
if (fastargs[1]) {
if (!PyUnicode_Check(fastargs[1])) {
_PyArg_BadArgument("bytes", "argument 'encoding'", "str", fastargs[1]);
goto exit;
}
Py_ssize_t encoding_length;
encoding = PyUnicode_AsUTF8AndSize(fastargs[1], &encoding_length);
if (encoding == NULL) {
goto exit;
}
if (strlen(encoding) != (size_t)encoding_length) {
PyErr_SetString(PyExc_ValueError, "embedded null character");
goto exit;
}
if (!--noptargs) {
goto skip_optional_pos;
}
}
if (!PyUnicode_Check(fastargs[2])) {
_PyArg_BadArgument("bytes", "argument 'errors'", "str", fastargs[2]);
goto exit;
}
Py_ssize_t errors_length;
errors = PyUnicode_AsUTF8AndSize(fastargs[2], &errors_length);
if (errors == NULL) {
goto exit;
}
if (strlen(errors) != (size_t)errors_length) {
PyErr_SetString(PyExc_ValueError, "embedded null character");
goto exit;
}
skip_optional_pos:
return_value = bytes_new_impl(type, x, encoding, errors);
exit:
return return_value;
}
/*[clinic end generated code: output=6101b417d6a6a717 input=a9049054013a1b77]*/

View File

@ -1258,4 +1258,73 @@ unicode_sizeof(PyObject *self, PyObject *Py_UNUSED(ignored))
{
return unicode_sizeof_impl(self);
}
/*[clinic end generated code: output=c5eb21e314da78b8 input=a9049054013a1b77]*/
static PyObject *
unicode_new_impl(PyTypeObject *type, PyObject *x, const char *encoding,
const char *errors);
static PyObject *
unicode_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
{
PyObject *return_value = NULL;
static const char * const _keywords[] = {"object", "encoding", "errors", NULL};
static _PyArg_Parser _parser = {NULL, _keywords, "str", 0};
PyObject *argsbuf[3];
PyObject * const *fastargs;
Py_ssize_t nargs = PyTuple_GET_SIZE(args);
Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 0;
PyObject *x = NULL;
const char *encoding = NULL;
const char *errors = NULL;
fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, 0, 3, 0, argsbuf);
if (!fastargs) {
goto exit;
}
if (!noptargs) {
goto skip_optional_pos;
}
if (fastargs[0]) {
x = fastargs[0];
if (!--noptargs) {
goto skip_optional_pos;
}
}
if (fastargs[1]) {
if (!PyUnicode_Check(fastargs[1])) {
_PyArg_BadArgument("str", "argument 'encoding'", "str", fastargs[1]);
goto exit;
}
Py_ssize_t encoding_length;
encoding = PyUnicode_AsUTF8AndSize(fastargs[1], &encoding_length);
if (encoding == NULL) {
goto exit;
}
if (strlen(encoding) != (size_t)encoding_length) {
PyErr_SetString(PyExc_ValueError, "embedded null character");
goto exit;
}
if (!--noptargs) {
goto skip_optional_pos;
}
}
if (!PyUnicode_Check(fastargs[2])) {
_PyArg_BadArgument("str", "argument 'errors'", "str", fastargs[2]);
goto exit;
}
Py_ssize_t errors_length;
errors = PyUnicode_AsUTF8AndSize(fastargs[2], &errors_length);
if (errors == NULL) {
goto exit;
}
if (strlen(errors) != (size_t)errors_length) {
PyErr_SetString(PyExc_ValueError, "embedded null character");
goto exit;
}
skip_optional_pos:
return_value = unicode_new_impl(type, x, encoding, errors);
exit:
return return_value;
}
/*[clinic end generated code: output=f10cf85d3935b3b7 input=a9049054013a1b77]*/

View File

@ -15466,52 +15466,57 @@ PyUnicode_Format(PyObject *format, PyObject *args)
}
static PyObject *
unicode_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
unicode_subtype_new(PyTypeObject *type, PyObject *unicode);
/*[clinic input]
@classmethod
str.__new__ as unicode_new
object as x: object = NULL
encoding: str = NULL
errors: str = NULL
[clinic start generated code]*/
static PyObject *
unicode_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
unicode_new_impl(PyTypeObject *type, PyObject *x, const char *encoding,
const char *errors)
/*[clinic end generated code: output=fc72d4878b0b57e9 input=e81255e5676d174e]*/
{
PyObject *x = NULL;
static char *kwlist[] = {"object", "encoding", "errors", 0};
char *encoding = NULL;
char *errors = NULL;
PyObject *unicode;
if (x == NULL) {
unicode = unicode_new_empty();
}
else if (encoding == NULL && errors == NULL) {
unicode = PyObject_Str(x);
}
else {
unicode = PyUnicode_FromEncodedObject(x, encoding, errors);
}
if (type != &PyUnicode_Type)
return unicode_subtype_new(type, args, kwds);
if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:str",
kwlist, &x, &encoding, &errors))
return NULL;
if (x == NULL)
_Py_RETURN_UNICODE_EMPTY();
if (encoding == NULL && errors == NULL)
return PyObject_Str(x);
else
return PyUnicode_FromEncodedObject(x, encoding, errors);
if (unicode != NULL && type != &PyUnicode_Type) {
Py_SETREF(unicode, unicode_subtype_new(type, unicode));
}
return unicode;
}
static PyObject *
unicode_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
unicode_subtype_new(PyTypeObject *type, PyObject *unicode)
{
PyObject *unicode, *self;
PyObject *self;
Py_ssize_t length, char_size;
int share_wstr, share_utf8;
unsigned int kind;
void *data;
assert(PyType_IsSubtype(type, &PyUnicode_Type));
unicode = unicode_new(&PyUnicode_Type, args, kwds);
if (unicode == NULL)
return NULL;
assert(_PyUnicode_CHECK(unicode));
if (PyUnicode_READY(unicode) == -1) {
Py_DECREF(unicode);
return NULL;
}
self = type->tp_alloc(type, 0);
if (self == NULL) {
Py_DECREF(unicode);
return NULL;
}
kind = PyUnicode_KIND(unicode);
@ -15580,11 +15585,9 @@ unicode_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
#ifdef Py_DEBUG
_PyUnicode_HASH(self) = _PyUnicode_HASH(unicode);
#endif
Py_DECREF(unicode);
return self;
onError:
Py_DECREF(unicode);
Py_DECREF(self);
return NULL;
}