Issue #24000: Improved Argument Clinic's mapping of converters to legacy

"format units".  Updated the documentation to match.
This commit is contained in:
Larry Hastings 2015-05-07 23:30:09 -07:00
parent 95283fb589
commit 38337d1e15
7 changed files with 122 additions and 101 deletions

View File

@ -758,6 +758,14 @@ All Argument Clinic converters accept the following arguments:
In addition, some converters accept additional arguments. Here is a list In addition, some converters accept additional arguments. Here is a list
of these arguments, along with their meanings: of these arguments, along with their meanings:
``accept``
A set of Python types (and possibly pseudo-types);
this restricts the allowable Python argument to values of these types.
(This is not a general-purpose facility; as a rule it only supports
specific lists of types as shown in the legacy converter table.)
To accept ``None``, add ``NoneType`` to this set.
``bitwise`` ``bitwise``
Only supported for unsigned integers. The native integer value of this Only supported for unsigned integers. The native integer value of this
Python argument will be written to the parameter without any range checking, Python argument will be written to the parameter without any range checking,
@ -772,39 +780,27 @@ of these arguments, along with their meanings:
Only supported for strings. Specifies the encoding to use when converting Only supported for strings. Specifies the encoding to use when converting
this string from a Python str (Unicode) value into a C ``char *`` value. this string from a Python str (Unicode) value into a C ``char *`` value.
``length``
Only supported for strings. If true, requests that the length of the
string be passed in to the impl function, just after the string parameter,
in a parameter named ``<parameter_name>_length``.
``nullable``
Only supported for strings. If true, this parameter may also be set to
``None``, in which case the C parameter will be set to ``NULL``.
``subclass_of`` ``subclass_of``
Only supported for the ``object`` converter. Requires that the Python Only supported for the ``object`` converter. Requires that the Python
value be a subclass of a Python type, as expressed in C. value be a subclass of a Python type, as expressed in C.
``types`` ``type``
Only supported for the ``object`` (and ``self``) converter. Specifies Only supported for the ``object`` and ``self`` converters. Specifies
the C type that will be used to declare the variable. Default value is the C type that will be used to declare the variable. Default value is
``"PyObject *"``. ``"PyObject *"``.
``types``
A string containing a list of Python types (and possibly pseudo-types);
this restricts the allowable Python argument to values of these types.
(This is not a general-purpose facility; as a rule it only supports
specific lists of types as shown in the legacy converter table.)
``zeroes`` ``zeroes``
Only supported for strings. If true, embedded NUL bytes (``'\\0'``) are Only supported for strings. If true, embedded NUL bytes (``'\\0'``) are
permitted inside the value. permitted inside the value. The length of the string will be passed in
to the impl function, just after the string parameter, as a parameter named
``<parameter_name>_length``.
Please note, not every possible combination of arguments will work. Please note, not every possible combination of arguments will work.
Often these arguments are implemented internally by specific ``PyArg_ParseTuple`` Usually these arguments are implemented by specific ``PyArg_ParseTuple``
*format units*, with specific behavior. For example, currently you cannot *format units*, with specific behavior. For example, currently you cannot
call ``str`` and pass in ``zeroes=True`` without also specifying an ``encoding``; call ``unsigned_short`` without also specifying ``bitwise=True``.
although it's perfectly reasonable to think this would work, these semantics don't Although it's perfectly reasonable to think this would work, these semantics don't
map to any existing format unit. So Argument Clinic doesn't support it. (Or, at map to any existing format unit. So Argument Clinic doesn't support it. (Or, at
least, not yet.) least, not yet.)
@ -816,13 +812,13 @@ on the right is the text you'd replace it with.
``'B'`` ``unsigned_char(bitwise=True)`` ``'B'`` ``unsigned_char(bitwise=True)``
``'b'`` ``unsigned_char`` ``'b'`` ``unsigned_char``
``'c'`` ``char`` ``'c'`` ``char``
``'C'`` ``int(types='str')`` ``'C'`` ``int(accept={str})``
``'d'`` ``double`` ``'d'`` ``double``
``'D'`` ``Py_complex`` ``'D'`` ``Py_complex``
``'es#'`` ``str(encoding='name_of_encoding', length=True, zeroes=True)``
``'es'`` ``str(encoding='name_of_encoding')`` ``'es'`` ``str(encoding='name_of_encoding')``
``'et#'`` ``str(encoding='name_of_encoding', types='bytes bytearray str', length=True)`` ``'es#'`` ``str(encoding='name_of_encoding', zeroes=True)``
``'et'`` ``str(encoding='name_of_encoding', types='bytes bytearray str')`` ``'et'`` ``str(encoding='name_of_encoding', accept={bytes, bytearray, str})``
``'et#'`` ``str(encoding='name_of_encoding', accept={bytes, bytearray, str}, zeroes=True)``
``'f'`` ``float`` ``'f'`` ``float``
``'h'`` ``short`` ``'h'`` ``short``
``'H'`` ``unsigned_short(bitwise=True)`` ``'H'`` ``unsigned_short(bitwise=True)``
@ -832,27 +828,27 @@ on the right is the text you'd replace it with.
``'K'`` ``unsigned_PY_LONG_LONG(bitwise=True)`` ``'K'`` ``unsigned_PY_LONG_LONG(bitwise=True)``
``'L'`` ``PY_LONG_LONG`` ``'L'`` ``PY_LONG_LONG``
``'n'`` ``Py_ssize_t`` ``'n'`` ``Py_ssize_t``
``'O'`` ``object``
``'O!'`` ``object(subclass_of='&PySomething_Type')`` ``'O!'`` ``object(subclass_of='&PySomething_Type')``
``'O&'`` ``object(converter='name_of_c_function')`` ``'O&'`` ``object(converter='name_of_c_function')``
``'O'`` ``object``
``'p'`` ``bool`` ``'p'`` ``bool``
``'s#'`` ``str(length=True)``
``'S'`` ``PyBytesObject`` ``'S'`` ``PyBytesObject``
``'s'`` ``str`` ``'s'`` ``str``
``'s*'`` ``Py_buffer(types='str bytes bytearray buffer')`` ``'s#'`` ``str(zeroes=True)``
``'u#'`` ``Py_UNICODE(length=True)`` ``'s*'`` ``Py_buffer(accept={buffer, str})``
``'u'`` ``Py_UNICODE``
``'U'`` ``unicode`` ``'U'`` ``unicode``
``'w*'`` ``Py_buffer(types='bytearray rwbuffer')`` ``'u'`` ``Py_UNICODE``
``'y#'`` ``str(types='bytes', length=True)`` ``'u#'`` ``Py_UNICODE(zeroes=True)``
``'w*'`` ``Py_buffer(accept={rwbuffer})``
``'Y'`` ``PyByteArrayObject`` ``'Y'`` ``PyByteArrayObject``
``'y'`` ``str(types='bytes')`` ``'y'`` ``str(accept={bytes})``
``'y#'`` ``str(accept={robuffer}, zeroes=True)``
``'y*'`` ``Py_buffer`` ``'y*'`` ``Py_buffer``
``'Z#'`` ``Py_UNICODE(nullable=True, length=True)`` ``'Z'`` ``Py_UNICODE(accept={str, NoneType})``
``'z#'`` ``str(nullable=True, length=True)`` ``'Z#'`` ``Py_UNICODE(accept={str, NoneType}, zeroes=True)``
``'Z'`` ``Py_UNICODE(nullable=True)`` ``'z'`` ``str(accept={str, NoneType})``
``'z'`` ``str(nullable=True)`` ``'z#'`` ``str(accept={str, NoneType}, zeroes=True)``
``'z*'`` ``Py_buffer(types='str bytes bytearray buffer', nullable=True)`` ``'z*'`` ``Py_buffer(accept={buffer, str, NoneType})``
========= ================================================================================= ========= =================================================================================
As an example, here's our sample ``pickle.Pickler.dump`` using the proper As an example, here's our sample ``pickle.Pickler.dump`` using the proper

View File

@ -82,6 +82,9 @@ Documentation
Tools/Demos Tools/Demos
----------- -----------
- Issue #24000: Improved Argument Clinic's mapping of converters to legacy
"format units". Updated the documentation to match.
- Issue #24001: Argument Clinic converters now use accept={type} - Issue #24001: Argument Clinic converters now use accept={type}
instead of types={'type'} to specify the types the converter accepts. instead of types={'type'} to specify the types the converter accepts.

View File

@ -274,7 +274,7 @@ static PySequenceMethods dbm_as_sequence = {
/*[clinic input] /*[clinic input]
_dbm.dbm.get _dbm.dbm.get
key: str(accept={str, robuffer}, length=True) key: str(accept={str, robuffer}, zeroes=True)
default: object(c_default="NULL") = b'' default: object(c_default="NULL") = b''
/ /
@ -284,7 +284,8 @@ Return the value for key if present, otherwise default.
static PyObject * static PyObject *
_dbm_dbm_get_impl(dbmobject *self, const char *key, _dbm_dbm_get_impl(dbmobject *self, const char *key,
Py_ssize_clean_t key_length, PyObject *default_value) Py_ssize_clean_t key_length, PyObject *default_value)
/*[clinic end generated code: output=b44f95eba8203d93 input=3c7c1afd9c508457]*/ /*[clinic end generated code: output=b44f95eba8203d93 input=a3a279957f85eb6d]*/
/*[clinic end generated code: output=4f5c0e523eaf1251 input=9402c0af8582dc69]*/
{ {
datum dbm_key, val; datum dbm_key, val;
@ -301,7 +302,7 @@ _dbm_dbm_get_impl(dbmobject *self, const char *key,
/*[clinic input] /*[clinic input]
_dbm.dbm.setdefault _dbm.dbm.setdefault
key: str(accept={str, robuffer}, length=True) key: str(accept={str, robuffer}, zeroes=True)
default: object(c_default="NULL") = b'' default: object(c_default="NULL") = b''
/ /
@ -314,7 +315,7 @@ static PyObject *
_dbm_dbm_setdefault_impl(dbmobject *self, const char *key, _dbm_dbm_setdefault_impl(dbmobject *self, const char *key,
Py_ssize_clean_t key_length, Py_ssize_clean_t key_length,
PyObject *default_value) PyObject *default_value)
/*[clinic end generated code: output=52545886cf272161 input=a66fcb7f18ee2f50]*/ /*[clinic end generated code: output=52545886cf272161 input=bf40c48edaca01d6]*/
{ {
datum dbm_key, val; datum dbm_key, val;
Py_ssize_t tmp_size; Py_ssize_t tmp_size;

View File

@ -383,7 +383,7 @@ _gdbm_gdbm_firstkey_impl(dbmobject *self)
/*[clinic input] /*[clinic input]
_gdbm.gdbm.nextkey _gdbm.gdbm.nextkey
key: str(accept={str, robuffer}, length=True) key: str(accept={str, robuffer}, zeroes=True)
/ /
Returns the key that follows key in the traversal. Returns the key that follows key in the traversal.
@ -400,7 +400,7 @@ to create a list in memory that contains them all:
static PyObject * static PyObject *
_gdbm_gdbm_nextkey_impl(dbmobject *self, const char *key, _gdbm_gdbm_nextkey_impl(dbmobject *self, const char *key,
Py_ssize_clean_t key_length) Py_ssize_clean_t key_length)
/*[clinic end generated code: output=192ab892de6eb2f6 input=1eb2ff9b4b0e6ffd]*/ /*[clinic end generated code: output=192ab892de6eb2f6 input=1f1606943614e36f]*/
{ {
PyObject *v; PyObject *v;
datum dbm_key, nextkey; datum dbm_key, nextkey;

View File

@ -1673,7 +1673,7 @@ array_array_tostring_impl(arrayobject *self)
/*[clinic input] /*[clinic input]
array.array.fromunicode array.array.fromunicode
ustr: Py_UNICODE(length=True) ustr: Py_UNICODE(zeroes=True)
/ /
Extends this array with data from the unicode string ustr. Extends this array with data from the unicode string ustr.
@ -1686,7 +1686,7 @@ some other type.
static PyObject * static PyObject *
array_array_fromunicode_impl(arrayobject *self, Py_UNICODE *ustr, array_array_fromunicode_impl(arrayobject *self, Py_UNICODE *ustr,
Py_ssize_clean_t ustr_length) Py_ssize_clean_t ustr_length)
/*[clinic end generated code: output=ebb72fc16975e06d input=56bcedb5ef70139f]*/ /*[clinic end generated code: output=ebb72fc16975e06d input=150f00566ffbca6e]*/
{ {
char typecode; char typecode;

View File

@ -1215,7 +1215,7 @@ unicodedata_UCD_name_impl(PyObject *self, int chr, PyObject *default_value)
unicodedata.UCD.lookup unicodedata.UCD.lookup
self: self self: self
name: str(accept={str, robuffer}, length=True) name: str(accept={str, robuffer}, zeroes=True)
/ /
Look up character by name. Look up character by name.
@ -1227,7 +1227,7 @@ corresponding character. If not found, KeyError is raised.
static PyObject * static PyObject *
unicodedata_UCD_lookup_impl(PyObject *self, const char *name, unicodedata_UCD_lookup_impl(PyObject *self, const char *name,
Py_ssize_clean_t name_length) Py_ssize_clean_t name_length)
/*[clinic end generated code: output=765cb8186788e6be input=2dfe682c2491447a]*/ /*[clinic end generated code: output=765cb8186788e6be input=a557be0f8607a0d6]*/
{ {
Py_UCS4 code; Py_UCS4 code;
unsigned int index; unsigned int index;

View File

@ -2644,64 +2644,85 @@ class buffer: pass
class rwbuffer: pass class rwbuffer: pass
class robuffer: pass class robuffer: pass
@add_legacy_c_converter('s#', accept={str, robuffer}, length=True) def str_converter_key(types, encoding, zeroes):
@add_legacy_c_converter('y', accept={robuffer}) return (frozenset(types), bool(encoding), bool(zeroes))
@add_legacy_c_converter('y#', accept={robuffer}, length=True)
@add_legacy_c_converter('z', accept={str, NoneType}) str_converter_argument_map = {}
@add_legacy_c_converter('z#', accept={str, NoneType}, length=True)
# add_legacy_c_converter not supported for es, es#, et, et#
# because of their extra encoding argument
class str_converter(CConverter): class str_converter(CConverter):
type = 'const char *' type = 'const char *'
default_type = (str, Null, NoneType) default_type = (str, Null, NoneType)
format_unit = 's' format_unit = 's'
def converter_init(self, *, encoding=None, accept={str}, length=False, zeroes=False): def converter_init(self, *, accept={str}, encoding=None, zeroes=False):
self.length = bool(length)
is_b_or_ba = accept == {bytes, bytearray}
is_b_or_ba_or_none = accept == {bytes, bytearray, NoneType}
is_str = accept == {str}
is_str_or_none = accept == {str, NoneType}
is_robuffer = accept == {robuffer}
is_str_or_robuffer = accept == {str, robuffer}
is_str_or_robuffer_or_none = accept == {str, robuffer, NoneType}
format_unit = None
if encoding:
self.encoding = encoding
if is_str and not length and not zeroes:
format_unit = 'es'
elif is_str_or_none and length and zeroes:
format_unit = 'es#'
elif is_b_or_ba and not length and not zeroes:
format_unit = 'et'
elif is_b_or_ba_or_none and length and zeroes:
format_unit = 'et#'
else:
if zeroes:
fail("str_converter: illegal combination of arguments (zeroes is only legal with an encoding)")
if is_str and not length:
format_unit = 's'
elif is_str_or_none and not length:
format_unit = 'z'
elif is_robuffer and not length:
format_unit = 'y'
elif is_robuffer and length:
format_unit = 'y#'
elif is_str_or_robuffer and length:
format_unit = 's#'
elif is_str_or_robuffer_or_none and length:
format_unit = 'z#'
key = str_converter_key(accept, encoding, zeroes)
format_unit = str_converter_argument_map.get(key)
if not format_unit: if not format_unit:
fail("str_converter: illegal combination of arguments") fail("str_converter: illegal combination of arguments", key)
self.format_unit = format_unit self.format_unit = format_unit
self.length = bool(zeroes)
if encoding:
if self.default not in (Null, None, unspecified):
fail("str_converter: Argument Clinic doesn't support default values for encoded strings")
self.encoding = encoding
self.type = 'char *'
# sorry, clinic can't support preallocated buffers
# for es# and et#
self.c_default = "NULL"
def cleanup(self):
if self.encoding:
name = ensure_legal_c_identifier(self.name)
return "".join(["if (", name, ")\n PyMem_FREE(", name, ");\n"])
#
# This is the fourth or fifth rewrite of registering all the
# crazy string converter format units. Previous approaches hid
# bugs--generally mismatches between the semantics of the format
# unit and the arguments necessary to represent those semantics
# properly. Hopefully with this approach we'll get it 100% right.
#
# The r() function (short for "register") both registers the
# mapping from arguments to format unit *and* registers the
# legacy C converter for that format unit.
#
def r(format_unit, *, accept, encoding=False, zeroes=False):
if not encoding and format_unit != 's':
# add the legacy c converters here too.
#
# note: add_legacy_c_converter can't work for
# es, es#, et, or et#
# because of their extra encoding argument
#
# also don't add the converter for 's' because
# the metaclass for CConverter adds it for us.
kwargs = {}
if accept != {str}:
kwargs['accept'] = accept
if zeroes:
kwargs['zeroes'] = True
added_f = functools.partial(str_converter, **kwargs)
legacy_converters[format_unit] = added_f
d = str_converter_argument_map
key = str_converter_key(accept, encoding, zeroes)
if key in d:
sys.exit("Duplicate keys specified for str_converter_argument_map!")
d[key] = format_unit
r('es', encoding=True, accept={str})
r('es#', encoding=True, zeroes=True, accept={str})
r('et', encoding=True, accept={bytes, bytearray, str})
r('et#', encoding=True, zeroes=True, accept={bytes, bytearray, str})
r('s', accept={str})
r('s#', zeroes=True, accept={robuffer, str})
r('y', accept={robuffer})
r('y#', zeroes=True, accept={robuffer})
r('z', accept={str, NoneType})
r('z#', zeroes=True, accept={robuffer, str, NoneType})
del r
class PyBytesObject_converter(CConverter): class PyBytesObject_converter(CConverter):
@ -2719,17 +2740,17 @@ class unicode_converter(CConverter):
default_type = (str, Null, NoneType) default_type = (str, Null, NoneType)
format_unit = 'U' format_unit = 'U'
@add_legacy_c_converter('u#', length=True) @add_legacy_c_converter('u#', zeroes=True)
@add_legacy_c_converter('Z', accept={str, NoneType}) @add_legacy_c_converter('Z', accept={str, NoneType})
@add_legacy_c_converter('Z#', accept={str, NoneType}, length=True) @add_legacy_c_converter('Z#', accept={str, NoneType}, zeroes=True)
class Py_UNICODE_converter(CConverter): class Py_UNICODE_converter(CConverter):
type = 'Py_UNICODE *' type = 'Py_UNICODE *'
default_type = (str, Null, NoneType) default_type = (str, Null, NoneType)
format_unit = 'u' format_unit = 'u'
def converter_init(self, *, accept={str}, length=False): def converter_init(self, *, accept={str}, zeroes=False):
format_unit = 'Z' if accept=={str, NoneType} else 'u' format_unit = 'Z' if accept=={str, NoneType} else 'u'
if length: if zeroes:
format_unit += '#' format_unit += '#'
self.length = True self.length = True
self.format_unit = format_unit self.format_unit = format_unit