From 25cd8730aa39c18e767f63586dfd1da2fbb307c9 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 14 Mar 2024 18:59:43 +0100 Subject: [PATCH] gh-113317, AC: Add libclinic.converter module (#116821) * Move CConverter class to a new libclinic.converter module. * Move CRenderData and Include classes to a new libclinic.crenderdata module. --- Tools/clinic/clinic.py | 604 +------------------------- Tools/clinic/libclinic/converter.py | 534 +++++++++++++++++++++++ Tools/clinic/libclinic/crenderdata.py | 81 ++++ Tools/clinic/libclinic/function.py | 3 +- 4 files changed, 621 insertions(+), 601 deletions(-) create mode 100644 Tools/clinic/libclinic/converter.py create mode 100644 Tools/clinic/libclinic/crenderdata.py diff --git a/Tools/clinic/clinic.py b/Tools/clinic/clinic.py index ac205866f9d..c9641cb9c82 100755 --- a/Tools/clinic/clinic.py +++ b/Tools/clinic/clinic.py @@ -32,15 +32,12 @@ from collections.abc import ( from operator import attrgetter from types import FunctionType, NoneType from typing import ( - TYPE_CHECKING, Any, Final, Literal, NamedTuple, NoReturn, Protocol, - TypeVar, - cast, ) @@ -57,6 +54,10 @@ from libclinic.function import ( GETTER, SETTER) from libclinic.language import Language, PythonLanguage from libclinic.block_parser import Block, BlockParser +from libclinic.crenderdata import CRenderData, Include, TemplateDict +from libclinic.converter import ( + CConverter, CConverterClassT, + converters, legacy_converters) # TODO: @@ -84,65 +85,6 @@ class Null: NULL = Null() -TemplateDict = dict[str, str] - - -class CRenderData: - def __init__(self) -> None: - - # The C statements to declare variables. - # Should be full lines with \n eol characters. - self.declarations: list[str] = [] - - # The C statements required to initialize the variables before the parse call. - # Should be full lines with \n eol characters. - self.initializers: list[str] = [] - - # The C statements needed to dynamically modify the values - # parsed by the parse call, before calling the impl. - self.modifications: list[str] = [] - - # The entries for the "keywords" array for PyArg_ParseTuple. - # Should be individual strings representing the names. - self.keywords: list[str] = [] - - # The "format units" for PyArg_ParseTuple. - # Should be individual strings that will get - self.format_units: list[str] = [] - - # The varargs arguments for PyArg_ParseTuple. - self.parse_arguments: list[str] = [] - - # The parameter declarations for the impl function. - self.impl_parameters: list[str] = [] - - # The arguments to the impl function at the time it's called. - self.impl_arguments: list[str] = [] - - # For return converters: the name of the variable that - # should receive the value returned by the impl. - self.return_value = "return_value" - - # For return converters: the code to convert the return - # value from the parse function. This is also where - # you should check the _return_value for errors, and - # "goto exit" if there are any. - self.return_conversion: list[str] = [] - self.converter_retval = "_return_value" - - # The C statements required to do some operations - # after the end of parsing but before cleaning up. - # These operations may be, for example, memory deallocations which - # can only be done without any error happening during argument parsing. - self.post_parsing: list[str] = [] - - # The C statements required to clean up after the impl call. - self.cleanup: list[str] = [] - - # The C statements to generate critical sections (per-object locking). - self.lock: list[str] = [] - self.unlock: list[str] = [] - ParamTuple = tuple["Parameter", ...] @@ -1556,26 +1498,6 @@ class CLanguage(Language): return clinic.get_destination('block').dump() -@dc.dataclass(slots=True, frozen=True) -class Include: - """ - An include like: #include "pycore_long.h" // _Py_ID() - """ - # Example: "pycore_long.h". - filename: str - - # Example: "_Py_ID()". - reason: str - - # None means unconditional include. - # Example: "#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)". - condition: str | None - - def sort_key(self) -> tuple[str, str]: - # order: '#if' comes before 'NO_CONDITION' - return (self.condition or 'NO_CONDITION', self.filename) - - @dc.dataclass(slots=True) class BlockPrinter: language: Language @@ -2151,29 +2073,6 @@ __xor__ ReturnConverterType = Callable[..., "CReturnConverter"] -CConverterClassT = TypeVar("CConverterClassT", bound=type["CConverter"]) - -def add_c_converter( - f: CConverterClassT, - name: str | None = None -) -> CConverterClassT: - if not name: - name = f.__name__ - if not name.endswith('_converter'): - return f - name = name.removesuffix('_converter') - converters[name] = f - return f - -def add_default_legacy_c_converter(cls: CConverterClassT) -> CConverterClassT: - # automatically add converter for default format unit - # (but without stomping on the existing one if it's already - # set, in case you subclass) - if ((cls.format_unit not in ('O&', '')) and - (cls.format_unit not in legacy_converters)): - legacy_converters[cls.format_unit] = cls - return cls - def add_legacy_c_converter( format_unit: str, **kwargs: Any @@ -2192,501 +2091,6 @@ def add_legacy_c_converter( return f return closure -class CConverterAutoRegister(type): - def __init__( - cls, name: str, bases: tuple[type[object], ...], classdict: dict[str, Any] - ) -> None: - converter_cls = cast(type["CConverter"], cls) - add_c_converter(converter_cls) - add_default_legacy_c_converter(converter_cls) - -class CConverter(metaclass=CConverterAutoRegister): - """ - For the init function, self, name, function, and default - must be keyword-or-positional parameters. All other - parameters must be keyword-only. - """ - - # The C name to use for this variable. - name: str - - # The Python name to use for this variable. - py_name: str - - # The C type to use for this variable. - # 'type' should be a Python string specifying the type, e.g. "int". - # If this is a pointer type, the type string should end with ' *'. - type: str | None = None - - # The Python default value for this parameter, as a Python value. - # Or the magic value "unspecified" if there is no default. - # Or the magic value "unknown" if this value is a cannot be evaluated - # at Argument-Clinic-preprocessing time (but is presumed to be valid - # at runtime). - default: object = unspecified - - # If not None, default must be isinstance() of this type. - # (You can also specify a tuple of types.) - default_type: bltns.type[object] | tuple[bltns.type[object], ...] | None = None - - # "default" converted into a C value, as a string. - # Or None if there is no default. - c_default: str | None = None - - # "default" converted into a Python value, as a string. - # Or None if there is no default. - py_default: str | None = None - - # The default value used to initialize the C variable when - # there is no default, but not specifying a default may - # result in an "uninitialized variable" warning. This can - # easily happen when using option groups--although - # properly-written code won't actually use the variable, - # the variable does get passed in to the _impl. (Ah, if - # only dataflow analysis could inline the static function!) - # - # This value is specified as a string. - # Every non-abstract subclass should supply a valid value. - c_ignored_default: str = 'NULL' - - # If true, wrap with Py_UNUSED. - unused = False - - # The C converter *function* to be used, if any. - # (If this is not None, format_unit must be 'O&'.) - converter: str | None = None - - # Should Argument Clinic add a '&' before the name of - # the variable when passing it into the _impl function? - impl_by_reference = False - - # Should Argument Clinic add a '&' before the name of - # the variable when passing it into PyArg_ParseTuple (AndKeywords)? - parse_by_reference = True - - ############################################################# - ############################################################# - ## You shouldn't need to read anything below this point to ## - ## write your own converter functions. ## - ############################################################# - ############################################################# - - # The "format unit" to specify for this variable when - # parsing arguments using PyArg_ParseTuple (AndKeywords). - # Custom converters should always use the default value of 'O&'. - format_unit = 'O&' - - # What encoding do we want for this variable? Only used - # by format units starting with 'e'. - encoding: str | None = None - - # Should this object be required to be a subclass of a specific type? - # If not None, should be a string representing a pointer to a - # PyTypeObject (e.g. "&PyUnicode_Type"). - # Only used by the 'O!' format unit (and the "object" converter). - subclass_of: str | None = None - - # See also the 'length_name' property. - # Only used by format units ending with '#'. - length = False - - # Should we show this parameter in the generated - # __text_signature__? This is *almost* always True. - # (It's only False for __new__, __init__, and METH_STATIC functions.) - show_in_signature = True - - # Overrides the name used in a text signature. - # The name used for a "self" parameter must be one of - # self, type, or module; however users can set their own. - # This lets the self_converter overrule the user-settable - # name, *just* for the text signature. - # Only set by self_converter. - signature_name: str | None = None - - broken_limited_capi: bool = False - - # keep in sync with self_converter.__init__! - def __init__(self, - # Positional args: - name: str, - py_name: str, - function: Function, - default: object = unspecified, - *, # Keyword only args: - c_default: str | None = None, - py_default: str | None = None, - annotation: str | Literal[Sentinels.unspecified] = unspecified, - unused: bool = False, - **kwargs: Any - ) -> None: - self.name = libclinic.ensure_legal_c_identifier(name) - self.py_name = py_name - self.unused = unused - self.includes: list[Include] = [] - - if default is not unspecified: - if (self.default_type - and default is not unknown - and not isinstance(default, self.default_type) - ): - if isinstance(self.default_type, type): - types_str = self.default_type.__name__ - else: - names = [cls.__name__ for cls in self.default_type] - types_str = ', '.join(names) - cls_name = self.__class__.__name__ - fail(f"{cls_name}: default value {default!r} for field " - f"{name!r} is not of type {types_str!r}") - self.default = default - - if c_default: - self.c_default = c_default - if py_default: - self.py_default = py_default - - if annotation is not unspecified: - fail("The 'annotation' parameter is not currently permitted.") - - # Make sure not to set self.function until after converter_init() has been called. - # This prevents you from caching information - # about the function in converter_init(). - # (That breaks if we get cloned.) - self.converter_init(**kwargs) - self.function = function - - # Add a custom __getattr__ method to improve the error message - # if somebody tries to access self.function in converter_init(). - # - # mypy will assume arbitrary access is okay for a class with a __getattr__ method, - # and that's not what we want, - # so put it inside an `if not TYPE_CHECKING` block - if not TYPE_CHECKING: - def __getattr__(self, attr): - if attr == "function": - fail( - f"{self.__class__.__name__!r} object has no attribute 'function'.\n" - f"Note: accessing self.function inside converter_init is disallowed!" - ) - return super().__getattr__(attr) - # this branch is just here for coverage reporting - else: # pragma: no cover - pass - - def converter_init(self) -> None: - pass - - def is_optional(self) -> bool: - return (self.default is not unspecified) - - def _render_self(self, parameter: Parameter, data: CRenderData) -> None: - self.parameter = parameter - name = self.parser_name - - # impl_arguments - s = ("&" if self.impl_by_reference else "") + name - data.impl_arguments.append(s) - if self.length: - data.impl_arguments.append(self.length_name) - - # impl_parameters - data.impl_parameters.append(self.simple_declaration(by_reference=self.impl_by_reference)) - if self.length: - data.impl_parameters.append(f"Py_ssize_t {self.length_name}") - - def _render_non_self( - self, - parameter: Parameter, - data: CRenderData - ) -> None: - self.parameter = parameter - name = self.name - - # declarations - d = self.declaration(in_parser=True) - data.declarations.append(d) - - # initializers - initializers = self.initialize() - if initializers: - data.initializers.append('/* initializers for ' + name + ' */\n' + initializers.rstrip()) - - # modifications - modifications = self.modify() - if modifications: - data.modifications.append('/* modifications for ' + name + ' */\n' + modifications.rstrip()) - - # keywords - if parameter.is_vararg(): - pass - elif parameter.is_positional_only(): - data.keywords.append('') - else: - data.keywords.append(parameter.name) - - # format_units - if self.is_optional() and '|' not in data.format_units: - data.format_units.append('|') - if parameter.is_keyword_only() and '$' not in data.format_units: - data.format_units.append('$') - data.format_units.append(self.format_unit) - - # parse_arguments - self.parse_argument(data.parse_arguments) - - # post_parsing - if post_parsing := self.post_parsing(): - data.post_parsing.append('/* Post parse cleanup for ' + name + ' */\n' + post_parsing.rstrip() + '\n') - - # cleanup - cleanup = self.cleanup() - if cleanup: - data.cleanup.append('/* Cleanup for ' + name + ' */\n' + cleanup.rstrip() + "\n") - - def render(self, parameter: Parameter, data: CRenderData) -> None: - """ - parameter is a clinic.Parameter instance. - data is a CRenderData instance. - """ - self._render_self(parameter, data) - self._render_non_self(parameter, data) - - @functools.cached_property - def length_name(self) -> str: - """Computes the name of the associated "length" variable.""" - assert self.length is not None - return self.parser_name + "_length" - - # Why is this one broken out separately? - # For "positional-only" function parsing, - # which generates a bunch of PyArg_ParseTuple calls. - def parse_argument(self, args: list[str]) -> None: - assert not (self.converter and self.encoding) - if self.format_unit == 'O&': - assert self.converter - args.append(self.converter) - - if self.encoding: - args.append(libclinic.c_repr(self.encoding)) - elif self.subclass_of: - args.append(self.subclass_of) - - s = ("&" if self.parse_by_reference else "") + self.parser_name - args.append(s) - - if self.length: - args.append(f"&{self.length_name}") - - # - # All the functions after here are intended as extension points. - # - - def simple_declaration( - self, - by_reference: bool = False, - *, - in_parser: bool = False - ) -> str: - """ - Computes the basic declaration of the variable. - Used in computing the prototype declaration and the - variable declaration. - """ - assert isinstance(self.type, str) - prototype = [self.type] - if by_reference or not self.type.endswith('*'): - prototype.append(" ") - if by_reference: - prototype.append('*') - if in_parser: - name = self.parser_name - else: - name = self.name - if self.unused: - name = f"Py_UNUSED({name})" - prototype.append(name) - return "".join(prototype) - - def declaration(self, *, in_parser: bool = False) -> str: - """ - The C statement to declare this variable. - """ - declaration = [self.simple_declaration(in_parser=True)] - default = self.c_default - if not default and self.parameter.group: - default = self.c_ignored_default - if default: - declaration.append(" = ") - declaration.append(default) - declaration.append(";") - if self.length: - declaration.append('\n') - declaration.append(f"Py_ssize_t {self.length_name};") - return "".join(declaration) - - def initialize(self) -> str: - """ - The C statements required to set up this variable before parsing. - Returns a string containing this code indented at column 0. - If no initialization is necessary, returns an empty string. - """ - return "" - - def modify(self) -> str: - """ - The C statements required to modify this variable after parsing. - Returns a string containing this code indented at column 0. - If no modification is necessary, returns an empty string. - """ - return "" - - def post_parsing(self) -> str: - """ - The C statements required to do some operations after the end of parsing but before cleaning up. - Return a string containing this code indented at column 0. - If no operation is necessary, return an empty string. - """ - return "" - - def cleanup(self) -> str: - """ - The C statements required to clean up after this variable. - Returns a string containing this code indented at column 0. - If no cleanup is necessary, returns an empty string. - """ - return "" - - def pre_render(self) -> None: - """ - A second initialization function, like converter_init, - called just before rendering. - You are permitted to examine self.function here. - """ - pass - - def bad_argument(self, displayname: str, expected: str, *, limited_capi: bool, expected_literal: bool = True) -> str: - assert '"' not in expected - if limited_capi: - if expected_literal: - return (f'PyErr_Format(PyExc_TypeError, ' - f'"{{{{name}}}}() {displayname} must be {expected}, not %.50s", ' - f'{{argname}} == Py_None ? "None" : Py_TYPE({{argname}})->tp_name);') - else: - return (f'PyErr_Format(PyExc_TypeError, ' - f'"{{{{name}}}}() {displayname} must be %.50s, not %.50s", ' - f'"{expected}", ' - f'{{argname}} == Py_None ? "None" : Py_TYPE({{argname}})->tp_name);') - else: - if expected_literal: - expected = f'"{expected}"' - self.add_include('pycore_modsupport.h', '_PyArg_BadArgument()') - return f'_PyArg_BadArgument("{{{{name}}}}", "{displayname}", {expected}, {{argname}});' - - def format_code(self, fmt: str, *, - argname: str, - bad_argument: str | None = None, - bad_argument2: str | None = None, - **kwargs: Any) -> str: - if '{bad_argument}' in fmt: - if not bad_argument: - raise TypeError("required 'bad_argument' argument") - fmt = fmt.replace('{bad_argument}', bad_argument) - if '{bad_argument2}' in fmt: - if not bad_argument2: - raise TypeError("required 'bad_argument2' argument") - fmt = fmt.replace('{bad_argument2}', bad_argument2) - return fmt.format(argname=argname, paramname=self.parser_name, **kwargs) - - def use_converter(self) -> None: - """Method called when self.converter is used to parse an argument.""" - pass - - def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> str | None: - if self.format_unit == 'O&': - self.use_converter() - return self.format_code(""" - if (!{converter}({argname}, &{paramname})) {{{{ - goto exit; - }}}} - """, - argname=argname, - converter=self.converter) - if self.format_unit == 'O!': - cast = '(%s)' % self.type if self.type != 'PyObject *' else '' - if self.subclass_of in type_checks: - typecheck, typename = type_checks[self.subclass_of] - return self.format_code(""" - if (!{typecheck}({argname})) {{{{ - {bad_argument} - goto exit; - }}}} - {paramname} = {cast}{argname}; - """, - argname=argname, - bad_argument=self.bad_argument(displayname, typename, limited_capi=limited_capi), - typecheck=typecheck, typename=typename, cast=cast) - return self.format_code(""" - if (!PyObject_TypeCheck({argname}, {subclass_of})) {{{{ - {bad_argument} - goto exit; - }}}} - {paramname} = {cast}{argname}; - """, - argname=argname, - bad_argument=self.bad_argument(displayname, '({subclass_of})->tp_name', - expected_literal=False, limited_capi=limited_capi), - subclass_of=self.subclass_of, cast=cast) - if self.format_unit == 'O': - cast = '(%s)' % self.type if self.type != 'PyObject *' else '' - return self.format_code(""" - {paramname} = {cast}{argname}; - """, - argname=argname, cast=cast) - return None - - def set_template_dict(self, template_dict: TemplateDict) -> None: - pass - - @property - def parser_name(self) -> str: - if self.name in libclinic.CLINIC_PREFIXED_ARGS: # bpo-39741 - return libclinic.CLINIC_PREFIX + self.name - else: - return self.name - - def add_include(self, name: str, reason: str, - *, condition: str | None = None) -> None: - include = Include(name, reason, condition) - self.includes.append(include) - -type_checks = { - '&PyLong_Type': ('PyLong_Check', 'int'), - '&PyTuple_Type': ('PyTuple_Check', 'tuple'), - '&PyList_Type': ('PyList_Check', 'list'), - '&PySet_Type': ('PySet_Check', 'set'), - '&PyFrozenSet_Type': ('PyFrozenSet_Check', 'frozenset'), - '&PyDict_Type': ('PyDict_Check', 'dict'), - '&PyUnicode_Type': ('PyUnicode_Check', 'str'), - '&PyBytes_Type': ('PyBytes_Check', 'bytes'), - '&PyByteArray_Type': ('PyByteArray_Check', 'bytearray'), -} - - -ConverterType = Callable[..., CConverter] -ConverterDict = dict[str, ConverterType] - -# maps strings to callables. -# these callables must be of the form: -# def foo(name, default, *, ...) -# The callable may have any number of keyword-only parameters. -# The callable must return a CConverter object. -# The callable should not call builtins.print. -converters: ConverterDict = {} - -# maps strings to callables. -# these callables follow the same rules as those for "converters" above. -# note however that they will never be called with keyword-only parameters. -legacy_converters: ConverterDict = {} - # maps strings to callables. # these callables must be of the form: # def foo(*, ...) diff --git a/Tools/clinic/libclinic/converter.py b/Tools/clinic/libclinic/converter.py new file mode 100644 index 00000000000..da28ba56a34 --- /dev/null +++ b/Tools/clinic/libclinic/converter.py @@ -0,0 +1,534 @@ +from __future__ import annotations +import builtins as bltns +import functools +from typing import Any, TypeVar, Literal, TYPE_CHECKING, cast +from collections.abc import Callable + +import libclinic +from libclinic import fail +from libclinic import Sentinels, unspecified, unknown +from libclinic.crenderdata import CRenderData, Include, TemplateDict +from libclinic.function import Function, Parameter + + +CConverterClassT = TypeVar("CConverterClassT", bound=type["CConverter"]) + + +type_checks = { + '&PyLong_Type': ('PyLong_Check', 'int'), + '&PyTuple_Type': ('PyTuple_Check', 'tuple'), + '&PyList_Type': ('PyList_Check', 'list'), + '&PySet_Type': ('PySet_Check', 'set'), + '&PyFrozenSet_Type': ('PyFrozenSet_Check', 'frozenset'), + '&PyDict_Type': ('PyDict_Check', 'dict'), + '&PyUnicode_Type': ('PyUnicode_Check', 'str'), + '&PyBytes_Type': ('PyBytes_Check', 'bytes'), + '&PyByteArray_Type': ('PyByteArray_Check', 'bytearray'), +} + + +def add_c_converter( + f: CConverterClassT, + name: str | None = None +) -> CConverterClassT: + if not name: + name = f.__name__ + if not name.endswith('_converter'): + return f + name = name.removesuffix('_converter') + converters[name] = f + return f + + +def add_default_legacy_c_converter(cls: CConverterClassT) -> CConverterClassT: + # automatically add converter for default format unit + # (but without stomping on the existing one if it's already + # set, in case you subclass) + if ((cls.format_unit not in ('O&', '')) and + (cls.format_unit not in legacy_converters)): + legacy_converters[cls.format_unit] = cls + return cls + + +class CConverterAutoRegister(type): + def __init__( + cls, name: str, bases: tuple[type[object], ...], classdict: dict[str, Any] + ) -> None: + converter_cls = cast(type["CConverter"], cls) + add_c_converter(converter_cls) + add_default_legacy_c_converter(converter_cls) + +class CConverter(metaclass=CConverterAutoRegister): + """ + For the init function, self, name, function, and default + must be keyword-or-positional parameters. All other + parameters must be keyword-only. + """ + + # The C name to use for this variable. + name: str + + # The Python name to use for this variable. + py_name: str + + # The C type to use for this variable. + # 'type' should be a Python string specifying the type, e.g. "int". + # If this is a pointer type, the type string should end with ' *'. + type: str | None = None + + # The Python default value for this parameter, as a Python value. + # Or the magic value "unspecified" if there is no default. + # Or the magic value "unknown" if this value is a cannot be evaluated + # at Argument-Clinic-preprocessing time (but is presumed to be valid + # at runtime). + default: object = unspecified + + # If not None, default must be isinstance() of this type. + # (You can also specify a tuple of types.) + default_type: bltns.type[object] | tuple[bltns.type[object], ...] | None = None + + # "default" converted into a C value, as a string. + # Or None if there is no default. + c_default: str | None = None + + # "default" converted into a Python value, as a string. + # Or None if there is no default. + py_default: str | None = None + + # The default value used to initialize the C variable when + # there is no default, but not specifying a default may + # result in an "uninitialized variable" warning. This can + # easily happen when using option groups--although + # properly-written code won't actually use the variable, + # the variable does get passed in to the _impl. (Ah, if + # only dataflow analysis could inline the static function!) + # + # This value is specified as a string. + # Every non-abstract subclass should supply a valid value. + c_ignored_default: str = 'NULL' + + # If true, wrap with Py_UNUSED. + unused = False + + # The C converter *function* to be used, if any. + # (If this is not None, format_unit must be 'O&'.) + converter: str | None = None + + # Should Argument Clinic add a '&' before the name of + # the variable when passing it into the _impl function? + impl_by_reference = False + + # Should Argument Clinic add a '&' before the name of + # the variable when passing it into PyArg_ParseTuple (AndKeywords)? + parse_by_reference = True + + ############################################################# + ############################################################# + ## You shouldn't need to read anything below this point to ## + ## write your own converter functions. ## + ############################################################# + ############################################################# + + # The "format unit" to specify for this variable when + # parsing arguments using PyArg_ParseTuple (AndKeywords). + # Custom converters should always use the default value of 'O&'. + format_unit = 'O&' + + # What encoding do we want for this variable? Only used + # by format units starting with 'e'. + encoding: str | None = None + + # Should this object be required to be a subclass of a specific type? + # If not None, should be a string representing a pointer to a + # PyTypeObject (e.g. "&PyUnicode_Type"). + # Only used by the 'O!' format unit (and the "object" converter). + subclass_of: str | None = None + + # See also the 'length_name' property. + # Only used by format units ending with '#'. + length = False + + # Should we show this parameter in the generated + # __text_signature__? This is *almost* always True. + # (It's only False for __new__, __init__, and METH_STATIC functions.) + show_in_signature = True + + # Overrides the name used in a text signature. + # The name used for a "self" parameter must be one of + # self, type, or module; however users can set their own. + # This lets the self_converter overrule the user-settable + # name, *just* for the text signature. + # Only set by self_converter. + signature_name: str | None = None + + broken_limited_capi: bool = False + + # keep in sync with self_converter.__init__! + def __init__(self, + # Positional args: + name: str, + py_name: str, + function: Function, + default: object = unspecified, + *, # Keyword only args: + c_default: str | None = None, + py_default: str | None = None, + annotation: str | Literal[Sentinels.unspecified] = unspecified, + unused: bool = False, + **kwargs: Any + ) -> None: + self.name = libclinic.ensure_legal_c_identifier(name) + self.py_name = py_name + self.unused = unused + self.includes: list[Include] = [] + + if default is not unspecified: + if (self.default_type + and default is not unknown + and not isinstance(default, self.default_type) + ): + if isinstance(self.default_type, type): + types_str = self.default_type.__name__ + else: + names = [cls.__name__ for cls in self.default_type] + types_str = ', '.join(names) + cls_name = self.__class__.__name__ + fail(f"{cls_name}: default value {default!r} for field " + f"{name!r} is not of type {types_str!r}") + self.default = default + + if c_default: + self.c_default = c_default + if py_default: + self.py_default = py_default + + if annotation is not unspecified: + fail("The 'annotation' parameter is not currently permitted.") + + # Make sure not to set self.function until after converter_init() has been called. + # This prevents you from caching information + # about the function in converter_init(). + # (That breaks if we get cloned.) + self.converter_init(**kwargs) + self.function = function + + # Add a custom __getattr__ method to improve the error message + # if somebody tries to access self.function in converter_init(). + # + # mypy will assume arbitrary access is okay for a class with a __getattr__ method, + # and that's not what we want, + # so put it inside an `if not TYPE_CHECKING` block + if not TYPE_CHECKING: + def __getattr__(self, attr): + if attr == "function": + fail( + f"{self.__class__.__name__!r} object has no attribute 'function'.\n" + f"Note: accessing self.function inside converter_init is disallowed!" + ) + return super().__getattr__(attr) + # this branch is just here for coverage reporting + else: # pragma: no cover + pass + + def converter_init(self) -> None: + pass + + def is_optional(self) -> bool: + return (self.default is not unspecified) + + def _render_self(self, parameter: Parameter, data: CRenderData) -> None: + self.parameter = parameter + name = self.parser_name + + # impl_arguments + s = ("&" if self.impl_by_reference else "") + name + data.impl_arguments.append(s) + if self.length: + data.impl_arguments.append(self.length_name) + + # impl_parameters + data.impl_parameters.append(self.simple_declaration(by_reference=self.impl_by_reference)) + if self.length: + data.impl_parameters.append(f"Py_ssize_t {self.length_name}") + + def _render_non_self( + self, + parameter: Parameter, + data: CRenderData + ) -> None: + self.parameter = parameter + name = self.name + + # declarations + d = self.declaration(in_parser=True) + data.declarations.append(d) + + # initializers + initializers = self.initialize() + if initializers: + data.initializers.append('/* initializers for ' + name + ' */\n' + initializers.rstrip()) + + # modifications + modifications = self.modify() + if modifications: + data.modifications.append('/* modifications for ' + name + ' */\n' + modifications.rstrip()) + + # keywords + if parameter.is_vararg(): + pass + elif parameter.is_positional_only(): + data.keywords.append('') + else: + data.keywords.append(parameter.name) + + # format_units + if self.is_optional() and '|' not in data.format_units: + data.format_units.append('|') + if parameter.is_keyword_only() and '$' not in data.format_units: + data.format_units.append('$') + data.format_units.append(self.format_unit) + + # parse_arguments + self.parse_argument(data.parse_arguments) + + # post_parsing + if post_parsing := self.post_parsing(): + data.post_parsing.append('/* Post parse cleanup for ' + name + ' */\n' + post_parsing.rstrip() + '\n') + + # cleanup + cleanup = self.cleanup() + if cleanup: + data.cleanup.append('/* Cleanup for ' + name + ' */\n' + cleanup.rstrip() + "\n") + + def render(self, parameter: Parameter, data: CRenderData) -> None: + """ + parameter is a clinic.Parameter instance. + data is a CRenderData instance. + """ + self._render_self(parameter, data) + self._render_non_self(parameter, data) + + @functools.cached_property + def length_name(self) -> str: + """Computes the name of the associated "length" variable.""" + assert self.length is not None + return self.parser_name + "_length" + + # Why is this one broken out separately? + # For "positional-only" function parsing, + # which generates a bunch of PyArg_ParseTuple calls. + def parse_argument(self, args: list[str]) -> None: + assert not (self.converter and self.encoding) + if self.format_unit == 'O&': + assert self.converter + args.append(self.converter) + + if self.encoding: + args.append(libclinic.c_repr(self.encoding)) + elif self.subclass_of: + args.append(self.subclass_of) + + s = ("&" if self.parse_by_reference else "") + self.parser_name + args.append(s) + + if self.length: + args.append(f"&{self.length_name}") + + # + # All the functions after here are intended as extension points. + # + + def simple_declaration( + self, + by_reference: bool = False, + *, + in_parser: bool = False + ) -> str: + """ + Computes the basic declaration of the variable. + Used in computing the prototype declaration and the + variable declaration. + """ + assert isinstance(self.type, str) + prototype = [self.type] + if by_reference or not self.type.endswith('*'): + prototype.append(" ") + if by_reference: + prototype.append('*') + if in_parser: + name = self.parser_name + else: + name = self.name + if self.unused: + name = f"Py_UNUSED({name})" + prototype.append(name) + return "".join(prototype) + + def declaration(self, *, in_parser: bool = False) -> str: + """ + The C statement to declare this variable. + """ + declaration = [self.simple_declaration(in_parser=True)] + default = self.c_default + if not default and self.parameter.group: + default = self.c_ignored_default + if default: + declaration.append(" = ") + declaration.append(default) + declaration.append(";") + if self.length: + declaration.append('\n') + declaration.append(f"Py_ssize_t {self.length_name};") + return "".join(declaration) + + def initialize(self) -> str: + """ + The C statements required to set up this variable before parsing. + Returns a string containing this code indented at column 0. + If no initialization is necessary, returns an empty string. + """ + return "" + + def modify(self) -> str: + """ + The C statements required to modify this variable after parsing. + Returns a string containing this code indented at column 0. + If no modification is necessary, returns an empty string. + """ + return "" + + def post_parsing(self) -> str: + """ + The C statements required to do some operations after the end of parsing but before cleaning up. + Return a string containing this code indented at column 0. + If no operation is necessary, return an empty string. + """ + return "" + + def cleanup(self) -> str: + """ + The C statements required to clean up after this variable. + Returns a string containing this code indented at column 0. + If no cleanup is necessary, returns an empty string. + """ + return "" + + def pre_render(self) -> None: + """ + A second initialization function, like converter_init, + called just before rendering. + You are permitted to examine self.function here. + """ + pass + + def bad_argument(self, displayname: str, expected: str, *, limited_capi: bool, expected_literal: bool = True) -> str: + assert '"' not in expected + if limited_capi: + if expected_literal: + return (f'PyErr_Format(PyExc_TypeError, ' + f'"{{{{name}}}}() {displayname} must be {expected}, not %.50s", ' + f'{{argname}} == Py_None ? "None" : Py_TYPE({{argname}})->tp_name);') + else: + return (f'PyErr_Format(PyExc_TypeError, ' + f'"{{{{name}}}}() {displayname} must be %.50s, not %.50s", ' + f'"{expected}", ' + f'{{argname}} == Py_None ? "None" : Py_TYPE({{argname}})->tp_name);') + else: + if expected_literal: + expected = f'"{expected}"' + self.add_include('pycore_modsupport.h', '_PyArg_BadArgument()') + return f'_PyArg_BadArgument("{{{{name}}}}", "{displayname}", {expected}, {{argname}});' + + def format_code(self, fmt: str, *, + argname: str, + bad_argument: str | None = None, + bad_argument2: str | None = None, + **kwargs: Any) -> str: + if '{bad_argument}' in fmt: + if not bad_argument: + raise TypeError("required 'bad_argument' argument") + fmt = fmt.replace('{bad_argument}', bad_argument) + if '{bad_argument2}' in fmt: + if not bad_argument2: + raise TypeError("required 'bad_argument2' argument") + fmt = fmt.replace('{bad_argument2}', bad_argument2) + return fmt.format(argname=argname, paramname=self.parser_name, **kwargs) + + def use_converter(self) -> None: + """Method called when self.converter is used to parse an argument.""" + pass + + def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> str | None: + if self.format_unit == 'O&': + self.use_converter() + return self.format_code(""" + if (!{converter}({argname}, &{paramname})) {{{{ + goto exit; + }}}} + """, + argname=argname, + converter=self.converter) + if self.format_unit == 'O!': + cast = '(%s)' % self.type if self.type != 'PyObject *' else '' + if self.subclass_of in type_checks: + typecheck, typename = type_checks[self.subclass_of] + return self.format_code(""" + if (!{typecheck}({argname})) {{{{ + {bad_argument} + goto exit; + }}}} + {paramname} = {cast}{argname}; + """, + argname=argname, + bad_argument=self.bad_argument(displayname, typename, limited_capi=limited_capi), + typecheck=typecheck, typename=typename, cast=cast) + return self.format_code(""" + if (!PyObject_TypeCheck({argname}, {subclass_of})) {{{{ + {bad_argument} + goto exit; + }}}} + {paramname} = {cast}{argname}; + """, + argname=argname, + bad_argument=self.bad_argument(displayname, '({subclass_of})->tp_name', + expected_literal=False, limited_capi=limited_capi), + subclass_of=self.subclass_of, cast=cast) + if self.format_unit == 'O': + cast = '(%s)' % self.type if self.type != 'PyObject *' else '' + return self.format_code(""" + {paramname} = {cast}{argname}; + """, + argname=argname, cast=cast) + return None + + def set_template_dict(self, template_dict: TemplateDict) -> None: + pass + + @property + def parser_name(self) -> str: + if self.name in libclinic.CLINIC_PREFIXED_ARGS: # bpo-39741 + return libclinic.CLINIC_PREFIX + self.name + else: + return self.name + + def add_include(self, name: str, reason: str, + *, condition: str | None = None) -> None: + include = Include(name, reason, condition) + self.includes.append(include) + + +ConverterType = Callable[..., CConverter] +ConverterDict = dict[str, ConverterType] + +# maps strings to callables. +# these callables must be of the form: +# def foo(name, default, *, ...) +# The callable may have any number of keyword-only parameters. +# The callable must return a CConverter object. +# The callable should not call builtins.print. +converters: ConverterDict = {} + +# maps strings to callables. +# these callables follow the same rules as those for "converters" above. +# note however that they will never be called with keyword-only parameters. +legacy_converters: ConverterDict = {} diff --git a/Tools/clinic/libclinic/crenderdata.py b/Tools/clinic/libclinic/crenderdata.py new file mode 100644 index 00000000000..58976b8185e --- /dev/null +++ b/Tools/clinic/libclinic/crenderdata.py @@ -0,0 +1,81 @@ +import dataclasses as dc + + +TemplateDict = dict[str, str] + + +class CRenderData: + def __init__(self) -> None: + + # The C statements to declare variables. + # Should be full lines with \n eol characters. + self.declarations: list[str] = [] + + # The C statements required to initialize the variables before the parse call. + # Should be full lines with \n eol characters. + self.initializers: list[str] = [] + + # The C statements needed to dynamically modify the values + # parsed by the parse call, before calling the impl. + self.modifications: list[str] = [] + + # The entries for the "keywords" array for PyArg_ParseTuple. + # Should be individual strings representing the names. + self.keywords: list[str] = [] + + # The "format units" for PyArg_ParseTuple. + # Should be individual strings that will get + self.format_units: list[str] = [] + + # The varargs arguments for PyArg_ParseTuple. + self.parse_arguments: list[str] = [] + + # The parameter declarations for the impl function. + self.impl_parameters: list[str] = [] + + # The arguments to the impl function at the time it's called. + self.impl_arguments: list[str] = [] + + # For return converters: the name of the variable that + # should receive the value returned by the impl. + self.return_value = "return_value" + + # For return converters: the code to convert the return + # value from the parse function. This is also where + # you should check the _return_value for errors, and + # "goto exit" if there are any. + self.return_conversion: list[str] = [] + self.converter_retval = "_return_value" + + # The C statements required to do some operations + # after the end of parsing but before cleaning up. + # These operations may be, for example, memory deallocations which + # can only be done without any error happening during argument parsing. + self.post_parsing: list[str] = [] + + # The C statements required to clean up after the impl call. + self.cleanup: list[str] = [] + + # The C statements to generate critical sections (per-object locking). + self.lock: list[str] = [] + self.unlock: list[str] = [] + + +@dc.dataclass(slots=True, frozen=True) +class Include: + """ + An include like: #include "pycore_long.h" // _Py_ID() + """ + # Example: "pycore_long.h". + filename: str + + # Example: "_Py_ID()". + reason: str + + # None means unconditional include. + # Example: "#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)". + condition: str | None + + def sort_key(self) -> tuple[str, str]: + # order: '#if' comes before 'NO_CONDITION' + return (self.condition or 'NO_CONDITION', self.filename) diff --git a/Tools/clinic/libclinic/function.py b/Tools/clinic/libclinic/function.py index 48cb7d05a7c..4fafedb6171 100644 --- a/Tools/clinic/libclinic/function.py +++ b/Tools/clinic/libclinic/function.py @@ -6,7 +6,8 @@ import functools import inspect from typing import Final, Any, TYPE_CHECKING if TYPE_CHECKING: - from clinic import Clinic, CConverter, CReturnConverter, self_converter + from clinic import Clinic, CReturnConverter, self_converter + from libclinic.converter import CConverter from libclinic import VersionTuple, unspecified