bpo-40528: Implement a metadata system for ASDL Generator (GH-20193)

ASDL Generator was lack of proper annotation related to generated
module. This patch implements a MetadataVisitor that produces a
metadata object to pass to other visitors that are visiting that
same module. For the inital patch, it dynamically retrieves int
sequences (like cmpop), that was previously hardcoded. It offers
an interface that is easy to extend.
This commit is contained in:
Batuhan Taskaya 2021-06-22 19:29:42 +03:00 committed by GitHub
parent d1ae57027f
commit 35ad425866
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 75 additions and 24 deletions

View File

@ -4,6 +4,7 @@
import os import os
import sys import sys
import textwrap import textwrap
import types
from argparse import ArgumentParser from argparse import ArgumentParser
from contextlib import contextmanager from contextlib import contextmanager
@ -100,11 +101,12 @@ def asdl_of(name, obj):
class EmitVisitor(asdl.VisitorBase): class EmitVisitor(asdl.VisitorBase):
"""Visit that emits lines""" """Visit that emits lines"""
def __init__(self, file): def __init__(self, file, metadata = None):
self.file = file self.file = file
self.identifiers = set() self.identifiers = set()
self.singletons = set() self.singletons = set()
self.types = set() self.types = set()
self._metadata = metadata
super(EmitVisitor, self).__init__() super(EmitVisitor, self).__init__()
def emit_identifier(self, name): def emit_identifier(self, name):
@ -127,6 +129,42 @@ class EmitVisitor(asdl.VisitorBase):
line = (" " * TABSIZE * depth) + line line = (" " * TABSIZE * depth) + line
self.file.write(line + "\n") self.file.write(line + "\n")
@property
def metadata(self):
if self._metadata is None:
raise ValueError(
"%s was expecting to be annnotated with metadata"
% type(self).__name__
)
return self._metadata
@metadata.setter
def metadata(self, value):
self._metadata = value
class MetadataVisitor(asdl.VisitorBase):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# Metadata:
# - simple_sums: Tracks the list of compound type
# names where all the constructors
# belonging to that type lack of any
# fields.
self.metadata = types.SimpleNamespace(
simple_sums=set()
)
def visitModule(self, mod):
for dfn in mod.dfns:
self.visit(dfn)
def visitType(self, type):
self.visit(type.value, type.name)
def visitSum(self, sum, name):
if is_simple(sum):
self.metadata.simple_sums.add(name)
class TypeDefVisitor(EmitVisitor): class TypeDefVisitor(EmitVisitor):
def visitModule(self, mod): def visitModule(self, mod):
@ -244,7 +282,7 @@ class StructVisitor(EmitVisitor):
ctype = get_c_type(field.type) ctype = get_c_type(field.type)
name = field.name name = field.name
if field.seq: if field.seq:
if field.type == 'cmpop': if field.type in self.metadata.simple_sums:
self.emit("asdl_int_seq *%(name)s;" % locals(), depth) self.emit("asdl_int_seq *%(name)s;" % locals(), depth)
else: else:
_type = field.type _type = field.type
@ -304,7 +342,7 @@ class PrototypeVisitor(EmitVisitor):
name = f.name name = f.name
# XXX should extend get_c_type() to handle this # XXX should extend get_c_type() to handle this
if f.seq: if f.seq:
if f.type == 'cmpop': if f.type in self.metadata.simple_sums:
ctype = "asdl_int_seq *" ctype = "asdl_int_seq *"
else: else:
ctype = f"asdl_{f.type}_seq *" ctype = f"asdl_{f.type}_seq *"
@ -549,16 +587,11 @@ class Obj2ModVisitor(PickleVisitor):
ctype = get_c_type(field.type) ctype = get_c_type(field.type)
self.emit("%s %s;" % (ctype, field.name), depth) self.emit("%s %s;" % (ctype, field.name), depth)
def isSimpleSum(self, field):
# XXX can the members of this list be determined automatically?
return field.type in ('expr_context', 'boolop', 'operator',
'unaryop', 'cmpop')
def isNumeric(self, field): def isNumeric(self, field):
return get_c_type(field.type) in ("int", "bool") return get_c_type(field.type) in ("int", "bool")
def isSimpleType(self, field): def isSimpleType(self, field):
return self.isSimpleSum(field) or self.isNumeric(field) return field.type in self.metadata.simple_sums or self.isNumeric(field)
def visitField(self, field, name, sum=None, prod=None, depth=0): def visitField(self, field, name, sum=None, prod=None, depth=0):
ctype = get_c_type(field.type) ctype = get_c_type(field.type)
@ -1282,18 +1315,23 @@ class ObjVisitor(PickleVisitor):
def set(self, field, value, depth): def set(self, field, value, depth):
if field.seq: if field.seq:
# XXX should really check for is_simple, but that requires a symbol table if field.type in self.metadata.simple_sums:
if field.type == "cmpop":
# While the sequence elements are stored as void*, # While the sequence elements are stored as void*,
# ast2obj_cmpop expects an enum # simple sums expects an enum
self.emit("{", depth) self.emit("{", depth)
self.emit("Py_ssize_t i, n = asdl_seq_LEN(%s);" % value, depth+1) self.emit("Py_ssize_t i, n = asdl_seq_LEN(%s);" % value, depth+1)
self.emit("value = PyList_New(n);", depth+1) self.emit("value = PyList_New(n);", depth+1)
self.emit("if (!value) goto failed;", depth+1) self.emit("if (!value) goto failed;", depth+1)
self.emit("for(i = 0; i < n; i++)", depth+1) self.emit("for(i = 0; i < n; i++)", depth+1)
# This cannot fail, so no need for error handling # This cannot fail, so no need for error handling
self.emit("PyList_SET_ITEM(value, i, ast2obj_cmpop(state, (cmpop_ty)asdl_seq_GET(%s, i)));" % value, self.emit(
depth+2, reflow=False) "PyList_SET_ITEM(value, i, ast2obj_{0}(state, ({0}_ty)asdl_seq_GET({1}, i)));".format(
field.type,
value
),
depth + 2,
reflow=False,
)
self.emit("}", depth) self.emit("}", depth)
else: else:
self.emit("value = ast2obj_list(state, (asdl_seq*)%s, ast2obj_%s);" % (value, field.type), depth) self.emit("value = ast2obj_list(state, (asdl_seq*)%s, ast2obj_%s);" % (value, field.type), depth)
@ -1362,11 +1400,13 @@ int PyAST_Check(PyObject* obj)
""" """
class ChainOfVisitors: class ChainOfVisitors:
def __init__(self, *visitors): def __init__(self, *visitors, metadata = None):
self.visitors = visitors self.visitors = visitors
self.metadata = metadata
def visit(self, object): def visit(self, object):
for v in self.visitors: for v in self.visitors:
v.metadata = self.metadata
v.visit(object) v.visit(object)
v.emit("", 0) v.emit("", 0)
@ -1468,7 +1508,7 @@ def generate_module_def(mod, f, internal_h):
f.write(' return 1;\n') f.write(' return 1;\n')
f.write('};\n\n') f.write('};\n\n')
def write_header(mod, f): def write_header(mod, metadata, f):
f.write(textwrap.dedent(""" f.write(textwrap.dedent("""
#ifndef Py_INTERNAL_AST_H #ifndef Py_INTERNAL_AST_H
#define Py_INTERNAL_AST_H #define Py_INTERNAL_AST_H
@ -1483,12 +1523,19 @@ def write_header(mod, f):
#include "pycore_asdl.h" #include "pycore_asdl.h"
""").lstrip()) """).lstrip())
c = ChainOfVisitors(TypeDefVisitor(f),
SequenceDefVisitor(f), c = ChainOfVisitors(
StructVisitor(f)) TypeDefVisitor(f),
SequenceDefVisitor(f),
StructVisitor(f),
metadata=metadata
)
c.visit(mod) c.visit(mod)
f.write("// Note: these macros affect function definitions, not only call sites.\n") f.write("// Note: these macros affect function definitions, not only call sites.\n")
PrototypeVisitor(f).visit(mod) prototype_visitor = PrototypeVisitor(f, metadata=metadata)
prototype_visitor.visit(mod)
f.write(textwrap.dedent(""" f.write(textwrap.dedent("""
PyObject* PyAST_mod2obj(mod_ty t); PyObject* PyAST_mod2obj(mod_ty t);
@ -1535,8 +1582,7 @@ def write_internal_h_footer(mod, f):
#endif /* !Py_INTERNAL_AST_STATE_H */ #endif /* !Py_INTERNAL_AST_STATE_H */
"""), file=f) """), file=f)
def write_source(mod, metadata, f, internal_h_file):
def write_source(mod, f, internal_h_file):
generate_module_def(mod, f, internal_h_file) generate_module_def(mod, f, internal_h_file)
v = ChainOfVisitors( v = ChainOfVisitors(
@ -1549,6 +1595,7 @@ def write_source(mod, f, internal_h_file):
Obj2ModVisitor(f), Obj2ModVisitor(f),
ASTModuleVisitor(f), ASTModuleVisitor(f),
PartingShots(f), PartingShots(f),
metadata=metadata
) )
v.visit(mod) v.visit(mod)
@ -1561,6 +1608,10 @@ def main(input_filename, c_filename, h_filename, internal_h_filename, dump_modul
if not asdl.check(mod): if not asdl.check(mod):
sys.exit(1) sys.exit(1)
metadata_visitor = MetadataVisitor()
metadata_visitor.visit(mod)
metadata = metadata_visitor.metadata
with c_filename.open("w") as c_file, \ with c_filename.open("w") as c_file, \
h_filename.open("w") as h_file, \ h_filename.open("w") as h_file, \
internal_h_filename.open("w") as internal_h_file: internal_h_filename.open("w") as internal_h_file:
@ -1569,8 +1620,8 @@ def main(input_filename, c_filename, h_filename, internal_h_filename, dump_modul
internal_h_file.write(auto_gen_msg) internal_h_file.write(auto_gen_msg)
write_internal_h_header(mod, internal_h_file) write_internal_h_header(mod, internal_h_file)
write_source(mod, c_file, internal_h_file) write_source(mod, metadata, c_file, internal_h_file)
write_header(mod, h_file) write_header(mod, metadata, h_file)
write_internal_h_footer(mod, internal_h_file) write_internal_h_footer(mod, internal_h_file)
print(f"{c_filename}, {h_filename}, {internal_h_filename} regenerated.") print(f"{c_filename}, {h_filename}, {internal_h_filename} regenerated.")