#! /usr/bin/env python """Generate C code from an ASDL description.""" import sys import textwrap import types from argparse import ArgumentParser from contextlib import contextmanager from pathlib import Path import asdl TABSIZE = 4 MAX_COL = 80 AUTOGEN_MESSAGE = "// File automatically generated by {}.\n\n" builtin_type_to_c_type = { "identifier": "PyUnicode_Type", "string": "PyUnicode_Type", "int": "PyLong_Type", "constant": "PyBaseObject_Type", } def get_c_type(name): """Return a string for the C name of the type. This function special cases the default types provided by asdl. """ if name in asdl.builtin_types: return name else: return "%s_ty" % name def reflow_lines(s, depth): """Reflow the line s indented depth tabs. Return a sequence of lines where no line extends beyond MAX_COL when properly indented. The first line is properly indented based exclusively on depth * TABSIZE. All following lines -- these are the reflowed lines generated by this function -- start at the same column as the first character beyond the opening { in the first line. """ size = MAX_COL - depth * TABSIZE if len(s) < size: return [s] lines = [] cur = s padding = "" while len(cur) > size: i = cur.rfind(' ', 0, size) # XXX this should be fixed for real if i == -1 and 'GeneratorExp' in cur: i = size + 3 assert i != -1, "Impossible line %d to reflow: %r" % (size, s) lines.append(padding + cur[:i]) if len(lines) == 1: # find new size based on brace j = cur.find('{', 0, i) if j >= 0: j += 2 # account for the brace and the space after it size -= j padding = " " * j else: j = cur.find('(', 0, i) if j >= 0: j += 1 # account for the paren (no space after it) size -= j padding = " " * j cur = cur[i+1:] else: lines.append(padding + cur) return lines def reflow_c_string(s, depth): return '"%s"' % s.replace('\n', '\\n"\n%s"' % (' ' * depth * TABSIZE)) def is_simple(sum_type): """Return True if a sum is a simple. A sum is simple if its types have no fields and itself doesn't have any attributes. Instances of these types are cached at C level, and they act like singletons when propagating parser generated nodes into Python level, e.g. unaryop = Invert | Not | UAdd | USub """ return not ( sum_type.attributes or any(constructor.fields for constructor in sum_type.types) ) def asdl_of(name, obj): if isinstance(obj, asdl.Product) or isinstance(obj, asdl.Constructor): fields = ", ".join(map(str, obj.fields)) if fields: fields = "({})".format(fields) return "{}{}".format(name, fields) else: if is_simple(obj): types = " | ".join(type.name for type in obj.types) else: sep = "\n{}| ".format(" " * (len(name) + 1)) types = sep.join( asdl_of(type.name, type) for type in obj.types ) return "{} = {}".format(name, types) class EmitVisitor(asdl.VisitorBase): """Visit that emits lines""" def __init__(self, file, metadata = None): self.file = file self._metadata = metadata super(EmitVisitor, self).__init__() def emit(self, s, depth, reflow=True): # XXX reflow long lines? if reflow: lines = reflow_lines(s, depth) else: lines = [s] for line in lines: if line: line = (" " * TABSIZE * depth) + line self.file.write(line + "\n") @property def metadata(self): if self._metadata is None: raise ValueError( "%s was expecting to be annnotated with metadata" % type(self).__name__ ) return self._metadata @metadata.setter def metadata(self, value): self._metadata = value class MetadataVisitor(asdl.VisitorBase): ROOT_TYPE = "AST" def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # Metadata: # - simple_sums: Tracks the list of compound type # names where all the constructors # belonging to that type lack of any # fields. # - identifiers: All identifiers used in the AST declarations # - singletons: List of all constructors that originates from # simple sums. # - types: List of all top level type names # self.metadata = types.SimpleNamespace( simple_sums=set(), identifiers=set(), singletons=set(), types={self.ROOT_TYPE}, ) def visitModule(self, mod): for dfn in mod.dfns: self.visit(dfn) def visitType(self, type): self.visit(type.value, type.name) def visitSum(self, sum, name): self.metadata.types.add(name) simple_sum = is_simple(sum) if simple_sum: self.metadata.simple_sums.add(name) for constructor in sum.types: if simple_sum: self.metadata.singletons.add(constructor.name) self.visitConstructor(constructor) self.visitFields(sum.attributes) def visitConstructor(self, constructor): self.metadata.types.add(constructor.name) self.visitFields(constructor.fields) def visitProduct(self, product, name): self.metadata.types.add(name) self.visitFields(product.attributes) self.visitFields(product.fields) def visitFields(self, fields): for field in fields: self.visitField(field) def visitField(self, field): self.metadata.identifiers.add(field.name) class TypeDefVisitor(EmitVisitor): def visitModule(self, mod): for dfn in mod.dfns: self.visit(dfn) def visitType(self, type, depth=0): self.visit(type.value, type.name, depth) def visitSum(self, sum, name, depth): if is_simple(sum): self.simple_sum(sum, name, depth) else: self.sum_with_constructors(sum, name, depth) def simple_sum(self, sum, name, depth): enum = [] for i in range(len(sum.types)): type = sum.types[i] enum.append("%s=%d" % (type.name, i + 1)) enums = ", ".join(enum) ctype = get_c_type(name) s = "typedef enum _%s { %s } %s;" % (name, enums, ctype) self.emit(s, depth) self.emit("", depth) def sum_with_constructors(self, sum, name, depth): ctype = get_c_type(name) s = "typedef struct _%(name)s *%(ctype)s;" % locals() self.emit(s, depth) self.emit("", depth) def visitProduct(self, product, name, depth): ctype = get_c_type(name) s = "typedef struct _%(name)s *%(ctype)s;" % locals() self.emit(s, depth) self.emit("", depth) class SequenceDefVisitor(EmitVisitor): def visitModule(self, mod): for dfn in mod.dfns: self.visit(dfn) def visitType(self, type, depth=0): self.visit(type.value, type.name, depth) def visitSum(self, sum, name, depth): if is_simple(sum): return self.emit_sequence_constructor(name, depth) def emit_sequence_constructor(self, name,depth): ctype = get_c_type(name) self.emit("""\ typedef struct { _ASDL_SEQ_HEAD %(ctype)s typed_elements[1]; } asdl_%(name)s_seq;""" % locals(), reflow=False, depth=depth) self.emit("", depth) self.emit("asdl_%(name)s_seq *_Py_asdl_%(name)s_seq_new(Py_ssize_t size, PyArena *arena);" % locals(), depth) self.emit("", depth) def visitProduct(self, product, name, depth): self.emit_sequence_constructor(name, depth) class StructVisitor(EmitVisitor): """Visitor to generate typedefs for AST.""" def visitModule(self, mod): for dfn in mod.dfns: self.visit(dfn) def visitType(self, type, depth=0): self.visit(type.value, type.name, depth) def visitSum(self, sum, name, depth): if not is_simple(sum): self.sum_with_constructors(sum, name, depth) def sum_with_constructors(self, sum, name, depth): def emit(s, depth=depth): self.emit(s % sys._getframe(1).f_locals, depth) enum = [] for i in range(len(sum.types)): type = sum.types[i] enum.append("%s_kind=%d" % (type.name, i + 1)) emit("enum _%(name)s_kind {" + ", ".join(enum) + "};") emit("struct _%(name)s {") emit("enum _%(name)s_kind kind;", depth + 1) emit("union {", depth + 1) for t in sum.types: self.visit(t, depth + 2) emit("} v;", depth + 1) for field in sum.attributes: # rudimentary attribute handling type = str(field.type) assert type in asdl.builtin_types, type emit("%s %s;" % (type, field.name), depth + 1); emit("};") emit("") def visitConstructor(self, cons, depth): if cons.fields: self.emit("struct {", depth) for f in cons.fields: self.visit(f, depth + 1) self.emit("} %s;" % cons.name, depth) self.emit("", depth) def visitField(self, field, depth): # XXX need to lookup field.type, because it might be something # like a builtin... ctype = get_c_type(field.type) name = field.name if field.seq: if field.type in self.metadata.simple_sums: self.emit("asdl_int_seq *%(name)s;" % locals(), depth) else: _type = field.type self.emit("asdl_%(_type)s_seq *%(name)s;" % locals(), depth) else: self.emit("%(ctype)s %(name)s;" % locals(), depth) def visitProduct(self, product, name, depth): self.emit("struct _%(name)s {" % locals(), depth) for f in product.fields: self.visit(f, depth + 1) for field in product.attributes: # rudimentary attribute handling type = str(field.type) assert type in asdl.builtin_types, type self.emit("%s %s;" % (type, field.name), depth + 1); self.emit("};", depth) self.emit("", depth) def ast_func_name(name): return f"_PyAST_{name}" class PrototypeVisitor(EmitVisitor): """Generate function prototypes for the .h file""" def visitModule(self, mod): for dfn in mod.dfns: self.visit(dfn) def visitType(self, type): self.visit(type.value, type.name) def visitSum(self, sum, name): if is_simple(sum): pass # XXX else: for t in sum.types: self.visit(t, name, sum.attributes) def get_args(self, fields): """Return list of C argument info, one for each field. Argument info is 3-tuple of a C type, variable name, and flag that is true if type can be NULL. """ args = [] unnamed = {} for f in fields: if f.name is None: name = f.type c = unnamed[name] = unnamed.get(name, 0) + 1 if c > 1: name = "name%d" % (c - 1) else: name = f.name # XXX should extend get_c_type() to handle this if f.seq: if f.type in self.metadata.simple_sums: ctype = "asdl_int_seq *" else: ctype = f"asdl_{f.type}_seq *" else: ctype = get_c_type(f.type) args.append((ctype, name, f.opt or f.seq)) return args def visitConstructor(self, cons, type, attrs): args = self.get_args(cons.fields) attrs = self.get_args(attrs) ctype = get_c_type(type) self.emit_function(cons.name, ctype, args, attrs) def emit_function(self, name, ctype, args, attrs, union=True): args = args + attrs if args: argstr = ", ".join(["%s %s" % (atype, aname) for atype, aname, opt in args]) argstr += ", PyArena *arena" else: argstr = "PyArena *arena" self.emit("%s %s(%s);" % (ctype, ast_func_name(name), argstr), False) def visitProduct(self, prod, name): self.emit_function(name, get_c_type(name), self.get_args(prod.fields), self.get_args(prod.attributes), union=False) class FunctionVisitor(PrototypeVisitor): """Visitor to generate constructor functions for AST.""" def emit_function(self, name, ctype, args, attrs, union=True): def emit(s, depth=0, reflow=True): self.emit(s, depth, reflow) argstr = ", ".join(["%s %s" % (atype, aname) for atype, aname, opt in args + attrs]) if argstr: argstr += ", PyArena *arena" else: argstr = "PyArena *arena" self.emit("%s" % ctype, 0) emit("%s(%s)" % (ast_func_name(name), argstr)) emit("{") emit("%s p;" % ctype, 1) for argtype, argname, opt in args: if not opt and argtype != "int": emit("if (!%s) {" % argname, 1) emit("PyErr_SetString(PyExc_ValueError,", 2) msg = "field '%s' is required for %s" % (argname, name) emit(' "%s");' % msg, 2, reflow=False) emit('return NULL;', 2) emit('}', 1) emit("p = (%s)_PyArena_Malloc(arena, sizeof(*p));" % ctype, 1); emit("if (!p)", 1) emit("return NULL;", 2) if union: self.emit_body_union(name, args, attrs) else: self.emit_body_struct(name, args, attrs) emit("return p;", 1) emit("}") emit("") def emit_body_union(self, name, args, attrs): def emit(s, depth=0, reflow=True): self.emit(s, depth, reflow) emit("p->kind = %s_kind;" % name, 1) for argtype, argname, opt in args: emit("p->v.%s.%s = %s;" % (name, argname, argname), 1) for argtype, argname, opt in attrs: emit("p->%s = %s;" % (argname, argname), 1) def emit_body_struct(self, name, args, attrs): def emit(s, depth=0, reflow=True): self.emit(s, depth, reflow) for argtype, argname, opt in args: emit("p->%s = %s;" % (argname, argname), 1) for argtype, argname, opt in attrs: emit("p->%s = %s;" % (argname, argname), 1) class PickleVisitor(EmitVisitor): def visitModule(self, mod): for dfn in mod.dfns: self.visit(dfn) def visitType(self, type): self.visit(type.value, type.name) def visitSum(self, sum, name): pass def visitProduct(self, sum, name): pass def visitConstructor(self, cons, name): pass def visitField(self, sum): pass class Obj2ModPrototypeVisitor(PickleVisitor): def visitProduct(self, prod, name): code = "static int obj2ast_%s(struct ast_state *state, PyObject* obj, %s* out, PyArena* arena);" self.emit(code % (name, get_c_type(name)), 0) visitSum = visitProduct class Obj2ModVisitor(PickleVisitor): attribute_special_defaults = { "end_lineno": "lineno", "end_col_offset": "col_offset", } @contextmanager def recursive_call(self, node, level): self.emit('if (_Py_EnterRecursiveCall(" while traversing \'%s\' node")) {' % node, level, reflow=False) self.emit('goto failed;', level + 1) self.emit('}', level) yield self.emit('_Py_LeaveRecursiveCall();', level) def funcHeader(self, name): ctype = get_c_type(name) self.emit("int", 0) self.emit("obj2ast_%s(struct ast_state *state, PyObject* obj, %s* out, PyArena* arena)" % (name, ctype), 0) self.emit("{", 0) self.emit("int isinstance;", 1) self.emit("", 0) def sumTrailer(self, name, add_label=False): self.emit("", 0) # there's really nothing more we can do if this fails ... error = "expected some sort of %s, but got %%R" % name format = "PyErr_Format(PyExc_TypeError, \"%s\", obj);" self.emit(format % error, 1, reflow=False) if add_label: self.emit("failed:", 1) self.emit("Py_XDECREF(tmp);", 1) self.emit("return -1;", 1) self.emit("}", 0) self.emit("", 0) def simpleSum(self, sum, name): self.funcHeader(name) for t in sum.types: line = ("isinstance = PyObject_IsInstance(obj, " "state->%s_type);") self.emit(line % (t.name,), 1) self.emit("if (isinstance == -1) {", 1) self.emit("return -1;", 2) self.emit("}", 1) self.emit("if (isinstance) {", 1) self.emit("*out = %s;" % t.name, 2) self.emit("return 0;", 2) self.emit("}", 1) self.sumTrailer(name) def buildArgs(self, fields): return ", ".join(fields + ["arena"]) def complexSum(self, sum, name): self.funcHeader(name) self.emit("PyObject *tmp = NULL;", 1) self.emit("PyObject *tp;", 1) for a in sum.attributes: self.visitAttributeDeclaration(a, name, sum=sum) self.emit("", 0) # XXX: should we only do this for 'expr'? self.emit("if (obj == Py_None) {", 1) self.emit("*out = NULL;", 2) self.emit("return 0;", 2) self.emit("}", 1) for a in sum.attributes: self.visitField(a, name, sum=sum, depth=1) for t in sum.types: self.emit("tp = state->%s_type;" % (t.name,), 1) self.emit("isinstance = PyObject_IsInstance(obj, tp);", 1) self.emit("if (isinstance == -1) {", 1) self.emit("return -1;", 2) self.emit("}", 1) self.emit("if (isinstance) {", 1) for f in t.fields: self.visitFieldDeclaration(f, t.name, sum=sum, depth=2) self.emit("", 0) for f in t.fields: self.visitField(f, t.name, sum=sum, depth=2) args = [f.name for f in t.fields] + [a.name for a in sum.attributes] self.emit("*out = %s(%s);" % (ast_func_name(t.name), self.buildArgs(args)), 2) self.emit("if (*out == NULL) goto failed;", 2) self.emit("return 0;", 2) self.emit("}", 1) self.sumTrailer(name, True) def visitAttributeDeclaration(self, a, name, sum=sum): ctype = get_c_type(a.type) self.emit("%s %s;" % (ctype, a.name), 1) def visitSum(self, sum, name): if is_simple(sum): self.simpleSum(sum, name) else: self.complexSum(sum, name) def visitProduct(self, prod, name): ctype = get_c_type(name) self.emit("int", 0) self.emit("obj2ast_%s(struct ast_state *state, PyObject* obj, %s* out, PyArena* arena)" % (name, ctype), 0) self.emit("{", 0) self.emit("PyObject* tmp = NULL;", 1) for f in prod.fields: self.visitFieldDeclaration(f, name, prod=prod, depth=1) for a in prod.attributes: self.visitFieldDeclaration(a, name, prod=prod, depth=1) self.emit("", 0) for f in prod.fields: self.visitField(f, name, prod=prod, depth=1) for a in prod.attributes: self.visitField(a, name, prod=prod, depth=1) args = [f.name for f in prod.fields] args.extend([a.name for a in prod.attributes]) self.emit("*out = %s(%s);" % (ast_func_name(name), self.buildArgs(args)), 1) self.emit("if (*out == NULL) goto failed;", 1) self.emit("return 0;", 1) self.emit("failed:", 0) self.emit("Py_XDECREF(tmp);", 1) self.emit("return -1;", 1) self.emit("}", 0) self.emit("", 0) def visitFieldDeclaration(self, field, name, sum=None, prod=None, depth=0): ctype = get_c_type(field.type) if field.seq: if self.isSimpleType(field): self.emit("asdl_int_seq* %s;" % field.name, depth) else: _type = field.type self.emit(f"asdl_{field.type}_seq* {field.name};", depth) else: ctype = get_c_type(field.type) self.emit("%s %s;" % (ctype, field.name), depth) def isNumeric(self, field): return get_c_type(field.type) in ("int", "bool") def isSimpleType(self, field): return field.type in self.metadata.simple_sums or self.isNumeric(field) def visitField(self, field, name, sum=None, prod=None, depth=0): ctype = get_c_type(field.type) line = "if (PyObject_GetOptionalAttr(obj, state->%s, &tmp) < 0) {" self.emit(line % field.name, depth) self.emit("return -1;", depth+1) self.emit("}", depth) if field.seq: self.emit("if (tmp == NULL) {", depth) self.emit("tmp = PyList_New(0);", depth+1) self.emit("if (tmp == NULL) {", depth+1) self.emit("return -1;", depth+2) self.emit("}", depth+1) self.emit("}", depth) self.emit("{", depth) else: if not field.opt: self.emit("if (tmp == NULL) {", depth) message = "required field \\\"%s\\\" missing from %s" % (field.name, name) format = "PyErr_SetString(PyExc_TypeError, \"%s\");" self.emit(format % message, depth+1, reflow=False) self.emit("return -1;", depth+1) else: self.emit("if (tmp == NULL || tmp == Py_None) {", depth) self.emit("Py_CLEAR(tmp);", depth+1) if self.isNumeric(field): if field.name in self.attribute_special_defaults: self.emit( "%s = %s;" % (field.name, self.attribute_special_defaults[field.name]), depth+1, ) else: self.emit("%s = 0;" % field.name, depth+1) elif not self.isSimpleType(field): self.emit("%s = NULL;" % field.name, depth+1) else: raise TypeError("could not determine the default value for %s" % field.name) self.emit("}", depth) self.emit("else {", depth) self.emit("int res;", depth+1) if field.seq: self.emit("Py_ssize_t len;", depth+1) self.emit("Py_ssize_t i;", depth+1) self.emit("if (!PyList_Check(tmp)) {", depth+1) self.emit("PyErr_Format(PyExc_TypeError, \"%s field \\\"%s\\\" must " "be a list, not a %%.200s\", _PyType_Name(Py_TYPE(tmp)));" % (name, field.name), depth+2, reflow=False) self.emit("goto failed;", depth+2) self.emit("}", depth+1) self.emit("len = PyList_GET_SIZE(tmp);", depth+1) if self.isSimpleType(field): self.emit("%s = _Py_asdl_int_seq_new(len, arena);" % field.name, depth+1) else: self.emit("%s = _Py_asdl_%s_seq_new(len, arena);" % (field.name, field.type), depth+1) self.emit("if (%s == NULL) goto failed;" % field.name, depth+1) self.emit("for (i = 0; i < len; i++) {", depth+1) self.emit("%s val;" % ctype, depth+2) self.emit("PyObject *tmp2 = Py_NewRef(PyList_GET_ITEM(tmp, i));", depth+2) with self.recursive_call(name, depth+2): self.emit("res = obj2ast_%s(state, tmp2, &val, arena);" % field.type, depth+2, reflow=False) self.emit("Py_DECREF(tmp2);", depth+2) self.emit("if (res != 0) goto failed;", depth+2) self.emit("if (len != PyList_GET_SIZE(tmp)) {", depth+2) self.emit("PyErr_SetString(PyExc_RuntimeError, \"%s field \\\"%s\\\" " "changed size during iteration\");" % (name, field.name), depth+3, reflow=False) self.emit("goto failed;", depth+3) self.emit("}", depth+2) self.emit("asdl_seq_SET(%s, i, val);" % field.name, depth+2) self.emit("}", depth+1) else: with self.recursive_call(name, depth+1): self.emit("res = obj2ast_%s(state, tmp, &%s, arena);" % (field.type, field.name), depth+1) self.emit("if (res != 0) goto failed;", depth+1) self.emit("Py_CLEAR(tmp);", depth+1) self.emit("}", depth) class SequenceConstructorVisitor(EmitVisitor): def visitModule(self, mod): for dfn in mod.dfns: self.visit(dfn) def visitType(self, type): self.visit(type.value, type.name) def visitProduct(self, prod, name): self.emit_sequence_constructor(name, get_c_type(name)) def visitSum(self, sum, name): if not is_simple(sum): self.emit_sequence_constructor(name, get_c_type(name)) def emit_sequence_constructor(self, name, type): self.emit(f"GENERATE_ASDL_SEQ_CONSTRUCTOR({name}, {type})", depth=0) class PyTypesDeclareVisitor(PickleVisitor): def visitProduct(self, prod, name): self.emit("static PyObject* ast2obj_%s(struct ast_state *state, struct validator *vstate, void*);" % name, 0) if prod.attributes: self.emit("static const char * const %s_attributes[] = {" % name, 0) for a in prod.attributes: self.emit('"%s",' % a.name, 1) self.emit("};", 0) if prod.fields: self.emit("static const char * const %s_fields[]={" % name,0) for f in prod.fields: self.emit('"%s",' % f.name, 1) self.emit("};", 0) def visitSum(self, sum, name): if sum.attributes: self.emit("static const char * const %s_attributes[] = {" % name, 0) for a in sum.attributes: self.emit('"%s",' % a.name, 1) self.emit("};", 0) ptype = "void*" if is_simple(sum): ptype = get_c_type(name) self.emit("static PyObject* ast2obj_%s(struct ast_state *state, struct validator *vstate, %s);" % (name, ptype), 0) for t in sum.types: self.visitConstructor(t, name) def visitConstructor(self, cons, name): if cons.fields: self.emit("static const char * const %s_fields[]={" % cons.name, 0) for t in cons.fields: self.emit('"%s",' % t.name, 1) self.emit("};",0) class AnnotationsVisitor(PickleVisitor): def visitModule(self, mod): self.file.write(textwrap.dedent(''' static int add_ast_annotations(struct ast_state *state) { bool cond; ''')) for dfn in mod.dfns: self.visit(dfn) self.file.write(textwrap.dedent(''' return 1; } ''')) def visitProduct(self, prod, name): self.emit_annotations(name, prod.fields) def visitSum(self, sum, name): for t in sum.types: self.visitConstructor(t, name) def visitConstructor(self, cons, name): self.emit_annotations(cons.name, cons.fields) def emit_annotations(self, name, fields): self.emit(f"PyObject *{name}_annotations = PyDict_New();", 1) self.emit(f"if (!{name}_annotations) return 0;", 1) for field in fields: self.emit("{", 1) if field.type in builtin_type_to_c_type: self.emit(f"PyObject *type = (PyObject *)&{builtin_type_to_c_type[field.type]};", 2) else: self.emit(f"PyObject *type = state->{field.type}_type;", 2) if field.opt: self.emit("type = _Py_union_type_or(type, Py_None);", 2) self.emit("cond = type != NULL;", 2) self.emit_annotations_error(name, 2) elif field.seq: self.emit("type = Py_GenericAlias((PyObject *)&PyList_Type, type);", 2) self.emit("cond = type != NULL;", 2) self.emit_annotations_error(name, 2) else: self.emit("Py_INCREF(type);", 2) self.emit(f"cond = PyDict_SetItemString({name}_annotations, \"{field.name}\", type) == 0;", 2) self.emit("Py_DECREF(type);", 2) self.emit_annotations_error(name, 2) self.emit("}", 1) self.emit(f'cond = PyObject_SetAttrString(state->{name}_type, "_field_types", {name}_annotations) == 0;', 1) self.emit_annotations_error(name, 1) self.emit(f'cond = PyObject_SetAttrString(state->{name}_type, "__annotations__", {name}_annotations) == 0;', 1) self.emit_annotations_error(name, 1) self.emit(f"Py_DECREF({name}_annotations);", 1) def emit_annotations_error(self, name, depth): self.emit("if (!cond) {", depth) self.emit(f"Py_DECREF({name}_annotations);", depth + 1) self.emit("return 0;", depth + 1) self.emit("}", depth) class PyTypesVisitor(PickleVisitor): def visitModule(self, mod): self.emit(""" typedef struct { PyObject_HEAD PyObject *dict; } AST_object; static void ast_dealloc(AST_object *self) { /* bpo-31095: UnTrack is needed before calling any callbacks */ PyTypeObject *tp = Py_TYPE(self); PyObject_GC_UnTrack(self); Py_CLEAR(self->dict); freefunc free_func = PyType_GetSlot(tp, Py_tp_free); assert(free_func != NULL); free_func(self); Py_DECREF(tp); } static int ast_traverse(AST_object *self, visitproc visit, void *arg) { Py_VISIT(Py_TYPE(self)); Py_VISIT(self->dict); return 0; } static int ast_clear(AST_object *self) { Py_CLEAR(self->dict); return 0; } static int ast_type_init(PyObject *self, PyObject *args, PyObject *kw) { struct ast_state *state = get_ast_state(); if (state == NULL) { return -1; } Py_ssize_t i, numfields = 0; int res = -1; PyObject *key, *value, *fields, *attributes = NULL, *remaining_fields = NULL; if (PyObject_GetOptionalAttr((PyObject*)Py_TYPE(self), state->_fields, &fields) < 0) { goto cleanup; } if (fields) { numfields = PySequence_Size(fields); if (numfields == -1) { goto cleanup; } remaining_fields = PySet_New(fields); } else { remaining_fields = PySet_New(NULL); } if (remaining_fields == NULL) { goto cleanup; } res = 0; /* if no error occurs, this stays 0 to the end */ if (numfields < PyTuple_GET_SIZE(args)) { PyErr_Format(PyExc_TypeError, "%.400s constructor takes at most " "%zd positional argument%s", _PyType_Name(Py_TYPE(self)), numfields, numfields == 1 ? "" : "s"); res = -1; goto cleanup; } for (i = 0; i < PyTuple_GET_SIZE(args); i++) { /* cannot be reached when fields is NULL */ PyObject *name = PySequence_GetItem(fields, i); if (!name) { res = -1; goto cleanup; } res = PyObject_SetAttr(self, name, PyTuple_GET_ITEM(args, i)); if (PySet_Discard(remaining_fields, name) < 0) { res = -1; Py_DECREF(name); goto cleanup; } Py_DECREF(name); if (res < 0) { goto cleanup; } } if (kw) { i = 0; /* needed by PyDict_Next */ while (PyDict_Next(kw, &i, &key, &value)) { int contains = PySequence_Contains(fields, key); if (contains == -1) { res = -1; goto cleanup; } else if (contains == 1) { int p = PySet_Discard(remaining_fields, key); if (p == -1) { res = -1; goto cleanup; } if (p == 0) { PyErr_Format(PyExc_TypeError, "%.400s got multiple values for argument '%U'", Py_TYPE(self)->tp_name, key); res = -1; goto cleanup; } } else { // Lazily initialize "attributes" if (attributes == NULL) { attributes = PyObject_GetAttr((PyObject*)Py_TYPE(self), state->_attributes); if (attributes == NULL) { res = -1; goto cleanup; } } int contains = PySequence_Contains(attributes, key); if (contains == -1) { res = -1; goto cleanup; } else if (contains == 0) { if (PyErr_WarnFormat( PyExc_DeprecationWarning, 1, "%.400s.__init__ got an unexpected keyword argument '%U'. " "Support for arbitrary keyword arguments is deprecated " "and will be removed in Python 3.15.", Py_TYPE(self)->tp_name, key ) < 0) { res = -1; goto cleanup; } } } res = PyObject_SetAttr(self, key, value); if (res < 0) { goto cleanup; } } } Py_ssize_t size = PySet_Size(remaining_fields); PyObject *field_types = NULL, *remaining_list = NULL; if (size > 0) { if (PyObject_GetOptionalAttr((PyObject*)Py_TYPE(self), &_Py_ID(_field_types), &field_types) < 0) { res = -1; goto cleanup; } if (field_types == NULL) { // Probably a user-defined subclass of AST that lacks _field_types. // This will continue to work as it did before 3.13; i.e., attributes // that are not passed in simply do not exist on the instance. goto cleanup; } remaining_list = PySequence_List(remaining_fields); if (!remaining_list) { goto set_remaining_cleanup; } for (Py_ssize_t i = 0; i < size; i++) { PyObject *name = PyList_GET_ITEM(remaining_list, i); PyObject *type = PyDict_GetItemWithError(field_types, name); if (!type) { if (PyErr_Occurred()) { goto set_remaining_cleanup; } else { if (PyErr_WarnFormat( PyExc_DeprecationWarning, 1, "Field '%U' is missing from %.400s._field_types. " "This will become an error in Python 3.15.", name, Py_TYPE(self)->tp_name ) < 0) { goto set_remaining_cleanup; } } } else if (_PyUnion_Check(type)) { // optional field // do nothing, we'll have set a None default on the class } else if (Py_IS_TYPE(type, &Py_GenericAliasType)) { // list field PyObject *empty = PyList_New(0); if (!empty) { goto set_remaining_cleanup; } res = PyObject_SetAttr(self, name, empty); Py_DECREF(empty); if (res < 0) { goto set_remaining_cleanup; } } else if (type == state->expr_context_type) { // special case for expr_context: default to Load() res = PyObject_SetAttr(self, name, state->Load_singleton); if (res < 0) { goto set_remaining_cleanup; } } else { // simple field (e.g., identifier) if (PyErr_WarnFormat( PyExc_DeprecationWarning, 1, "%.400s.__init__ missing 1 required positional argument: '%U'. " "This will become an error in Python 3.15.", Py_TYPE(self)->tp_name, name ) < 0) { goto set_remaining_cleanup; } } } Py_DECREF(remaining_list); Py_DECREF(field_types); } cleanup: Py_XDECREF(attributes); Py_XDECREF(fields); Py_XDECREF(remaining_fields); return res; set_remaining_cleanup: Py_XDECREF(remaining_list); Py_XDECREF(field_types); res = -1; goto cleanup; } /* Pickling support */ static PyObject * ast_type_reduce(PyObject *self, PyObject *unused) { struct ast_state *state = get_ast_state(); if (state == NULL) { return NULL; } PyObject *dict = NULL, *fields = NULL, *positional_args = NULL; if (PyObject_GetOptionalAttr(self, state->__dict__, &dict) < 0) { return NULL; } PyObject *result = NULL; if (dict) { // Unpickling (or copying) works as follows: // - Construct the object with only positional arguments // - Set the fields from the dict // We have two constraints: // - We must set all the required fields in the initial constructor call, // or the unpickling or deepcopying of the object will trigger DeprecationWarnings. // - We must not include child nodes in the positional args, because // that may trigger runaway recursion during copying (gh-120108). // To satisfy both constraints, we set all the fields to None in the // initial list of positional args, and then set the fields from the dict. if (PyObject_GetOptionalAttr((PyObject*)Py_TYPE(self), state->_fields, &fields) < 0) { goto cleanup; } if (fields) { Py_ssize_t numfields = PySequence_Size(fields); if (numfields == -1) { Py_DECREF(dict); goto cleanup; } positional_args = PyList_New(0); if (!positional_args) { goto cleanup; } for (Py_ssize_t i = 0; i < numfields; i++) { PyObject *name = PySequence_GetItem(fields, i); if (!name) { goto cleanup; } PyObject *value; int rc = PyDict_GetItemRef(dict, name, &value); Py_DECREF(name); if (rc < 0) { goto cleanup; } if (!value) { break; } rc = PyList_Append(positional_args, Py_None); Py_DECREF(value); if (rc < 0) { goto cleanup; } } PyObject *args_tuple = PyList_AsTuple(positional_args); if (!args_tuple) { goto cleanup; } result = Py_BuildValue("ONN", Py_TYPE(self), args_tuple, dict); } else { result = Py_BuildValue("O()N", Py_TYPE(self), dict); } } else { result = Py_BuildValue("O()", Py_TYPE(self)); } cleanup: Py_XDECREF(fields); Py_XDECREF(positional_args); return result; } /* * Perform the following validations: * * - All keyword arguments are known 'fields' or 'attributes'. * - No field or attribute would be left unfilled after copy.replace(). * * On success, this returns 1. Otherwise, set a TypeError * exception and returns -1 (no exception is set if some * other internal errors occur). * * Parameters * * self The AST node instance. * dict The AST node instance dictionary (self.__dict__). * fields The list of fields (self._fields). * attributes The list of attributes (self._attributes). * kwargs Keyword arguments passed to ast_type_replace(). * * The 'dict', 'fields', 'attributes' and 'kwargs' arguments can be NULL. * * Note: this function can be removed in 3.15 since the verification * will be done inside the constructor. */ static inline int ast_type_replace_check(PyObject *self, PyObject *dict, PyObject *fields, PyObject *attributes, PyObject *kwargs) { // While it is possible to make some fast paths that would avoid // allocating objects on the stack, this would cost us readability. // For instance, if 'fields' and 'attributes' are both empty, and // 'kwargs' is not empty, we could raise a TypeError immediately. PyObject *expecting = PySet_New(fields); if (expecting == NULL) { return -1; } if (attributes) { if (_PySet_Update(expecting, attributes) < 0) { Py_DECREF(expecting); return -1; } } // Any keyword argument that is neither a field nor attribute is rejected. // We first need to check whether a keyword argument is accepted or not. // If all keyword arguments are accepted, we compute the required fields // and attributes. A field or attribute is not needed if: // // 1) it is given in 'kwargs', or // 2) it already exists on 'self'. if (kwargs) { Py_ssize_t pos = 0; PyObject *key, *value; while (PyDict_Next(kwargs, &pos, &key, &value)) { int rc = PySet_Discard(expecting, key); if (rc < 0) { Py_DECREF(expecting); return -1; } if (rc == 0) { PyErr_Format(PyExc_TypeError, "%.400s.__replace__ got an unexpected keyword " "argument '%U'.", Py_TYPE(self)->tp_name, key); Py_DECREF(expecting); return -1; } } } // check that the remaining fields or attributes would be filled if (dict) { Py_ssize_t pos = 0; PyObject *key, *value; while (PyDict_Next(dict, &pos, &key, &value)) { // Mark fields or attributes that are found on the instance // as non-mandatory. If they are not given in 'kwargs', they // will be shallow-coied; otherwise, they would be replaced // (not in this function). if (PySet_Discard(expecting, key) < 0) { Py_DECREF(expecting); return -1; } } if (attributes) { // Some attributes may or may not be present at runtime. // In particular, now that we checked whether 'kwargs' // is correct or not, we allow any attribute to be missing. // // Note that fields must still be entirely determined when // calling the constructor later. PyObject *unused = PyObject_CallMethodOneArg(expecting, &_Py_ID(difference_update), attributes); if (unused == NULL) { Py_DECREF(expecting); return -1; } Py_DECREF(unused); } } // Now 'expecting' contains the fields or attributes // that would not be filled inside ast_type_replace(). Py_ssize_t m = PySet_GET_SIZE(expecting); if (m > 0) { PyObject *names = PyList_New(m); if (names == NULL) { Py_DECREF(expecting); return -1; } Py_ssize_t i = 0, pos = 0; PyObject *item; Py_hash_t hash; while (_PySet_NextEntry(expecting, &pos, &item, &hash)) { PyObject *name = PyObject_Repr(item); if (name == NULL) { Py_DECREF(expecting); Py_DECREF(names); return -1; } // steal the reference 'name' PyList_SET_ITEM(names, i++, name); } Py_DECREF(expecting); if (PyList_Sort(names) < 0) { Py_DECREF(names); return -1; } PyObject *sep = PyUnicode_FromString(", "); if (sep == NULL) { Py_DECREF(names); return -1; } PyObject *str_names = PyUnicode_Join(sep, names); Py_DECREF(sep); Py_DECREF(names); if (str_names == NULL) { return -1; } PyErr_Format(PyExc_TypeError, "%.400s.__replace__ missing %ld keyword argument%s: %U.", Py_TYPE(self)->tp_name, m, m == 1 ? "" : "s", str_names); Py_DECREF(str_names); return -1; } else { Py_DECREF(expecting); return 1; } } /* * Python equivalent: * * for key in keys: * if hasattr(self, key): * payload[key] = getattr(self, key) * * The 'keys' argument is a sequence corresponding to * the '_fields' or the '_attributes' of an AST node. * * This returns -1 if an error occurs and 0 otherwise. * * Parameters * * payload A dictionary to fill. * keys A sequence of keys or NULL for an empty sequence. * dict The AST node instance dictionary (must not be NULL). */ static inline int ast_type_replace_update_payload(PyObject *payload, PyObject *keys, PyObject *dict) { assert(dict != NULL); if (keys == NULL) { return 0; } Py_ssize_t n = PySequence_Size(keys); if (n == -1) { return -1; } for (Py_ssize_t i = 0; i < n; i++) { PyObject *key = PySequence_GetItem(keys, i); if (key == NULL) { return -1; } PyObject *value; if (PyDict_GetItemRef(dict, key, &value) < 0) { Py_DECREF(key); return -1; } if (value == NULL) { Py_DECREF(key); // If a field or attribute is not present at runtime, it should // be explicitly given in 'kwargs'. If not, the constructor will // issue a warning (which becomes an error in 3.15). continue; } int rc = PyDict_SetItem(payload, key, value); Py_DECREF(key); Py_DECREF(value); if (rc < 0) { return -1; } } return 0; } /* copy.replace() support (shallow copy) */ static PyObject * ast_type_replace(PyObject *self, PyObject *args, PyObject *kwargs) { if (!_PyArg_NoPositional("__replace__", args)) { return NULL; } struct ast_state *state = get_ast_state(); if (state == NULL) { return NULL; } PyObject *result = NULL; // known AST class fields and attributes PyObject *fields = NULL, *attributes = NULL; // current instance dictionary PyObject *dict = NULL; // constructor positional and keyword arguments PyObject *empty_tuple = NULL, *payload = NULL; PyObject *type = (PyObject *)Py_TYPE(self); if (PyObject_GetOptionalAttr(type, state->_fields, &fields) < 0) { goto cleanup; } if (PyObject_GetOptionalAttr(type, state->_attributes, &attributes) < 0) { goto cleanup; } if (PyObject_GetOptionalAttr(self, state->__dict__, &dict) < 0) { goto cleanup; } if (ast_type_replace_check(self, dict, fields, attributes, kwargs) < 0) { goto cleanup; } empty_tuple = PyTuple_New(0); if (empty_tuple == NULL) { goto cleanup; } payload = PyDict_New(); if (payload == NULL) { goto cleanup; } if (dict) { // in case __dict__ is missing (for some obscure reason) // copy the instance's fields (possibly NULL) if (ast_type_replace_update_payload(payload, fields, dict) < 0) { goto cleanup; } // copy the instance's attributes (possibly NULL) if (ast_type_replace_update_payload(payload, attributes, dict) < 0) { goto cleanup; } } if (kwargs && PyDict_Update(payload, kwargs) < 0) { goto cleanup; } result = PyObject_Call(type, empty_tuple, payload); cleanup: Py_XDECREF(payload); Py_XDECREF(empty_tuple); Py_XDECREF(dict); Py_XDECREF(attributes); Py_XDECREF(fields); return result; } static PyMemberDef ast_type_members[] = { {"__dictoffset__", Py_T_PYSSIZET, offsetof(AST_object, dict), Py_READONLY}, {NULL} /* Sentinel */ }; static PyMethodDef ast_type_methods[] = { {"__reduce__", ast_type_reduce, METH_NOARGS, NULL}, {"__replace__", _PyCFunction_CAST(ast_type_replace), METH_VARARGS | METH_KEYWORDS, PyDoc_STR("__replace__($self, /, **fields)\\n--\\n\\n" "Return a copy of the AST node with new values " "for the specified fields.")}, {NULL} }; static PyGetSetDef ast_type_getsets[] = { {"__dict__", PyObject_GenericGetDict, PyObject_GenericSetDict}, {NULL} }; static PyType_Slot AST_type_slots[] = { {Py_tp_dealloc, ast_dealloc}, {Py_tp_getattro, PyObject_GenericGetAttr}, {Py_tp_setattro, PyObject_GenericSetAttr}, {Py_tp_traverse, ast_traverse}, {Py_tp_clear, ast_clear}, {Py_tp_members, ast_type_members}, {Py_tp_methods, ast_type_methods}, {Py_tp_getset, ast_type_getsets}, {Py_tp_init, ast_type_init}, {Py_tp_alloc, PyType_GenericAlloc}, {Py_tp_new, PyType_GenericNew}, {Py_tp_free, PyObject_GC_Del}, {0, 0}, }; static PyType_Spec AST_type_spec = { "ast.AST", sizeof(AST_object), 0, Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, AST_type_slots }; static PyObject * make_type(struct ast_state *state, const char *type, PyObject* base, const char* const* fields, int num_fields, const char *doc) { PyObject *fnames, *result; int i; fnames = PyTuple_New(num_fields); if (!fnames) return NULL; for (i = 0; i < num_fields; i++) { PyObject *field = PyUnicode_InternFromString(fields[i]); if (!field) { Py_DECREF(fnames); return NULL; } PyTuple_SET_ITEM(fnames, i, field); } result = PyObject_CallFunction((PyObject*)&PyType_Type, "s(O){OOOOOOOs}", type, base, state->_fields, fnames, state->__match_args__, fnames, state->__module__, state->ast, state->__doc__, doc); Py_DECREF(fnames); return result; } static int add_attributes(struct ast_state *state, PyObject *type, const char * const *attrs, int num_fields) { int i, result; PyObject *s, *l = PyTuple_New(num_fields); if (!l) return -1; for (i = 0; i < num_fields; i++) { s = PyUnicode_InternFromString(attrs[i]); if (!s) { Py_DECREF(l); return -1; } PyTuple_SET_ITEM(l, i, s); } result = PyObject_SetAttr(type, state->_attributes, l); Py_DECREF(l); return result; } /* Conversion AST -> Python */ static PyObject* ast2obj_list(struct ast_state *state, struct validator *vstate, asdl_seq *seq, PyObject* (*func)(struct ast_state *state, struct validator *vstate, void*)) { Py_ssize_t i, n = asdl_seq_LEN(seq); PyObject *result = PyList_New(n); PyObject *value; if (!result) return NULL; for (i = 0; i < n; i++) { value = func(state, vstate, asdl_seq_GET_UNTYPED(seq, i)); if (!value) { Py_DECREF(result); return NULL; } PyList_SET_ITEM(result, i, value); } return result; } static PyObject* ast2obj_object(struct ast_state *Py_UNUSED(state), struct validator *Py_UNUSED(vstate), void *o) { PyObject *op = (PyObject*)o; if (!op) { op = Py_None; } return Py_NewRef(op); } #define ast2obj_constant ast2obj_object #define ast2obj_identifier ast2obj_object #define ast2obj_string ast2obj_object static PyObject* ast2obj_int(struct ast_state *Py_UNUSED(state), struct validator *Py_UNUSED(vstate), long b) { return PyLong_FromLong(b); } /* Conversion Python -> AST */ static int obj2ast_object(struct ast_state *Py_UNUSED(state), PyObject* obj, PyObject** out, PyArena* arena) { if (obj == Py_None) obj = NULL; if (obj) { if (_PyArena_AddPyObject(arena, obj) < 0) { *out = NULL; return -1; } *out = Py_NewRef(obj); } else { *out = NULL; } return 0; } static int obj2ast_constant(struct ast_state *Py_UNUSED(state), PyObject* obj, PyObject** out, PyArena* arena) { if (_PyArena_AddPyObject(arena, obj) < 0) { *out = NULL; return -1; } *out = Py_NewRef(obj); return 0; } static int obj2ast_identifier(struct ast_state *state, PyObject* obj, PyObject** out, PyArena* arena) { if (!PyUnicode_CheckExact(obj) && obj != Py_None) { PyErr_SetString(PyExc_TypeError, "AST identifier must be of type str"); return -1; } return obj2ast_object(state, obj, out, arena); } static int obj2ast_string(struct ast_state *state, PyObject* obj, PyObject** out, PyArena* arena) { if (!PyUnicode_CheckExact(obj) && !PyBytes_CheckExact(obj)) { PyErr_SetString(PyExc_TypeError, "AST string must be of type str"); return -1; } return obj2ast_object(state, obj, out, arena); } static int obj2ast_int(struct ast_state* Py_UNUSED(state), PyObject* obj, int* out, PyArena* arena) { int i; if (!PyLong_Check(obj)) { PyErr_Format(PyExc_ValueError, "invalid integer value: %R", obj); return -1; } i = PyLong_AsInt(obj); if (i == -1 && PyErr_Occurred()) return -1; *out = i; return 0; } static int add_ast_fields(struct ast_state *state) { PyObject *empty_tuple; empty_tuple = PyTuple_New(0); if (!empty_tuple || PyObject_SetAttrString(state->AST_type, "_fields", empty_tuple) < 0 || PyObject_SetAttrString(state->AST_type, "__match_args__", empty_tuple) < 0 || PyObject_SetAttrString(state->AST_type, "_attributes", empty_tuple) < 0) { Py_XDECREF(empty_tuple); return -1; } Py_DECREF(empty_tuple); return 0; } """, 0, reflow=False) self.file.write(textwrap.dedent(''' static int init_types(struct ast_state *state) { if (init_identifiers(state) < 0) { return -1; } state->AST_type = PyType_FromSpec(&AST_type_spec); if (!state->AST_type) { return -1; } if (add_ast_fields(state) < 0) { return -1; } ''')) for dfn in mod.dfns: self.visit(dfn) self.file.write(textwrap.dedent(''' if (!add_ast_annotations(state)) { return -1; } return 0; } ''')) def visitProduct(self, prod, name): if prod.fields: fields = name+"_fields" else: fields = "NULL" self.emit('state->%s_type = make_type(state, "%s", state->AST_type, %s, %d,' % (name, name, fields, len(prod.fields)), 1) self.emit('%s);' % reflow_c_string(asdl_of(name, prod), 2), 2, reflow=False) self.emit("if (!state->%s_type) return -1;" % name, 1) if prod.attributes: self.emit("if (add_attributes(state, state->%s_type, %s_attributes, %d) < 0) return -1;" % (name, name, len(prod.attributes)), 1) else: self.emit("if (add_attributes(state, state->%s_type, NULL, 0) < 0) return -1;" % name, 1) self.emit_defaults(name, prod.fields, 1) self.emit_defaults(name, prod.attributes, 1) def visitSum(self, sum, name): self.emit('state->%s_type = make_type(state, "%s", state->AST_type, NULL, 0,' % (name, name), 1) self.emit('%s);' % reflow_c_string(asdl_of(name, sum), 2), 2, reflow=False) self.emit("if (!state->%s_type) return -1;" % name, 1) if sum.attributes: self.emit("if (add_attributes(state, state->%s_type, %s_attributes, %d) < 0) return -1;" % (name, name, len(sum.attributes)), 1) else: self.emit("if (add_attributes(state, state->%s_type, NULL, 0) < 0) return -1;" % name, 1) self.emit_defaults(name, sum.attributes, 1) simple = is_simple(sum) for t in sum.types: self.visitConstructor(t, name, simple) def visitConstructor(self, cons, name, simple): if cons.fields: fields = cons.name+"_fields" else: fields = "NULL" self.emit('state->%s_type = make_type(state, "%s", state->%s_type, %s, %d,' % (cons.name, cons.name, name, fields, len(cons.fields)), 1) self.emit('%s);' % reflow_c_string(asdl_of(cons.name, cons), 2), 2, reflow=False) self.emit("if (!state->%s_type) return -1;" % cons.name, 1) self.emit_defaults(cons.name, cons.fields, 1) if simple: self.emit("state->%s_singleton = PyType_GenericNew((PyTypeObject *)" "state->%s_type, NULL, NULL);" % (cons.name, cons.name), 1) self.emit("if (!state->%s_singleton) return -1;" % cons.name, 1) def emit_defaults(self, name, fields, depth): for field in fields: if field.opt: self.emit('if (PyObject_SetAttr(state->%s_type, state->%s, Py_None) == -1)' % (name, field.name), depth) self.emit("return -1;", depth+1) class ASTModuleVisitor(PickleVisitor): def visitModule(self, mod): self.emit("static int", 0) self.emit("astmodule_exec(PyObject *m)", 0) self.emit("{", 0) self.emit('struct ast_state *state = get_ast_state();', 1) self.emit('if (state == NULL) {', 1) self.emit('return -1;', 2) self.emit('}', 1) self.emit('if (PyModule_AddObjectRef(m, "AST", state->AST_type) < 0) {', 1) self.emit('return -1;', 2) self.emit('}', 1) self.emit('if (PyModule_AddIntMacro(m, PyCF_ALLOW_TOP_LEVEL_AWAIT) < 0) {', 1) self.emit("return -1;", 2) self.emit('}', 1) self.emit('if (PyModule_AddIntMacro(m, PyCF_ONLY_AST) < 0) {', 1) self.emit("return -1;", 2) self.emit('}', 1) self.emit('if (PyModule_AddIntMacro(m, PyCF_TYPE_COMMENTS) < 0) {', 1) self.emit("return -1;", 2) self.emit('}', 1) self.emit('if (PyModule_AddIntMacro(m, PyCF_OPTIMIZED_AST) < 0) {', 1) self.emit("return -1;", 2) self.emit('}', 1) for dfn in mod.dfns: self.visit(dfn) self.emit("return 0;", 1) self.emit("}", 0) self.emit("", 0) self.emit(""" static PyModuleDef_Slot astmodule_slots[] = { {Py_mod_exec, astmodule_exec}, {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED}, {Py_mod_gil, Py_MOD_GIL_NOT_USED}, {0, NULL} }; static struct PyModuleDef _astmodule = { PyModuleDef_HEAD_INIT, .m_name = "_ast", // The _ast module uses a per-interpreter state (PyInterpreterState.ast) .m_size = 0, .m_slots = astmodule_slots, }; PyMODINIT_FUNC PyInit__ast(void) { return PyModuleDef_Init(&_astmodule); } """.strip(), 0, reflow=False) def visitProduct(self, prod, name): self.addObj(name) def visitSum(self, sum, name): self.addObj(name) for t in sum.types: self.visitConstructor(t, name) def visitConstructor(self, cons, name): self.addObj(cons.name) def addObj(self, name): self.emit("if (PyModule_AddObjectRef(m, \"%s\", " "state->%s_type) < 0) {" % (name, name), 1) self.emit("return -1;", 2) self.emit('}', 1) class StaticVisitor(PickleVisitor): CODE = '''Very simple, always emit this static code. Override CODE''' def visit(self, object): self.emit(self.CODE, 0, reflow=False) class ObjVisitor(PickleVisitor): def func_begin(self, name): ctype = get_c_type(name) self.emit("PyObject*", 0) self.emit("ast2obj_%s(struct ast_state *state, struct validator *vstate, void* _o)" % (name), 0) self.emit("{", 0) self.emit("%s o = (%s)_o;" % (ctype, ctype), 1) self.emit("PyObject *result = NULL, *value = NULL;", 1) self.emit("PyTypeObject *tp;", 1) self.emit('if (!o) {', 1) self.emit("Py_RETURN_NONE;", 2) self.emit("}", 1) self.emit("if (++vstate->recursion_depth > vstate->recursion_limit) {", 1) self.emit("PyErr_SetString(PyExc_RecursionError,", 2) self.emit('"maximum recursion depth exceeded during ast construction");', 3) self.emit("return NULL;", 2) self.emit("}", 1) def func_end(self): self.emit("vstate->recursion_depth--;", 1) self.emit("return result;", 1) self.emit("failed:", 0) self.emit("vstate->recursion_depth--;", 1) self.emit("Py_XDECREF(value);", 1) self.emit("Py_XDECREF(result);", 1) self.emit("return NULL;", 1) self.emit("}", 0) self.emit("", 0) def visitSum(self, sum, name): if is_simple(sum): self.simpleSum(sum, name) return self.func_begin(name) self.emit("switch (o->kind) {", 1) for i in range(len(sum.types)): t = sum.types[i] self.visitConstructor(t, i + 1, name) self.emit("}", 1) for a in sum.attributes: self.emit("value = ast2obj_%s(state, vstate, o->%s);" % (a.type, a.name), 1) self.emit("if (!value) goto failed;", 1) self.emit('if (PyObject_SetAttr(result, state->%s, value) < 0)' % a.name, 1) self.emit('goto failed;', 2) self.emit('Py_DECREF(value);', 1) self.func_end() def simpleSum(self, sum, name): self.emit("PyObject* ast2obj_%s(struct ast_state *state, struct validator *vstate, %s_ty o)" % (name, name), 0) self.emit("{", 0) self.emit("switch(o) {", 1) for t in sum.types: self.emit("case %s:" % t.name, 2) self.emit("return Py_NewRef(state->%s_singleton);" % t.name, 3) self.emit("}", 1) self.emit("Py_UNREACHABLE();", 1); self.emit("}", 0) def visitProduct(self, prod, name): self.func_begin(name) self.emit("tp = (PyTypeObject *)state->%s_type;" % name, 1) self.emit("result = PyType_GenericNew(tp, NULL, NULL);", 1); self.emit("if (!result) return NULL;", 1) for field in prod.fields: self.visitField(field, name, 1, True) for a in prod.attributes: self.emit("value = ast2obj_%s(state, vstate, o->%s);" % (a.type, a.name), 1) self.emit("if (!value) goto failed;", 1) self.emit("if (PyObject_SetAttr(result, state->%s, value) < 0)" % a.name, 1) self.emit('goto failed;', 2) self.emit('Py_DECREF(value);', 1) self.func_end() def visitConstructor(self, cons, enum, name): self.emit("case %s_kind:" % cons.name, 1) self.emit("tp = (PyTypeObject *)state->%s_type;" % cons.name, 2) self.emit("result = PyType_GenericNew(tp, NULL, NULL);", 2); self.emit("if (!result) goto failed;", 2) for f in cons.fields: self.visitField(f, cons.name, 2, False) self.emit("break;", 2) def visitField(self, field, name, depth, product): def emit(s, d): self.emit(s, depth + d) if product: value = "o->%s" % field.name else: value = "o->v.%s.%s" % (name, field.name) self.set(field, value, depth) emit("if (!value) goto failed;", 0) emit("if (PyObject_SetAttr(result, state->%s, value) == -1)" % field.name, 0) emit("goto failed;", 1) emit("Py_DECREF(value);", 0) def set(self, field, value, depth): if field.seq: if field.type in self.metadata.simple_sums: # While the sequence elements are stored as void*, # simple sums expects an enum self.emit("{", depth) self.emit("Py_ssize_t i, n = asdl_seq_LEN(%s);" % value, depth+1) self.emit("value = PyList_New(n);", depth+1) self.emit("if (!value) goto failed;", depth+1) self.emit("for(i = 0; i < n; i++)", depth+1) # This cannot fail, so no need for error handling self.emit( "PyList_SET_ITEM(value, i, ast2obj_{0}(state, vstate, ({0}_ty)asdl_seq_GET({1}, i)));".format( field.type, value ), depth + 2, reflow=False, ) self.emit("}", depth) else: self.emit("value = ast2obj_list(state, vstate, (asdl_seq*)%s, ast2obj_%s);" % (value, field.type), depth) else: self.emit("value = ast2obj_%s(state, vstate, %s);" % (field.type, value), depth, reflow=False) class PartingShots(StaticVisitor): CODE = """ PyObject* PyAST_mod2obj(mod_ty t) { struct ast_state *state = get_ast_state(); if (state == NULL) { return NULL; } int starting_recursion_depth; /* Be careful here to prevent overflow. */ PyThreadState *tstate = _PyThreadState_GET(); if (!tstate) { return NULL; } struct validator vstate; vstate.recursion_limit = Py_C_RECURSION_LIMIT; int recursion_depth = Py_C_RECURSION_LIMIT - tstate->c_recursion_remaining; starting_recursion_depth = recursion_depth; vstate.recursion_depth = starting_recursion_depth; PyObject *result = ast2obj_mod(state, &vstate, t); /* Check that the recursion depth counting balanced correctly */ if (result && vstate.recursion_depth != starting_recursion_depth) { PyErr_Format(PyExc_SystemError, "AST constructor recursion depth mismatch (before=%d, after=%d)", starting_recursion_depth, vstate.recursion_depth); return NULL; } return result; } /* mode is 0 for "exec", 1 for "eval" and 2 for "single" input */ mod_ty PyAST_obj2mod(PyObject* ast, PyArena* arena, int mode) { const char * const req_name[] = {"Module", "Expression", "Interactive"}; int isinstance; if (PySys_Audit("compile", "OO", ast, Py_None) < 0) { return NULL; } struct ast_state *state = get_ast_state(); if (state == NULL) { return NULL; } PyObject *req_type[3]; req_type[0] = state->Module_type; req_type[1] = state->Expression_type; req_type[2] = state->Interactive_type; assert(0 <= mode && mode <= 2); isinstance = PyObject_IsInstance(ast, req_type[mode]); if (isinstance == -1) return NULL; if (!isinstance) { PyErr_Format(PyExc_TypeError, "expected %s node, got %.400s", req_name[mode], _PyType_Name(Py_TYPE(ast))); return NULL; } mod_ty res = NULL; if (obj2ast_mod(state, ast, &res, arena) != 0) return NULL; else return res; } int PyAST_Check(PyObject* obj) { struct ast_state *state = get_ast_state(); if (state == NULL) { return -1; } return PyObject_IsInstance(obj, state->AST_type); } """ class ChainOfVisitors: def __init__(self, *visitors, metadata = None): self.visitors = visitors self.metadata = metadata def visit(self, object): for v in self.visitors: v.metadata = self.metadata v.visit(object) v.emit("", 0) def generate_ast_state(module_state, f): f.write('struct ast_state {\n') f.write(' _PyOnceFlag once;\n') f.write(' int finalized;\n') for s in module_state: f.write(' PyObject *' + s + ';\n') f.write('};') def generate_ast_fini(module_state, f): f.write(textwrap.dedent(""" void _PyAST_Fini(PyInterpreterState *interp) { struct ast_state *state = &interp->ast; """)) for s in module_state: f.write(" Py_CLEAR(state->" + s + ');\n') f.write(textwrap.dedent(""" Py_CLEAR(_Py_INTERP_CACHED_OBJECT(interp, str_replace_inf)); state->finalized = 1; state->once = (_PyOnceFlag){0}; } """)) def generate_module_def(mod, metadata, f, internal_h): # Gather all the data needed for ModuleSpec state_strings = { "ast", "_fields", "__match_args__", "__doc__", "__dict__", "__module__", "_attributes", *metadata.identifiers } module_state = state_strings.copy() module_state.update( "%s_singleton" % singleton for singleton in metadata.singletons ) module_state.update( "%s_type" % type for type in metadata.types ) state_strings = sorted(state_strings) module_state = sorted(module_state) generate_ast_state(module_state, internal_h) print(textwrap.dedent(""" #include "Python.h" #include "pycore_ast.h" #include "pycore_ast_state.h" // struct ast_state #include "pycore_ceval.h" // _Py_EnterRecursiveCall #include "pycore_lock.h" // _PyOnceFlag #include "pycore_interp.h" // _PyInterpreterState.ast #include "pycore_modsupport.h" // _PyArg_NoPositional() #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_setobject.h" // _PySet_NextEntry(), _PySet_Update() #include "pycore_unionobject.h" // _Py_union_type_or #include "structmember.h" #include struct validator { int recursion_depth; /* current recursion depth */ int recursion_limit; /* recursion limit */ }; // Forward declaration static int init_types(struct ast_state *state); static struct ast_state* get_ast_state(void) { PyInterpreterState *interp = _PyInterpreterState_GET(); struct ast_state *state = &interp->ast; assert(!state->finalized); if (_PyOnceFlag_CallOnce(&state->once, (_Py_once_fn_t *)&init_types, state) < 0) { return NULL; } return state; } """).strip(), file=f) generate_ast_fini(module_state, f) f.write('static int init_identifiers(struct ast_state *state)\n') f.write('{\n') for identifier in state_strings: f.write(' if ((state->' + identifier) f.write(' = PyUnicode_InternFromString("') f.write(identifier + '")) == NULL) return -1;\n') f.write(' return 0;\n') f.write('};\n\n') def write_header(mod, metadata, f): f.write(textwrap.dedent(""" #ifndef Py_INTERNAL_AST_H #define Py_INTERNAL_AST_H #ifdef __cplusplus extern "C" { #endif #ifndef Py_BUILD_CORE # error "this header requires Py_BUILD_CORE define" #endif #include "pycore_asdl.h" // _ASDL_SEQ_HEAD """).lstrip()) c = ChainOfVisitors( TypeDefVisitor(f), SequenceDefVisitor(f), StructVisitor(f), metadata=metadata ) c.visit(mod) f.write("// Note: these macros affect function definitions, not only call sites.\n") prototype_visitor = PrototypeVisitor(f, metadata=metadata) prototype_visitor.visit(mod) f.write(textwrap.dedent(""" PyObject* PyAST_mod2obj(mod_ty t); mod_ty PyAST_obj2mod(PyObject* ast, PyArena* arena, int mode); int PyAST_Check(PyObject* obj); extern int _PyAST_Validate(mod_ty); /* _PyAST_ExprAsUnicode is defined in ast_unparse.c */ extern PyObject* _PyAST_ExprAsUnicode(expr_ty); /* Return the borrowed reference to the first literal string in the sequence of statements or NULL if it doesn't start from a literal string. Doesn't set exception. */ extern PyObject* _PyAST_GetDocString(asdl_stmt_seq *); #ifdef __cplusplus } #endif #endif /* !Py_INTERNAL_AST_H */ """)) def write_internal_h_header(mod, f): print(textwrap.dedent(""" #ifndef Py_INTERNAL_AST_STATE_H #define Py_INTERNAL_AST_STATE_H #include "pycore_lock.h" // _PyOnceFlag #ifdef __cplusplus extern "C" { #endif #ifndef Py_BUILD_CORE # error "this header requires Py_BUILD_CORE define" #endif """).lstrip(), file=f) def write_internal_h_footer(mod, f): print(textwrap.dedent(""" #ifdef __cplusplus } #endif #endif /* !Py_INTERNAL_AST_STATE_H */ """), file=f) def write_source(mod, metadata, f, internal_h_file): generate_module_def(mod, metadata, f, internal_h_file) v = ChainOfVisitors( SequenceConstructorVisitor(f), PyTypesDeclareVisitor(f), AnnotationsVisitor(f), PyTypesVisitor(f), Obj2ModPrototypeVisitor(f), FunctionVisitor(f), ObjVisitor(f), Obj2ModVisitor(f), ASTModuleVisitor(f), PartingShots(f), metadata=metadata ) v.visit(mod) def main(input_filename, c_filename, h_filename, internal_h_filename, dump_module=False): auto_gen_msg = AUTOGEN_MESSAGE.format("/".join(Path(__file__).parts[-2:])) mod = asdl.parse(input_filename) if dump_module: print('Parsed Module:') print(mod) if not asdl.check(mod): sys.exit(1) metadata_visitor = MetadataVisitor() metadata_visitor.visit(mod) metadata = metadata_visitor.metadata with c_filename.open("w") as c_file, \ h_filename.open("w") as h_file, \ internal_h_filename.open("w") as internal_h_file: c_file.write(auto_gen_msg) h_file.write(auto_gen_msg) internal_h_file.write(auto_gen_msg) write_internal_h_header(mod, internal_h_file) write_source(mod, metadata, c_file, internal_h_file) write_header(mod, metadata, h_file) write_internal_h_footer(mod, internal_h_file) print(f"{c_filename}, {h_filename}, {internal_h_filename} regenerated.") if __name__ == "__main__": parser = ArgumentParser() parser.add_argument("input_file", type=Path) parser.add_argument("-C", "--c-file", type=Path, required=True) parser.add_argument("-H", "--h-file", type=Path, required=True) parser.add_argument("-I", "--internal-h-file", type=Path, required=True) parser.add_argument("-d", "--dump-module", action="store_true") args = parser.parse_args() main(args.input_file, args.c_file, args.h_file, args.internal_h_file, args.dump_module)