cpython/Parser/asdl_c.py

702 lines
21 KiB
Python
Executable File

#! /usr/bin/env python
"""Generate C code from an ASDL description."""
# TO DO
# handle fields that have a type but no name
import os, sys, traceback
import asdl
TABSIZE = 8
MAX_COL = 80
def get_c_type(name):
"""Return a string for the C name of the type.
This function special cases the default types provided by asdl:
identifier, string, int, bool.
"""
# XXX ack! need to figure out where Id is useful and where string
if isinstance(name, asdl.Id):
name = name.value
if name in asdl.builtin_types:
return name
else:
return "%s_ty" % name
def reflow_lines(s, depth):
"""Reflow the line s indented depth tabs.
Return a sequence of lines where no line extends beyond MAX_COL
when properly indented. The first line is properly indented based
exclusively on depth * TABSIZE. All following lines -- these are
the reflowed lines generated by this function -- start at the same
column as the first character beyond the opening { in the first
line.
"""
size = MAX_COL - depth * TABSIZE
if len(s) < size:
return [s]
lines = []
cur = s
padding = ""
while len(cur) > size:
i = cur.rfind(' ', 0, size)
# XXX this should be fixed for real
if i == -1 and 'GeneratorExp' in cur:
i = size + 3
assert i != -1, "Impossible line %d to reflow: %s" % (size, `s`)
lines.append(padding + cur[:i])
if len(lines) == 1:
# find new size based on brace
j = cur.find('{', 0, i)
if j >= 0:
j += 2 # account for the brace and the space after it
size -= j
padding = " " * j
else:
j = cur.find('(', 0, i)
if j >= 0:
j += 1 # account for the paren (no space after it)
size -= j
padding = " " * j
cur = cur[i+1:]
else:
lines.append(padding + cur)
return lines
def is_simple(sum):
"""Return True if a sum is a simple.
A sum is simple if its types have no fields, e.g.
unaryop = Invert | Not | UAdd | USub
"""
for t in sum.types:
if t.fields:
return False
return True
class EmitVisitor(asdl.VisitorBase):
"""Visit that emits lines"""
def __init__(self, file):
self.file = file
super(EmitVisitor, self).__init__()
def emit(self, s, depth, reflow=1):
# XXX reflow long lines?
if reflow:
lines = reflow_lines(s, depth)
else:
lines = [s]
for line in lines:
line = (" " * TABSIZE * depth) + line + "\n"
self.file.write(line)
class TypeDefVisitor(EmitVisitor):
def visitModule(self, mod):
for dfn in mod.dfns:
self.visit(dfn)
def visitType(self, type, depth=0):
self.visit(type.value, type.name, depth)
def visitSum(self, sum, name, depth):
if is_simple(sum):
self.simple_sum(sum, name, depth)
else:
self.sum_with_constructors(sum, name, depth)
def simple_sum(self, sum, name, depth):
enum = []
for i in range(len(sum.types)):
type = sum.types[i]
enum.append("%s=%d" % (type.name, i + 1))
enums = ", ".join(enum)
ctype = get_c_type(name)
s = "typedef enum _%s { %s } %s;" % (name, enums, ctype)
self.emit(s, depth)
self.emit("", depth)
def sum_with_constructors(self, sum, name, depth):
ctype = get_c_type(name)
s = "typedef struct _%(name)s *%(ctype)s;" % locals()
self.emit(s, depth)
self.emit("", depth)
def visitProduct(self, product, name, depth):
ctype = get_c_type(name)
s = "typedef struct _%(name)s *%(ctype)s;" % locals()
self.emit(s, depth)
self.emit("", depth)
class StructVisitor(EmitVisitor):
"""Visitor to generate typdefs for AST."""
def visitModule(self, mod):
for dfn in mod.dfns:
self.visit(dfn)
def visitType(self, type, depth=0):
self.visit(type.value, type.name, depth)
def visitSum(self, sum, name, depth):
if not is_simple(sum):
self.sum_with_constructors(sum, name, depth)
def sum_with_constructors(self, sum, name, depth):
def emit(s, depth=depth):
self.emit(s % sys._getframe(1).f_locals, depth)
enum = []
for i in range(len(sum.types)):
type = sum.types[i]
enum.append("%s_kind=%d" % (type.name, i + 1))
emit("struct _%(name)s {")
emit("enum { " + ", ".join(enum) + " } kind;", depth + 1)
emit("union {", depth + 1)
for t in sum.types:
self.visit(t, depth + 2)
emit("} v;", depth + 1)
for field in sum.attributes:
# rudimentary attribute handling
type = str(field.type)
assert type in asdl.builtin_types, type
emit("%s %s;" % (type, field.name), depth + 1);
emit("};")
emit("")
def visitConstructor(self, cons, depth):
if cons.fields:
self.emit("struct {", depth)
for f in cons.fields:
self.visit(f, depth + 1)
self.emit("} %s;" % cons.name, depth)
self.emit("", depth)
else:
# XXX not sure what I want here, nothing is probably fine
pass
def visitField(self, field, depth):
# XXX need to lookup field.type, because it might be something
# like a builtin...
ctype = get_c_type(field.type)
name = field.name
if field.seq:
self.emit("asdl_seq *%(name)s;" % locals(), depth)
else:
self.emit("%(ctype)s %(name)s;" % locals(), depth)
def visitProduct(self, product, name, depth):
self.emit("struct _%(name)s {" % locals(), depth)
for f in product.fields:
self.visit(f, depth + 1)
self.emit("};", depth)
self.emit("", depth)
class PrototypeVisitor(EmitVisitor):
"""Generate function prototypes for the .h file"""
def visitModule(self, mod):
for dfn in mod.dfns:
self.visit(dfn)
def visitType(self, type):
self.visit(type.value, type.name)
def visitSum(self, sum, name):
if is_simple(sum):
pass # XXX
else:
for t in sum.types:
self.visit(t, name, sum.attributes)
def get_args(self, fields):
"""Return list of C argument into, one for each field.
Argument info is 3-tuple of a C type, variable name, and flag
that is true if type can be NULL.
"""
args = []
unnamed = {}
for f in fields:
if f.name is None:
name = f.type
c = unnamed[name] = unnamed.get(name, 0) + 1
if c > 1:
name = "name%d" % (c - 1)
else:
name = f.name
# XXX should extend get_c_type() to handle this
if f.seq:
ctype = "asdl_seq *"
else:
ctype = get_c_type(f.type)
args.append((ctype, name, f.opt or f.seq))
return args
def visitConstructor(self, cons, type, attrs):
args = self.get_args(cons.fields)
attrs = self.get_args(attrs)
ctype = get_c_type(type)
self.emit_function(cons.name, ctype, args, attrs)
def emit_function(self, name, ctype, args, attrs, union=1):
args = args + attrs
if args:
argstr = ", ".join(["%s %s" % (atype, aname)
for atype, aname, opt in args])
argstr += ", PyArena *arena"
else:
argstr = "PyArena *arena"
self.emit("%s %s(%s);" % (ctype, name, argstr), 0)
def visitProduct(self, prod, name):
self.emit_function(name, get_c_type(name),
self.get_args(prod.fields), [], union=0)
class FunctionVisitor(PrototypeVisitor):
"""Visitor to generate constructor functions for AST."""
def emit_function(self, name, ctype, args, attrs, union=1):
def emit(s, depth=0, reflow=1):
self.emit(s, depth, reflow)
argstr = ", ".join(["%s %s" % (atype, aname)
for atype, aname, opt in args + attrs])
if argstr:
argstr += ", PyArena *arena"
else:
argstr = "PyArena *arena"
self.emit("%s" % ctype, 0)
emit("%s(%s)" % (name, argstr))
emit("{")
emit("%s p;" % ctype, 1)
for argtype, argname, opt in args:
# XXX hack alert: false is allowed for a bool
if not opt and not argtype == "bool":
emit("if (!%s) {" % argname, 1)
emit("PyErr_SetString(PyExc_ValueError,", 2)
msg = "field %s is required for %s" % (argname, name)
emit(' "%s");' % msg,
2, reflow=0)
emit('return NULL;', 2)
emit('}', 1)
emit("p = (%s)PyArena_Malloc(arena, sizeof(*p));" % ctype, 1);
emit("if (!p) {", 1)
emit("PyErr_NoMemory();", 2)
emit("return NULL;", 2)
emit("}", 1)
if union:
self.emit_body_union(name, args, attrs)
else:
self.emit_body_struct(name, args, attrs)
emit("return p;", 1)
emit("}")
emit("")
def emit_body_union(self, name, args, attrs):
def emit(s, depth=0, reflow=1):
self.emit(s, depth, reflow)
emit("p->kind = %s_kind;" % name, 1)
for argtype, argname, opt in args:
emit("p->v.%s.%s = %s;" % (name, argname, argname), 1)
for argtype, argname, opt in attrs:
emit("p->%s = %s;" % (argname, argname), 1)
def emit_body_struct(self, name, args, attrs):
def emit(s, depth=0, reflow=1):
self.emit(s, depth, reflow)
for argtype, argname, opt in args:
emit("p->%s = %s;" % (argname, argname), 1)
assert not attrs
class PickleVisitor(EmitVisitor):
def visitModule(self, mod):
for dfn in mod.dfns:
self.visit(dfn)
def visitType(self, type):
self.visit(type.value, type.name)
def visitSum(self, sum, name):
pass
def visitProduct(self, sum, name):
pass
def visitConstructor(self, cons, name):
pass
def visitField(self, sum):
pass
class MarshalPrototypeVisitor(PickleVisitor):
def prototype(self, sum, name):
ctype = get_c_type(name)
self.emit("static int marshal_write_%s(PyObject **, int *, %s);"
% (name, ctype), 0)
visitProduct = visitSum = prototype
class FreePrototypeVisitor(PickleVisitor):
def prototype(self, sum, name):
ctype = get_c_type(name)
self.emit("void free_%s(%s);" % (name, ctype), 0)
visitProduct = visitSum = prototype
_SPECIALIZED_SEQUENCES = ('stmt', 'expr')
def find_sequence(fields, doing_specialization):
"""Return True if any field uses a sequence."""
for f in fields:
if f.seq:
if not doing_specialization:
return True
if str(f.type) not in _SPECIALIZED_SEQUENCES:
return True
return False
def has_sequence(types, doing_specialization):
for t in types:
if find_sequence(t.fields, doing_specialization):
return True
return False
class StaticVisitor(PickleVisitor):
CODE = '''Very simple, always emit this static code. Overide CODE'''
def visit(self, object):
self.emit(self.CODE, 0, reflow=False)
class FreeUtilVisitor(StaticVisitor):
CODE = '''static void
free_seq_exprs(asdl_seq *seq)
{
int i, n;
n = asdl_seq_LEN(seq);
for (i = 0; i < n; i++)
free_expr((expr_ty)asdl_seq_GET(seq, i));
asdl_seq_free(seq);
}
static void
free_seq_stmts(asdl_seq *seq)
{
int i, n;
n = asdl_seq_LEN(seq);
for (i = 0; i < n; i++)
free_stmt((stmt_ty)asdl_seq_GET(seq, i));
asdl_seq_free(seq);
}
'''
class FreeVisitor(PickleVisitor):
def func_begin(self, name, has_seq):
ctype = get_c_type(name)
self.emit("void", 0)
self.emit("free_%s(%s o)" % (name, ctype), 0)
self.emit("{", 0)
if has_seq:
self.emit("int i, n;", 1)
self.emit("asdl_seq *seq;", 1)
self.emit('', 0)
self.emit('if (!o)', 1)
self.emit('return;', 2)
self.emit('', 0)
def func_end(self):
self.emit("}", 0)
self.emit("", 0)
def visitSum(self, sum, name):
has_seq = has_sequence(sum.types, True)
self.func_begin(name, has_seq)
if not is_simple(sum):
self.emit("switch (o->kind) {", 1)
for i in range(len(sum.types)):
t = sum.types[i]
self.visitConstructor(t, i + 1, name)
self.emit("}", 1)
self.emit("", 0)
self.emit("free(o);", 1)
self.func_end()
def visitProduct(self, prod, name):
self.func_begin(name, find_sequence(prod.fields, True))
for field in prod.fields:
self.visitField(field, name, 1, True)
self.emit("", 0)
self.emit("free(o);", 1)
self.func_end()
def visitConstructor(self, cons, enum, name):
self.emit("case %s_kind:" % cons.name, 1)
for f in cons.fields:
self.visitField(f, cons.name, 2, False)
self.emit("break;", 2)
def visitField(self, field, name, depth, product):
def emit(s, d):
self.emit(s, depth + d)
if product:
value = "o->%s" % field.name
else:
value = "o->v.%s.%s" % (name, field.name)
if field.seq:
self.emitSeq(field, value, depth, emit)
# XXX need to know the simple types in advance, so that we
# don't call free_TYPE() for them.
elif field.opt:
emit("if (%s) {" % value, 0)
self.free(field, value, depth + 1)
emit("}", 0)
else:
self.free(field, value, depth)
def emitSeq(self, field, value, depth, emit):
# specialize for freeing sequences of statements and expressions
if str(field.type) in _SPECIALIZED_SEQUENCES:
c_code = "free_seq_%ss(%s);" % (field.type, value)
emit(c_code, 0)
else:
emit("seq = %s;" % value, 0)
emit("n = asdl_seq_LEN(seq);", 0)
emit("for (i = 0; i < n; i++)", 0)
self.free(field, "asdl_seq_GET(seq, i)", depth + 1)
emit("asdl_seq_free(seq);", 0)
def free(self, field, value, depth):
if str(field.type) in ("identifier", "string", "object"):
ctype = get_c_type(field.type)
self.emit("Py_DECREF((%s)%s);" % (ctype, value), depth)
elif str(field.type) == "bool":
return
else:
ctype = get_c_type(field.type)
self.emit("free_%s((%s)%s);" % (field.type, ctype, value), depth)
class MarshalUtilVisitor(StaticVisitor):
CODE = '''
#define CHECKSIZE(BUF, OFF, MIN) { \\
int need = *(OFF) + MIN; \\
if (need >= PyString_GET_SIZE(*(BUF))) { \\
int newsize = PyString_GET_SIZE(*(BUF)) * 2; \\
if (newsize < need) \\
newsize = need; \\
if (_PyString_Resize((BUF), newsize) < 0) \\
return 0; \\
} \\
}
static int
marshal_write_int(PyObject **buf, int *offset, int x)
{
char *s;
CHECKSIZE(buf, offset, 4)
s = PyString_AS_STRING(*buf) + (*offset);
s[0] = (x & 0xff);
s[1] = (x >> 8) & 0xff;
s[2] = (x >> 16) & 0xff;
s[3] = (x >> 24) & 0xff;
*offset += 4;
return 1;
}
static int
marshal_write_bool(PyObject **buf, int *offset, bool b)
{
if (b)
marshal_write_int(buf, offset, 1);
else
marshal_write_int(buf, offset, 0);
return 1;
}
static int
marshal_write_identifier(PyObject **buf, int *offset, identifier id)
{
int l = PyString_GET_SIZE(id);
marshal_write_int(buf, offset, l);
CHECKSIZE(buf, offset, l);
memcpy(PyString_AS_STRING(*buf) + *offset,
PyString_AS_STRING(id), l);
*offset += l;
return 1;
}
static int
marshal_write_string(PyObject **buf, int *offset, string s)
{
int len = PyString_GET_SIZE(s);
marshal_write_int(buf, offset, len);
CHECKSIZE(buf, offset, len);
memcpy(PyString_AS_STRING(*buf) + *offset,
PyString_AS_STRING(s), len);
*offset += len;
return 1;
}
static int
marshal_write_object(PyObject **buf, int *offset, object s)
{
/* XXX */
return 0;
}
'''
class MarshalFunctionVisitor(PickleVisitor):
def func_begin(self, name, has_seq):
ctype = get_c_type(name)
self.emit("static int", 0)
self.emit("marshal_write_%s(PyObject **buf, int *off, %s o)" %
(name, ctype), 0)
self.emit("{", 0)
if has_seq:
self.emit("int i;", 1)
def func_end(self):
self.emit("return 1;", 1)
self.emit("}", 0)
self.emit("", 0)
def visitSum(self, sum, name):
self.func_begin(name, has_sequence(sum.types, False))
simple = is_simple(sum)
if simple:
self.emit("switch (o) {", 1)
else:
self.emit("switch (o->kind) {", 1)
for i in range(len(sum.types)):
t = sum.types[i]
self.visitConstructor(t, i + 1, name, simple)
self.emit("}", 1)
self.func_end()
def visitProduct(self, prod, name):
self.func_begin(name, find_sequence(prod.fields, False))
for field in prod.fields:
self.visitField(field, name, 1, 1)
self.func_end()
def visitConstructor(self, cons, enum, name, simple):
if simple:
self.emit("case %s:" % cons.name, 1)
self.emit("marshal_write_int(buf, off, %d);" % enum, 2);
self.emit("break;", 2)
else:
self.emit("case %s_kind:" % cons.name, 1)
self.emit("marshal_write_int(buf, off, %d);" % enum, 2)
for f in cons.fields:
self.visitField(f, cons.name, 2, 0)
self.emit("break;", 2)
def visitField(self, field, name, depth, product):
def emit(s, d):
self.emit(s, depth + d)
if product:
value = "o->%s" % field.name
else:
value = "o->v.%s.%s" % (name, field.name)
if field.seq:
emit("marshal_write_int(buf, off, asdl_seq_LEN(%s));" % value, 0)
emit("for (i = 0; i < asdl_seq_LEN(%s); i++) {" % value, 0)
emit("void *elt = asdl_seq_GET(%s, i);" % value, 1);
ctype = get_c_type(field.type);
emit("marshal_write_%s(buf, off, (%s)elt);" % (field.type,
ctype), 1)
emit("}", 0)
elif field.opt:
emit("if (%s) {" % value, 0)
emit("marshal_write_int(buf, off, 1);", 1)
emit("marshal_write_%s(buf, off, %s);" % (field.type, value), 1)
emit("}", 0)
emit("else {", 0)
emit("marshal_write_int(buf, off, 0);", 1)
emit("}", 0)
else:
emit("marshal_write_%s(buf, off, %s);" % (field.type, value), 0)
class ChainOfVisitors:
def __init__(self, *visitors):
self.visitors = visitors
def visit(self, object):
for v in self.visitors:
v.visit(object)
v.emit("", 0)
def main(srcfile):
argv0 = sys.argv[0]
components = argv0.split(os.sep)
argv0 = os.sep.join(components[-2:])
auto_gen_msg = '/* File automatically generated by %s */\n' % argv0
mod = asdl.parse(srcfile)
if not asdl.check(mod):
sys.exit(1)
if INC_DIR:
p = "%s/%s-ast.h" % (INC_DIR, mod.name)
else:
p = "%s-ast.h" % mod.name
f = open(p, "wb")
print >> f, auto_gen_msg
print >> f, '#include "asdl.h"\n'
c = ChainOfVisitors(TypeDefVisitor(f),
StructVisitor(f),
PrototypeVisitor(f),
## FreePrototypeVisitor(f),
)
c.visit(mod)
f.close()
if SRC_DIR:
p = "%s/%s-ast.c" % (SRC_DIR, mod.name)
else:
p = "%s-ast.c" % mod.name
f = open(p, "wb")
print >> f, auto_gen_msg
print >> f, '#include "Python.h"'
print >> f, '#include "%s-ast.h"' % mod.name
print >> f
v = ChainOfVisitors(MarshalPrototypeVisitor(f),
FunctionVisitor(f),
## FreeUtilVisitor(f),
## FreeVisitor(f),
MarshalUtilVisitor(f),
MarshalFunctionVisitor(f),
)
v.visit(mod)
f.close()
if __name__ == "__main__":
import sys
import getopt
INC_DIR = ''
SRC_DIR = ''
opts, args = getopt.getopt(sys.argv[1:], "h:c:")
for o, v in opts:
if o == '-h':
INC_DIR = v
if o == '-c':
SRC_DIR = v
if len(args) != 1:
print "Must specify single input file"
main(args[0])