Merged revisions 62004 via svnmerge from

svn+ssh://pythondev@svn.python.org/python/trunk

........
  r62004 | georg.brandl | 2008-03-28 13:11:56 +0100 (Fr, 28 Mär 2008) | 4 lines

  Patch #1810 by Thomas Lee, reviewed by myself:
  allow compiling Python AST objects into code objects
  in compile().
........
This commit is contained in:
Martin v. Löwis 2008-03-30 20:03:44 +00:00
parent d3372793d6
commit 618dc5e064
8 changed files with 3476 additions and 53 deletions

View File

@ -10,16 +10,16 @@ Abstract Syntax Trees
The ``_ast`` module helps Python applications to process trees of the Python
abstract syntax grammar. The Python compiler currently provides read-only access
to such trees, meaning that applications can only create a tree for a given
piece of Python source code; generating :term:`bytecode` from a (potentially modified)
tree is not supported. The abstract syntax itself might change with each Python
release; this module helps to find out programmatically what the current grammar
looks like.
abstract syntax grammar. The abstract syntax itself might change with each
Python release; this module helps to find out programmatically what the current
grammar looks like.
An abstract syntax tree can be generated by passing ``_ast.PyCF_ONLY_AST`` as a
flag to the :func:`compile` builtin function. The result will be a tree of
objects whose classes all inherit from ``_ast.AST``.
An abstract syntax tree can be generated by passing :data:`_ast.PyCF_ONLY_AST`
as a flag to the :func:`compile` builtin function. The result will be a tree of
objects whose classes all inherit from :class:`_ast.AST`.
A modified abstract syntax tree can be compiled into a Python code object using
the built-in :func:`compile` function.
The actual classes are derived from the ``Parser/Python.asdl`` file, which is
reproduced below. There is one class defined for each left-hand side symbol in
@ -45,6 +45,9 @@ If these attributes are marked as optional in the grammar (using a question
mark), the value might be ``None``. If the attributes can have zero-or-more
values (marked with an asterisk), the values are represented as Python lists.
The constructors of all ``_ast`` classes don't take arguments; instead, if you
create instances, you must assign the required attributes separately.
Abstract Grammar
----------------

View File

@ -193,21 +193,21 @@ available. They are listed here in alphabetical order.
.. function:: compile(source, filename, mode[, flags[, dont_inherit]])
Compile the *source* into a code object. Code objects can be executed by a call
to :func:`exec` or evaluated by a call to :func:`eval`. The *filename* argument
should give the file from which the code was read; pass some recognizable value
if it wasn't read from a file (``'<string>'`` is commonly used). The *mode*
argument specifies what kind of code must be compiled; it can be ``'exec'`` if
*source* consists of a sequence of statements, ``'eval'`` if it consists of a
single expression, or ``'single'`` if it consists of a single interactive
statement (in the latter case, expression statements that evaluate to something
else than ``None`` will be printed).
Compile the *source* into a code object. Code objects can be
executed by a call to :func:`exec` or evaluated by a call to
:func:`eval`. *source* can either be a string or an AST object.
Refer to the :mod:`_ast` module documentation for information on
how to compile into and from AST objects.
When compiling multi-line statements, two caveats apply: line endings must be
represented by a single newline character (``'\n'``), and the input must be
terminated by at least one newline character. If line endings are represented
by ``'\r\n'``, use the string :meth:`replace` method to change them into
``'\n'``.
The *filename* argument should give the file from
which the code was read; pass some recognizable value if it wasn't
read from a file (``'<string>'`` is commonly used). The *mode*
argument specifies what kind of code must be compiled; it can be
``'exec'`` if *source* consists of a sequence of statements,
``'eval'`` if it consists of a single expression, or ``'single'``
if it consists of a single interactive statement (in the latter
case, expression statements that evaluate to something else than
``None`` will be printed).
The optional arguments *flags* and *dont_inherit* (which are new in Python 2.2)
control which future statements (see :pep:`236`) affect the compilation of
@ -227,6 +227,9 @@ available. They are listed here in alphabetical order.
This function raises :exc:`SyntaxError` if the compiled source is invalid,
and :exc:`TypeError` if the source contains null bytes.
.. versionadded:: 2.6
Support for compiling AST objects.
.. function:: complex([real[, imag]])

View File

@ -542,3 +542,5 @@ keyword_ty _Py_keyword(identifier arg, expr_ty value, PyArena *arena);
alias_ty _Py_alias(identifier name, identifier asname, PyArena *arena);
PyObject* PyAST_mod2obj(mod_ty t);
mod_ty PyAST_obj2mod(PyObject* ast, PyArena* arena);
int PyAST_Check(PyObject* obj);

View File

@ -1,5 +1,6 @@
import unittest
import sys
import _ast
from test import test_support
class TestSpecifics(unittest.TestCase):
@ -406,6 +407,28 @@ if 1:
self.assert_("_A__mangled_mod" in A.f.__code__.co_varnames)
self.assert_("__package__" in A.f.__code__.co_varnames)
def test_compile_ast(self):
fname = __file__
if fname.lower().endswith(('pyc', 'pyo')):
fname = fname[:-1]
with open(fname, 'r') as f:
fcontents = f.read()
sample_code = [
['<assign>', 'x = 5'],
['<ifblock>', """if True:\n pass\n"""],
['<forblock>', """for n in [1, 2, 3]:\n print(n)\n"""],
['<deffunc>', """def foo():\n pass\nfoo()\n"""],
[fname, fcontents],
]
for fname, code in sample_code:
co1 = compile(code, '%s1' % fname, 'exec')
ast = compile(code, '%s2' % fname, 'exec', _ast.PyCF_ONLY_AST)
self.assert_(type(ast) == _ast.Module)
co2 = compile(ast, '%s3' % fname, 'exec')
self.assertEqual(co1, co2)
def test_main():
test_support.run_unittest(TestSpecifics)

View File

@ -73,12 +73,12 @@ def is_simple(sum):
A sum is simple if its types have no fields, e.g.
unaryop = Invert | Not | UAdd | USub
"""
for t in sum.types:
if t.fields:
return False
return True
class EmitVisitor(asdl.VisitorBase):
"""Visit that emits lines"""
@ -96,6 +96,7 @@ class EmitVisitor(asdl.VisitorBase):
line = (" " * TABSIZE * depth) + line + "\n"
self.file.write(line)
class TypeDefVisitor(EmitVisitor):
def visitModule(self, mod):
for dfn in mod.dfns:
@ -133,6 +134,7 @@ class TypeDefVisitor(EmitVisitor):
self.emit(s, depth)
self.emit("", depth)
class StructVisitor(EmitVisitor):
"""Visitor to generate typdefs for AST."""
@ -202,6 +204,7 @@ class StructVisitor(EmitVisitor):
self.emit("};", depth)
self.emit("", depth)
class PrototypeVisitor(EmitVisitor):
"""Generate function prototypes for the .h file"""
@ -271,6 +274,7 @@ class PrototypeVisitor(EmitVisitor):
self.emit_function(name, get_c_type(name),
self.get_args(prod.fields), [], union=0)
class FunctionVisitor(PrototypeVisitor):
"""Visitor to generate constructor functions for AST."""
@ -324,6 +328,7 @@ class FunctionVisitor(PrototypeVisitor):
emit("p->%s = %s;" % (argname, argname), 1)
assert not attrs
class PickleVisitor(EmitVisitor):
def visitModule(self, mod):
@ -345,6 +350,181 @@ class PickleVisitor(EmitVisitor):
def visitField(self, sum):
pass
class Obj2ModPrototypeVisitor(PickleVisitor):
def visitProduct(self, prod, name):
code = "static int obj2ast_%s(PyObject* obj, %s* out, PyArena* arena);"
self.emit(code % (name, get_c_type(name)), 0)
visitSum = visitProduct
class Obj2ModVisitor(PickleVisitor):
def funcHeader(self, name):
ctype = get_c_type(name)
self.emit("int", 0)
self.emit("obj2ast_%s(PyObject* obj, %s* out, PyArena* arena)" % (name, ctype), 0)
self.emit("{", 0)
self.emit("PyObject* tmp = NULL;", 1)
self.emit("", 0)
def sumTrailer(self, name):
self.emit("", 0)
self.emit("tmp = PyObject_Repr(obj);", 1)
# there's really nothing more we can do if this fails ...
self.emit("if (tmp == NULL) goto failed;", 1)
error = "expected some sort of %s, but got %%.400s" % name
format = "PyErr_Format(PyExc_TypeError, \"%s\", PyString_AS_STRING(tmp));"
self.emit(format % error, 1, reflow=False)
self.emit("failed:", 0)
self.emit("Py_XDECREF(tmp);", 1)
self.emit("return 1;", 1)
self.emit("}", 0)
self.emit("", 0)
def simpleSum(self, sum, name):
self.funcHeader(name)
for t in sum.types:
self.emit("if (PyObject_IsInstance(obj, (PyObject*)%s_type)) {" % t.name, 1)
self.emit("*out = %s;" % t.name, 2)
self.emit("return 0;", 2)
self.emit("}", 1)
self.sumTrailer(name)
def buildArgs(self, fields):
return ", ".join(fields + ["arena"])
def complexSum(self, sum, name):
self.funcHeader(name)
for a in sum.attributes:
self.visitAttributeDeclaration(a, name, sum=sum)
self.emit("", 0)
# XXX: should we only do this for 'expr'?
self.emit("if (obj == Py_None) {", 1)
self.emit("*out = NULL;", 2)
self.emit("return 0;", 2)
self.emit("}", 1)
for a in sum.attributes:
self.visitField(a, name, sum=sum, depth=1)
for t in sum.types:
self.emit("if (PyObject_IsInstance(obj, (PyObject*)%s_type)) {" % t.name, 1)
for f in t.fields:
self.visitFieldDeclaration(f, t.name, sum=sum, depth=2)
self.emit("", 0)
for f in t.fields:
self.visitField(f, t.name, sum=sum, depth=2)
args = [f.name.value for f in t.fields] + [a.name.value for a in sum.attributes]
self.emit("*out = %s(%s);" % (t.name, self.buildArgs(args)), 2)
self.emit("if (*out == NULL) goto failed;", 2)
self.emit("return 0;", 2)
self.emit("}", 1)
self.sumTrailer(name)
def visitAttributeDeclaration(self, a, name, sum=sum):
ctype = get_c_type(a.type)
self.emit("%s %s;" % (ctype, a.name), 1)
def visitSum(self, sum, name):
if is_simple(sum):
self.simpleSum(sum, name)
else:
self.complexSum(sum, name)
def visitProduct(self, prod, name):
ctype = get_c_type(name)
self.emit("int", 0)
self.emit("obj2ast_%s(PyObject* obj, %s* out, PyArena* arena)" % (name, ctype), 0)
self.emit("{", 0)
self.emit("PyObject* tmp = NULL;", 1)
for f in prod.fields:
self.visitFieldDeclaration(f, name, prod=prod, depth=1)
self.emit("", 0)
for f in prod.fields:
self.visitField(f, name, prod=prod, depth=1)
args = [f.name.value for f in prod.fields]
self.emit("*out = %s(%s);" % (name, self.buildArgs(args)), 1)
self.emit("return 0;", 1)
self.emit("failed:", 0)
self.emit("Py_XDECREF(tmp);", 1)
self.emit("return 1;", 1)
self.emit("}", 0)
self.emit("", 0)
def visitFieldDeclaration(self, field, name, sum=None, prod=None, depth=0):
ctype = get_c_type(field.type)
if field.seq:
if self.isSimpleType(field):
self.emit("asdl_int_seq* %s;" % field.name, depth)
else:
self.emit("asdl_seq* %s;" % field.name, depth)
else:
ctype = get_c_type(field.type)
self.emit("%s %s;" % (ctype, field.name), depth)
def isSimpleSum(self, field):
# XXX can the members of this list be determined automatically?
return field.type.value in ('expr_context', 'boolop', 'operator',
'unaryop', 'cmpop')
def isNumeric(self, field):
return get_c_type(field.type) in ("int", "bool")
def isSimpleType(self, field):
return self.isSimpleSum(field) or self.isNumeric(field)
def visitField(self, field, name, sum=None, prod=None, depth=0):
ctype = get_c_type(field.type)
self.emit("if (PyObject_HasAttrString(obj, \"%s\")) {" % field.name, depth)
self.emit("int res;", depth+1)
if field.seq:
self.emit("Py_ssize_t len;", depth+1)
self.emit("Py_ssize_t i;", depth+1)
self.emit("tmp = PyObject_GetAttrString(obj, \"%s\");" % field.name, depth+1)
self.emit("if (tmp == NULL) goto failed;", depth+1)
if field.seq:
self.emit("if (!PyList_Check(tmp)) {", depth+1)
self.emit("PyErr_Format(PyExc_TypeError, \"%s field \\\"%s\\\" must "
"be a list, not a %%.200s\", tmp->ob_type->tp_name);" %
(name, field.name),
depth+2, reflow=False)
self.emit("goto failed;", depth+2)
self.emit("}", depth+1)
self.emit("len = PyList_GET_SIZE(tmp);", depth+1)
if self.isSimpleType(field):
self.emit("%s = asdl_int_seq_new(len, arena);" % field.name, depth+1)
else:
self.emit("%s = asdl_seq_new(len, arena);" % field.name, depth+1)
self.emit("if (%s == NULL) goto failed;" % field.name, depth+1)
self.emit("for (i = 0; i < len; i++) {", depth+1)
self.emit("%s value;" % ctype, depth+2)
self.emit("res = obj2ast_%s(PyList_GET_ITEM(tmp, i), &value, arena);" %
field.type, depth+2, reflow=False)
self.emit("if (res != 0) goto failed;", depth+2)
self.emit("asdl_seq_SET(%s, i, value);" % field.name, depth+2)
self.emit("}", depth+1)
else:
self.emit("res = obj2ast_%s(tmp, &%s, arena);" %
(field.type, field.name), depth+1)
self.emit("if (res != 0) goto failed;", depth+1)
self.emit("Py_XDECREF(tmp);", depth+1)
self.emit("tmp = NULL;", depth+1)
self.emit("} else {", depth)
if not field.opt:
message = "required field \\\"%s\\\" missing from %s" % (field.name, name)
format = "PyErr_SetString(PyExc_TypeError, \"%s\");"
self.emit(format % message, depth+1, reflow=False)
self.emit("return 1;", depth+1)
else:
if self.isNumeric(field):
self.emit("%s = 0;" % field.name, depth+1)
elif not self.isSimpleType(field):
self.emit("%s = NULL;" % field.name, depth+1)
else:
raise TypeError("could not determine the default value for %s" % field.name)
self.emit("}", depth)
class MarshalPrototypeVisitor(PickleVisitor):
def prototype(self, sum, name):
@ -354,6 +534,7 @@ class MarshalPrototypeVisitor(PickleVisitor):
visitProduct = visitSum = prototype
class PyTypesDeclareVisitor(PickleVisitor):
def visitProduct(self, prod, name):
@ -439,6 +620,8 @@ static int add_attributes(PyTypeObject* type, char**attrs, int num_fields)
return result;
}
/* Conversion AST -> Python */
static PyObject* ast2obj_list(asdl_seq *seq, PyObject* (*func)(void*))
{
int i, n = asdl_seq_LEN(seq);
@ -471,6 +654,42 @@ static PyObject* ast2obj_int(long b)
{
return PyLong_FromLong(b);
}
/* Conversion Python -> AST */
static int obj2ast_object(PyObject* obj, PyObject** out, PyArena* arena)
{
if (obj == Py_None)
obj = NULL;
if (obj)
PyArena_AddPyObject(arena, obj);
Py_XINCREF(obj);
*out = obj;
return 0;
}
#define obj2ast_identifier obj2ast_object
#define obj2ast_string obj2ast_object
static int obj2ast_int(PyObject* obj, int* out, PyArena* arena)
{
int i;
if (!PyLong_Check(obj)) {
PyObject *s = PyObject_Repr(obj);
if (s == NULL) return 1;
PyErr_Format(PyExc_ValueError, "invalid integer value: %.400s",
PyString_AS_STRING(s));
Py_DECREF(s);
return 1;
}
i = (int)PyLong_AsLong(obj);
if (i == -1 && PyErr_Occurred())
return 1;
*out = i;
return 0;
}
""", 0, reflow=False)
self.emit("static int init_types(void)",0)
@ -518,6 +737,7 @@ static PyObject* ast2obj_int(long b)
(cons.name, cons.name), 1)
self.emit("if (!%s_singleton) return 0;" % cons.name, 1)
def parse_version(mod):
return mod.version.value[12:-3]
@ -557,6 +777,7 @@ class ASTModuleVisitor(PickleVisitor):
def addObj(self, name):
self.emit('if (PyDict_SetItemString(d, "%s", (PyObject*)%s_type) < 0) return;' % (name, name), 1)
_SPECIALIZED_SEQUENCES = ('stmt', 'expr')
def find_sequence(fields, doing_specialization):
@ -582,6 +803,7 @@ class StaticVisitor(PickleVisitor):
def visit(self, object):
self.emit(self.CODE, 0, reflow=False)
class ObjVisitor(PickleVisitor):
def func_begin(self, name):
@ -632,8 +854,12 @@ class ObjVisitor(PickleVisitor):
self.emit("case %s:" % t.name, 2)
self.emit("Py_INCREF(%s_singleton);" % t.name, 3)
self.emit("return %s_singleton;" % t.name, 3)
self.emit("default:" % name, 2)
self.emit('/* should never happen, but just in case ... */', 3)
code = "PyErr_Format(PyExc_SystemError, \"unknown %s found\");" % name
self.emit(code, 3, reflow=False)
self.emit("return NULL;", 3)
self.emit("}", 1)
self.emit("return NULL; /* cannot happen */", 1)
self.emit("}", 0)
def visitProduct(self, prod, name):
@ -707,6 +933,27 @@ PyObject* PyAST_mod2obj(mod_ty t)
init_types();
return ast2obj_mod(t);
}
mod_ty PyAST_obj2mod(PyObject* ast, PyArena* arena)
{
mod_ty res;
init_types();
if (!PyObject_IsInstance(ast, mod_type)) {
PyErr_SetString(PyExc_TypeError, "expected either Module, Interactive "
"or Expression node");
return NULL;
}
if (obj2ast_mod(ast, &res, arena) != 0)
return NULL;
else
return res;
}
int PyAST_Check(PyObject* obj)
{
init_types();
return PyObject_IsInstance(obj, (PyObject*)AST_type);
}
"""
class ChainOfVisitors:
@ -750,6 +997,8 @@ def main(srcfile):
)
c.visit(mod)
f.write("PyObject* PyAST_mod2obj(mod_ty t);\n")
print >>f, "mod_ty PyAST_obj2mod(PyObject* ast, PyArena* arena);"
print >>f, "int PyAST_Check(PyObject* obj);"
f.close()
if SRC_DIR:
@ -764,8 +1013,10 @@ def main(srcfile):
v = ChainOfVisitors(
PyTypesDeclareVisitor(f),
PyTypesVisitor(f),
Obj2ModPrototypeVisitor(f),
FunctionVisitor(f),
ObjVisitor(f),
Obj2ModVisitor(f),
ASTModuleVisitor(f),
PartingShots(f),
)

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,7 @@
/* Built-in functions */
#include "Python.h"
#include "Python-ast.h"
#include "node.h"
#include "code.h"
@ -527,22 +528,6 @@ builtin_compile(PyObject *self, PyObject *args, PyObject *kwds)
cf.cf_flags = supplied_flags | PyCF_SOURCE_IS_UTF8;
str = source_as_string(cmd);
if (str == NULL)
return NULL;
if (strcmp(startstr, "exec") == 0)
start = Py_file_input;
else if (strcmp(startstr, "eval") == 0)
start = Py_eval_input;
else if (strcmp(startstr, "single") == 0)
start = Py_single_input;
else {
PyErr_SetString(PyExc_ValueError,
"compile() arg 3 must be 'exec' or 'eval' or 'single'");
return NULL;
}
if (supplied_flags &
~(PyCF_MASK | PyCF_MASK_OBSOLETE | PyCF_DONT_IMPLY_DEDENT | PyCF_ONLY_AST))
{
@ -555,6 +540,48 @@ builtin_compile(PyObject *self, PyObject *args, PyObject *kwds)
if (!dont_inherit) {
PyEval_MergeCompilerFlags(&cf);
}
if (PyAST_Check(cmd)) {
PyObject *result;
if (supplied_flags & PyCF_ONLY_AST) {
Py_INCREF(cmd);
result = cmd;
}
else {
PyArena *arena;
mod_ty mod;
arena = PyArena_New();
mod = PyAST_obj2mod(cmd, arena);
if (mod == NULL) {
PyArena_Free(arena);
return NULL;
}
result = (PyObject*)PyAST_Compile(mod, filename,
&cf, arena);
PyArena_Free(arena);
}
return result;
}
/* XXX: is it possible to pass start to the PyAST_ branch? */
if (strcmp(startstr, "exec") == 0)
start = Py_file_input;
else if (strcmp(startstr, "eval") == 0)
start = Py_eval_input;
else if (strcmp(startstr, "single") == 0)
start = Py_single_input;
else {
PyErr_SetString(PyExc_ValueError,
"compile() arg 3 must be 'exec'"
"or 'eval' or 'single'");
return NULL;
}
str = source_as_string(cmd);
if (str == NULL)
return NULL;
return Py_CompileStringFlags(str, filename, start, &cf);
}

View File

@ -2356,8 +2356,11 @@ unaryop(unaryop_ty op)
return UNARY_POSITIVE;
case USub:
return UNARY_NEGATIVE;
}
default:
PyErr_Format(PyExc_SystemError,
"unary op %d should not be possible", op);
return 0;
}
}
static int
@ -2388,8 +2391,11 @@ binop(struct compiler *c, operator_ty op)
return BINARY_AND;
case FloorDiv:
return BINARY_FLOOR_DIVIDE;
}
default:
PyErr_Format(PyExc_SystemError,
"binary op %d should not be possible", op);
return 0;
}
}
static int
@ -2416,8 +2422,9 @@ cmpop(cmpop_ty op)
return PyCmp_IN;
case NotIn:
return PyCmp_NOT_IN;
}
default:
return PyCmp_BAD;
}
}
static int
@ -2448,10 +2455,11 @@ inplace_binop(struct compiler *c, operator_ty op)
return INPLACE_AND;
case FloorDiv:
return INPLACE_FLOOR_DIVIDE;
}
default:
PyErr_Format(PyExc_SystemError,
"inplace binary op %d should not be possible", op);
return 0;
}
}
static int