Merged revisions 62004 via svnmerge from

svn+ssh://pythondev@svn.python.org/python/trunk

........
  r62004 | georg.brandl | 2008-03-28 13:11:56 +0100 (Fr, 28 Mär 2008) | 4 lines

  Patch #1810 by Thomas Lee, reviewed by myself:
  allow compiling Python AST objects into code objects
  in compile().
........
This commit is contained in:
Martin v. Löwis 2008-03-30 20:03:44 +00:00
parent d3372793d6
commit 618dc5e064
8 changed files with 3476 additions and 53 deletions

View File

@ -10,16 +10,16 @@ Abstract Syntax Trees
The ``_ast`` module helps Python applications to process trees of the Python
abstract syntax grammar. The Python compiler currently provides read-only access
to such trees, meaning that applications can only create a tree for a given
piece of Python source code; generating :term:`bytecode` from a (potentially modified)
tree is not supported. The abstract syntax itself might change with each Python
release; this module helps to find out programmatically what the current grammar
looks like.
abstract syntax grammar. The abstract syntax itself might change with each
Python release; this module helps to find out programmatically what the current
grammar looks like.
An abstract syntax tree can be generated by passing ``_ast.PyCF_ONLY_AST`` as a
flag to the :func:`compile` builtin function. The result will be a tree of
objects whose classes all inherit from ``_ast.AST``.
An abstract syntax tree can be generated by passing :data:`_ast.PyCF_ONLY_AST`
as a flag to the :func:`compile` builtin function. The result will be a tree of
objects whose classes all inherit from :class:`_ast.AST`.
A modified abstract syntax tree can be compiled into a Python code object using
the built-in :func:`compile` function.
The actual classes are derived from the ``Parser/Python.asdl`` file, which is
reproduced below. There is one class defined for each left-hand side symbol in
@ -39,12 +39,15 @@ attribute ``left`` of type ``_ast.expr``. Instances of ``_ast.expr`` and
``_ast.stmt`` subclasses also have lineno and col_offset attributes. The lineno
is the line number of source text (1 indexed so the first line is line 1) and
the col_offset is the utf8 byte offset of the first token that generated the
node. The utf8 offset is recorded because the parser uses utf8 internally.
node. The utf8 offset is recorded because the parser uses utf8 internally.
If these attributes are marked as optional in the grammar (using a question
mark), the value might be ``None``. If the attributes can have zero-or-more
values (marked with an asterisk), the values are represented as Python lists.
The constructors of all ``_ast`` classes don't take arguments; instead, if you
create instances, you must assign the required attributes separately.
Abstract Grammar
----------------

View File

@ -193,21 +193,21 @@ available. They are listed here in alphabetical order.
.. function:: compile(source, filename, mode[, flags[, dont_inherit]])
Compile the *source* into a code object. Code objects can be executed by a call
to :func:`exec` or evaluated by a call to :func:`eval`. The *filename* argument
should give the file from which the code was read; pass some recognizable value
if it wasn't read from a file (``'<string>'`` is commonly used). The *mode*
argument specifies what kind of code must be compiled; it can be ``'exec'`` if
*source* consists of a sequence of statements, ``'eval'`` if it consists of a
single expression, or ``'single'`` if it consists of a single interactive
statement (in the latter case, expression statements that evaluate to something
else than ``None`` will be printed).
Compile the *source* into a code object. Code objects can be
executed by a call to :func:`exec` or evaluated by a call to
:func:`eval`. *source* can either be a string or an AST object.
Refer to the :mod:`_ast` module documentation for information on
how to compile into and from AST objects.
When compiling multi-line statements, two caveats apply: line endings must be
represented by a single newline character (``'\n'``), and the input must be
terminated by at least one newline character. If line endings are represented
by ``'\r\n'``, use the string :meth:`replace` method to change them into
``'\n'``.
The *filename* argument should give the file from
which the code was read; pass some recognizable value if it wasn't
read from a file (``'<string>'`` is commonly used). The *mode*
argument specifies what kind of code must be compiled; it can be
``'exec'`` if *source* consists of a sequence of statements,
``'eval'`` if it consists of a single expression, or ``'single'``
if it consists of a single interactive statement (in the latter
case, expression statements that evaluate to something else than
``None`` will be printed).
The optional arguments *flags* and *dont_inherit* (which are new in Python 2.2)
control which future statements (see :pep:`236`) affect the compilation of
@ -227,6 +227,9 @@ available. They are listed here in alphabetical order.
This function raises :exc:`SyntaxError` if the compiled source is invalid,
and :exc:`TypeError` if the source contains null bytes.
.. versionadded:: 2.6
Support for compiling AST objects.
.. function:: complex([real[, imag]])

View File

@ -542,3 +542,5 @@ keyword_ty _Py_keyword(identifier arg, expr_ty value, PyArena *arena);
alias_ty _Py_alias(identifier name, identifier asname, PyArena *arena);
PyObject* PyAST_mod2obj(mod_ty t);
mod_ty PyAST_obj2mod(PyObject* ast, PyArena* arena);
int PyAST_Check(PyObject* obj);

View File

@ -1,5 +1,6 @@
import unittest
import sys
import _ast
from test import test_support
class TestSpecifics(unittest.TestCase):
@ -406,6 +407,28 @@ if 1:
self.assert_("_A__mangled_mod" in A.f.__code__.co_varnames)
self.assert_("__package__" in A.f.__code__.co_varnames)
def test_compile_ast(self):
fname = __file__
if fname.lower().endswith(('pyc', 'pyo')):
fname = fname[:-1]
with open(fname, 'r') as f:
fcontents = f.read()
sample_code = [
['<assign>', 'x = 5'],
['<ifblock>', """if True:\n pass\n"""],
['<forblock>', """for n in [1, 2, 3]:\n print(n)\n"""],
['<deffunc>', """def foo():\n pass\nfoo()\n"""],
[fname, fcontents],
]
for fname, code in sample_code:
co1 = compile(code, '%s1' % fname, 'exec')
ast = compile(code, '%s2' % fname, 'exec', _ast.PyCF_ONLY_AST)
self.assert_(type(ast) == _ast.Module)
co2 = compile(ast, '%s3' % fname, 'exec')
self.assertEqual(co1, co2)
def test_main():
test_support.run_unittest(TestSpecifics)

View File

@ -73,12 +73,12 @@ def is_simple(sum):
A sum is simple if its types have no fields, e.g.
unaryop = Invert | Not | UAdd | USub
"""
for t in sum.types:
if t.fields:
return False
return True
class EmitVisitor(asdl.VisitorBase):
"""Visit that emits lines"""
@ -96,6 +96,7 @@ class EmitVisitor(asdl.VisitorBase):
line = (" " * TABSIZE * depth) + line + "\n"
self.file.write(line)
class TypeDefVisitor(EmitVisitor):
def visitModule(self, mod):
for dfn in mod.dfns:
@ -133,6 +134,7 @@ class TypeDefVisitor(EmitVisitor):
self.emit(s, depth)
self.emit("", depth)
class StructVisitor(EmitVisitor):
"""Visitor to generate typdefs for AST."""
@ -202,6 +204,7 @@ class StructVisitor(EmitVisitor):
self.emit("};", depth)
self.emit("", depth)
class PrototypeVisitor(EmitVisitor):
"""Generate function prototypes for the .h file"""
@ -271,6 +274,7 @@ class PrototypeVisitor(EmitVisitor):
self.emit_function(name, get_c_type(name),
self.get_args(prod.fields), [], union=0)
class FunctionVisitor(PrototypeVisitor):
"""Visitor to generate constructor functions for AST."""
@ -324,6 +328,7 @@ class FunctionVisitor(PrototypeVisitor):
emit("p->%s = %s;" % (argname, argname), 1)
assert not attrs
class PickleVisitor(EmitVisitor):
def visitModule(self, mod):
@ -345,6 +350,181 @@ class PickleVisitor(EmitVisitor):
def visitField(self, sum):
pass
class Obj2ModPrototypeVisitor(PickleVisitor):
def visitProduct(self, prod, name):
code = "static int obj2ast_%s(PyObject* obj, %s* out, PyArena* arena);"
self.emit(code % (name, get_c_type(name)), 0)
visitSum = visitProduct
class Obj2ModVisitor(PickleVisitor):
def funcHeader(self, name):
ctype = get_c_type(name)
self.emit("int", 0)
self.emit("obj2ast_%s(PyObject* obj, %s* out, PyArena* arena)" % (name, ctype), 0)
self.emit("{", 0)
self.emit("PyObject* tmp = NULL;", 1)
self.emit("", 0)
def sumTrailer(self, name):
self.emit("", 0)
self.emit("tmp = PyObject_Repr(obj);", 1)
# there's really nothing more we can do if this fails ...
self.emit("if (tmp == NULL) goto failed;", 1)
error = "expected some sort of %s, but got %%.400s" % name
format = "PyErr_Format(PyExc_TypeError, \"%s\", PyString_AS_STRING(tmp));"
self.emit(format % error, 1, reflow=False)
self.emit("failed:", 0)
self.emit("Py_XDECREF(tmp);", 1)
self.emit("return 1;", 1)
self.emit("}", 0)
self.emit("", 0)
def simpleSum(self, sum, name):
self.funcHeader(name)
for t in sum.types:
self.emit("if (PyObject_IsInstance(obj, (PyObject*)%s_type)) {" % t.name, 1)
self.emit("*out = %s;" % t.name, 2)
self.emit("return 0;", 2)
self.emit("}", 1)
self.sumTrailer(name)
def buildArgs(self, fields):
return ", ".join(fields + ["arena"])
def complexSum(self, sum, name):
self.funcHeader(name)
for a in sum.attributes:
self.visitAttributeDeclaration(a, name, sum=sum)
self.emit("", 0)
# XXX: should we only do this for 'expr'?
self.emit("if (obj == Py_None) {", 1)
self.emit("*out = NULL;", 2)
self.emit("return 0;", 2)
self.emit("}", 1)
for a in sum.attributes:
self.visitField(a, name, sum=sum, depth=1)
for t in sum.types:
self.emit("if (PyObject_IsInstance(obj, (PyObject*)%s_type)) {" % t.name, 1)
for f in t.fields:
self.visitFieldDeclaration(f, t.name, sum=sum, depth=2)
self.emit("", 0)
for f in t.fields:
self.visitField(f, t.name, sum=sum, depth=2)
args = [f.name.value for f in t.fields] + [a.name.value for a in sum.attributes]
self.emit("*out = %s(%s);" % (t.name, self.buildArgs(args)), 2)
self.emit("if (*out == NULL) goto failed;", 2)
self.emit("return 0;", 2)
self.emit("}", 1)
self.sumTrailer(name)
def visitAttributeDeclaration(self, a, name, sum=sum):
ctype = get_c_type(a.type)
self.emit("%s %s;" % (ctype, a.name), 1)
def visitSum(self, sum, name):
if is_simple(sum):
self.simpleSum(sum, name)
else:
self.complexSum(sum, name)
def visitProduct(self, prod, name):
ctype = get_c_type(name)
self.emit("int", 0)
self.emit("obj2ast_%s(PyObject* obj, %s* out, PyArena* arena)" % (name, ctype), 0)
self.emit("{", 0)
self.emit("PyObject* tmp = NULL;", 1)
for f in prod.fields:
self.visitFieldDeclaration(f, name, prod=prod, depth=1)
self.emit("", 0)
for f in prod.fields:
self.visitField(f, name, prod=prod, depth=1)
args = [f.name.value for f in prod.fields]
self.emit("*out = %s(%s);" % (name, self.buildArgs(args)), 1)
self.emit("return 0;", 1)
self.emit("failed:", 0)
self.emit("Py_XDECREF(tmp);", 1)
self.emit("return 1;", 1)
self.emit("}", 0)
self.emit("", 0)
def visitFieldDeclaration(self, field, name, sum=None, prod=None, depth=0):
ctype = get_c_type(field.type)
if field.seq:
if self.isSimpleType(field):
self.emit("asdl_int_seq* %s;" % field.name, depth)
else:
self.emit("asdl_seq* %s;" % field.name, depth)
else:
ctype = get_c_type(field.type)
self.emit("%s %s;" % (ctype, field.name), depth)
def isSimpleSum(self, field):
# XXX can the members of this list be determined automatically?
return field.type.value in ('expr_context', 'boolop', 'operator',
'unaryop', 'cmpop')
def isNumeric(self, field):
return get_c_type(field.type) in ("int", "bool")
def isSimpleType(self, field):
return self.isSimpleSum(field) or self.isNumeric(field)
def visitField(self, field, name, sum=None, prod=None, depth=0):
ctype = get_c_type(field.type)
self.emit("if (PyObject_HasAttrString(obj, \"%s\")) {" % field.name, depth)
self.emit("int res;", depth+1)
if field.seq:
self.emit("Py_ssize_t len;", depth+1)
self.emit("Py_ssize_t i;", depth+1)
self.emit("tmp = PyObject_GetAttrString(obj, \"%s\");" % field.name, depth+1)
self.emit("if (tmp == NULL) goto failed;", depth+1)
if field.seq:
self.emit("if (!PyList_Check(tmp)) {", depth+1)
self.emit("PyErr_Format(PyExc_TypeError, \"%s field \\\"%s\\\" must "
"be a list, not a %%.200s\", tmp->ob_type->tp_name);" %
(name, field.name),
depth+2, reflow=False)
self.emit("goto failed;", depth+2)
self.emit("}", depth+1)
self.emit("len = PyList_GET_SIZE(tmp);", depth+1)
if self.isSimpleType(field):
self.emit("%s = asdl_int_seq_new(len, arena);" % field.name, depth+1)
else:
self.emit("%s = asdl_seq_new(len, arena);" % field.name, depth+1)
self.emit("if (%s == NULL) goto failed;" % field.name, depth+1)
self.emit("for (i = 0; i < len; i++) {", depth+1)
self.emit("%s value;" % ctype, depth+2)
self.emit("res = obj2ast_%s(PyList_GET_ITEM(tmp, i), &value, arena);" %
field.type, depth+2, reflow=False)
self.emit("if (res != 0) goto failed;", depth+2)
self.emit("asdl_seq_SET(%s, i, value);" % field.name, depth+2)
self.emit("}", depth+1)
else:
self.emit("res = obj2ast_%s(tmp, &%s, arena);" %
(field.type, field.name), depth+1)
self.emit("if (res != 0) goto failed;", depth+1)
self.emit("Py_XDECREF(tmp);", depth+1)
self.emit("tmp = NULL;", depth+1)
self.emit("} else {", depth)
if not field.opt:
message = "required field \\\"%s\\\" missing from %s" % (field.name, name)
format = "PyErr_SetString(PyExc_TypeError, \"%s\");"
self.emit(format % message, depth+1, reflow=False)
self.emit("return 1;", depth+1)
else:
if self.isNumeric(field):
self.emit("%s = 0;" % field.name, depth+1)
elif not self.isSimpleType(field):
self.emit("%s = NULL;" % field.name, depth+1)
else:
raise TypeError("could not determine the default value for %s" % field.name)
self.emit("}", depth)
class MarshalPrototypeVisitor(PickleVisitor):
def prototype(self, sum, name):
@ -354,6 +534,7 @@ class MarshalPrototypeVisitor(PickleVisitor):
visitProduct = visitSum = prototype
class PyTypesDeclareVisitor(PickleVisitor):
def visitProduct(self, prod, name):
@ -439,6 +620,8 @@ static int add_attributes(PyTypeObject* type, char**attrs, int num_fields)
return result;
}
/* Conversion AST -> Python */
static PyObject* ast2obj_list(asdl_seq *seq, PyObject* (*func)(void*))
{
int i, n = asdl_seq_LEN(seq);
@ -471,6 +654,42 @@ static PyObject* ast2obj_int(long b)
{
return PyLong_FromLong(b);
}
/* Conversion Python -> AST */
static int obj2ast_object(PyObject* obj, PyObject** out, PyArena* arena)
{
if (obj == Py_None)
obj = NULL;
if (obj)
PyArena_AddPyObject(arena, obj);
Py_XINCREF(obj);
*out = obj;
return 0;
}
#define obj2ast_identifier obj2ast_object
#define obj2ast_string obj2ast_object
static int obj2ast_int(PyObject* obj, int* out, PyArena* arena)
{
int i;
if (!PyLong_Check(obj)) {
PyObject *s = PyObject_Repr(obj);
if (s == NULL) return 1;
PyErr_Format(PyExc_ValueError, "invalid integer value: %.400s",
PyString_AS_STRING(s));
Py_DECREF(s);
return 1;
}
i = (int)PyLong_AsLong(obj);
if (i == -1 && PyErr_Occurred())
return 1;
*out = i;
return 0;
}
""", 0, reflow=False)
self.emit("static int init_types(void)",0)
@ -518,6 +737,7 @@ static PyObject* ast2obj_int(long b)
(cons.name, cons.name), 1)
self.emit("if (!%s_singleton) return 0;" % cons.name, 1)
def parse_version(mod):
return mod.version.value[12:-3]
@ -557,6 +777,7 @@ class ASTModuleVisitor(PickleVisitor):
def addObj(self, name):
self.emit('if (PyDict_SetItemString(d, "%s", (PyObject*)%s_type) < 0) return;' % (name, name), 1)
_SPECIALIZED_SEQUENCES = ('stmt', 'expr')
def find_sequence(fields, doing_specialization):
@ -582,6 +803,7 @@ class StaticVisitor(PickleVisitor):
def visit(self, object):
self.emit(self.CODE, 0, reflow=False)
class ObjVisitor(PickleVisitor):
def func_begin(self, name):
@ -632,8 +854,12 @@ class ObjVisitor(PickleVisitor):
self.emit("case %s:" % t.name, 2)
self.emit("Py_INCREF(%s_singleton);" % t.name, 3)
self.emit("return %s_singleton;" % t.name, 3)
self.emit("default:" % name, 2)
self.emit('/* should never happen, but just in case ... */', 3)
code = "PyErr_Format(PyExc_SystemError, \"unknown %s found\");" % name
self.emit(code, 3, reflow=False)
self.emit("return NULL;", 3)
self.emit("}", 1)
self.emit("return NULL; /* cannot happen */", 1)
self.emit("}", 0)
def visitProduct(self, prod, name):
@ -707,6 +933,27 @@ PyObject* PyAST_mod2obj(mod_ty t)
init_types();
return ast2obj_mod(t);
}
mod_ty PyAST_obj2mod(PyObject* ast, PyArena* arena)
{
mod_ty res;
init_types();
if (!PyObject_IsInstance(ast, mod_type)) {
PyErr_SetString(PyExc_TypeError, "expected either Module, Interactive "
"or Expression node");
return NULL;
}
if (obj2ast_mod(ast, &res, arena) != 0)
return NULL;
else
return res;
}
int PyAST_Check(PyObject* obj)
{
init_types();
return PyObject_IsInstance(obj, (PyObject*)AST_type);
}
"""
class ChainOfVisitors:
@ -750,6 +997,8 @@ def main(srcfile):
)
c.visit(mod)
f.write("PyObject* PyAST_mod2obj(mod_ty t);\n")
print >>f, "mod_ty PyAST_obj2mod(PyObject* ast, PyArena* arena);"
print >>f, "int PyAST_Check(PyObject* obj);"
f.close()
if SRC_DIR:
@ -764,8 +1013,10 @@ def main(srcfile):
v = ChainOfVisitors(
PyTypesDeclareVisitor(f),
PyTypesVisitor(f),
Obj2ModPrototypeVisitor(f),
FunctionVisitor(f),
ObjVisitor(f),
Obj2ModVisitor(f),
ASTModuleVisitor(f),
PartingShots(f),
)

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,7 @@
/* Built-in functions */
#include "Python.h"
#include "Python-ast.h"
#include "node.h"
#include "code.h"
@ -527,22 +528,6 @@ builtin_compile(PyObject *self, PyObject *args, PyObject *kwds)
cf.cf_flags = supplied_flags | PyCF_SOURCE_IS_UTF8;
str = source_as_string(cmd);
if (str == NULL)
return NULL;
if (strcmp(startstr, "exec") == 0)
start = Py_file_input;
else if (strcmp(startstr, "eval") == 0)
start = Py_eval_input;
else if (strcmp(startstr, "single") == 0)
start = Py_single_input;
else {
PyErr_SetString(PyExc_ValueError,
"compile() arg 3 must be 'exec' or 'eval' or 'single'");
return NULL;
}
if (supplied_flags &
~(PyCF_MASK | PyCF_MASK_OBSOLETE | PyCF_DONT_IMPLY_DEDENT | PyCF_ONLY_AST))
{
@ -555,6 +540,48 @@ builtin_compile(PyObject *self, PyObject *args, PyObject *kwds)
if (!dont_inherit) {
PyEval_MergeCompilerFlags(&cf);
}
if (PyAST_Check(cmd)) {
PyObject *result;
if (supplied_flags & PyCF_ONLY_AST) {
Py_INCREF(cmd);
result = cmd;
}
else {
PyArena *arena;
mod_ty mod;
arena = PyArena_New();
mod = PyAST_obj2mod(cmd, arena);
if (mod == NULL) {
PyArena_Free(arena);
return NULL;
}
result = (PyObject*)PyAST_Compile(mod, filename,
&cf, arena);
PyArena_Free(arena);
}
return result;
}
/* XXX: is it possible to pass start to the PyAST_ branch? */
if (strcmp(startstr, "exec") == 0)
start = Py_file_input;
else if (strcmp(startstr, "eval") == 0)
start = Py_eval_input;
else if (strcmp(startstr, "single") == 0)
start = Py_single_input;
else {
PyErr_SetString(PyExc_ValueError,
"compile() arg 3 must be 'exec'"
"or 'eval' or 'single'");
return NULL;
}
str = source_as_string(cmd);
if (str == NULL)
return NULL;
return Py_CompileStringFlags(str, filename, start, &cf);
}

View File

@ -2356,8 +2356,11 @@ unaryop(unaryop_ty op)
return UNARY_POSITIVE;
case USub:
return UNARY_NEGATIVE;
default:
PyErr_Format(PyExc_SystemError,
"unary op %d should not be possible", op);
return 0;
}
return 0;
}
static int
@ -2388,8 +2391,11 @@ binop(struct compiler *c, operator_ty op)
return BINARY_AND;
case FloorDiv:
return BINARY_FLOOR_DIVIDE;
default:
PyErr_Format(PyExc_SystemError,
"binary op %d should not be possible", op);
return 0;
}
return 0;
}
static int
@ -2416,8 +2422,9 @@ cmpop(cmpop_ty op)
return PyCmp_IN;
case NotIn:
return PyCmp_NOT_IN;
default:
return PyCmp_BAD;
}
return PyCmp_BAD;
}
static int
@ -2448,10 +2455,11 @@ inplace_binop(struct compiler *c, operator_ty op)
return INPLACE_AND;
case FloorDiv:
return INPLACE_FLOOR_DIVIDE;
default:
PyErr_Format(PyExc_SystemError,
"inplace binary op %d should not be possible", op);
return 0;
}
PyErr_Format(PyExc_SystemError,
"inplace binary op %d should not be possible", op);
return 0;
}
static int