diff --git a/Tools/compiler/doc/astdocgen.py b/Tools/compiler/doc/astdocgen.py new file mode 100644 index 00000000000..c593ce10c5f --- /dev/null +++ b/Tools/compiler/doc/astdocgen.py @@ -0,0 +1,30 @@ +# Lame substitute for a fine script to generate the table from ast.txt + +from compiler import astgen + +AST_DEF = '../compiler/ast.txt' + +def sort(l): + l = l[:] + l.sort(lambda a, b: cmp(a.name, b.name)) + return l + +def main(): + nodes = astgen.parse_spec(AST_DEF) + print "\\begin{longtableiii}{lll}{class}{Node type}{Attribute}{Value}" + print + for node in sort(nodes): + if node.argnames: + print "\\lineiii{%s}{%s}{}" % (node.name, node.argnames[0]) + else: + print "\\lineiii{%s}{}{}" % node.name + + for arg in node.argnames[1:]: + print "\\lineiii{}{%s}{}" % arg + print "\\hline", "\n" + print "\\end{longtableiii}" + + +if __name__ == "__main__": + main() + diff --git a/Tools/compiler/doc/asttable.tex b/Tools/compiler/doc/asttable.tex new file mode 100644 index 00000000000..84b3346d99f --- /dev/null +++ b/Tools/compiler/doc/asttable.tex @@ -0,0 +1,253 @@ +\begin{longtableiii}{lll}{class}{Node type}{Attribute}{Value} + +\lineiii{Add}{left}{left operand} +\lineiii{}{right}{right operand} +\hline + +\lineiii{And}{nodes}{list of operands} +\hline + +\lineiii{AssAttr}{}{\emph{attribute as target of assignment}} +\lineiii{}{expr}{expression on the left-hand side of the dot} +\lineiii{}{attrname}{the attribute name, a string} +\lineiii{}{flags}{XXX} +\hline + +\lineiii{AssList}{nodes}{list of list elements being assigned to} +\hline + +\lineiii{AssName}{name}{name being assigned to} +\lineiii{}{flags}{XXX} +\hline + +\lineiii{AssTuple}{nodes}{list of tuple elements being assigned to} +\hline + +\lineiii{Assert}{test}{the expression to be tested} +\lineiii{}{fail}{the value of the \exception{AssertionError}} +\hline + +\lineiii{Assign}{nodes}{a list of assignment targets, one per equal sign} +\lineiii{}{expr}{the value being assigned} +\hline + +\lineiii{AugAssign}{node}{} +\lineiii{}{op}{} +\lineiii{}{expr}{} +\hline + +\lineiii{Backquote}{expr}{} +\hline + +\lineiii{Bitand}{nodes}{} +\hline + +\lineiii{Bitor}{nodes}{} +\hline + +\lineiii{Bitxor}{nodes}{} +\hline + +\lineiii{Break}{}{} +\hline + +\lineiii{CallFunc}{node}{expression for the callee} +\lineiii{}{args}{a list of arguments} +\lineiii{}{star_args}{the extended *-arg value} +\lineiii{}{dstar_args}{the extended **-arg value} +\hline + +\lineiii{Class}{name}{the name of the class, a string} +\lineiii{}{bases}{a list of base classes} +\lineiii{}{doc}{doc string, a string or \code{None}} +\lineiii{}{code}{the body of the class statement} +\hline + +\lineiii{Compare}{expr}{} +\lineiii{}{ops}{} +\hline + +\lineiii{Const}{value}{} +\hline + +\lineiii{Continue}{}{} +\hline + +\lineiii{Dict}{items}{} +\hline + +\lineiii{Discard}{expr}{} +\hline + +\lineiii{Div}{left}{} +\lineiii{}{right}{} +\hline + +\lineiii{Ellipsis}{}{} +\hline + +\lineiii{Exec}{expr}{} +\lineiii{}{locals}{} +\lineiii{}{globals}{} +\hline + +\lineiii{For}{assign}{} +\lineiii{}{list}{} +\lineiii{}{body}{} +\lineiii{}{else_}{} +\hline + +\lineiii{From}{modname}{} +\lineiii{}{names}{} +\hline + +\lineiii{Function}{name}{name used in def, a string} +\lineiii{}{argnames}{list of argument names, as strings} +\lineiii{}{defaults}{list of default values} +\lineiii{}{flags}{xxx} +\lineiii{}{doc}{doc string, a string or \code{None}} +\lineiii{}{code}{the body of the function} +\hline + +\lineiii{Getattr}{expr}{} +\lineiii{}{attrname}{} +\hline + +\lineiii{Global}{names}{} +\hline + +\lineiii{If}{tests}{} +\lineiii{}{else_}{} +\hline + +\lineiii{Import}{names}{} +\hline + +\lineiii{Invert}{expr}{} +\hline + +\lineiii{Keyword}{name}{} +\lineiii{}{expr}{} +\hline + +\lineiii{Lambda}{argnames}{} +\lineiii{}{defaults}{} +\lineiii{}{flags}{} +\lineiii{}{code}{} +\hline + +\lineiii{LeftShift}{left}{} +\lineiii{}{right}{} +\hline + +\lineiii{List}{nodes}{} +\hline + +\lineiii{ListComp}{expr}{} +\lineiii{}{quals}{} +\hline + +\lineiii{ListCompFor}{assign}{} +\lineiii{}{list}{} +\lineiii{}{ifs}{} +\hline + +\lineiii{ListCompIf}{test}{} +\hline + +\lineiii{Mod}{left}{} +\lineiii{}{right}{} +\hline + +\lineiii{Module}{doc}{doc string, a string or \code{None}} +\lineiii{}{node}{body of the module, a \class{Stmt}} +\hline + +\lineiii{Mul}{left}{} +\lineiii{}{right}{} +\hline + +\lineiii{Name}{name}{} +\hline + +\lineiii{Not}{expr}{} +\hline + +\lineiii{Or}{nodes}{} +\hline + +\lineiii{Pass}{}{} +\hline + +\lineiii{Power}{left}{} +\lineiii{}{right}{} +\hline + +\lineiii{Print}{nodes}{} +\lineiii{}{dest}{} +\hline + +\lineiii{Printnl}{nodes}{} +\lineiii{}{dest}{} +\hline + +\lineiii{Raise}{expr1}{} +\lineiii{}{expr2}{} +\lineiii{}{expr3}{} +\hline + +\lineiii{Return}{value}{} +\hline + +\lineiii{RightShift}{left}{} +\lineiii{}{right}{} +\hline + +\lineiii{Slice}{expr}{} +\lineiii{}{flags}{} +\lineiii{}{lower}{} +\lineiii{}{upper}{} +\hline + +\lineiii{Sliceobj}{nodes}{list of statements} +\hline + +\lineiii{Stmt}{nodes}{} +\hline + +\lineiii{Sub}{left}{} +\lineiii{}{right}{} +\hline + +\lineiii{Subscript}{expr}{} +\lineiii{}{flags}{} +\lineiii{}{subs}{} +\hline + +\lineiii{TryExcept}{body}{} +\lineiii{}{handlers}{} +\lineiii{}{else_}{} +\hline + +\lineiii{TryFinally}{body}{} +\lineiii{}{final}{} +\hline + +\lineiii{Tuple}{nodes}{} +\hline + +\lineiii{UnaryAdd}{expr}{} +\hline + +\lineiii{UnarySub}{expr}{} +\hline + +\lineiii{While}{test}{} +\lineiii{}{body}{} +\lineiii{}{else_}{} +\hline + +\lineiii{Yield}{value}{} +\hline + +\end{longtableiii} diff --git a/Tools/compiler/doc/compiler.tex b/Tools/compiler/doc/compiler.tex new file mode 100644 index 00000000000..98ac5f27eca --- /dev/null +++ b/Tools/compiler/doc/compiler.tex @@ -0,0 +1,305 @@ +% Complete documentation on the extended LaTeX markup used for Python +% documentation is available in ``Documenting Python'', which is part +% of the standard documentation for Python. It may be found online +% at: +% +% http://www.python.org/doc/current/doc/doc.html + +\documentclass{manual} + +\title{Python compiler package} + +\author{Jeremy Hylton} + +% Please at least include a long-lived email address; +% the rest is at your discretion. +\authoraddress{ + PythonLabs \\ + Zope Corp. \\ + Email: \email{jeremy@zope.com} +} + +\date{August 15, 2001} % update before release! + % Use an explicit date so that reformatting + % doesn't cause a new date to be used. Setting + % the date to \today can be used during draft + % stages to make it easier to handle versions. + +\release{2.2} % release version; this is used to define the + % \version macro + +\makeindex % tell \index to actually write the .idx file +\makemodindex % If this contains a lot of module sections. + + +\begin{document} + +\maketitle + +% This makes the contents more accessible from the front page of the HTML. +\ifhtml +\chapter*{Front Matter\label{front}} +\fi + +%\input{copyright} + +\begin{abstract} + +\noindent +The Python compiler package is a tool for analyzing Python source code +and generating Python bytecode. The compiler contains libraries to +generate an abstract syntax tree from Python source code and to +generate Python bytecode from the tree. + +\end{abstract} + +\tableofcontents + +\chapter{Introduction\label{Introduction}} + +XXX Need basic intro + +XXX what are the major advantages... the abstract syntax is much +closer to the python source... + +\section{The basic interface} + +The top-level of the package defines four functions. + +\begin{funcdesc}{parse}{buf} +Returns an abstract syntax tree for the Python source code in \var{buf}. +The function raises SyntaxError if there is an error in the source +code. The return value is a \class{compiler.ast.Module} instance that +contains the tree. +\end{funcdesc} + +\begin{funcdesc}{parseFile}{path} +Return an abstract syntax tree for the Python source code in the file +specified by \var{path}. It is equivalent to \code{parse(open(path))}. +\end{funcdesc} + +\begin{funcdesc}{walk}{ast, visitor, \optional{verbose=None}} +Do a pre-order walk over the abstract syntax tree \var{ast}. Call the +appropriate method on the \var{visitor} instance for each node +encountered. +\end{funcdesc} + +\begin{funcdesc}{compile}{filename} +Compile the file \var{filename} and generated \var{filename}.pyc. +\end{funcdesc} + +The \module{compiler} package contains the following modules: +\module{ast}, \module{consts}, \module{future}, \module{misc}, +\module{pyassem}, \module{pycodegen}, \module{symbols}, +\module{transformer}, and \module{visitor}. + +\section{Limitations} + +There are some problems with the error checking of the compiler +package. The interpreter detects syntax errors in two distinct +phases. One set of errors is detected by the interpreter's parser, +the other set by the compiler. The compiler package relies on the +interpreter's parser, so it get the first phases of error checking for +free. It implements the second phase itself, and that implement is +incomplete. For example, the compiler package does not raise an error +if a name appears more than once in an argument list: +\code{def f(x, x): ...} + +\chapter{Python Abstract Syntax} + +\section{Introduction} + +The \module{compiler.ast} module defines an abstract syntax for +Python. In the abstract syntax tree, each node represents a syntactic +construct. The root of the tree is \class{Module} object. + +The abstract syntax offers a higher level interface to parsed Python +source code. The \module{parser} module and the compiler written in C +for the Python interpreter use a concrete syntax tree. The concrete +syntax is tied closely to the grammar description used for the Python +parser. Instead of a single node for a construct, there are often +several levels of nested nodes that are introduced by Python's +precedence rules. + +The abstract syntax tree is created by the +\module{compiler.transformer} module. The transformer relies on the +builtin Python parser to generate a concrete syntax tree. It +generates an abstract syntax tree from the concrete tree. + +The \module{transformer} module was created by Greg Stein and Bill +Tutt for the Python-to-C compiler. The current version contains a +number of modifications and improvements, but the basic form of the +abstract syntax and of the transformer are due to Stein and Tutt. + +\section{AST Nodes} + +The \module{ast} module is generated from a text file that describes +each node type and its elements. Each node type is represented as a +class that inherits from the abstract base class \class{ast.Node} and +defines a set of named attributes for child nodes. + +\begin{classdesc}{Node}{} + + The \class{Node} instances are created automatically by the parser + generator. The recommended interface for specific \class{Node} + instances is to use the public attributes to access child nodes. A + public attribute may be bound to a single node or to a sequence of + nodes, depending on the \class{Node} type. For example, the + \member{bases} attribute of the \class{Class} node, is bound to a + list of base class nodes, and the \member{doc} attribute is bound to + a single node. + + Each \class{Node} instance has a \member{lineno} attribute which may + be \code{None}. XXX Not sure what the rules are for which nodes + will have a useful lineno. + + \begin{methoddesc}{getChildren}{} + Returns a flattened list of the child nodes and objects in the + order they occur. Specifically, the order of the nodes is the + order in which they appear in the Python grammar. Not all of the + children are \class{Node} instances. The names of functions and + classes, for example, are plain strings. + \end{methoddesc} + + \begin{methoddesc}{getChildNodes}{} + Returns a flattened list of the child nodes in the order they + occur. This method is like \method{getChildNodes}, except that it + only returns those children that are \class{Node} instances. + \end{methoddesc} + +\end{classdesc} + +Two examples illustrate the general structure of \class{Node} +classes. The while statement is defined by the following grammar +production: + +\begin{verbatim} +while_stmt: "while" expression ":" suite + ["else" ":" suite] +\end{verbatim} + +The \class{While} node has three attributes: \member{test}, +\member{body}, and \member{else_}. (If the natural name for an +attribute is also a Python reserved word, it can't be used as an +attribute name. An underscore is appended to the word to make it +legal, hence \code{else_} instead of \code{else}.) + +The if statement is more complicated because it can include several +tests. + +\begin{verbatim} +if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] +\end{verbatim} + +The \class{If} node only defines two attributes: \member{tests} and +\member{else_}. The \member{tests} attribute is a sequence of test +expression, consequent body pairs. There is one pair of each if/elif +clause. The first element of the pair is the test expression. The +second elements is a \class{Stmt} node that contains the code to +execute if the test is true. + +The \method{getChildren()} method of \class{If} returns a flat list of +child nodes. If there are three if/elif clauses and no else clause, +then \method{getChildren()} will return a list of six elements: the +first test expression, the first \class{Stmt}, the second text +expression, etc. + +The following table lists each of the \class{Node} subclasses defined +in \module{compiler.ast} and each of the public attributes available +on their instances. The values of most of the attributes are +themselves \class{Node} instances or sequences of instances. When the +value is something other than an instance, the type is noted in the +comment. The attributes are listed in the order in which they are +returned by \method{getChildren} and \method{getChildNodes}. + +\input{asttable} + +\section{Assignment nodes} + +There is a collection of nodes used to represent assignments. Each +assignment statement in the source code becomes a single +\class{Assign} node in the AST. The \member{nodes} attribute is a +list that contains a node for each assignment target. This is +necessary because assignment can be chained, e.g. \code{a = b = 2}. +Each \class{Node} in the list will be one of the following classes: +\class{AssAttr}, \class{AssList}, \class{AssName}, or +\class{AssTuple}. + +XXX Explain what the AssXXX nodes are for. Mention \code{a.b.c = 2} +as an example. Explain what the flags are for. + +\chapter{Using Visitors to Walk ASTs} + +The visitor pattern is ... The \module{compiler} package uses a +variant on the visitor pattern that takes advantage of Python's +introspection features to elminiate the need for much of the visitor's +infrastructure. + +The classes being visited do not need to be programmed to accept +visitors. The visitor need only define visit methods for classes it +is specifically interested in; a default visit method can handle the +rest. + +XXX The magic \method{visit()} method for visitors. + +\begin{classdesc}{ASTVisitor}{} + +The \class{ASTVisitor} is responsible for walking over the tree in the +correct order. A walk begins with a call to \method{preorder()}. For +each node, it checks the \var{visitor} argument to \method{preorder{}} +for a method named `visitNodeType,' where NodeType is the name of the +node's class, e.g. for a \class{While} node a \method{visitWhile} +would be called . If the method exists, it is called with the node as +its first argument. + +The visitor method for a particular node type can control how child +nodes are visited during the walk. The \class{ASTVisitor} modifies +the visitor argument by adding a visit method to the visitor; this +method can be used to visit a particular child node. If no visitor is +found for a particular node type, the \method{default} method is +called. + +XXX describe extra arguments + +\begin{methoddesc}{default}{node\optional{, *args}} +\end{methoddesc} + +\begin{methoddesc}{dispatch}{node\optional{, *args}} +\end{methoddesc} + +\begin{methoddesc}{preorder}{tree, visitor} +\end{methoddesc} + +\end{classdesc} + +\begin{funcdesc}{walk}{tree, visitor\optional{, verbose=None}} +\end{funcdesc} + +\chapter{Bytecode Generation} + +The code generator is a visit that emits bytecodes. Each visit method +can call the \method{emit} method to emit a new bytecode. The basic +code generator is specialized for modules, classes, and functions. An +assembler converts that emitted instructions to the low-level bytecode +format. It handles things like generator of constant lists of code +objects and calculation of jump offsets. + +% +% The ugly "%begin{latexonly}" pseudo-environments are really just to +% keep LaTeX2HTML quiet during the \renewcommand{} macros; they're +% not really valuable. +% +% If you don't want the Module Index, you can remove all of this up +% until the second \input line. +% +%begin{latexonly} +\renewcommand{\indexname}{Module Index} +%end{latexonly} +\input{mod\jobname.ind} % Module Index + +%begin{latexonly} +\renewcommand{\indexname}{Index} +%end{latexonly} +\input{\jobname.ind} % Index + +\end{document}