mirror of https://github.com/python/cpython
First day's progress on documentation
This commit is contained in:
parent
ce575bac5e
commit
76f42ac512
|
@ -0,0 +1,30 @@
|
|||
# Lame substitute for a fine script to generate the table from ast.txt
|
||||
|
||||
from compiler import astgen
|
||||
|
||||
AST_DEF = '../compiler/ast.txt'
|
||||
|
||||
def sort(l):
|
||||
l = l[:]
|
||||
l.sort(lambda a, b: cmp(a.name, b.name))
|
||||
return l
|
||||
|
||||
def main():
|
||||
nodes = astgen.parse_spec(AST_DEF)
|
||||
print "\\begin{longtableiii}{lll}{class}{Node type}{Attribute}{Value}"
|
||||
print
|
||||
for node in sort(nodes):
|
||||
if node.argnames:
|
||||
print "\\lineiii{%s}{%s}{}" % (node.name, node.argnames[0])
|
||||
else:
|
||||
print "\\lineiii{%s}{}{}" % node.name
|
||||
|
||||
for arg in node.argnames[1:]:
|
||||
print "\\lineiii{}{%s}{}" % arg
|
||||
print "\\hline", "\n"
|
||||
print "\\end{longtableiii}"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -0,0 +1,253 @@
|
|||
\begin{longtableiii}{lll}{class}{Node type}{Attribute}{Value}
|
||||
|
||||
\lineiii{Add}{left}{left operand}
|
||||
\lineiii{}{right}{right operand}
|
||||
\hline
|
||||
|
||||
\lineiii{And}{nodes}{list of operands}
|
||||
\hline
|
||||
|
||||
\lineiii{AssAttr}{}{\emph{attribute as target of assignment}}
|
||||
\lineiii{}{expr}{expression on the left-hand side of the dot}
|
||||
\lineiii{}{attrname}{the attribute name, a string}
|
||||
\lineiii{}{flags}{XXX}
|
||||
\hline
|
||||
|
||||
\lineiii{AssList}{nodes}{list of list elements being assigned to}
|
||||
\hline
|
||||
|
||||
\lineiii{AssName}{name}{name being assigned to}
|
||||
\lineiii{}{flags}{XXX}
|
||||
\hline
|
||||
|
||||
\lineiii{AssTuple}{nodes}{list of tuple elements being assigned to}
|
||||
\hline
|
||||
|
||||
\lineiii{Assert}{test}{the expression to be tested}
|
||||
\lineiii{}{fail}{the value of the \exception{AssertionError}}
|
||||
\hline
|
||||
|
||||
\lineiii{Assign}{nodes}{a list of assignment targets, one per equal sign}
|
||||
\lineiii{}{expr}{the value being assigned}
|
||||
\hline
|
||||
|
||||
\lineiii{AugAssign}{node}{}
|
||||
\lineiii{}{op}{}
|
||||
\lineiii{}{expr}{}
|
||||
\hline
|
||||
|
||||
\lineiii{Backquote}{expr}{}
|
||||
\hline
|
||||
|
||||
\lineiii{Bitand}{nodes}{}
|
||||
\hline
|
||||
|
||||
\lineiii{Bitor}{nodes}{}
|
||||
\hline
|
||||
|
||||
\lineiii{Bitxor}{nodes}{}
|
||||
\hline
|
||||
|
||||
\lineiii{Break}{}{}
|
||||
\hline
|
||||
|
||||
\lineiii{CallFunc}{node}{expression for the callee}
|
||||
\lineiii{}{args}{a list of arguments}
|
||||
\lineiii{}{star_args}{the extended *-arg value}
|
||||
\lineiii{}{dstar_args}{the extended **-arg value}
|
||||
\hline
|
||||
|
||||
\lineiii{Class}{name}{the name of the class, a string}
|
||||
\lineiii{}{bases}{a list of base classes}
|
||||
\lineiii{}{doc}{doc string, a string or \code{None}}
|
||||
\lineiii{}{code}{the body of the class statement}
|
||||
\hline
|
||||
|
||||
\lineiii{Compare}{expr}{}
|
||||
\lineiii{}{ops}{}
|
||||
\hline
|
||||
|
||||
\lineiii{Const}{value}{}
|
||||
\hline
|
||||
|
||||
\lineiii{Continue}{}{}
|
||||
\hline
|
||||
|
||||
\lineiii{Dict}{items}{}
|
||||
\hline
|
||||
|
||||
\lineiii{Discard}{expr}{}
|
||||
\hline
|
||||
|
||||
\lineiii{Div}{left}{}
|
||||
\lineiii{}{right}{}
|
||||
\hline
|
||||
|
||||
\lineiii{Ellipsis}{}{}
|
||||
\hline
|
||||
|
||||
\lineiii{Exec}{expr}{}
|
||||
\lineiii{}{locals}{}
|
||||
\lineiii{}{globals}{}
|
||||
\hline
|
||||
|
||||
\lineiii{For}{assign}{}
|
||||
\lineiii{}{list}{}
|
||||
\lineiii{}{body}{}
|
||||
\lineiii{}{else_}{}
|
||||
\hline
|
||||
|
||||
\lineiii{From}{modname}{}
|
||||
\lineiii{}{names}{}
|
||||
\hline
|
||||
|
||||
\lineiii{Function}{name}{name used in def, a string}
|
||||
\lineiii{}{argnames}{list of argument names, as strings}
|
||||
\lineiii{}{defaults}{list of default values}
|
||||
\lineiii{}{flags}{xxx}
|
||||
\lineiii{}{doc}{doc string, a string or \code{None}}
|
||||
\lineiii{}{code}{the body of the function}
|
||||
\hline
|
||||
|
||||
\lineiii{Getattr}{expr}{}
|
||||
\lineiii{}{attrname}{}
|
||||
\hline
|
||||
|
||||
\lineiii{Global}{names}{}
|
||||
\hline
|
||||
|
||||
\lineiii{If}{tests}{}
|
||||
\lineiii{}{else_}{}
|
||||
\hline
|
||||
|
||||
\lineiii{Import}{names}{}
|
||||
\hline
|
||||
|
||||
\lineiii{Invert}{expr}{}
|
||||
\hline
|
||||
|
||||
\lineiii{Keyword}{name}{}
|
||||
\lineiii{}{expr}{}
|
||||
\hline
|
||||
|
||||
\lineiii{Lambda}{argnames}{}
|
||||
\lineiii{}{defaults}{}
|
||||
\lineiii{}{flags}{}
|
||||
\lineiii{}{code}{}
|
||||
\hline
|
||||
|
||||
\lineiii{LeftShift}{left}{}
|
||||
\lineiii{}{right}{}
|
||||
\hline
|
||||
|
||||
\lineiii{List}{nodes}{}
|
||||
\hline
|
||||
|
||||
\lineiii{ListComp}{expr}{}
|
||||
\lineiii{}{quals}{}
|
||||
\hline
|
||||
|
||||
\lineiii{ListCompFor}{assign}{}
|
||||
\lineiii{}{list}{}
|
||||
\lineiii{}{ifs}{}
|
||||
\hline
|
||||
|
||||
\lineiii{ListCompIf}{test}{}
|
||||
\hline
|
||||
|
||||
\lineiii{Mod}{left}{}
|
||||
\lineiii{}{right}{}
|
||||
\hline
|
||||
|
||||
\lineiii{Module}{doc}{doc string, a string or \code{None}}
|
||||
\lineiii{}{node}{body of the module, a \class{Stmt}}
|
||||
\hline
|
||||
|
||||
\lineiii{Mul}{left}{}
|
||||
\lineiii{}{right}{}
|
||||
\hline
|
||||
|
||||
\lineiii{Name}{name}{}
|
||||
\hline
|
||||
|
||||
\lineiii{Not}{expr}{}
|
||||
\hline
|
||||
|
||||
\lineiii{Or}{nodes}{}
|
||||
\hline
|
||||
|
||||
\lineiii{Pass}{}{}
|
||||
\hline
|
||||
|
||||
\lineiii{Power}{left}{}
|
||||
\lineiii{}{right}{}
|
||||
\hline
|
||||
|
||||
\lineiii{Print}{nodes}{}
|
||||
\lineiii{}{dest}{}
|
||||
\hline
|
||||
|
||||
\lineiii{Printnl}{nodes}{}
|
||||
\lineiii{}{dest}{}
|
||||
\hline
|
||||
|
||||
\lineiii{Raise}{expr1}{}
|
||||
\lineiii{}{expr2}{}
|
||||
\lineiii{}{expr3}{}
|
||||
\hline
|
||||
|
||||
\lineiii{Return}{value}{}
|
||||
\hline
|
||||
|
||||
\lineiii{RightShift}{left}{}
|
||||
\lineiii{}{right}{}
|
||||
\hline
|
||||
|
||||
\lineiii{Slice}{expr}{}
|
||||
\lineiii{}{flags}{}
|
||||
\lineiii{}{lower}{}
|
||||
\lineiii{}{upper}{}
|
||||
\hline
|
||||
|
||||
\lineiii{Sliceobj}{nodes}{list of statements}
|
||||
\hline
|
||||
|
||||
\lineiii{Stmt}{nodes}{}
|
||||
\hline
|
||||
|
||||
\lineiii{Sub}{left}{}
|
||||
\lineiii{}{right}{}
|
||||
\hline
|
||||
|
||||
\lineiii{Subscript}{expr}{}
|
||||
\lineiii{}{flags}{}
|
||||
\lineiii{}{subs}{}
|
||||
\hline
|
||||
|
||||
\lineiii{TryExcept}{body}{}
|
||||
\lineiii{}{handlers}{}
|
||||
\lineiii{}{else_}{}
|
||||
\hline
|
||||
|
||||
\lineiii{TryFinally}{body}{}
|
||||
\lineiii{}{final}{}
|
||||
\hline
|
||||
|
||||
\lineiii{Tuple}{nodes}{}
|
||||
\hline
|
||||
|
||||
\lineiii{UnaryAdd}{expr}{}
|
||||
\hline
|
||||
|
||||
\lineiii{UnarySub}{expr}{}
|
||||
\hline
|
||||
|
||||
\lineiii{While}{test}{}
|
||||
\lineiii{}{body}{}
|
||||
\lineiii{}{else_}{}
|
||||
\hline
|
||||
|
||||
\lineiii{Yield}{value}{}
|
||||
\hline
|
||||
|
||||
\end{longtableiii}
|
|
@ -0,0 +1,305 @@
|
|||
% Complete documentation on the extended LaTeX markup used for Python
|
||||
% documentation is available in ``Documenting Python'', which is part
|
||||
% of the standard documentation for Python. It may be found online
|
||||
% at:
|
||||
%
|
||||
% http://www.python.org/doc/current/doc/doc.html
|
||||
|
||||
\documentclass{manual}
|
||||
|
||||
\title{Python compiler package}
|
||||
|
||||
\author{Jeremy Hylton}
|
||||
|
||||
% Please at least include a long-lived email address;
|
||||
% the rest is at your discretion.
|
||||
\authoraddress{
|
||||
PythonLabs \\
|
||||
Zope Corp. \\
|
||||
Email: \email{jeremy@zope.com}
|
||||
}
|
||||
|
||||
\date{August 15, 2001} % update before release!
|
||||
% Use an explicit date so that reformatting
|
||||
% doesn't cause a new date to be used. Setting
|
||||
% the date to \today can be used during draft
|
||||
% stages to make it easier to handle versions.
|
||||
|
||||
\release{2.2} % release version; this is used to define the
|
||||
% \version macro
|
||||
|
||||
\makeindex % tell \index to actually write the .idx file
|
||||
\makemodindex % If this contains a lot of module sections.
|
||||
|
||||
|
||||
\begin{document}
|
||||
|
||||
\maketitle
|
||||
|
||||
% This makes the contents more accessible from the front page of the HTML.
|
||||
\ifhtml
|
||||
\chapter*{Front Matter\label{front}}
|
||||
\fi
|
||||
|
||||
%\input{copyright}
|
||||
|
||||
\begin{abstract}
|
||||
|
||||
\noindent
|
||||
The Python compiler package is a tool for analyzing Python source code
|
||||
and generating Python bytecode. The compiler contains libraries to
|
||||
generate an abstract syntax tree from Python source code and to
|
||||
generate Python bytecode from the tree.
|
||||
|
||||
\end{abstract}
|
||||
|
||||
\tableofcontents
|
||||
|
||||
\chapter{Introduction\label{Introduction}}
|
||||
|
||||
XXX Need basic intro
|
||||
|
||||
XXX what are the major advantages... the abstract syntax is much
|
||||
closer to the python source...
|
||||
|
||||
\section{The basic interface}
|
||||
|
||||
The top-level of the package defines four functions.
|
||||
|
||||
\begin{funcdesc}{parse}{buf}
|
||||
Returns an abstract syntax tree for the Python source code in \var{buf}.
|
||||
The function raises SyntaxError if there is an error in the source
|
||||
code. The return value is a \class{compiler.ast.Module} instance that
|
||||
contains the tree.
|
||||
\end{funcdesc}
|
||||
|
||||
\begin{funcdesc}{parseFile}{path}
|
||||
Return an abstract syntax tree for the Python source code in the file
|
||||
specified by \var{path}. It is equivalent to \code{parse(open(path))}.
|
||||
\end{funcdesc}
|
||||
|
||||
\begin{funcdesc}{walk}{ast, visitor, \optional{verbose=None}}
|
||||
Do a pre-order walk over the abstract syntax tree \var{ast}. Call the
|
||||
appropriate method on the \var{visitor} instance for each node
|
||||
encountered.
|
||||
\end{funcdesc}
|
||||
|
||||
\begin{funcdesc}{compile}{filename}
|
||||
Compile the file \var{filename} and generated \var{filename}.pyc.
|
||||
\end{funcdesc}
|
||||
|
||||
The \module{compiler} package contains the following modules:
|
||||
\module{ast}, \module{consts}, \module{future}, \module{misc},
|
||||
\module{pyassem}, \module{pycodegen}, \module{symbols},
|
||||
\module{transformer}, and \module{visitor}.
|
||||
|
||||
\section{Limitations}
|
||||
|
||||
There are some problems with the error checking of the compiler
|
||||
package. The interpreter detects syntax errors in two distinct
|
||||
phases. One set of errors is detected by the interpreter's parser,
|
||||
the other set by the compiler. The compiler package relies on the
|
||||
interpreter's parser, so it get the first phases of error checking for
|
||||
free. It implements the second phase itself, and that implement is
|
||||
incomplete. For example, the compiler package does not raise an error
|
||||
if a name appears more than once in an argument list:
|
||||
\code{def f(x, x): ...}
|
||||
|
||||
\chapter{Python Abstract Syntax}
|
||||
|
||||
\section{Introduction}
|
||||
|
||||
The \module{compiler.ast} module defines an abstract syntax for
|
||||
Python. In the abstract syntax tree, each node represents a syntactic
|
||||
construct. The root of the tree is \class{Module} object.
|
||||
|
||||
The abstract syntax offers a higher level interface to parsed Python
|
||||
source code. The \module{parser} module and the compiler written in C
|
||||
for the Python interpreter use a concrete syntax tree. The concrete
|
||||
syntax is tied closely to the grammar description used for the Python
|
||||
parser. Instead of a single node for a construct, there are often
|
||||
several levels of nested nodes that are introduced by Python's
|
||||
precedence rules.
|
||||
|
||||
The abstract syntax tree is created by the
|
||||
\module{compiler.transformer} module. The transformer relies on the
|
||||
builtin Python parser to generate a concrete syntax tree. It
|
||||
generates an abstract syntax tree from the concrete tree.
|
||||
|
||||
The \module{transformer} module was created by Greg Stein and Bill
|
||||
Tutt for the Python-to-C compiler. The current version contains a
|
||||
number of modifications and improvements, but the basic form of the
|
||||
abstract syntax and of the transformer are due to Stein and Tutt.
|
||||
|
||||
\section{AST Nodes}
|
||||
|
||||
The \module{ast} module is generated from a text file that describes
|
||||
each node type and its elements. Each node type is represented as a
|
||||
class that inherits from the abstract base class \class{ast.Node} and
|
||||
defines a set of named attributes for child nodes.
|
||||
|
||||
\begin{classdesc}{Node}{}
|
||||
|
||||
The \class{Node} instances are created automatically by the parser
|
||||
generator. The recommended interface for specific \class{Node}
|
||||
instances is to use the public attributes to access child nodes. A
|
||||
public attribute may be bound to a single node or to a sequence of
|
||||
nodes, depending on the \class{Node} type. For example, the
|
||||
\member{bases} attribute of the \class{Class} node, is bound to a
|
||||
list of base class nodes, and the \member{doc} attribute is bound to
|
||||
a single node.
|
||||
|
||||
Each \class{Node} instance has a \member{lineno} attribute which may
|
||||
be \code{None}. XXX Not sure what the rules are for which nodes
|
||||
will have a useful lineno.
|
||||
|
||||
\begin{methoddesc}{getChildren}{}
|
||||
Returns a flattened list of the child nodes and objects in the
|
||||
order they occur. Specifically, the order of the nodes is the
|
||||
order in which they appear in the Python grammar. Not all of the
|
||||
children are \class{Node} instances. The names of functions and
|
||||
classes, for example, are plain strings.
|
||||
\end{methoddesc}
|
||||
|
||||
\begin{methoddesc}{getChildNodes}{}
|
||||
Returns a flattened list of the child nodes in the order they
|
||||
occur. This method is like \method{getChildNodes}, except that it
|
||||
only returns those children that are \class{Node} instances.
|
||||
\end{methoddesc}
|
||||
|
||||
\end{classdesc}
|
||||
|
||||
Two examples illustrate the general structure of \class{Node}
|
||||
classes. The while statement is defined by the following grammar
|
||||
production:
|
||||
|
||||
\begin{verbatim}
|
||||
while_stmt: "while" expression ":" suite
|
||||
["else" ":" suite]
|
||||
\end{verbatim}
|
||||
|
||||
The \class{While} node has three attributes: \member{test},
|
||||
\member{body}, and \member{else_}. (If the natural name for an
|
||||
attribute is also a Python reserved word, it can't be used as an
|
||||
attribute name. An underscore is appended to the word to make it
|
||||
legal, hence \code{else_} instead of \code{else}.)
|
||||
|
||||
The if statement is more complicated because it can include several
|
||||
tests.
|
||||
|
||||
\begin{verbatim}
|
||||
if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
|
||||
\end{verbatim}
|
||||
|
||||
The \class{If} node only defines two attributes: \member{tests} and
|
||||
\member{else_}. The \member{tests} attribute is a sequence of test
|
||||
expression, consequent body pairs. There is one pair of each if/elif
|
||||
clause. The first element of the pair is the test expression. The
|
||||
second elements is a \class{Stmt} node that contains the code to
|
||||
execute if the test is true.
|
||||
|
||||
The \method{getChildren()} method of \class{If} returns a flat list of
|
||||
child nodes. If there are three if/elif clauses and no else clause,
|
||||
then \method{getChildren()} will return a list of six elements: the
|
||||
first test expression, the first \class{Stmt}, the second text
|
||||
expression, etc.
|
||||
|
||||
The following table lists each of the \class{Node} subclasses defined
|
||||
in \module{compiler.ast} and each of the public attributes available
|
||||
on their instances. The values of most of the attributes are
|
||||
themselves \class{Node} instances or sequences of instances. When the
|
||||
value is something other than an instance, the type is noted in the
|
||||
comment. The attributes are listed in the order in which they are
|
||||
returned by \method{getChildren} and \method{getChildNodes}.
|
||||
|
||||
\input{asttable}
|
||||
|
||||
\section{Assignment nodes}
|
||||
|
||||
There is a collection of nodes used to represent assignments. Each
|
||||
assignment statement in the source code becomes a single
|
||||
\class{Assign} node in the AST. The \member{nodes} attribute is a
|
||||
list that contains a node for each assignment target. This is
|
||||
necessary because assignment can be chained, e.g. \code{a = b = 2}.
|
||||
Each \class{Node} in the list will be one of the following classes:
|
||||
\class{AssAttr}, \class{AssList}, \class{AssName}, or
|
||||
\class{AssTuple}.
|
||||
|
||||
XXX Explain what the AssXXX nodes are for. Mention \code{a.b.c = 2}
|
||||
as an example. Explain what the flags are for.
|
||||
|
||||
\chapter{Using Visitors to Walk ASTs}
|
||||
|
||||
The visitor pattern is ... The \module{compiler} package uses a
|
||||
variant on the visitor pattern that takes advantage of Python's
|
||||
introspection features to elminiate the need for much of the visitor's
|
||||
infrastructure.
|
||||
|
||||
The classes being visited do not need to be programmed to accept
|
||||
visitors. The visitor need only define visit methods for classes it
|
||||
is specifically interested in; a default visit method can handle the
|
||||
rest.
|
||||
|
||||
XXX The magic \method{visit()} method for visitors.
|
||||
|
||||
\begin{classdesc}{ASTVisitor}{}
|
||||
|
||||
The \class{ASTVisitor} is responsible for walking over the tree in the
|
||||
correct order. A walk begins with a call to \method{preorder()}. For
|
||||
each node, it checks the \var{visitor} argument to \method{preorder{}}
|
||||
for a method named `visitNodeType,' where NodeType is the name of the
|
||||
node's class, e.g. for a \class{While} node a \method{visitWhile}
|
||||
would be called . If the method exists, it is called with the node as
|
||||
its first argument.
|
||||
|
||||
The visitor method for a particular node type can control how child
|
||||
nodes are visited during the walk. The \class{ASTVisitor} modifies
|
||||
the visitor argument by adding a visit method to the visitor; this
|
||||
method can be used to visit a particular child node. If no visitor is
|
||||
found for a particular node type, the \method{default} method is
|
||||
called.
|
||||
|
||||
XXX describe extra arguments
|
||||
|
||||
\begin{methoddesc}{default}{node\optional{, *args}}
|
||||
\end{methoddesc}
|
||||
|
||||
\begin{methoddesc}{dispatch}{node\optional{, *args}}
|
||||
\end{methoddesc}
|
||||
|
||||
\begin{methoddesc}{preorder}{tree, visitor}
|
||||
\end{methoddesc}
|
||||
|
||||
\end{classdesc}
|
||||
|
||||
\begin{funcdesc}{walk}{tree, visitor\optional{, verbose=None}}
|
||||
\end{funcdesc}
|
||||
|
||||
\chapter{Bytecode Generation}
|
||||
|
||||
The code generator is a visit that emits bytecodes. Each visit method
|
||||
can call the \method{emit} method to emit a new bytecode. The basic
|
||||
code generator is specialized for modules, classes, and functions. An
|
||||
assembler converts that emitted instructions to the low-level bytecode
|
||||
format. It handles things like generator of constant lists of code
|
||||
objects and calculation of jump offsets.
|
||||
|
||||
%
|
||||
% The ugly "%begin{latexonly}" pseudo-environments are really just to
|
||||
% keep LaTeX2HTML quiet during the \renewcommand{} macros; they're
|
||||
% not really valuable.
|
||||
%
|
||||
% If you don't want the Module Index, you can remove all of this up
|
||||
% until the second \input line.
|
||||
%
|
||||
%begin{latexonly}
|
||||
\renewcommand{\indexname}{Module Index}
|
||||
%end{latexonly}
|
||||
\input{mod\jobname.ind} % Module Index
|
||||
|
||||
%begin{latexonly}
|
||||
\renewcommand{\indexname}{Index}
|
||||
%end{latexonly}
|
||||
\input{\jobname.ind} % Index
|
||||
|
||||
\end{document}
|
Loading…
Reference in New Issue