Final set of changes by Fred before 1.4beta3
This commit is contained in:
parent
d8a6d1c2e7
commit
8206fb9c4c
|
@ -3,6 +3,10 @@ parser.dvi: parser.tex ../../Doc/libparser.tex
|
|||
|
||||
# Use a new name for this; the included file uses 'clean' already....
|
||||
clean-parser:
|
||||
rm -f *.log *.aux *.dvi *.pyc
|
||||
rm -f *.log *.aux *.dvi *.pyc *.ps
|
||||
|
||||
dist:
|
||||
(cd ../..; \
|
||||
tar cf - `cat Demo/parser/FILES` | gzip >parsermodule-1.4.tar.gz)
|
||||
|
||||
include ../../Doc/Makefile
|
||||
|
|
|
@ -4,12 +4,29 @@ to the Python Library Reference for more information.
|
|||
Files:
|
||||
------
|
||||
|
||||
FILES -- list of files associated with the parser module.
|
||||
|
||||
README -- this file.
|
||||
|
||||
example.py -- module that uses the `parser' module to extract
|
||||
information from the parse tree of Python source
|
||||
code.
|
||||
|
||||
docstring.py -- sample source file containing only a module docstring.
|
||||
|
||||
simple.py -- sample source containing a "short form" definition.
|
||||
|
||||
source.py -- sample source code used to demonstrate ability to
|
||||
handle nested constructs easily using the functions
|
||||
and classes in example.py.
|
||||
|
||||
pprint.py -- function to pretty-print Python values.
|
||||
|
||||
test_parser.py program to put the parser module through it's paces.
|
||||
|
||||
parser.tex -- LaTex driver file for formatting the parser module
|
||||
documentation separately from the library reference.
|
||||
|
||||
Makefile -- `make' rule set to format the parser module manual.
|
||||
|
||||
Enjoy!
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
"""Simple code to extract class & function docstrings from a module.
|
||||
|
||||
|
||||
This code is used as an example in the library reference manual in the
|
||||
section on using the parser module. Refer to the manual for a thorough
|
||||
discussion of the operation of this code.
|
||||
"""
|
||||
|
||||
import symbol
|
||||
|
@ -23,12 +25,35 @@ def get_docs(fileName):
|
|||
return ModuleInfo(tup, basename)
|
||||
|
||||
|
||||
class DefnInfo:
|
||||
class SuiteInfoBase:
|
||||
_docstring = ''
|
||||
_name = ''
|
||||
|
||||
def __init__(self, tree):
|
||||
self._name = tree[2][1]
|
||||
def __init__(self, tree = None):
|
||||
self._class_info = {}
|
||||
self._function_info = {}
|
||||
if tree:
|
||||
self._extract_info(tree)
|
||||
|
||||
def _extract_info(self, tree):
|
||||
# extract docstring
|
||||
if len(tree) == 2:
|
||||
found, vars = match(DOCSTRING_STMT_PATTERN[1], tree[1])
|
||||
else:
|
||||
found, vars = match(DOCSTRING_STMT_PATTERN, tree[3])
|
||||
if found:
|
||||
self._docstring = eval(vars['docstring'])
|
||||
# discover inner definitions
|
||||
for node in tree[1:]:
|
||||
found, vars = match(COMPOUND_STMT_PATTERN, node)
|
||||
if found:
|
||||
cstmt = vars['compound']
|
||||
if cstmt[0] == symbol.funcdef:
|
||||
name = cstmt[2][1]
|
||||
self._function_info[name] = FunctionInfo(cstmt)
|
||||
elif cstmt[0] == symbol.classdef:
|
||||
name = cstmt[2][1]
|
||||
self._class_info[name] = ClassInfo(cstmt)
|
||||
|
||||
def get_docstring(self):
|
||||
return self._docstring
|
||||
|
@ -36,38 +61,21 @@ class DefnInfo:
|
|||
def get_name(self):
|
||||
return self._name
|
||||
|
||||
class SuiteInfoBase(DefnInfo):
|
||||
def __init__(self):
|
||||
self._class_info = {}
|
||||
self._function_info = {}
|
||||
|
||||
def get_class_names(self):
|
||||
return self._class_info.keys()
|
||||
|
||||
def get_class_info(self, name):
|
||||
return self._class_info[name]
|
||||
|
||||
def _extract_info(self, tree):
|
||||
if len(tree) >= 4:
|
||||
found, vars = match(DOCSTRING_STMT_PATTERN, tree[3])
|
||||
if found:
|
||||
self._docstring = eval(vars['docstring'])
|
||||
for node in tree[1:]:
|
||||
if (node[0] == symbol.stmt
|
||||
and node[1][0] == symbol.compound_stmt):
|
||||
if node[1][1][0] == symbol.funcdef:
|
||||
name = node[1][1][2][1]
|
||||
self._function_info[name] = \
|
||||
FunctionInfo(node[1][1])
|
||||
elif node[1][1][0] == symbol.classdef:
|
||||
name = node[1][1][2][1]
|
||||
self._class_info[name] = ClassInfo(node[1][1])
|
||||
def __getitem__(self, name):
|
||||
try:
|
||||
return self._class_info[name]
|
||||
except KeyError:
|
||||
return self._function_info[name]
|
||||
|
||||
|
||||
class SuiteInfo(SuiteInfoBase):
|
||||
def __init__(self, tree):
|
||||
SuiteInfoBase.__init__(self)
|
||||
self._extract_info(tree)
|
||||
class SuiteFuncInfo:
|
||||
# Mixin class providing access to function names and info.
|
||||
|
||||
def get_function_names(self):
|
||||
return self._function_info.keys()
|
||||
|
@ -76,23 +84,16 @@ class SuiteInfo(SuiteInfoBase):
|
|||
return self._function_info[name]
|
||||
|
||||
|
||||
class FunctionInfo(SuiteInfo):
|
||||
def __init__(self, tree):
|
||||
DefnInfo.__init__(self, tree)
|
||||
suite = tree[-1]
|
||||
if len(suite) >= 4:
|
||||
found, vars = match(DOCSTRING_STMT_PATTERN, suite[3])
|
||||
if found:
|
||||
self._docstring = eval(vars['docstring'])
|
||||
SuiteInfoBase.__init__(self)
|
||||
self._extract_info(suite)
|
||||
class FunctionInfo(SuiteInfoBase, SuiteFuncInfo):
|
||||
def __init__(self, tree = None):
|
||||
self._name = tree[2][1]
|
||||
SuiteInfoBase.__init__(self, tree and tree[-1] or None)
|
||||
|
||||
|
||||
class ClassInfo(SuiteInfoBase):
|
||||
def __init__(self, tree):
|
||||
SuiteInfoBase.__init__(self)
|
||||
DefnInfo.__init__(self, tree)
|
||||
self._extract_info(tree[-1])
|
||||
def __init__(self, tree = None):
|
||||
self._name = tree[2][1]
|
||||
SuiteInfoBase.__init__(self, tree and tree[-1] or None)
|
||||
|
||||
def get_method_names(self):
|
||||
return self._function_info.keys()
|
||||
|
@ -101,19 +102,40 @@ class ClassInfo(SuiteInfoBase):
|
|||
return self._function_info[name]
|
||||
|
||||
|
||||
class ModuleInfo(SuiteInfo):
|
||||
def __init__(self, tree, name="<string>"):
|
||||
class ModuleInfo(SuiteInfoBase, SuiteFuncInfo):
|
||||
def __init__(self, tree = None, name = "<string>"):
|
||||
self._name = name
|
||||
SuiteInfo.__init__(self, tree)
|
||||
found, vars = match(DOCSTRING_STMT_PATTERN, tree[1])
|
||||
if found:
|
||||
self._docstring = vars["docstring"]
|
||||
SuiteInfoBase.__init__(self, tree)
|
||||
if tree:
|
||||
found, vars = match(DOCSTRING_STMT_PATTERN, tree[1])
|
||||
if found:
|
||||
self._docstring = vars["docstring"]
|
||||
|
||||
|
||||
from types import ListType, TupleType
|
||||
|
||||
def match(pattern, data, vars=None):
|
||||
"""
|
||||
"""Match `data' to `pattern', with variable extraction.
|
||||
|
||||
pattern
|
||||
Pattern to match against, possibly containing variables.
|
||||
|
||||
data
|
||||
Data to be checked and against which variables are extracted.
|
||||
|
||||
vars
|
||||
Dictionary of variables which have already been found. If not
|
||||
provided, an empty dictionary is created.
|
||||
|
||||
The `pattern' value may contain variables of the form ['varname'] which
|
||||
are allowed to match anything. The value that is matched is returned as
|
||||
part of a dictionary which maps 'varname' to the matched value. 'varname'
|
||||
is not required to be a string object, but using strings makes patterns
|
||||
and the code which uses them more readable.
|
||||
|
||||
This function returns two values: a boolean indicating whether a match
|
||||
was found and a dictionary mapping variable names to their associated
|
||||
values.
|
||||
"""
|
||||
if vars is None:
|
||||
vars = {}
|
||||
|
@ -131,6 +153,15 @@ def match(pattern, data, vars=None):
|
|||
return same, vars
|
||||
|
||||
|
||||
# This pattern identifies compound statements, allowing them to be readily
|
||||
# differentiated from simple statements.
|
||||
#
|
||||
COMPOUND_STMT_PATTERN = (
|
||||
symbol.stmt,
|
||||
(symbol.compound_stmt, ['compound'])
|
||||
)
|
||||
|
||||
|
||||
# This pattern will match a 'stmt' node which *might* represent a docstring;
|
||||
# docstrings require that the statement which provides the docstring be the
|
||||
# first statement in the class or function, which this pattern does not check.
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# pprint.py
|
||||
#
|
||||
# Author: Fred L. Drake, Jr.
|
||||
# fdrake@vt.edu
|
||||
# fdrake@cnri.reston.va.us, fdrake@intr.net
|
||||
#
|
||||
# This is a simple little module I wrote to make life easier. I didn't
|
||||
# see anything quite like it in the library, though I may have overlooked
|
||||
|
@ -9,35 +9,29 @@
|
|||
# tuples with fairly non-descriptive content. This is modelled very much
|
||||
# after Lisp/Scheme - style pretty-printing of lists. If you find it
|
||||
# useful, thank small children who sleep at night.
|
||||
#
|
||||
|
||||
"""Support to pretty-print lists, tuples, & dictionaries recursively.
|
||||
Very simple, but at least somewhat useful, especially in debugging
|
||||
data structures.
|
||||
Very simple, but useful, especially in debugging data structures.
|
||||
|
||||
INDENT_PER_LEVEL -- Amount of indentation to use for each new
|
||||
recursive level. The default is 1. This
|
||||
must be a non-negative integer, and may be
|
||||
set by the caller before calling pprint().
|
||||
Constants
|
||||
---------
|
||||
|
||||
MAX_WIDTH -- Maximum width of the display. This is only
|
||||
used if the representation *can* be kept
|
||||
less than MAX_WIDTH characters wide. May
|
||||
be set by the user before calling pprint().
|
||||
INDENT_PER_LEVEL
|
||||
Amount of indentation to use for each new recursive level. The
|
||||
default is 1. This must be a non-negative integer, and may be set
|
||||
by the caller before calling pprint().
|
||||
|
||||
TAB_WIDTH -- The width represented by a single tab. This
|
||||
value is typically 8, but 4 is the default
|
||||
under MacOS. Can be changed by the user if
|
||||
desired, but is probably not a good idea.
|
||||
MAX_WIDTH
|
||||
Maximum width of the display. This is only used if the
|
||||
representation *can* be kept less than MAX_WIDTH characters wide.
|
||||
May be set by the user before calling pprint().
|
||||
|
||||
pprint(seq [, stream]) -- The pretty-printer. This takes a Python
|
||||
object (presumably a sequence, but that
|
||||
doesn't matter) and an optional output
|
||||
stream. See the function documentation
|
||||
for details.
|
||||
TAB_WIDTH
|
||||
The width represented by a single tab. This value is typically 8,
|
||||
but 4 is the default under MacOS. Can be changed by the user if
|
||||
desired, but is probably not a good idea.
|
||||
"""
|
||||
|
||||
|
||||
INDENT_PER_LEVEL = 1
|
||||
|
||||
MAX_WIDTH = 80
|
||||
|
@ -46,46 +40,45 @@ import os
|
|||
TAB_WIDTH = (os.name == 'mac' and 4) or 8
|
||||
del os
|
||||
|
||||
from types import DictType, ListType, TupleType
|
||||
|
||||
|
||||
def _indentation(cols):
|
||||
"Create tabbed indentation string COLS columns wide."
|
||||
|
||||
# This is used to reduce the byte-count for the output, allowing
|
||||
# files created using this module to use as little external storage
|
||||
# as possible. This is primarily intended to minimize impact on
|
||||
# a user's quota when storing resource files, or for creating output
|
||||
# intended for transmission.
|
||||
"""Create tabbed indentation string.
|
||||
|
||||
cols
|
||||
Width of the indentation, in columns.
|
||||
"""
|
||||
return ((cols / TAB_WIDTH) * '\t') + ((cols % TAB_WIDTH) * ' ')
|
||||
|
||||
|
||||
|
||||
def pprint(seq, stream = None, indent = 0, allowance = 0):
|
||||
"""Pretty-print a list, tuple, or dictionary.
|
||||
|
||||
pprint(seq [, stream]) ==> None
|
||||
seq
|
||||
List, tuple, or dictionary object to be pretty-printed. Other
|
||||
object types are permitted by are not specially interpreted.
|
||||
|
||||
If STREAM is provided, output is written to that stream, otherwise
|
||||
sys.stdout is used. Indentation is done according to
|
||||
INDENT_PER_LEVEL, which may be set to any non-negative integer
|
||||
before calling this function. The output written on the stream is
|
||||
a perfectly valid representation of the Python object passed in,
|
||||
with indentation to suite human-readable interpretation. The
|
||||
output can be used as input without error, given readable
|
||||
representations of all sequence elements are available via repr().
|
||||
Output is restricted to MAX_WIDTH columns where possible. The
|
||||
STREAM parameter must support the write() method with a single
|
||||
parameter, which will always be a string. The output stream may be
|
||||
a StringIO.StringIO object if the result is needed as a string.
|
||||
stream
|
||||
Output stream. If not provided, `sys.stdout' is used. This
|
||||
parameter must support the `write()' method with a single
|
||||
parameter, which will always be a string. It may be a
|
||||
`StringIO.StringIO' object if the result is needed as a
|
||||
string.
|
||||
|
||||
Indentation is done according to `INDENT_PER_LEVEL', which may be
|
||||
set to any non-negative integer before calling this function. The
|
||||
output written on the stream is a perfectly valid representation
|
||||
of the Python object passed in, with indentation to assist
|
||||
human-readable interpretation. The output can be used as input
|
||||
without error, given readable representations of all elements are
|
||||
available via `repr()'. Output is restricted to `MAX_WIDTH'
|
||||
columns where possible.
|
||||
"""
|
||||
|
||||
if stream is None:
|
||||
import sys
|
||||
stream = sys.stdout
|
||||
|
||||
from types import DictType, ListType, TupleType
|
||||
|
||||
rep = `seq`
|
||||
typ = type(seq)
|
||||
sepLines = len(rep) > (MAX_WIDTH - 1 - indent - allowance)
|
||||
|
@ -140,4 +133,4 @@ def pprint(seq, stream = None, indent = 0, allowance = 0):
|
|||
|
||||
|
||||
#
|
||||
# end of pprint.py
|
||||
# end of file
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
def f(): "maybe a docstring"
|
|
@ -236,19 +236,25 @@ to the descriptions of each function for detailed information.
|
|||
\subsection{AST Objects}
|
||||
|
||||
AST objects (returned by \code{expr()}, \code{suite()}, and
|
||||
\code{tuple2ast()}, described above) have no methods of their own.
|
||||
\code{sequence2ast()}, described above) have no methods of their own.
|
||||
Some of the functions defined which accept an AST object as their
|
||||
first argument may change to object methods in the future.
|
||||
|
||||
Ordered and equality comparisons are supported between AST objects.
|
||||
|
||||
|
||||
\subsection{Example}
|
||||
\subsection{Examples}
|
||||
|
||||
The parser modules allows operations to be performed on the parse tree
|
||||
of Python source code before the bytecode is generated, and provides
|
||||
for inspection of the parse tree for information gathering purposes as
|
||||
well. While many useful operations may take place between parsing and
|
||||
well. Two examples are presented. The simple example demonstrates
|
||||
emulation of the \code{compile()} built-in function and the complex
|
||||
example shows the use of a parse tree for information discovery.
|
||||
|
||||
\subsubsection{Emulation of {\tt compile()}}
|
||||
|
||||
While many useful operations may take place between parsing and
|
||||
bytecode generation, the simplest operation is to do nothing. For
|
||||
this purpose, using the \code{parser} module to produce an
|
||||
intermediate data structure is equivelent to the code
|
||||
|
@ -273,6 +279,25 @@ as an AST object:
|
|||
10
|
||||
\end{verbatim}
|
||||
|
||||
An application which needs both AST and code objects can package this
|
||||
code into readily available functions:
|
||||
|
||||
\begin{verbatim}
|
||||
import parser
|
||||
|
||||
def load_suite(source_string):
|
||||
ast = parser.suite(source_string)
|
||||
code = parser.compileast(ast)
|
||||
return ast, code
|
||||
|
||||
def load_expression(source_string):
|
||||
ast = parser.expr(source_string)
|
||||
code = parser.compileast(ast)
|
||||
return ast, code
|
||||
\end{verbatim}
|
||||
|
||||
\subsubsection{Information Discovery}
|
||||
|
||||
Some applications can benfit from access to the parse tree itself, and
|
||||
can take advantage of the intermediate data structure provided by the
|
||||
\code{parser} module. The remainder of this section of examples will
|
||||
|
@ -293,6 +318,16 @@ operations on behalf of the caller. All source files mentioned here
|
|||
which are not part of the Python installation are located in the
|
||||
\file{Demo/parser} directory of the distribution.
|
||||
|
||||
The dynamic nature of Python allows the programmer a great deal of
|
||||
flexibility, but most modules need only a limited measure of this when
|
||||
defining classes, functions, and methods. In this example, the only
|
||||
definitions that will be considered are those which are defined in the
|
||||
top level of their context, e.g., a function defined by a \code{def}
|
||||
statement at column zero of a module, but not a function defined
|
||||
within a branch of an \code{if} ... \code{else} construct, thought
|
||||
there are some good reasons for doing so in some situations. Nesting
|
||||
of definitions will be handled by the code developed in the example.
|
||||
|
||||
To construct the upper-level extraction methods, we need to know what
|
||||
the parse tree structure looks like and how much of it we actually
|
||||
need to be concerned about. Python uses a moderately deep parse tree,
|
||||
|
@ -300,7 +335,8 @@ so there are a large number of intermediate nodes. It is important to
|
|||
read and understand the formal grammar used by Python. This is
|
||||
specified in the file \file{Grammar/Grammar} in the distribution.
|
||||
Consider the simplest case of interest when searching for docstrings:
|
||||
a module consisting of a docstring and nothing else:
|
||||
a module consisting of a docstring and nothing else. (See file
|
||||
\file{docstring.py}.)
|
||||
|
||||
\begin{verbatim}
|
||||
"""Some documentation.
|
||||
|
@ -376,7 +412,7 @@ extraction, we can safely require that the tree be in tuple form
|
|||
rather than list form, allowing a simple variable representation to be
|
||||
\code{['variable\_name']}. A simple recursive function can implement
|
||||
the pattern matching, returning a boolean and a dictionary of variable
|
||||
name to value mappings.
|
||||
name to value mappings. (See file \file{example.py}.)
|
||||
|
||||
\begin{verbatim}
|
||||
from types import ListType, TupleType
|
||||
|
@ -399,32 +435,36 @@ def match(pattern, data, vars=None):
|
|||
\end{verbatim}
|
||||
|
||||
Using this simple recursive pattern matching function and the symbolic
|
||||
node types, the pattern for the candidate docstring subtrees becomes:
|
||||
node types, the pattern for the candidate docstring subtrees becomes
|
||||
fairly readable. (See file \file{example.py}.)
|
||||
|
||||
\begin{verbatim}
|
||||
>>> DOCSTRING_STMT_PATTERN = (
|
||||
... symbol.stmt,
|
||||
... (symbol.simple_stmt,
|
||||
... (symbol.small_stmt,
|
||||
... (symbol.expr_stmt,
|
||||
... (symbol.testlist,
|
||||
... (symbol.test,
|
||||
... (symbol.and_test,
|
||||
... (symbol.not_test,
|
||||
... (symbol.comparison,
|
||||
... (symbol.expr,
|
||||
... (symbol.xor_expr,
|
||||
... (symbol.and_expr,
|
||||
... (symbol.shift_expr,
|
||||
... (symbol.arith_expr,
|
||||
... (symbol.term,
|
||||
... (symbol.factor,
|
||||
... (symbol.power,
|
||||
... (symbol.atom,
|
||||
... (token.STRING, ['docstring'])
|
||||
... )))))))))))))))),
|
||||
... (token.NEWLINE, '')
|
||||
... ))
|
||||
import symbol
|
||||
import token
|
||||
|
||||
DOCSTRING_STMT_PATTERN = (
|
||||
symbol.stmt,
|
||||
(symbol.simple_stmt,
|
||||
(symbol.small_stmt,
|
||||
(symbol.expr_stmt,
|
||||
(symbol.testlist,
|
||||
(symbol.test,
|
||||
(symbol.and_test,
|
||||
(symbol.not_test,
|
||||
(symbol.comparison,
|
||||
(symbol.expr,
|
||||
(symbol.xor_expr,
|
||||
(symbol.and_expr,
|
||||
(symbol.shift_expr,
|
||||
(symbol.arith_expr,
|
||||
(symbol.term,
|
||||
(symbol.factor,
|
||||
(symbol.power,
|
||||
(symbol.atom,
|
||||
(token.STRING, ['docstring'])
|
||||
)))))))))))))))),
|
||||
(token.NEWLINE, '')
|
||||
))
|
||||
\end{verbatim}
|
||||
|
||||
Using the \code{match()} function with this pattern, extracting the
|
||||
|
@ -453,6 +493,160 @@ sibling nodes to match without regard to number. A more elaborate
|
|||
matching function could be used to overcome this limitation, but this
|
||||
is sufficient for the example.
|
||||
|
||||
Given the ability to determine whether a statement might be a
|
||||
docstring and extract the actual string from the statement, some work
|
||||
needs to be performed to walk the parse tree for an entire module and
|
||||
extract information about the names defined in each context of the
|
||||
module and associate any docstrings with the names. The code to
|
||||
perform this work is not complicated, but bears some explanation.
|
||||
|
||||
The public interface to the classes is straightforward and should
|
||||
probably be somewhat more flexible. Each ``major'' block of the
|
||||
module is described by an object providing several methods for inquiry
|
||||
and a constructor which accepts at least the subtree of the complete
|
||||
parse tree which it represents. The \code{ModuleInfo} constructor
|
||||
accepts an optional \code{\var{name}} parameter since it cannot
|
||||
otherwise determine the name of the module.
|
||||
|
||||
The public classes include \code{ClassInfo}, \code{FunctionInfo},
|
||||
and \code{ModuleInfo}. All objects provide the
|
||||
methods \code{get_name()}, \code{get_docstring()},
|
||||
\code{get_class_names()}, and \code{get_class_info()}. The
|
||||
\code{ClassInfo} objects support \code{get_method_names()} and
|
||||
\code{get_method_info()} while the other classes provide
|
||||
\code{get_function_names()} and \code{get_function_info()}.
|
||||
|
||||
Within each of the forms of code block that the public classes
|
||||
represent, most of the required information is in the same form and is
|
||||
access in the same way, with classes having the distinction that
|
||||
functions defined at the top level are referred to as ``methods.''
|
||||
Since the difference in nomenclature reflects a real semantic
|
||||
distinction from functions defined outside of a class, our
|
||||
implementation needs to maintain the same measure of distinction.
|
||||
Hence, most of the functionality of the public classes can be
|
||||
implemented in a common base class, \code{SuiteInfoBase}, with the
|
||||
accessors for function and method information provided elsewhere.
|
||||
Note that there is only one class which represents function and method
|
||||
information; this mirrors the use of the \code{def} statement to
|
||||
define both types of functions.
|
||||
|
||||
Most of the accessor functions are declared in \code{SuiteInfoBase}
|
||||
and do not need to be overriden by subclasses. More importantly, the
|
||||
extraction of most information from a parse tree is handled through a
|
||||
method called by the \code{SuiteInfoBase} constructor. The example
|
||||
code for most of the classes is clear when read alongside the formal
|
||||
grammar, but the method which recursively creates new information
|
||||
objects requires further examination. Here is the relevant part of
|
||||
the \code{SuiteInfoBase} definition from \file{example.py}:
|
||||
|
||||
\begin{verbatim}
|
||||
class SuiteInfoBase:
|
||||
_docstring = ''
|
||||
_name = ''
|
||||
|
||||
def __init__(self, tree = None):
|
||||
self._class_info = {}
|
||||
self._function_info = {}
|
||||
if tree:
|
||||
self._extract_info(tree)
|
||||
|
||||
def _extract_info(self, tree):
|
||||
# extract docstring
|
||||
if len(tree) == 2:
|
||||
found, vars = match(DOCSTRING_STMT_PATTERN[1], tree[1])
|
||||
else:
|
||||
found, vars = match(DOCSTRING_STMT_PATTERN, tree[3])
|
||||
if found:
|
||||
self._docstring = eval(vars['docstring'])
|
||||
# discover inner definitions
|
||||
for node in tree[1:]:
|
||||
found, vars = match(COMPOUND_STMT_PATTERN, node)
|
||||
if found:
|
||||
cstmt = vars['compound']
|
||||
if cstmt[0] == symbol.funcdef:
|
||||
name = cstmt[2][1]
|
||||
self._function_info[name] = FunctionInfo(cstmt)
|
||||
elif cstmt[0] == symbol.classdef:
|
||||
name = cstmt[2][1]
|
||||
self._class_info[name] = ClassInfo(cstmt)
|
||||
\end{verbatim}
|
||||
|
||||
After initializing some internal state, the constructor calls the
|
||||
\code{_extract_info()} method. This method performs the bulk of the
|
||||
information extraction which takes place in the entire example. The
|
||||
extraction has two distinct phases: the location of the docstring for
|
||||
the parse tree passed in, and the discovery of additional definitions
|
||||
within the code block represented by the parse tree.
|
||||
|
||||
The initial \code{if} test determines whether the nested suite is of
|
||||
the ``short form'' or the ``long form.'' The short form is used when
|
||||
the code block is on the same line as the definition of the code
|
||||
block, as in
|
||||
|
||||
\begin{verbatim}
|
||||
def square(x): "Square an argument."; return x ** 2
|
||||
\end{verbatim}
|
||||
|
||||
while the long form uses an indented block and allows nested
|
||||
definitions:
|
||||
|
||||
\begin{verbatim}
|
||||
def make_power(exp):
|
||||
"Make a function that raises an argument to the exponent `exp'."
|
||||
def raiser(x, y=exp):
|
||||
return x ** y
|
||||
return raiser
|
||||
\end{verbatim}
|
||||
|
||||
When the short form is used, the code block may contain a docstring as
|
||||
the first, and possibly only, \code{small_stmt} element. The
|
||||
extraction of such a docstring is slightly different and requires only
|
||||
a portion of the complete pattern used in the more common case. As
|
||||
given in the code, the docstring will only be found if there is only
|
||||
one \code{small_stmt} node in the \code{simple_stmt} node. Since most
|
||||
functions and methods which use the short form do not provide
|
||||
docstring, this may be considered sufficient. The extraction of the
|
||||
docstring proceeds using the \code{match()} function as described
|
||||
above, and the value of the docstring is stored as an attribute of the
|
||||
\code{SuiteInfoBase} object.
|
||||
|
||||
After docstring extraction, the operates a simple definition discovery
|
||||
algorithm on the \code{stmt} nodes of the \code{suite} node. The
|
||||
special case of the short form is not tested; since there are no
|
||||
\code{stmt} nodes in the short form, the algorithm will silently skip
|
||||
the single \code{simple_stmt} node and correctly not discover any
|
||||
nested definitions.
|
||||
|
||||
Each statement in the code block bing examined is categorized as being
|
||||
a class definition, function definition (including methods), or
|
||||
something else. For the definition statements, the name of the
|
||||
element being defined is extracted and representation object
|
||||
appropriate to the definition is created with the defining subtree
|
||||
passed as an argument to the constructor. The repesentation objects
|
||||
are stored in instance variables and may be retrieved by name using
|
||||
the appropriate accessor methods.
|
||||
|
||||
The public classes provide any accessors required which are more
|
||||
specific than those provided by the \code{SuiteInfoBase} class, but
|
||||
the real extraction algorithm remains common to all forms of code
|
||||
blocks. A high-level function can be used to extract the complete set
|
||||
of information from a source file:
|
||||
|
||||
\begin{verbatim}
|
||||
def get_docs(fileName):
|
||||
source = open(fileName).read()
|
||||
import os
|
||||
basename = os.path.basename(os.path.splitext(fileName)[0])
|
||||
import parser
|
||||
ast = parser.suite(source)
|
||||
tup = parser.ast2tuple(ast)
|
||||
return ModuleInfo(tup, basename)
|
||||
\end{verbatim}
|
||||
|
||||
This provides an easy-to-use interface to the documentation of a
|
||||
module. If information is required which is not extracted by the code
|
||||
of this example, the code may be extended at clearly defined points to
|
||||
provide additional capabilities.
|
||||
|
||||
|
||||
%%
|
||||
|
|
|
@ -236,19 +236,25 @@ to the descriptions of each function for detailed information.
|
|||
\subsection{AST Objects}
|
||||
|
||||
AST objects (returned by \code{expr()}, \code{suite()}, and
|
||||
\code{tuple2ast()}, described above) have no methods of their own.
|
||||
\code{sequence2ast()}, described above) have no methods of their own.
|
||||
Some of the functions defined which accept an AST object as their
|
||||
first argument may change to object methods in the future.
|
||||
|
||||
Ordered and equality comparisons are supported between AST objects.
|
||||
|
||||
|
||||
\subsection{Example}
|
||||
\subsection{Examples}
|
||||
|
||||
The parser modules allows operations to be performed on the parse tree
|
||||
of Python source code before the bytecode is generated, and provides
|
||||
for inspection of the parse tree for information gathering purposes as
|
||||
well. While many useful operations may take place between parsing and
|
||||
well. Two examples are presented. The simple example demonstrates
|
||||
emulation of the \code{compile()} built-in function and the complex
|
||||
example shows the use of a parse tree for information discovery.
|
||||
|
||||
\subsubsection{Emulation of {\tt compile()}}
|
||||
|
||||
While many useful operations may take place between parsing and
|
||||
bytecode generation, the simplest operation is to do nothing. For
|
||||
this purpose, using the \code{parser} module to produce an
|
||||
intermediate data structure is equivelent to the code
|
||||
|
@ -273,6 +279,25 @@ as an AST object:
|
|||
10
|
||||
\end{verbatim}
|
||||
|
||||
An application which needs both AST and code objects can package this
|
||||
code into readily available functions:
|
||||
|
||||
\begin{verbatim}
|
||||
import parser
|
||||
|
||||
def load_suite(source_string):
|
||||
ast = parser.suite(source_string)
|
||||
code = parser.compileast(ast)
|
||||
return ast, code
|
||||
|
||||
def load_expression(source_string):
|
||||
ast = parser.expr(source_string)
|
||||
code = parser.compileast(ast)
|
||||
return ast, code
|
||||
\end{verbatim}
|
||||
|
||||
\subsubsection{Information Discovery}
|
||||
|
||||
Some applications can benfit from access to the parse tree itself, and
|
||||
can take advantage of the intermediate data structure provided by the
|
||||
\code{parser} module. The remainder of this section of examples will
|
||||
|
@ -293,6 +318,16 @@ operations on behalf of the caller. All source files mentioned here
|
|||
which are not part of the Python installation are located in the
|
||||
\file{Demo/parser} directory of the distribution.
|
||||
|
||||
The dynamic nature of Python allows the programmer a great deal of
|
||||
flexibility, but most modules need only a limited measure of this when
|
||||
defining classes, functions, and methods. In this example, the only
|
||||
definitions that will be considered are those which are defined in the
|
||||
top level of their context, e.g., a function defined by a \code{def}
|
||||
statement at column zero of a module, but not a function defined
|
||||
within a branch of an \code{if} ... \code{else} construct, thought
|
||||
there are some good reasons for doing so in some situations. Nesting
|
||||
of definitions will be handled by the code developed in the example.
|
||||
|
||||
To construct the upper-level extraction methods, we need to know what
|
||||
the parse tree structure looks like and how much of it we actually
|
||||
need to be concerned about. Python uses a moderately deep parse tree,
|
||||
|
@ -300,7 +335,8 @@ so there are a large number of intermediate nodes. It is important to
|
|||
read and understand the formal grammar used by Python. This is
|
||||
specified in the file \file{Grammar/Grammar} in the distribution.
|
||||
Consider the simplest case of interest when searching for docstrings:
|
||||
a module consisting of a docstring and nothing else:
|
||||
a module consisting of a docstring and nothing else. (See file
|
||||
\file{docstring.py}.)
|
||||
|
||||
\begin{verbatim}
|
||||
"""Some documentation.
|
||||
|
@ -376,7 +412,7 @@ extraction, we can safely require that the tree be in tuple form
|
|||
rather than list form, allowing a simple variable representation to be
|
||||
\code{['variable\_name']}. A simple recursive function can implement
|
||||
the pattern matching, returning a boolean and a dictionary of variable
|
||||
name to value mappings.
|
||||
name to value mappings. (See file \file{example.py}.)
|
||||
|
||||
\begin{verbatim}
|
||||
from types import ListType, TupleType
|
||||
|
@ -399,32 +435,36 @@ def match(pattern, data, vars=None):
|
|||
\end{verbatim}
|
||||
|
||||
Using this simple recursive pattern matching function and the symbolic
|
||||
node types, the pattern for the candidate docstring subtrees becomes:
|
||||
node types, the pattern for the candidate docstring subtrees becomes
|
||||
fairly readable. (See file \file{example.py}.)
|
||||
|
||||
\begin{verbatim}
|
||||
>>> DOCSTRING_STMT_PATTERN = (
|
||||
... symbol.stmt,
|
||||
... (symbol.simple_stmt,
|
||||
... (symbol.small_stmt,
|
||||
... (symbol.expr_stmt,
|
||||
... (symbol.testlist,
|
||||
... (symbol.test,
|
||||
... (symbol.and_test,
|
||||
... (symbol.not_test,
|
||||
... (symbol.comparison,
|
||||
... (symbol.expr,
|
||||
... (symbol.xor_expr,
|
||||
... (symbol.and_expr,
|
||||
... (symbol.shift_expr,
|
||||
... (symbol.arith_expr,
|
||||
... (symbol.term,
|
||||
... (symbol.factor,
|
||||
... (symbol.power,
|
||||
... (symbol.atom,
|
||||
... (token.STRING, ['docstring'])
|
||||
... )))))))))))))))),
|
||||
... (token.NEWLINE, '')
|
||||
... ))
|
||||
import symbol
|
||||
import token
|
||||
|
||||
DOCSTRING_STMT_PATTERN = (
|
||||
symbol.stmt,
|
||||
(symbol.simple_stmt,
|
||||
(symbol.small_stmt,
|
||||
(symbol.expr_stmt,
|
||||
(symbol.testlist,
|
||||
(symbol.test,
|
||||
(symbol.and_test,
|
||||
(symbol.not_test,
|
||||
(symbol.comparison,
|
||||
(symbol.expr,
|
||||
(symbol.xor_expr,
|
||||
(symbol.and_expr,
|
||||
(symbol.shift_expr,
|
||||
(symbol.arith_expr,
|
||||
(symbol.term,
|
||||
(symbol.factor,
|
||||
(symbol.power,
|
||||
(symbol.atom,
|
||||
(token.STRING, ['docstring'])
|
||||
)))))))))))))))),
|
||||
(token.NEWLINE, '')
|
||||
))
|
||||
\end{verbatim}
|
||||
|
||||
Using the \code{match()} function with this pattern, extracting the
|
||||
|
@ -453,6 +493,160 @@ sibling nodes to match without regard to number. A more elaborate
|
|||
matching function could be used to overcome this limitation, but this
|
||||
is sufficient for the example.
|
||||
|
||||
Given the ability to determine whether a statement might be a
|
||||
docstring and extract the actual string from the statement, some work
|
||||
needs to be performed to walk the parse tree for an entire module and
|
||||
extract information about the names defined in each context of the
|
||||
module and associate any docstrings with the names. The code to
|
||||
perform this work is not complicated, but bears some explanation.
|
||||
|
||||
The public interface to the classes is straightforward and should
|
||||
probably be somewhat more flexible. Each ``major'' block of the
|
||||
module is described by an object providing several methods for inquiry
|
||||
and a constructor which accepts at least the subtree of the complete
|
||||
parse tree which it represents. The \code{ModuleInfo} constructor
|
||||
accepts an optional \code{\var{name}} parameter since it cannot
|
||||
otherwise determine the name of the module.
|
||||
|
||||
The public classes include \code{ClassInfo}, \code{FunctionInfo},
|
||||
and \code{ModuleInfo}. All objects provide the
|
||||
methods \code{get_name()}, \code{get_docstring()},
|
||||
\code{get_class_names()}, and \code{get_class_info()}. The
|
||||
\code{ClassInfo} objects support \code{get_method_names()} and
|
||||
\code{get_method_info()} while the other classes provide
|
||||
\code{get_function_names()} and \code{get_function_info()}.
|
||||
|
||||
Within each of the forms of code block that the public classes
|
||||
represent, most of the required information is in the same form and is
|
||||
access in the same way, with classes having the distinction that
|
||||
functions defined at the top level are referred to as ``methods.''
|
||||
Since the difference in nomenclature reflects a real semantic
|
||||
distinction from functions defined outside of a class, our
|
||||
implementation needs to maintain the same measure of distinction.
|
||||
Hence, most of the functionality of the public classes can be
|
||||
implemented in a common base class, \code{SuiteInfoBase}, with the
|
||||
accessors for function and method information provided elsewhere.
|
||||
Note that there is only one class which represents function and method
|
||||
information; this mirrors the use of the \code{def} statement to
|
||||
define both types of functions.
|
||||
|
||||
Most of the accessor functions are declared in \code{SuiteInfoBase}
|
||||
and do not need to be overriden by subclasses. More importantly, the
|
||||
extraction of most information from a parse tree is handled through a
|
||||
method called by the \code{SuiteInfoBase} constructor. The example
|
||||
code for most of the classes is clear when read alongside the formal
|
||||
grammar, but the method which recursively creates new information
|
||||
objects requires further examination. Here is the relevant part of
|
||||
the \code{SuiteInfoBase} definition from \file{example.py}:
|
||||
|
||||
\begin{verbatim}
|
||||
class SuiteInfoBase:
|
||||
_docstring = ''
|
||||
_name = ''
|
||||
|
||||
def __init__(self, tree = None):
|
||||
self._class_info = {}
|
||||
self._function_info = {}
|
||||
if tree:
|
||||
self._extract_info(tree)
|
||||
|
||||
def _extract_info(self, tree):
|
||||
# extract docstring
|
||||
if len(tree) == 2:
|
||||
found, vars = match(DOCSTRING_STMT_PATTERN[1], tree[1])
|
||||
else:
|
||||
found, vars = match(DOCSTRING_STMT_PATTERN, tree[3])
|
||||
if found:
|
||||
self._docstring = eval(vars['docstring'])
|
||||
# discover inner definitions
|
||||
for node in tree[1:]:
|
||||
found, vars = match(COMPOUND_STMT_PATTERN, node)
|
||||
if found:
|
||||
cstmt = vars['compound']
|
||||
if cstmt[0] == symbol.funcdef:
|
||||
name = cstmt[2][1]
|
||||
self._function_info[name] = FunctionInfo(cstmt)
|
||||
elif cstmt[0] == symbol.classdef:
|
||||
name = cstmt[2][1]
|
||||
self._class_info[name] = ClassInfo(cstmt)
|
||||
\end{verbatim}
|
||||
|
||||
After initializing some internal state, the constructor calls the
|
||||
\code{_extract_info()} method. This method performs the bulk of the
|
||||
information extraction which takes place in the entire example. The
|
||||
extraction has two distinct phases: the location of the docstring for
|
||||
the parse tree passed in, and the discovery of additional definitions
|
||||
within the code block represented by the parse tree.
|
||||
|
||||
The initial \code{if} test determines whether the nested suite is of
|
||||
the ``short form'' or the ``long form.'' The short form is used when
|
||||
the code block is on the same line as the definition of the code
|
||||
block, as in
|
||||
|
||||
\begin{verbatim}
|
||||
def square(x): "Square an argument."; return x ** 2
|
||||
\end{verbatim}
|
||||
|
||||
while the long form uses an indented block and allows nested
|
||||
definitions:
|
||||
|
||||
\begin{verbatim}
|
||||
def make_power(exp):
|
||||
"Make a function that raises an argument to the exponent `exp'."
|
||||
def raiser(x, y=exp):
|
||||
return x ** y
|
||||
return raiser
|
||||
\end{verbatim}
|
||||
|
||||
When the short form is used, the code block may contain a docstring as
|
||||
the first, and possibly only, \code{small_stmt} element. The
|
||||
extraction of such a docstring is slightly different and requires only
|
||||
a portion of the complete pattern used in the more common case. As
|
||||
given in the code, the docstring will only be found if there is only
|
||||
one \code{small_stmt} node in the \code{simple_stmt} node. Since most
|
||||
functions and methods which use the short form do not provide
|
||||
docstring, this may be considered sufficient. The extraction of the
|
||||
docstring proceeds using the \code{match()} function as described
|
||||
above, and the value of the docstring is stored as an attribute of the
|
||||
\code{SuiteInfoBase} object.
|
||||
|
||||
After docstring extraction, the operates a simple definition discovery
|
||||
algorithm on the \code{stmt} nodes of the \code{suite} node. The
|
||||
special case of the short form is not tested; since there are no
|
||||
\code{stmt} nodes in the short form, the algorithm will silently skip
|
||||
the single \code{simple_stmt} node and correctly not discover any
|
||||
nested definitions.
|
||||
|
||||
Each statement in the code block bing examined is categorized as being
|
||||
a class definition, function definition (including methods), or
|
||||
something else. For the definition statements, the name of the
|
||||
element being defined is extracted and representation object
|
||||
appropriate to the definition is created with the defining subtree
|
||||
passed as an argument to the constructor. The repesentation objects
|
||||
are stored in instance variables and may be retrieved by name using
|
||||
the appropriate accessor methods.
|
||||
|
||||
The public classes provide any accessors required which are more
|
||||
specific than those provided by the \code{SuiteInfoBase} class, but
|
||||
the real extraction algorithm remains common to all forms of code
|
||||
blocks. A high-level function can be used to extract the complete set
|
||||
of information from a source file:
|
||||
|
||||
\begin{verbatim}
|
||||
def get_docs(fileName):
|
||||
source = open(fileName).read()
|
||||
import os
|
||||
basename = os.path.basename(os.path.splitext(fileName)[0])
|
||||
import parser
|
||||
ast = parser.suite(source)
|
||||
tup = parser.ast2tuple(ast)
|
||||
return ModuleInfo(tup, basename)
|
||||
\end{verbatim}
|
||||
|
||||
This provides an easy-to-use interface to the documentation of a
|
||||
module. If information is required which is not extracted by the code
|
||||
of this example, the code may be extended at clearly defined points to
|
||||
provide additional capabilities.
|
||||
|
||||
|
||||
%%
|
||||
|
|
Loading…
Reference in New Issue