Demos for Fred's parser module

This commit is contained in:
Guido van Rossum 1996-08-21 16:28:53 +00:00
parent 6dbd190f5e
commit 16d27e3b14
9 changed files with 491 additions and 0 deletions

6
Demo/parser/FILES Normal file
View File

@ -0,0 +1,6 @@
Demo/parser/
Doc/libparser.tex
Lib/AST.py
Lib/symbol.py
Lib/token.py
Modules/parsermodule.c

8
Demo/parser/Makefile Normal file
View File

@ -0,0 +1,8 @@
parser.dvi: parser.tex ../../Doc/libparser.tex
TEXINPUTS=../../Doc:: $(LATEX) parser
# Use a new name for this; the included file uses 'clean' already....
clean-parser:
rm -f *.log *.aux *.dvi *.pyc
include ../../Doc/Makefile

15
Demo/parser/README Normal file
View File

@ -0,0 +1,15 @@
These files are from the large example of using the `parser' module. Refer
to the Python Library Reference for more information.
Files:
------
example.py -- module that uses the `parser' module to extract
information from the parse tree of Python source
code.
source.py -- sample source code used to demonstrate ability to
handle nested constructs easily using the functions
and classes in example.py.
Enjoy!

2
Demo/parser/docstring.py Normal file
View File

@ -0,0 +1,2 @@
"""Some documentation.
"""

163
Demo/parser/example.py Normal file
View File

@ -0,0 +1,163 @@
"""Simple code to extract class & function docstrings from a module.
"""
import symbol
import token
import types
def get_docs(fileName):
"""Retrieve information from the parse tree of a source file.
fileName
Name of the file to read Python source code from.
"""
source = open(fileName).read()
import os
basename = os.path.basename(os.path.splitext(fileName)[0])
import parser
ast = parser.suite(source)
tup = parser.ast2tuple(ast)
return ModuleInfo(tup, basename)
class DefnInfo:
_docstring = ''
_name = ''
def __init__(self, tree):
self._name = tree[2][1]
def get_docstring(self):
return self._docstring
def get_name(self):
return self._name
class SuiteInfoBase(DefnInfo):
def __init__(self):
self._class_info = {}
self._function_info = {}
def get_class_names(self):
return self._class_info.keys()
def get_class_info(self, name):
return self._class_info[name]
def _extract_info(self, tree):
if len(tree) >= 4:
found, vars = match(DOCSTRING_STMT_PATTERN, tree[3])
if found:
self._docstring = eval(vars['docstring'])
for node in tree[1:]:
if (node[0] == symbol.stmt
and node[1][0] == symbol.compound_stmt):
if node[1][1][0] == symbol.funcdef:
name = node[1][1][2][1]
self._function_info[name] = \
FunctionInfo(node[1][1])
elif node[1][1][0] == symbol.classdef:
name = node[1][1][2][1]
self._class_info[name] = ClassInfo(node[1][1])
class SuiteInfo(SuiteInfoBase):
def __init__(self, tree):
SuiteInfoBase.__init__(self)
self._extract_info(tree)
def get_function_names(self):
return self._function_info.keys()
def get_function_info(self, name):
return self._function_info[name]
class FunctionInfo(SuiteInfo):
def __init__(self, tree):
DefnInfo.__init__(self, tree)
suite = tree[-1]
if len(suite) >= 4:
found, vars = match(DOCSTRING_STMT_PATTERN, suite[3])
if found:
self._docstring = eval(vars['docstring'])
SuiteInfoBase.__init__(self)
self._extract_info(suite)
class ClassInfo(SuiteInfoBase):
def __init__(self, tree):
SuiteInfoBase.__init__(self)
DefnInfo.__init__(self, tree)
self._extract_info(tree[-1])
def get_method_names(self):
return self._function_info.keys()
def get_method_info(self, name):
return self._function_info[name]
class ModuleInfo(SuiteInfo):
def __init__(self, tree, name="<string>"):
self._name = name
SuiteInfo.__init__(self, tree)
found, vars = match(DOCSTRING_STMT_PATTERN, tree[1])
if found:
self._docstring = vars["docstring"]
from types import ListType, TupleType
def match(pattern, data, vars=None):
"""
"""
if vars is None:
vars = {}
if type(pattern) is ListType: # 'variables' are ['varname']
vars[pattern[0]] = data
return 1, vars
if type(pattern) is not TupleType:
return (pattern == data), vars
if len(data) != len(pattern):
return 0, vars
for pattern, data in map(None, pattern, data):
same, vars = match(pattern, data, vars)
if not same:
break
return same, vars
# This pattern will match a 'stmt' node which *might* represent a docstring;
# docstrings require that the statement which provides the docstring be the
# first statement in the class or function, which this pattern does not check.
#
DOCSTRING_STMT_PATTERN = (
symbol.stmt,
(symbol.simple_stmt,
(symbol.small_stmt,
(symbol.expr_stmt,
(symbol.testlist,
(symbol.test,
(symbol.and_test,
(symbol.not_test,
(symbol.comparison,
(symbol.expr,
(symbol.xor_expr,
(symbol.and_expr,
(symbol.shift_expr,
(symbol.arith_expr,
(symbol.term,
(symbol.factor,
(symbol.power,
(symbol.atom,
(token.STRING, ['docstring'])
)))))))))))))))),
(token.NEWLINE, '')
))
#
# end of file

77
Demo/parser/parser.tex Normal file
View File

@ -0,0 +1,77 @@
\documentstyle[twoside,10pt,myformat]{report}
%% This manual does not supplement the chapter from the Python
%% Library Reference, but only allows formatting of the parser module
%% component of that document as a separate document, and was created
%% primarily to ease review of the formatted document during authoring.
\title{Python Parser Module Reference}
\author{
Fred L. Drake, Jr. \\
Corporation for National Research Initiatives (CNRI) \\
1895 Preston White Drive, Reston, Va 20191, USA \\
E-mail: {\tt fdrake@cnri.reston.va.us}, {\tt fdrake@intr.net}
}
\date{August 20th, 1996 \\ Release 1.4}
\begin{document}
\pagenumbering{roman}
\maketitle
Copyright \copyright{} 1995-1996 by Fred L. Drake, Jr. and Virginia
Polytechnic Institute and State University, Blacksburg, Virginia, USA.
Portions of the software copyright 1991-1995 by Stichting Mathematisch
Centrum, Amsterdam, The Netherlands. Copying is permitted under the
terms associated with the main Python distribution, with the
additional restriction that this additional notice be included and
maintained on all distributed copies.
\begin{center}
All Rights Reserved
\end{center}
Permission to use, copy, modify, and distribute this software and its
documentation for any purpose and without fee is hereby granted,
provided that the above copyright notice appear in all copies and that
both that copyright notice and this permission notice appear in
supporting documentation, and that the names of Fred L. Drake, Jr. and
Virginia Polytechnic Institute and State University not be used in
advertising or publicity pertaining to distribution of the software
without specific, written prior permission.
FRED L. DRAKE, JR. AND VIRGINIA POLYTECHNIC INSTITUTE AND STATE
UNIVERSITY DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
EVENT SHALL FRED L. DRAKE, JR. OR VIRGINIA POLYTECHNIC INSTITUTE AND
STATE UNIVERSITY BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
\begin{abstract}
\noindent
The \emph{Python Parser Module Reference} describes the interfaces
published by the optional \code{parser} module and gives examples of
how they may be used. It contains the same text as the chapter on the
\code{parser} module in the \emph{Python Library Reference}, but is
presented as a separate document.
This manual assumes basic knowledge about the Python language. For an
informal introduction to Python, see the {\em Python Tutorial}; the
Python Reference Manual remains the highest authority on syntactic and
semantic questions.
\end{abstract}
\pagebreak
\pagenumbering{arabic}
\chapter{Parser Module Reference}
\input{libparser}
\end{document}

143
Demo/parser/pprint.py Normal file
View File

@ -0,0 +1,143 @@
# pprint.py
#
# Author: Fred L. Drake, Jr.
# fdrake@vt.edu
#
# This is a simple little module I wrote to make life easier. I didn't
# see anything quite like it in the library, though I may have overlooked
# something. I wrote this when I was trying to read some heavily nested
# tuples with fairly non-descriptive content. This is modelled very much
# after Lisp/Scheme - style pretty-printing of lists. If you find it
# useful, thank small children who sleep at night.
#
"""Support to pretty-print lists, tuples, & dictionaries recursively.
Very simple, but at least somewhat useful, especially in debugging
data structures.
INDENT_PER_LEVEL -- Amount of indentation to use for each new
recursive level. The default is 1. This
must be a non-negative integer, and may be
set by the caller before calling pprint().
MAX_WIDTH -- Maximum width of the display. This is only
used if the representation *can* be kept
less than MAX_WIDTH characters wide. May
be set by the user before calling pprint().
TAB_WIDTH -- The width represented by a single tab. This
value is typically 8, but 4 is the default
under MacOS. Can be changed by the user if
desired, but is probably not a good idea.
pprint(seq [, stream]) -- The pretty-printer. This takes a Python
object (presumably a sequence, but that
doesn't matter) and an optional output
stream. See the function documentation
for details.
"""
INDENT_PER_LEVEL = 1
MAX_WIDTH = 80
import os
TAB_WIDTH = (os.name == 'mac' and 4) or 8
del os
def _indentation(cols):
"Create tabbed indentation string COLS columns wide."
# This is used to reduce the byte-count for the output, allowing
# files created using this module to use as little external storage
# as possible. This is primarily intended to minimize impact on
# a user's quota when storing resource files, or for creating output
# intended for transmission.
return ((cols / TAB_WIDTH) * '\t') + ((cols % TAB_WIDTH) * ' ')
def pprint(seq, stream = None, indent = 0, allowance = 0):
"""Pretty-print a list, tuple, or dictionary.
pprint(seq [, stream]) ==> None
If STREAM is provided, output is written to that stream, otherwise
sys.stdout is used. Indentation is done according to
INDENT_PER_LEVEL, which may be set to any non-negative integer
before calling this function. The output written on the stream is
a perfectly valid representation of the Python object passed in,
with indentation to suite human-readable interpretation. The
output can be used as input without error, given readable
representations of all sequence elements are available via repr().
Output is restricted to MAX_WIDTH columns where possible. The
STREAM parameter must support the write() method with a single
parameter, which will always be a string. The output stream may be
a StringIO.StringIO object if the result is needed as a string.
"""
if stream is None:
import sys
stream = sys.stdout
from types import DictType, ListType, TupleType
rep = `seq`
typ = type(seq)
sepLines = len(rep) > (MAX_WIDTH - 1 - indent - allowance)
if sepLines and (typ is ListType or typ is TupleType):
# Pretty-print the sequence.
stream.write(((typ is ListType) and '[') or '(')
length = len(seq)
if length:
indent = indent + INDENT_PER_LEVEL
pprint(seq[0], stream, indent, allowance + 1)
if len(seq) > 1:
for ent in seq[1:]:
stream.write(',\n' + _indentation(indent))
pprint(ent, stream, indent, allowance + 1)
indent = indent - INDENT_PER_LEVEL
stream.write(((typ is ListType) and ']') or ')')
elif typ is DictType and sepLines:
stream.write('{')
length = len(seq)
if length:
indent = indent + INDENT_PER_LEVEL
items = seq.items()
items.sort()
key, ent = items[0]
rep = `key` + ': '
stream.write(rep)
pprint(ent, stream, indent + len(rep), allowance + 1)
if len(items) > 1:
for key, ent in items[1:]:
rep = `key` + ': '
stream.write(',\n' + _indentation(indent) + rep)
pprint(ent, stream, indent + len(rep), allowance + 1)
indent = indent - INDENT_PER_LEVEL
stream.write('}')
else:
stream.write(rep)
# Terminate the 'print' if we're not a recursive invocation.
if not indent:
stream.write('\n')
#
# end of pprint.py

27
Demo/parser/source.py Normal file
View File

@ -0,0 +1,27 @@
"""Exmaple file to be parsed for the parsermodule example.
The classes and functions in this module exist only to exhibit the ability
of the handling information extraction from nested definitions using parse
trees. They shouldn't interest you otherwise!
"""
class Simple:
"This class does very little."
def method(self):
"This method does almost nothing."
return 1
class Nested:
"This is a nested class."
def nested_method(self):
"Method of Nested class."
def nested_function():
"Function in method of Nested class."
pass
return nested_function
def function():
"This function lives at the module level."
return 0

50
Demo/parser/test_parser.py Executable file
View File

@ -0,0 +1,50 @@
#! /projects/python/Python-1.4b2/python
# (Force the script to use the latest build.)
#
# test_parser.py
import parser, traceback
_numFailed = 0
def testChunk(t, fileName):
global _numFailed
print '----', fileName,
try:
ast = parser.suite(t)
tup = parser.ast2tuple(ast)
# this discards the first AST; a huge memory savings when running
# against a large source file like Tkinter.py.
ast = None
new = parser.tuple2ast(tup)
except parser.ParserError, err:
print
print 'parser module raised exception on input file', fileName + ':'
traceback.print_exc()
_numFailed = _numFailed + 1
else:
if tup != parser.ast2tuple(new):
print
print 'parser module failed on input file', fileName
_numFailed = _numFailed + 1
else:
print 'o.k.'
def testFile(fileName):
t = open(fileName).read()
testChunk(t, fileName)
def test():
import sys
args = sys.argv[1:]
if not args:
import glob
args = glob.glob("*.py")
map(testFile, args)
sys.exit(_numFailed != 0)
if __name__ == '__main__':
test()
#
# end of file