mirror of https://github.com/python/cpython
Demos for Fred's parser module
This commit is contained in:
parent
6dbd190f5e
commit
16d27e3b14
|
@ -0,0 +1,6 @@
|
||||||
|
Demo/parser/
|
||||||
|
Doc/libparser.tex
|
||||||
|
Lib/AST.py
|
||||||
|
Lib/symbol.py
|
||||||
|
Lib/token.py
|
||||||
|
Modules/parsermodule.c
|
|
@ -0,0 +1,8 @@
|
||||||
|
parser.dvi: parser.tex ../../Doc/libparser.tex
|
||||||
|
TEXINPUTS=../../Doc:: $(LATEX) parser
|
||||||
|
|
||||||
|
# Use a new name for this; the included file uses 'clean' already....
|
||||||
|
clean-parser:
|
||||||
|
rm -f *.log *.aux *.dvi *.pyc
|
||||||
|
|
||||||
|
include ../../Doc/Makefile
|
|
@ -0,0 +1,15 @@
|
||||||
|
These files are from the large example of using the `parser' module. Refer
|
||||||
|
to the Python Library Reference for more information.
|
||||||
|
|
||||||
|
Files:
|
||||||
|
------
|
||||||
|
|
||||||
|
example.py -- module that uses the `parser' module to extract
|
||||||
|
information from the parse tree of Python source
|
||||||
|
code.
|
||||||
|
|
||||||
|
source.py -- sample source code used to demonstrate ability to
|
||||||
|
handle nested constructs easily using the functions
|
||||||
|
and classes in example.py.
|
||||||
|
|
||||||
|
Enjoy!
|
|
@ -0,0 +1,2 @@
|
||||||
|
"""Some documentation.
|
||||||
|
"""
|
|
@ -0,0 +1,163 @@
|
||||||
|
"""Simple code to extract class & function docstrings from a module.
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
import symbol
|
||||||
|
import token
|
||||||
|
import types
|
||||||
|
|
||||||
|
|
||||||
|
def get_docs(fileName):
|
||||||
|
"""Retrieve information from the parse tree of a source file.
|
||||||
|
|
||||||
|
fileName
|
||||||
|
Name of the file to read Python source code from.
|
||||||
|
"""
|
||||||
|
source = open(fileName).read()
|
||||||
|
import os
|
||||||
|
basename = os.path.basename(os.path.splitext(fileName)[0])
|
||||||
|
import parser
|
||||||
|
ast = parser.suite(source)
|
||||||
|
tup = parser.ast2tuple(ast)
|
||||||
|
return ModuleInfo(tup, basename)
|
||||||
|
|
||||||
|
|
||||||
|
class DefnInfo:
|
||||||
|
_docstring = ''
|
||||||
|
_name = ''
|
||||||
|
|
||||||
|
def __init__(self, tree):
|
||||||
|
self._name = tree[2][1]
|
||||||
|
|
||||||
|
def get_docstring(self):
|
||||||
|
return self._docstring
|
||||||
|
|
||||||
|
def get_name(self):
|
||||||
|
return self._name
|
||||||
|
|
||||||
|
class SuiteInfoBase(DefnInfo):
|
||||||
|
def __init__(self):
|
||||||
|
self._class_info = {}
|
||||||
|
self._function_info = {}
|
||||||
|
|
||||||
|
def get_class_names(self):
|
||||||
|
return self._class_info.keys()
|
||||||
|
|
||||||
|
def get_class_info(self, name):
|
||||||
|
return self._class_info[name]
|
||||||
|
|
||||||
|
def _extract_info(self, tree):
|
||||||
|
if len(tree) >= 4:
|
||||||
|
found, vars = match(DOCSTRING_STMT_PATTERN, tree[3])
|
||||||
|
if found:
|
||||||
|
self._docstring = eval(vars['docstring'])
|
||||||
|
for node in tree[1:]:
|
||||||
|
if (node[0] == symbol.stmt
|
||||||
|
and node[1][0] == symbol.compound_stmt):
|
||||||
|
if node[1][1][0] == symbol.funcdef:
|
||||||
|
name = node[1][1][2][1]
|
||||||
|
self._function_info[name] = \
|
||||||
|
FunctionInfo(node[1][1])
|
||||||
|
elif node[1][1][0] == symbol.classdef:
|
||||||
|
name = node[1][1][2][1]
|
||||||
|
self._class_info[name] = ClassInfo(node[1][1])
|
||||||
|
|
||||||
|
|
||||||
|
class SuiteInfo(SuiteInfoBase):
|
||||||
|
def __init__(self, tree):
|
||||||
|
SuiteInfoBase.__init__(self)
|
||||||
|
self._extract_info(tree)
|
||||||
|
|
||||||
|
def get_function_names(self):
|
||||||
|
return self._function_info.keys()
|
||||||
|
|
||||||
|
def get_function_info(self, name):
|
||||||
|
return self._function_info[name]
|
||||||
|
|
||||||
|
|
||||||
|
class FunctionInfo(SuiteInfo):
|
||||||
|
def __init__(self, tree):
|
||||||
|
DefnInfo.__init__(self, tree)
|
||||||
|
suite = tree[-1]
|
||||||
|
if len(suite) >= 4:
|
||||||
|
found, vars = match(DOCSTRING_STMT_PATTERN, suite[3])
|
||||||
|
if found:
|
||||||
|
self._docstring = eval(vars['docstring'])
|
||||||
|
SuiteInfoBase.__init__(self)
|
||||||
|
self._extract_info(suite)
|
||||||
|
|
||||||
|
|
||||||
|
class ClassInfo(SuiteInfoBase):
|
||||||
|
def __init__(self, tree):
|
||||||
|
SuiteInfoBase.__init__(self)
|
||||||
|
DefnInfo.__init__(self, tree)
|
||||||
|
self._extract_info(tree[-1])
|
||||||
|
|
||||||
|
def get_method_names(self):
|
||||||
|
return self._function_info.keys()
|
||||||
|
|
||||||
|
def get_method_info(self, name):
|
||||||
|
return self._function_info[name]
|
||||||
|
|
||||||
|
|
||||||
|
class ModuleInfo(SuiteInfo):
|
||||||
|
def __init__(self, tree, name="<string>"):
|
||||||
|
self._name = name
|
||||||
|
SuiteInfo.__init__(self, tree)
|
||||||
|
found, vars = match(DOCSTRING_STMT_PATTERN, tree[1])
|
||||||
|
if found:
|
||||||
|
self._docstring = vars["docstring"]
|
||||||
|
|
||||||
|
|
||||||
|
from types import ListType, TupleType
|
||||||
|
|
||||||
|
def match(pattern, data, vars=None):
|
||||||
|
"""
|
||||||
|
"""
|
||||||
|
if vars is None:
|
||||||
|
vars = {}
|
||||||
|
if type(pattern) is ListType: # 'variables' are ['varname']
|
||||||
|
vars[pattern[0]] = data
|
||||||
|
return 1, vars
|
||||||
|
if type(pattern) is not TupleType:
|
||||||
|
return (pattern == data), vars
|
||||||
|
if len(data) != len(pattern):
|
||||||
|
return 0, vars
|
||||||
|
for pattern, data in map(None, pattern, data):
|
||||||
|
same, vars = match(pattern, data, vars)
|
||||||
|
if not same:
|
||||||
|
break
|
||||||
|
return same, vars
|
||||||
|
|
||||||
|
|
||||||
|
# This pattern will match a 'stmt' node which *might* represent a docstring;
|
||||||
|
# docstrings require that the statement which provides the docstring be the
|
||||||
|
# first statement in the class or function, which this pattern does not check.
|
||||||
|
#
|
||||||
|
DOCSTRING_STMT_PATTERN = (
|
||||||
|
symbol.stmt,
|
||||||
|
(symbol.simple_stmt,
|
||||||
|
(symbol.small_stmt,
|
||||||
|
(symbol.expr_stmt,
|
||||||
|
(symbol.testlist,
|
||||||
|
(symbol.test,
|
||||||
|
(symbol.and_test,
|
||||||
|
(symbol.not_test,
|
||||||
|
(symbol.comparison,
|
||||||
|
(symbol.expr,
|
||||||
|
(symbol.xor_expr,
|
||||||
|
(symbol.and_expr,
|
||||||
|
(symbol.shift_expr,
|
||||||
|
(symbol.arith_expr,
|
||||||
|
(symbol.term,
|
||||||
|
(symbol.factor,
|
||||||
|
(symbol.power,
|
||||||
|
(symbol.atom,
|
||||||
|
(token.STRING, ['docstring'])
|
||||||
|
)))))))))))))))),
|
||||||
|
(token.NEWLINE, '')
|
||||||
|
))
|
||||||
|
|
||||||
|
#
|
||||||
|
# end of file
|
|
@ -0,0 +1,77 @@
|
||||||
|
\documentstyle[twoside,10pt,myformat]{report}
|
||||||
|
|
||||||
|
%% This manual does not supplement the chapter from the Python
|
||||||
|
%% Library Reference, but only allows formatting of the parser module
|
||||||
|
%% component of that document as a separate document, and was created
|
||||||
|
%% primarily to ease review of the formatted document during authoring.
|
||||||
|
|
||||||
|
\title{Python Parser Module Reference}
|
||||||
|
\author{
|
||||||
|
Fred L. Drake, Jr. \\
|
||||||
|
Corporation for National Research Initiatives (CNRI) \\
|
||||||
|
1895 Preston White Drive, Reston, Va 20191, USA \\
|
||||||
|
E-mail: {\tt fdrake@cnri.reston.va.us}, {\tt fdrake@intr.net}
|
||||||
|
}
|
||||||
|
|
||||||
|
\date{August 20th, 1996 \\ Release 1.4}
|
||||||
|
|
||||||
|
\begin{document}
|
||||||
|
|
||||||
|
\pagenumbering{roman}
|
||||||
|
|
||||||
|
\maketitle
|
||||||
|
|
||||||
|
Copyright \copyright{} 1995-1996 by Fred L. Drake, Jr. and Virginia
|
||||||
|
Polytechnic Institute and State University, Blacksburg, Virginia, USA.
|
||||||
|
Portions of the software copyright 1991-1995 by Stichting Mathematisch
|
||||||
|
Centrum, Amsterdam, The Netherlands. Copying is permitted under the
|
||||||
|
terms associated with the main Python distribution, with the
|
||||||
|
additional restriction that this additional notice be included and
|
||||||
|
maintained on all distributed copies.
|
||||||
|
|
||||||
|
\begin{center}
|
||||||
|
All Rights Reserved
|
||||||
|
\end{center}
|
||||||
|
|
||||||
|
Permission to use, copy, modify, and distribute this software and its
|
||||||
|
documentation for any purpose and without fee is hereby granted,
|
||||||
|
provided that the above copyright notice appear in all copies and that
|
||||||
|
both that copyright notice and this permission notice appear in
|
||||||
|
supporting documentation, and that the names of Fred L. Drake, Jr. and
|
||||||
|
Virginia Polytechnic Institute and State University not be used in
|
||||||
|
advertising or publicity pertaining to distribution of the software
|
||||||
|
without specific, written prior permission.
|
||||||
|
|
||||||
|
FRED L. DRAKE, JR. AND VIRGINIA POLYTECHNIC INSTITUTE AND STATE
|
||||||
|
UNIVERSITY DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
|
||||||
|
INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
|
||||||
|
EVENT SHALL FRED L. DRAKE, JR. OR VIRGINIA POLYTECHNIC INSTITUTE AND
|
||||||
|
STATE UNIVERSITY BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
|
||||||
|
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
|
||||||
|
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
|
||||||
|
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
||||||
|
PERFORMANCE OF THIS SOFTWARE.
|
||||||
|
|
||||||
|
\begin{abstract}
|
||||||
|
|
||||||
|
\noindent
|
||||||
|
The \emph{Python Parser Module Reference} describes the interfaces
|
||||||
|
published by the optional \code{parser} module and gives examples of
|
||||||
|
how they may be used. It contains the same text as the chapter on the
|
||||||
|
\code{parser} module in the \emph{Python Library Reference}, but is
|
||||||
|
presented as a separate document.
|
||||||
|
|
||||||
|
This manual assumes basic knowledge about the Python language. For an
|
||||||
|
informal introduction to Python, see the {\em Python Tutorial}; the
|
||||||
|
Python Reference Manual remains the highest authority on syntactic and
|
||||||
|
semantic questions.
|
||||||
|
|
||||||
|
\end{abstract}
|
||||||
|
|
||||||
|
\pagebreak
|
||||||
|
\pagenumbering{arabic}
|
||||||
|
|
||||||
|
\chapter{Parser Module Reference}
|
||||||
|
\input{libparser}
|
||||||
|
|
||||||
|
\end{document}
|
|
@ -0,0 +1,143 @@
|
||||||
|
# pprint.py
|
||||||
|
#
|
||||||
|
# Author: Fred L. Drake, Jr.
|
||||||
|
# fdrake@vt.edu
|
||||||
|
#
|
||||||
|
# This is a simple little module I wrote to make life easier. I didn't
|
||||||
|
# see anything quite like it in the library, though I may have overlooked
|
||||||
|
# something. I wrote this when I was trying to read some heavily nested
|
||||||
|
# tuples with fairly non-descriptive content. This is modelled very much
|
||||||
|
# after Lisp/Scheme - style pretty-printing of lists. If you find it
|
||||||
|
# useful, thank small children who sleep at night.
|
||||||
|
#
|
||||||
|
|
||||||
|
"""Support to pretty-print lists, tuples, & dictionaries recursively.
|
||||||
|
Very simple, but at least somewhat useful, especially in debugging
|
||||||
|
data structures.
|
||||||
|
|
||||||
|
INDENT_PER_LEVEL -- Amount of indentation to use for each new
|
||||||
|
recursive level. The default is 1. This
|
||||||
|
must be a non-negative integer, and may be
|
||||||
|
set by the caller before calling pprint().
|
||||||
|
|
||||||
|
MAX_WIDTH -- Maximum width of the display. This is only
|
||||||
|
used if the representation *can* be kept
|
||||||
|
less than MAX_WIDTH characters wide. May
|
||||||
|
be set by the user before calling pprint().
|
||||||
|
|
||||||
|
TAB_WIDTH -- The width represented by a single tab. This
|
||||||
|
value is typically 8, but 4 is the default
|
||||||
|
under MacOS. Can be changed by the user if
|
||||||
|
desired, but is probably not a good idea.
|
||||||
|
|
||||||
|
pprint(seq [, stream]) -- The pretty-printer. This takes a Python
|
||||||
|
object (presumably a sequence, but that
|
||||||
|
doesn't matter) and an optional output
|
||||||
|
stream. See the function documentation
|
||||||
|
for details.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
INDENT_PER_LEVEL = 1
|
||||||
|
|
||||||
|
MAX_WIDTH = 80
|
||||||
|
|
||||||
|
import os
|
||||||
|
TAB_WIDTH = (os.name == 'mac' and 4) or 8
|
||||||
|
del os
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def _indentation(cols):
|
||||||
|
"Create tabbed indentation string COLS columns wide."
|
||||||
|
|
||||||
|
# This is used to reduce the byte-count for the output, allowing
|
||||||
|
# files created using this module to use as little external storage
|
||||||
|
# as possible. This is primarily intended to minimize impact on
|
||||||
|
# a user's quota when storing resource files, or for creating output
|
||||||
|
# intended for transmission.
|
||||||
|
|
||||||
|
return ((cols / TAB_WIDTH) * '\t') + ((cols % TAB_WIDTH) * ' ')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def pprint(seq, stream = None, indent = 0, allowance = 0):
|
||||||
|
"""Pretty-print a list, tuple, or dictionary.
|
||||||
|
|
||||||
|
pprint(seq [, stream]) ==> None
|
||||||
|
|
||||||
|
If STREAM is provided, output is written to that stream, otherwise
|
||||||
|
sys.stdout is used. Indentation is done according to
|
||||||
|
INDENT_PER_LEVEL, which may be set to any non-negative integer
|
||||||
|
before calling this function. The output written on the stream is
|
||||||
|
a perfectly valid representation of the Python object passed in,
|
||||||
|
with indentation to suite human-readable interpretation. The
|
||||||
|
output can be used as input without error, given readable
|
||||||
|
representations of all sequence elements are available via repr().
|
||||||
|
Output is restricted to MAX_WIDTH columns where possible. The
|
||||||
|
STREAM parameter must support the write() method with a single
|
||||||
|
parameter, which will always be a string. The output stream may be
|
||||||
|
a StringIO.StringIO object if the result is needed as a string.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if stream is None:
|
||||||
|
import sys
|
||||||
|
stream = sys.stdout
|
||||||
|
|
||||||
|
from types import DictType, ListType, TupleType
|
||||||
|
|
||||||
|
rep = `seq`
|
||||||
|
typ = type(seq)
|
||||||
|
sepLines = len(rep) > (MAX_WIDTH - 1 - indent - allowance)
|
||||||
|
|
||||||
|
if sepLines and (typ is ListType or typ is TupleType):
|
||||||
|
# Pretty-print the sequence.
|
||||||
|
stream.write(((typ is ListType) and '[') or '(')
|
||||||
|
|
||||||
|
length = len(seq)
|
||||||
|
if length:
|
||||||
|
indent = indent + INDENT_PER_LEVEL
|
||||||
|
pprint(seq[0], stream, indent, allowance + 1)
|
||||||
|
|
||||||
|
if len(seq) > 1:
|
||||||
|
for ent in seq[1:]:
|
||||||
|
stream.write(',\n' + _indentation(indent))
|
||||||
|
pprint(ent, stream, indent, allowance + 1)
|
||||||
|
|
||||||
|
indent = indent - INDENT_PER_LEVEL
|
||||||
|
|
||||||
|
stream.write(((typ is ListType) and ']') or ')')
|
||||||
|
|
||||||
|
elif typ is DictType and sepLines:
|
||||||
|
stream.write('{')
|
||||||
|
|
||||||
|
length = len(seq)
|
||||||
|
if length:
|
||||||
|
indent = indent + INDENT_PER_LEVEL
|
||||||
|
items = seq.items()
|
||||||
|
items.sort()
|
||||||
|
key, ent = items[0]
|
||||||
|
rep = `key` + ': '
|
||||||
|
stream.write(rep)
|
||||||
|
pprint(ent, stream, indent + len(rep), allowance + 1)
|
||||||
|
|
||||||
|
if len(items) > 1:
|
||||||
|
for key, ent in items[1:]:
|
||||||
|
rep = `key` + ': '
|
||||||
|
stream.write(',\n' + _indentation(indent) + rep)
|
||||||
|
pprint(ent, stream, indent + len(rep), allowance + 1)
|
||||||
|
|
||||||
|
indent = indent - INDENT_PER_LEVEL
|
||||||
|
|
||||||
|
stream.write('}')
|
||||||
|
|
||||||
|
else:
|
||||||
|
stream.write(rep)
|
||||||
|
|
||||||
|
# Terminate the 'print' if we're not a recursive invocation.
|
||||||
|
if not indent:
|
||||||
|
stream.write('\n')
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# end of pprint.py
|
|
@ -0,0 +1,27 @@
|
||||||
|
"""Exmaple file to be parsed for the parsermodule example.
|
||||||
|
|
||||||
|
The classes and functions in this module exist only to exhibit the ability
|
||||||
|
of the handling information extraction from nested definitions using parse
|
||||||
|
trees. They shouldn't interest you otherwise!
|
||||||
|
"""
|
||||||
|
|
||||||
|
class Simple:
|
||||||
|
"This class does very little."
|
||||||
|
|
||||||
|
def method(self):
|
||||||
|
"This method does almost nothing."
|
||||||
|
return 1
|
||||||
|
|
||||||
|
class Nested:
|
||||||
|
"This is a nested class."
|
||||||
|
|
||||||
|
def nested_method(self):
|
||||||
|
"Method of Nested class."
|
||||||
|
def nested_function():
|
||||||
|
"Function in method of Nested class."
|
||||||
|
pass
|
||||||
|
return nested_function
|
||||||
|
|
||||||
|
def function():
|
||||||
|
"This function lives at the module level."
|
||||||
|
return 0
|
|
@ -0,0 +1,50 @@
|
||||||
|
#! /projects/python/Python-1.4b2/python
|
||||||
|
# (Force the script to use the latest build.)
|
||||||
|
#
|
||||||
|
# test_parser.py
|
||||||
|
|
||||||
|
import parser, traceback
|
||||||
|
|
||||||
|
_numFailed = 0
|
||||||
|
|
||||||
|
def testChunk(t, fileName):
|
||||||
|
global _numFailed
|
||||||
|
print '----', fileName,
|
||||||
|
try:
|
||||||
|
ast = parser.suite(t)
|
||||||
|
tup = parser.ast2tuple(ast)
|
||||||
|
# this discards the first AST; a huge memory savings when running
|
||||||
|
# against a large source file like Tkinter.py.
|
||||||
|
ast = None
|
||||||
|
new = parser.tuple2ast(tup)
|
||||||
|
except parser.ParserError, err:
|
||||||
|
print
|
||||||
|
print 'parser module raised exception on input file', fileName + ':'
|
||||||
|
traceback.print_exc()
|
||||||
|
_numFailed = _numFailed + 1
|
||||||
|
else:
|
||||||
|
if tup != parser.ast2tuple(new):
|
||||||
|
print
|
||||||
|
print 'parser module failed on input file', fileName
|
||||||
|
_numFailed = _numFailed + 1
|
||||||
|
else:
|
||||||
|
print 'o.k.'
|
||||||
|
|
||||||
|
def testFile(fileName):
|
||||||
|
t = open(fileName).read()
|
||||||
|
testChunk(t, fileName)
|
||||||
|
|
||||||
|
def test():
|
||||||
|
import sys
|
||||||
|
args = sys.argv[1:]
|
||||||
|
if not args:
|
||||||
|
import glob
|
||||||
|
args = glob.glob("*.py")
|
||||||
|
map(testFile, args)
|
||||||
|
sys.exit(_numFailed != 0)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
test()
|
||||||
|
|
||||||
|
#
|
||||||
|
# end of file
|
Loading…
Reference in New Issue