The compiler package is now part of the standard library.

Remove all these files.  All except astgen.py are moved to Lib/compiler.
This commit is contained in:
Jeremy Hylton 2001-09-20 01:27:40 +00:00
parent f6cc07cffe
commit b3c569ce82
12 changed files with 0 additions and 5811 deletions

View File

@ -1,27 +0,0 @@
"""Package for parsing and compiling Python source code
There are several functions defined at the top level that are imported
from modules contained in the package.
parse(buf, mode="exec") -> AST
Converts a string containing Python source code to an abstract
syntax tree (AST). The AST is defined in compiler.ast.
parseFile(path) -> AST
The same as parse(open(path))
walk(ast, visitor, verbose=None)
Does a pre-order walk over the ast using the visitor instance.
See compiler.visitor for details.
compile(source, filename, mode, flags=None, dont_inherit=None)
Returns a code object. A replacement for the builtin compile() function.
compileFile(filename)
Generates a .pyc file by compilining filename.
"""
from transformer import parse, parseFile
from visitor import walk
from pycodegen import compile, compileFile

File diff suppressed because it is too large Load Diff

View File

@ -1,280 +0,0 @@
"""Generate ast module from specification
This script generates the ast module from a simple specification,
which makes it easy to accomodate changes in the grammar. This
approach would be quite reasonable if the grammar changed often.
Instead, it is rather complex to generate the appropriate code. And
the Node interface has changed more often than the grammar.
"""
import fileinput
import getopt
import re
import sys
from StringIO import StringIO
SPEC = "ast.txt"
COMMA = ", "
def load_boilerplate(file):
f = open(file)
buf = f.read()
f.close()
i = buf.find('### ''PROLOGUE')
j = buf.find('### ''EPILOGUE')
pro = buf[i+12:j].strip()
epi = buf[j+12:].strip()
return pro, epi
def strip_default(arg):
"""Return the argname from an 'arg = default' string"""
i = arg.find('=')
if i == -1:
return arg
t = arg[:i].strip()
return t
P_NODE = 1
P_OTHER = 2
P_NESTED = 3
P_NONE = 4
class NodeInfo:
"""Each instance describes a specific AST node"""
def __init__(self, name, args):
self.name = name
self.args = args.strip()
self.argnames = self.get_argnames()
self.argprops = self.get_argprops()
self.nargs = len(self.argnames)
self.init = []
def get_argnames(self):
if '(' in self.args:
i = self.args.find('(')
j = self.args.rfind(')')
args = self.args[i+1:j]
else:
args = self.args
return [strip_default(arg.strip())
for arg in args.split(',') if arg]
def get_argprops(self):
"""Each argument can have a property like '*' or '!'
XXX This method modifies the argnames in place!
"""
d = {}
hardest_arg = P_NODE
for i in range(len(self.argnames)):
arg = self.argnames[i]
if arg.endswith('*'):
arg = self.argnames[i] = arg[:-1]
d[arg] = P_OTHER
hardest_arg = max(hardest_arg, P_OTHER)
elif arg.endswith('!'):
arg = self.argnames[i] = arg[:-1]
d[arg] = P_NESTED
hardest_arg = max(hardest_arg, P_NESTED)
elif arg.endswith('&'):
arg = self.argnames[i] = arg[:-1]
d[arg] = P_NONE
hardest_arg = max(hardest_arg, P_NONE)
else:
d[arg] = P_NODE
self.hardest_arg = hardest_arg
if hardest_arg > P_NODE:
self.args = self.args.replace('*', '')
self.args = self.args.replace('!', '')
self.args = self.args.replace('&', '')
return d
def gen_source(self):
buf = StringIO()
print >> buf, "class %s(Node):" % self.name
print >> buf, ' nodes["%s"] = "%s"' % (self.name.lower(), self.name)
self._gen_init(buf)
print >> buf
self._gen_getChildren(buf)
print >> buf
self._gen_getChildNodes(buf)
print >> buf
self._gen_repr(buf)
buf.seek(0, 0)
return buf.read()
def _gen_init(self, buf):
print >> buf, " def __init__(self, %s):" % self.args
if self.argnames:
for name in self.argnames:
print >> buf, " self.%s = %s" % (name, name)
else:
print >> buf, " pass"
if self.init:
print >> buf, "".join([" " + line for line in self.init])
def _gen_getChildren(self, buf):
print >> buf, " def getChildren(self):"
if len(self.argnames) == 0:
print >> buf, " return ()"
else:
if self.hardest_arg < P_NESTED:
clist = COMMA.join(["self.%s" % c
for c in self.argnames])
if self.nargs == 1:
print >> buf, " return %s," % clist
else:
print >> buf, " return %s" % clist
else:
print >> buf, " children = []"
template = " children.%s(%sself.%s%s)"
for name in self.argnames:
if self.argprops[name] == P_NESTED:
print >> buf, template % ("extend", "flatten(",
name, ")")
else:
print >> buf, template % ("append", "", name, "")
print >> buf, " return tuple(children)"
def _gen_getChildNodes(self, buf):
print >> buf, " def getChildNodes(self):"
if len(self.argnames) == 0:
print >> buf, " return ()"
else:
if self.hardest_arg < P_NESTED:
clist = ["self.%s" % c
for c in self.argnames
if self.argprops[c] == P_NODE]
if len(clist) == 0:
print >> buf, " return ()"
elif len(clist) == 1:
print >> buf, " return %s," % clist[0]
else:
print >> buf, " return %s" % COMMA.join(clist)
else:
print >> buf, " nodes = []"
template = " nodes.%s(%sself.%s%s)"
for name in self.argnames:
if self.argprops[name] == P_NONE:
tmp = (" if self.%s is not None:"
" nodes.append(self.%s)")
print >> buf, tmp % (name, name)
elif self.argprops[name] == P_NESTED:
print >> buf, template % ("extend", "flatten_nodes(",
name, ")")
elif self.argprops[name] == P_NODE:
print >> buf, template % ("append", "", name, "")
print >> buf, " return tuple(nodes)"
def _gen_repr(self, buf):
print >> buf, " def __repr__(self):"
if self.argnames:
fmt = COMMA.join(["%s"] * self.nargs)
if '(' in self.args:
fmt = '(%s)' % fmt
vals = ["repr(self.%s)" % name for name in self.argnames]
vals = COMMA.join(vals)
if self.nargs == 1:
vals = vals + ","
print >> buf, ' return "%s(%s)" %% (%s)' % \
(self.name, fmt, vals)
else:
print >> buf, ' return "%s()"' % self.name
rx_init = re.compile('init\((.*)\):')
def parse_spec(file):
classes = {}
cur = None
for line in fileinput.input(file):
if line.strip().startswith('#'):
continue
mo = rx_init.search(line)
if mo is None:
if cur is None:
# a normal entry
try:
name, args = line.split(':')
except ValueError:
continue
classes[name] = NodeInfo(name, args)
cur = None
else:
# some code for the __init__ method
cur.init.append(line)
else:
# some extra code for a Node's __init__ method
name = mo.group(1)
cur = classes[name]
return classes.values()
def main():
prologue, epilogue = load_boilerplate(sys.argv[-1])
print prologue
print
classes = parse_spec(SPEC)
for info in classes:
print info.gen_source()
print epilogue
if __name__ == "__main__":
main()
sys.exit(0)
### PROLOGUE
"""Python abstract syntax node definitions
This file is automatically generated.
"""
from types import TupleType, ListType
from consts import CO_VARARGS, CO_VARKEYWORDS
def flatten(list):
l = []
for elt in list:
t = type(elt)
if t is TupleType or t is ListType:
for elt2 in flatten(elt):
l.append(elt2)
else:
l.append(elt)
return l
def flatten_nodes(list):
return [n for n in flatten(list) if isinstance(n, Node)]
def asList(nodes):
l = []
for item in nodes:
if hasattr(item, "asList"):
l.append(item.asList())
else:
t = type(item)
if t is TupleType or t is ListType:
l.append(tuple(asList(item)))
else:
l.append(item)
return l
nodes = {}
class Node: # an abstract base class
lineno = None # provide a lineno for nodes that don't have one
def getType(self):
pass # implemented by subclass
def getChildren(self):
pass # implemented by subclasses
def asList(self):
return tuple(asList(self.getChildren()))
def getChildNodes(self):
pass # implemented by subclasses
class EmptyNode(Node):
pass
### EPILOGUE
klasses = globals()
for k in nodes.keys():
nodes[k] = klasses[nodes[k]]

View File

@ -1,19 +0,0 @@
# operation flags
OP_ASSIGN = 'OP_ASSIGN'
OP_DELETE = 'OP_DELETE'
OP_APPLY = 'OP_APPLY'
SC_LOCAL = 1
SC_GLOBAL = 2
SC_FREE = 3
SC_CELL = 4
SC_UNKNOWN = 5
CO_OPTIMIZED = 0x0001
CO_NEWLOCALS = 0x0002
CO_VARARGS = 0x0004
CO_VARKEYWORDS = 0x0008
CO_NESTED = 0x0010
CO_GENERATOR = 0x0020
CO_GENERATOR_ALLOWED = 0x1000
CO_FUTURE_DIVISION = 0x2000

View File

@ -1,73 +0,0 @@
"""Parser for future statements
"""
from compiler import ast, walk
def is_future(stmt):
"""Return true if statement is a well-formed future statement"""
if not isinstance(stmt, ast.From):
return 0
if stmt.modname == "__future__":
return 1
else:
return 0
class FutureParser:
features = ("nested_scopes", "generators", "division")
def __init__(self):
self.found = {} # set
def visitModule(self, node):
stmt = node.node
for s in stmt.nodes:
if not self.check_stmt(s):
break
def check_stmt(self, stmt):
if is_future(stmt):
for name, asname in stmt.names:
if name in self.features:
self.found[name] = 1
else:
raise SyntaxError, \
"future feature %s is not defined" % name
stmt.valid_future = 1
return 1
return 0
def get_features(self):
"""Return list of features enabled by future statements"""
return self.found.keys()
class BadFutureParser:
"""Check for invalid future statements"""
def visitFrom(self, node):
if hasattr(node, 'valid_future'):
return
if node.modname != "__future__":
return
raise SyntaxError, "invalid future statement"
def find_futures(node):
p1 = FutureParser()
p2 = BadFutureParser()
walk(node, p1)
walk(node, p2)
return p1.get_features()
if __name__ == "__main__":
import sys
from compiler import parseFile, walk
for file in sys.argv[1:]:
print file
tree = parseFile(file)
v = FutureParser()
walk(tree, v)
print v.found
print

View File

@ -1,75 +0,0 @@
import types
def flatten(tup):
elts = []
for elt in tup:
if type(elt) == types.TupleType:
elts = elts + flatten(elt)
else:
elts.append(elt)
return elts
class Set:
def __init__(self):
self.elts = {}
def __len__(self):
return len(self.elts)
def __contains__(self, elt):
return self.elts.has_key(elt)
def add(self, elt):
self.elts[elt] = elt
def elements(self):
return self.elts.keys()
def has_elt(self, elt):
return self.elts.has_key(elt)
def remove(self, elt):
del self.elts[elt]
def copy(self):
c = Set()
c.elts.update(self.elts)
return c
class Stack:
def __init__(self):
self.stack = []
self.pop = self.stack.pop
def __len__(self):
return len(self.stack)
def push(self, elt):
self.stack.append(elt)
def top(self):
return self.stack[-1]
def __getitem__(self, index): # needed by visitContinue()
return self.stack[index]
MANGLE_LEN = 256 # magic constant from compile.c
def mangle(name, klass):
if not name.startswith('__'):
return name
if len(name) + 2 >= MANGLE_LEN:
return name
if name.endswith('__'):
return name
try:
i = 0
while klass[i] == '_':
i = i + 1
except IndexError:
return name
klass = klass[i:]
tlen = len(klass) + len(name)
if tlen > MANGLE_LEN:
klass = klass[:MANGLE_LEN-tlen]
return "_%s%s" % (klass, name)
def set_filename(filename, tree):
"""Set the filename attribute to filename on every node in tree"""
worklist = [tree]
while worklist:
node = worklist.pop(0)
node.filename = filename
worklist.extend(node.getChildNodes())

View File

@ -1,791 +0,0 @@
"""A flow graph representation for Python bytecode"""
import dis
import new
import string
import sys
import types
from compiler import misc
from compiler.consts import CO_OPTIMIZED, CO_NEWLOCALS, CO_VARARGS, \
CO_VARKEYWORDS
def xxx_sort(l):
l = l[:]
def sorter(a, b):
return cmp(a.bid, b.bid)
l.sort(sorter)
return l
class FlowGraph:
def __init__(self):
self.current = self.entry = Block()
self.exit = Block("exit")
self.blocks = misc.Set()
self.blocks.add(self.entry)
self.blocks.add(self.exit)
def startBlock(self, block):
if self._debug:
if self.current:
print "end", repr(self.current)
print " next", self.current.next
print " ", self.current.get_children()
print repr(block)
self.current = block
def nextBlock(self, block=None):
# XXX think we need to specify when there is implicit transfer
# from one block to the next. might be better to represent this
# with explicit JUMP_ABSOLUTE instructions that are optimized
# out when they are unnecessary.
#
# I think this strategy works: each block has a child
# designated as "next" which is returned as the last of the
# children. because the nodes in a graph are emitted in
# reverse post order, the "next" block will always be emitted
# immediately after its parent.
# Worry: maintaining this invariant could be tricky
if block is None:
block = self.newBlock()
# Note: If the current block ends with an unconditional
# control transfer, then it is incorrect to add an implicit
# transfer to the block graph. The current code requires
# these edges to get the blocks emitted in the right order,
# however. :-( If a client needs to remove these edges, call
# pruneEdges().
self.current.addNext(block)
self.startBlock(block)
def newBlock(self):
b = Block()
self.blocks.add(b)
return b
def startExitBlock(self):
self.startBlock(self.exit)
_debug = 0
def _enable_debug(self):
self._debug = 1
def _disable_debug(self):
self._debug = 0
def emit(self, *inst):
if self._debug:
print "\t", inst
if inst[0] == 'RETURN_VALUE':
self.current.addOutEdge(self.exit)
if len(inst) == 2 and isinstance(inst[1], Block):
self.current.addOutEdge(inst[1])
self.current.emit(inst)
def getBlocksInOrder(self):
"""Return the blocks in reverse postorder
i.e. each node appears before all of its successors
"""
# XXX make sure every node that doesn't have an explicit next
# is set so that next points to exit
for b in self.blocks.elements():
if b is self.exit:
continue
if not b.next:
b.addNext(self.exit)
order = dfs_postorder(self.entry, {})
order.reverse()
self.fixupOrder(order, self.exit)
# hack alert
if not self.exit in order:
order.append(self.exit)
return order
def fixupOrder(self, blocks, default_next):
"""Fixup bad order introduced by DFS."""
# XXX This is a total mess. There must be a better way to get
# the code blocks in the right order.
self.fixupOrderHonorNext(blocks, default_next)
self.fixupOrderForward(blocks, default_next)
def fixupOrderHonorNext(self, blocks, default_next):
"""Fix one problem with DFS.
The DFS uses child block, but doesn't know about the special
"next" block. As a result, the DFS can order blocks so that a
block isn't next to the right block for implicit control
transfers.
"""
index = {}
for i in range(len(blocks)):
index[blocks[i]] = i
for i in range(0, len(blocks) - 1):
b = blocks[i]
n = blocks[i + 1]
if not b.next or b.next[0] == default_next or b.next[0] == n:
continue
# The blocks are in the wrong order. Find the chain of
# blocks to insert where they belong.
cur = b
chain = []
elt = cur
while elt.next and elt.next[0] != default_next:
chain.append(elt.next[0])
elt = elt.next[0]
# Now remove the blocks in the chain from the current
# block list, so that they can be re-inserted.
l = []
for b in chain:
assert index[b] > i
l.append((index[b], b))
l.sort()
l.reverse()
for j, b in l:
del blocks[index[b]]
# Insert the chain in the proper location
blocks[i:i + 1] = [cur] + chain
# Finally, re-compute the block indexes
for i in range(len(blocks)):
index[blocks[i]] = i
def fixupOrderForward(self, blocks, default_next):
"""Make sure all JUMP_FORWARDs jump forward"""
index = {}
chains = []
cur = []
for b in blocks:
index[b] = len(chains)
cur.append(b)
if b.next and b.next[0] == default_next:
chains.append(cur)
cur = []
chains.append(cur)
while 1:
constraints = []
for i in range(len(chains)):
l = chains[i]
for b in l:
for c in b.get_children():
if index[c] < i:
forward_p = 0
for inst in b.insts:
if inst[0] == 'JUMP_FORWARD':
if inst[1] == c:
forward_p = 1
if not forward_p:
continue
constraints.append((index[c], i))
if not constraints:
break
# XXX just do one for now
# do swaps to get things in the right order
goes_before, a_chain = constraints[0]
assert a_chain > goes_before
c = chains[a_chain]
chains.remove(c)
chains.insert(goes_before, c)
del blocks[:]
for c in chains:
for b in c:
blocks.append(b)
def getBlocks(self):
return self.blocks.elements()
def getRoot(self):
"""Return nodes appropriate for use with dominator"""
return self.entry
def getContainedGraphs(self):
l = []
for b in self.getBlocks():
l.extend(b.getContainedGraphs())
return l
def dfs_postorder(b, seen):
"""Depth-first search of tree rooted at b, return in postorder"""
order = []
seen[b] = b
for c in b.get_children():
if seen.has_key(c):
continue
order = order + dfs_postorder(c, seen)
order.append(b)
return order
class Block:
_count = 0
def __init__(self, label=''):
self.insts = []
self.inEdges = misc.Set()
self.outEdges = misc.Set()
self.label = label
self.bid = Block._count
self.next = []
Block._count = Block._count + 1
def __repr__(self):
if self.label:
return "<block %s id=%d>" % (self.label, self.bid)
else:
return "<block id=%d>" % (self.bid)
def __str__(self):
insts = map(str, self.insts)
return "<block %s %d:\n%s>" % (self.label, self.bid,
string.join(insts, '\n'))
def emit(self, inst):
op = inst[0]
if op[:4] == 'JUMP':
self.outEdges.add(inst[1])
self.insts.append(inst)
def getInstructions(self):
return self.insts
def addInEdge(self, block):
self.inEdges.add(block)
def addOutEdge(self, block):
self.outEdges.add(block)
def addNext(self, block):
self.next.append(block)
assert len(self.next) == 1, map(str, self.next)
_uncond_transfer = ('RETURN_VALUE', 'RAISE_VARARGS',
'JUMP_ABSOLUTE', 'JUMP_FORWARD', 'CONTINUE_LOOP')
def pruneNext(self):
"""Remove bogus edge for unconditional transfers
Each block has a next edge that accounts for implicit control
transfers, e.g. from a JUMP_IF_FALSE to the block that will be
executed if the test is true.
These edges must remain for the current assembler code to
work. If they are removed, the dfs_postorder gets things in
weird orders. However, they shouldn't be there for other
purposes, e.g. conversion to SSA form. This method will
remove the next edge when it follows an unconditional control
transfer.
"""
try:
op, arg = self.insts[-1]
except (IndexError, ValueError):
return
if op in self._uncond_transfer:
self.next = []
def get_children(self):
if self.next and self.next[0] in self.outEdges:
self.outEdges.remove(self.next[0])
return self.outEdges.elements() + self.next
def getContainedGraphs(self):
"""Return all graphs contained within this block.
For example, a MAKE_FUNCTION block will contain a reference to
the graph for the function body.
"""
contained = []
for inst in self.insts:
if len(inst) == 1:
continue
op = inst[1]
if hasattr(op, 'graph'):
contained.append(op.graph)
return contained
# flags for code objects
# the FlowGraph is transformed in place; it exists in one of these states
RAW = "RAW"
FLAT = "FLAT"
CONV = "CONV"
DONE = "DONE"
class PyFlowGraph(FlowGraph):
super_init = FlowGraph.__init__
def __init__(self, name, filename, args=(), optimized=0, klass=None):
self.super_init()
self.name = name
self.filename = filename
self.docstring = None
self.args = args # XXX
self.argcount = getArgCount(args)
self.klass = klass
if optimized:
self.flags = CO_OPTIMIZED | CO_NEWLOCALS
else:
self.flags = 0
self.consts = []
self.names = []
# Free variables found by the symbol table scan, including
# variables used only in nested scopes, are included here.
self.freevars = []
self.cellvars = []
# The closure list is used to track the order of cell
# variables and free variables in the resulting code object.
# The offsets used by LOAD_CLOSURE/LOAD_DEREF refer to both
# kinds of variables.
self.closure = []
self.varnames = list(args) or []
for i in range(len(self.varnames)):
var = self.varnames[i]
if isinstance(var, TupleArg):
self.varnames[i] = var.getName()
self.stage = RAW
def setDocstring(self, doc):
self.docstring = doc
def setFlag(self, flag):
self.flags = self.flags | flag
if flag == CO_VARARGS:
self.argcount = self.argcount - 1
def checkFlag(self, flag):
if self.flags & flag:
return 1
def setFreeVars(self, names):
self.freevars = list(names)
def setCellVars(self, names):
self.cellvars = names
def getCode(self):
"""Get a Python code object"""
if self.stage == RAW:
self.flattenGraph()
if self.stage == FLAT:
self.convertArgs()
if self.stage == CONV:
self.makeByteCode()
if self.stage == DONE:
return self.newCodeObject()
raise RuntimeError, "inconsistent PyFlowGraph state"
def dump(self, io=None):
if io:
save = sys.stdout
sys.stdout = io
pc = 0
for t in self.insts:
opname = t[0]
if opname == "SET_LINENO":
print
if len(t) == 1:
print "\t", "%3d" % pc, opname
pc = pc + 1
else:
print "\t", "%3d" % pc, opname, t[1]
pc = pc + 3
if io:
sys.stdout = save
def flattenGraph(self):
"""Arrange the blocks in order and resolve jumps"""
assert self.stage == RAW
self.insts = insts = []
pc = 0
begin = {}
end = {}
for b in self.getBlocksInOrder():
begin[b] = pc
for inst in b.getInstructions():
insts.append(inst)
if len(inst) == 1:
pc = pc + 1
else:
# arg takes 2 bytes
pc = pc + 3
end[b] = pc
pc = 0
for i in range(len(insts)):
inst = insts[i]
if len(inst) == 1:
pc = pc + 1
else:
pc = pc + 3
opname = inst[0]
if self.hasjrel.has_elt(opname):
oparg = inst[1]
offset = begin[oparg] - pc
insts[i] = opname, offset
elif self.hasjabs.has_elt(opname):
insts[i] = opname, begin[inst[1]]
self.stacksize = findDepth(self.insts)
self.stage = FLAT
hasjrel = misc.Set()
for i in dis.hasjrel:
hasjrel.add(dis.opname[i])
hasjabs = misc.Set()
for i in dis.hasjabs:
hasjabs.add(dis.opname[i])
def convertArgs(self):
"""Convert arguments from symbolic to concrete form"""
assert self.stage == FLAT
self.consts.insert(0, self.docstring)
self.sort_cellvars()
for i in range(len(self.insts)):
t = self.insts[i]
if len(t) == 2:
opname, oparg = t
conv = self._converters.get(opname, None)
if conv:
self.insts[i] = opname, conv(self, oparg)
self.stage = CONV
def sort_cellvars(self):
"""Sort cellvars in the order of varnames and prune from freevars.
"""
cells = {}
for name in self.cellvars:
cells[name] = 1
self.cellvars = [name for name in self.varnames
if cells.has_key(name)]
for name in self.cellvars:
del cells[name]
self.cellvars = self.cellvars + cells.keys()
self.closure = self.cellvars + self.freevars
def _lookupName(self, name, list):
"""Return index of name in list, appending if necessary
This routine uses a list instead of a dictionary, because a
dictionary can't store two different keys if the keys have the
same value but different types, e.g. 2 and 2L. The compiler
must treat these two separately, so it does an explicit type
comparison before comparing the values.
"""
t = type(name)
for i in range(len(list)):
if t == type(list[i]) and list[i] == name:
return i
end = len(list)
list.append(name)
return end
_converters = {}
def _convert_LOAD_CONST(self, arg):
if hasattr(arg, 'getCode'):
arg = arg.getCode()
return self._lookupName(arg, self.consts)
def _convert_LOAD_FAST(self, arg):
self._lookupName(arg, self.names)
return self._lookupName(arg, self.varnames)
_convert_STORE_FAST = _convert_LOAD_FAST
_convert_DELETE_FAST = _convert_LOAD_FAST
def _convert_LOAD_NAME(self, arg):
if self.klass is None:
self._lookupName(arg, self.varnames)
return self._lookupName(arg, self.names)
def _convert_NAME(self, arg):
if self.klass is None:
self._lookupName(arg, self.varnames)
return self._lookupName(arg, self.names)
_convert_STORE_NAME = _convert_NAME
_convert_DELETE_NAME = _convert_NAME
_convert_IMPORT_NAME = _convert_NAME
_convert_IMPORT_FROM = _convert_NAME
_convert_STORE_ATTR = _convert_NAME
_convert_LOAD_ATTR = _convert_NAME
_convert_DELETE_ATTR = _convert_NAME
_convert_LOAD_GLOBAL = _convert_NAME
_convert_STORE_GLOBAL = _convert_NAME
_convert_DELETE_GLOBAL = _convert_NAME
def _convert_DEREF(self, arg):
self._lookupName(arg, self.names)
self._lookupName(arg, self.varnames)
return self._lookupName(arg, self.closure)
_convert_LOAD_DEREF = _convert_DEREF
_convert_STORE_DEREF = _convert_DEREF
def _convert_LOAD_CLOSURE(self, arg):
self._lookupName(arg, self.varnames)
return self._lookupName(arg, self.closure)
_cmp = list(dis.cmp_op)
def _convert_COMPARE_OP(self, arg):
return self._cmp.index(arg)
# similarly for other opcodes...
for name, obj in locals().items():
if name[:9] == "_convert_":
opname = name[9:]
_converters[opname] = obj
del name, obj, opname
def makeByteCode(self):
assert self.stage == CONV
self.lnotab = lnotab = LineAddrTable()
for t in self.insts:
opname = t[0]
if len(t) == 1:
lnotab.addCode(self.opnum[opname])
else:
oparg = t[1]
if opname == "SET_LINENO":
lnotab.nextLine(oparg)
hi, lo = twobyte(oparg)
try:
lnotab.addCode(self.opnum[opname], lo, hi)
except ValueError:
print opname, oparg
print self.opnum[opname], lo, hi
raise
self.stage = DONE
opnum = {}
for num in range(len(dis.opname)):
opnum[dis.opname[num]] = num
del num
def newCodeObject(self):
assert self.stage == DONE
if (self.flags & CO_NEWLOCALS) == 0:
nlocals = 0
else:
nlocals = len(self.varnames)
argcount = self.argcount
if self.flags & CO_VARKEYWORDS:
argcount = argcount - 1
return new.code(argcount, nlocals, self.stacksize, self.flags,
self.lnotab.getCode(), self.getConsts(),
tuple(self.names), tuple(self.varnames),
self.filename, self.name, self.lnotab.firstline,
self.lnotab.getTable(), tuple(self.freevars),
tuple(self.cellvars))
def getConsts(self):
"""Return a tuple for the const slot of the code object
Must convert references to code (MAKE_FUNCTION) to code
objects recursively.
"""
l = []
for elt in self.consts:
if isinstance(elt, PyFlowGraph):
elt = elt.getCode()
l.append(elt)
return tuple(l)
def isJump(opname):
if opname[:4] == 'JUMP':
return 1
class TupleArg:
"""Helper for marking func defs with nested tuples in arglist"""
def __init__(self, count, names):
self.count = count
self.names = names
def __repr__(self):
return "TupleArg(%s, %s)" % (self.count, self.names)
def getName(self):
return ".%d" % self.count
def getArgCount(args):
argcount = len(args)
if args:
for arg in args:
if isinstance(arg, TupleArg):
numNames = len(misc.flatten(arg.names))
argcount = argcount - numNames
return argcount
def twobyte(val):
"""Convert an int argument into high and low bytes"""
assert type(val) == types.IntType
return divmod(val, 256)
class LineAddrTable:
"""lnotab
This class builds the lnotab, which is documented in compile.c.
Here's a brief recap:
For each SET_LINENO instruction after the first one, two bytes are
added to lnotab. (In some cases, multiple two-byte entries are
added.) The first byte is the distance in bytes between the
instruction for the last SET_LINENO and the current SET_LINENO.
The second byte is offset in line numbers. If either offset is
greater than 255, multiple two-byte entries are added -- see
compile.c for the delicate details.
"""
def __init__(self):
self.code = []
self.codeOffset = 0
self.firstline = 0
self.lastline = 0
self.lastoff = 0
self.lnotab = []
def addCode(self, *args):
for arg in args:
self.code.append(chr(arg))
self.codeOffset = self.codeOffset + len(args)
def nextLine(self, lineno):
if self.firstline == 0:
self.firstline = lineno
self.lastline = lineno
else:
# compute deltas
addr = self.codeOffset - self.lastoff
line = lineno - self.lastline
# Python assumes that lineno always increases with
# increasing bytecode address (lnotab is unsigned char).
# Depending on when SET_LINENO instructions are emitted
# this is not always true. Consider the code:
# a = (1,
# b)
# In the bytecode stream, the assignment to "a" occurs
# after the loading of "b". This works with the C Python
# compiler because it only generates a SET_LINENO instruction
# for the assignment.
if line > 0:
push = self.lnotab.append
while addr > 255:
push(255); push(0)
addr -= 255
while line > 255:
push(addr); push(255)
line -= 255
addr = 0
if addr > 0 or line > 0:
push(addr); push(line)
self.lastline = lineno
self.lastoff = self.codeOffset
def getCode(self):
return string.join(self.code, '')
def getTable(self):
return string.join(map(chr, self.lnotab), '')
class StackDepthTracker:
# XXX 1. need to keep track of stack depth on jumps
# XXX 2. at least partly as a result, this code is broken
def findDepth(self, insts):
depth = 0
maxDepth = 0
for i in insts:
opname = i[0]
delta = self.effect.get(opname, 0)
if delta > 1:
depth = depth + delta
elif delta < 0:
if depth > maxDepth:
maxDepth = depth
depth = depth + delta
else:
if depth > maxDepth:
maxDepth = depth
# now check patterns
for pat, pat_delta in self.patterns:
if opname[:len(pat)] == pat:
delta = pat_delta
depth = depth + delta
break
# if we still haven't found a match
if delta == 0:
meth = getattr(self, opname, None)
if meth is not None:
depth = depth + meth(i[1])
if depth < 0:
depth = 0
return maxDepth
effect = {
'POP_TOP': -1,
'DUP_TOP': 1,
'SLICE+1': -1,
'SLICE+2': -1,
'SLICE+3': -2,
'STORE_SLICE+0': -1,
'STORE_SLICE+1': -2,
'STORE_SLICE+2': -2,
'STORE_SLICE+3': -3,
'DELETE_SLICE+0': -1,
'DELETE_SLICE+1': -2,
'DELETE_SLICE+2': -2,
'DELETE_SLICE+3': -3,
'STORE_SUBSCR': -3,
'DELETE_SUBSCR': -2,
# PRINT_EXPR?
'PRINT_ITEM': -1,
'RETURN_VALUE': -1,
'EXEC_STMT': -3,
'BUILD_CLASS': -2,
'STORE_NAME': -1,
'STORE_ATTR': -2,
'DELETE_ATTR': -1,
'STORE_GLOBAL': -1,
'BUILD_MAP': 1,
'COMPARE_OP': -1,
'STORE_FAST': -1,
'IMPORT_STAR': -1,
'IMPORT_NAME': 0,
'IMPORT_FROM': 1,
# close enough...
'SETUP_EXCEPT': 3,
'SETUP_FINALLY': 3,
'FOR_ITER': 1,
}
# use pattern match
patterns = [
('BINARY_', -1),
('LOAD_', 1),
]
def UNPACK_SEQUENCE(self, count):
return count-1
def BUILD_TUPLE(self, count):
return -count+1
def BUILD_LIST(self, count):
return -count+1
def CALL_FUNCTION(self, argc):
hi, lo = divmod(argc, 256)
return lo + hi * 2
def CALL_FUNCTION_VAR(self, argc):
return self.CALL_FUNCTION(argc)+1
def CALL_FUNCTION_KW(self, argc):
return self.CALL_FUNCTION(argc)+1
def CALL_FUNCTION_VAR_KW(self, argc):
return self.CALL_FUNCTION(argc)+2
def MAKE_FUNCTION(self, argc):
return -argc
def BUILD_SLICE(self, argc):
if argc == 2:
return -1
elif argc == 3:
return -2
findDepth = StackDepthTracker().findDepth

File diff suppressed because it is too large Load Diff

View File

@ -1,415 +0,0 @@
"""Module symbol-table generator"""
from compiler import ast
from compiler.consts import SC_LOCAL, SC_GLOBAL, SC_FREE, SC_CELL, SC_UNKNOWN
from compiler.misc import mangle
import types
import sys
MANGLE_LEN = 256
class Scope:
# XXX how much information do I need about each name?
def __init__(self, name, module, klass=None):
self.name = name
self.module = module
self.defs = {}
self.uses = {}
self.globals = {}
self.params = {}
self.frees = {}
self.cells = {}
self.children = []
# nested is true if the class could contain free variables,
# i.e. if it is nested within another function.
self.nested = None
self.generator = None
self.klass = None
if klass is not None:
for i in range(len(klass)):
if klass[i] != '_':
self.klass = klass[i:]
break
def __repr__(self):
return "<%s: %s>" % (self.__class__.__name__, self.name)
def mangle(self, name):
if self.klass is None:
return name
return mangle(name, self.klass)
def add_def(self, name):
self.defs[self.mangle(name)] = 1
def add_use(self, name):
self.uses[self.mangle(name)] = 1
def add_global(self, name):
name = self.mangle(name)
if self.uses.has_key(name) or self.defs.has_key(name):
pass # XXX warn about global following def/use
if self.params.has_key(name):
raise SyntaxError, "%s in %s is global and parameter" % \
(name, self.name)
self.globals[name] = 1
self.module.add_def(name)
def add_param(self, name):
name = self.mangle(name)
self.defs[name] = 1
self.params[name] = 1
def get_names(self):
d = {}
d.update(self.defs)
d.update(self.uses)
d.update(self.globals)
return d.keys()
def add_child(self, child):
self.children.append(child)
def get_children(self):
return self.children
def DEBUG(self):
print >> sys.stderr, self.name, self.nested and "nested" or ""
print >> sys.stderr, "\tglobals: ", self.globals
print >> sys.stderr, "\tcells: ", self.cells
print >> sys.stderr, "\tdefs: ", self.defs
print >> sys.stderr, "\tuses: ", self.uses
print >> sys.stderr, "\tfrees:", self.frees
def check_name(self, name):
"""Return scope of name.
The scope of a name could be LOCAL, GLOBAL, FREE, or CELL.
"""
if self.globals.has_key(name):
return SC_GLOBAL
if self.cells.has_key(name):
return SC_CELL
if self.defs.has_key(name):
return SC_LOCAL
if self.nested and (self.frees.has_key(name) or
self.uses.has_key(name)):
return SC_FREE
if self.nested:
return SC_UNKNOWN
else:
return SC_GLOBAL
def get_free_vars(self):
if not self.nested:
return ()
free = {}
free.update(self.frees)
for name in self.uses.keys():
if not (self.defs.has_key(name) or
self.globals.has_key(name)):
free[name] = 1
return free.keys()
def handle_children(self):
for child in self.children:
frees = child.get_free_vars()
globals = self.add_frees(frees)
for name in globals:
child.force_global(name)
def force_global(self, name):
"""Force name to be global in scope.
Some child of the current node had a free reference to name.
When the child was processed, it was labelled a free
variable. Now that all its enclosing scope have been
processed, the name is known to be a global or builtin. So
walk back down the child chain and set the name to be global
rather than free.
Be careful to stop if a child does not think the name is
free.
"""
self.globals[name] = 1
if self.frees.has_key(name):
del self.frees[name]
for child in self.children:
if child.check_name(name) == SC_FREE:
child.force_global(name)
def add_frees(self, names):
"""Process list of free vars from nested scope.
Returns a list of names that are either 1) declared global in the
parent or 2) undefined in a top-level parent. In either case,
the nested scope should treat them as globals.
"""
child_globals = []
for name in names:
sc = self.check_name(name)
if self.nested:
if sc == SC_UNKNOWN or sc == SC_FREE \
or isinstance(self, ClassScope):
self.frees[name] = 1
elif sc == SC_GLOBAL:
child_globals.append(name)
elif isinstance(self, FunctionScope) and sc == SC_LOCAL:
self.cells[name] = 1
elif sc != SC_CELL:
child_globals.append(name)
else:
if sc == SC_LOCAL:
self.cells[name] = 1
elif sc != SC_CELL:
child_globals.append(name)
return child_globals
def get_cell_vars(self):
return self.cells.keys()
class ModuleScope(Scope):
__super_init = Scope.__init__
def __init__(self):
self.__super_init("global", self)
class FunctionScope(Scope):
pass
class LambdaScope(FunctionScope):
__super_init = Scope.__init__
__counter = 1
def __init__(self, module, klass=None):
i = self.__counter
self.__counter += 1
self.__super_init("lambda.%d" % i, module, klass)
class ClassScope(Scope):
__super_init = Scope.__init__
def __init__(self, name, module):
self.__super_init(name, module, name)
class SymbolVisitor:
def __init__(self):
self.scopes = {}
self.klass = None
# node that define new scopes
def visitModule(self, node):
scope = self.module = self.scopes[node] = ModuleScope()
self.visit(node.node, scope)
def visitFunction(self, node, parent):
parent.add_def(node.name)
for n in node.defaults:
self.visit(n, parent)
scope = FunctionScope(node.name, self.module, self.klass)
if parent.nested or isinstance(parent, FunctionScope):
scope.nested = 1
self.scopes[node] = scope
self._do_args(scope, node.argnames)
self.visit(node.code, scope)
self.handle_free_vars(scope, parent)
def visitLambda(self, node, parent):
for n in node.defaults:
self.visit(n, parent)
scope = LambdaScope(self.module, self.klass)
if parent.nested or isinstance(parent, FunctionScope):
scope.nested = 1
self.scopes[node] = scope
self._do_args(scope, node.argnames)
self.visit(node.code, scope)
self.handle_free_vars(scope, parent)
def _do_args(self, scope, args):
for name in args:
if type(name) == types.TupleType:
self._do_args(scope, name)
else:
scope.add_param(name)
def handle_free_vars(self, scope, parent):
parent.add_child(scope)
scope.handle_children()
def visitClass(self, node, parent):
parent.add_def(node.name)
for n in node.bases:
self.visit(n, parent)
scope = ClassScope(node.name, self.module)
if parent.nested or isinstance(parent, FunctionScope):
scope.nested = 1
self.scopes[node] = scope
prev = self.klass
self.klass = node.name
self.visit(node.code, scope)
self.klass = prev
self.handle_free_vars(scope, parent)
# name can be a def or a use
# XXX a few calls and nodes expect a third "assign" arg that is
# true if the name is being used as an assignment. only
# expressions contained within statements may have the assign arg.
def visitName(self, node, scope, assign=0):
if assign:
scope.add_def(node.name)
else:
scope.add_use(node.name)
# operations that bind new names
def visitFor(self, node, scope):
self.visit(node.assign, scope, 1)
self.visit(node.list, scope)
self.visit(node.body, scope)
if node.else_:
self.visit(node.else_, scope)
def visitFrom(self, node, scope):
for name, asname in node.names:
if name == "*":
continue
scope.add_def(asname or name)
def visitImport(self, node, scope):
for name, asname in node.names:
i = name.find(".")
if i > -1:
name = name[:i]
scope.add_def(asname or name)
def visitGlobal(self, node, scope):
for name in node.names:
scope.add_global(name)
def visitAssign(self, node, scope):
"""Propagate assignment flag down to child nodes.
The Assign node doesn't itself contains the variables being
assigned to. Instead, the children in node.nodes are visited
with the assign flag set to true. When the names occur in
those nodes, they are marked as defs.
Some names that occur in an assignment target are not bound by
the assignment, e.g. a name occurring inside a slice. The
visitor handles these nodes specially; they do not propagate
the assign flag to their children.
"""
for n in node.nodes:
self.visit(n, scope, 1)
self.visit(node.expr, scope)
def visitAssName(self, node, scope, assign=1):
scope.add_def(node.name)
def visitAssAttr(self, node, scope, assign=0):
self.visit(node.expr, scope, 0)
def visitSubscript(self, node, scope, assign=0):
self.visit(node.expr, scope, 0)
for n in node.subs:
self.visit(n, scope, 0)
def visitSlice(self, node, scope, assign=0):
self.visit(node.expr, scope, 0)
if node.lower:
self.visit(node.lower, scope, 0)
if node.upper:
self.visit(node.upper, scope, 0)
def visitAugAssign(self, node, scope):
# If the LHS is a name, then this counts as assignment.
# Otherwise, it's just use.
self.visit(node.node, scope)
if isinstance(node.node, ast.Name):
self.visit(node.node, scope, 1) # XXX worry about this
self.visit(node.expr, scope)
# prune if statements if tests are false
_const_types = types.StringType, types.IntType, types.FloatType
def visitIf(self, node, scope):
for test, body in node.tests:
if isinstance(test, ast.Const):
if type(test.value) in self._const_types:
if not test.value:
continue
self.visit(test, scope)
self.visit(body, scope)
if node.else_:
self.visit(node.else_, scope)
# a yield statement signals a generator
def visitYield(self, node, scope):
scope.generator = 1
self.visit(node.value, scope)
def sort(l):
l = l[:]
l.sort()
return l
def list_eq(l1, l2):
return sort(l1) == sort(l2)
if __name__ == "__main__":
import sys
from compiler import parseFile, walk
import symtable
def get_names(syms):
return [s for s in [s.get_name() for s in syms.get_symbols()]
if not (s.startswith('_[') or s.startswith('.'))]
for file in sys.argv[1:]:
print file
f = open(file)
buf = f.read()
f.close()
syms = symtable.symtable(buf, file, "exec")
mod_names = get_names(syms)
tree = parseFile(file)
s = SymbolVisitor()
walk(tree, s)
# compare module-level symbols
names2 = s.scopes[tree].get_names()
if not list_eq(mod_names, names2):
print
print "oops", file
print sort(mod_names)
print sort(names2)
sys.exit(-1)
d = {}
d.update(s.scopes)
del d[tree]
scopes = d.values()
del d
for s in syms.get_symbols():
if s.is_namespace():
l = [sc for sc in scopes
if sc.name == s.get_name()]
if len(l) > 1:
print "skipping", s.get_name()
else:
if not list_eq(get_names(s.get_namespace()),
l[0].get_names()):
print s.get_name()
print sort(get_names(s.get_namespace()))
print sort(l[0].get_names())
sys.exit(-1)

View File

@ -1,46 +0,0 @@
"""Check for errs in the AST.
The Python parser does not catch all syntax errors. Others, like
assignments with invalid targets, are caught in the code generation
phase.
The compiler package catches some errors in the transformer module.
But it seems clearer to write checkers that use the AST to detect
errors.
"""
from compiler import ast, walk
def check(tree, multi=None):
v = SyntaxErrorChecker(multi)
walk(tree, v)
return v.errors
class SyntaxErrorChecker:
"""A visitor to find syntax errors in the AST."""
def __init__(self, multi=None):
"""Create new visitor object.
If optional argument multi is not None, then print messages
for each error rather than raising a SyntaxError for the
first.
"""
self.multi = multi
self.errors = 0
def error(self, node, msg):
self.errors = self.errors + 1
if self.multi is not None:
print "%s:%s: %s" % (node.filename, node.lineno, msg)
else:
raise SyntaxError, "%s (%s:%s)" % (msg, node.filename, node.lineno)
def visitAssign(self, node):
# the transformer module handles many of these
for target in node.nodes:
pass
## if isinstance(target, ast.AssList):
## if target.lineno is None:
## target.lineno = node.lineno
## self.error(target, "can't assign to list comprehension")

File diff suppressed because it is too large Load Diff

View File

@ -1,121 +0,0 @@
from compiler import ast
# XXX should probably rename ASTVisitor to ASTWalker
# XXX can it be made even more generic?
class ASTVisitor:
"""Performs a depth-first walk of the AST
The ASTVisitor will walk the AST, performing either a preorder or
postorder traversal depending on which method is called.
methods:
preorder(tree, visitor)
postorder(tree, visitor)
tree: an instance of ast.Node
visitor: an instance with visitXXX methods
The ASTVisitor is responsible for walking over the tree in the
correct order. For each node, it checks the visitor argument for
a method named 'visitNodeType' where NodeType is the name of the
node's class, e.g. Class. If the method exists, it is called
with the node as its sole argument.
The visitor method for a particular node type can control how
child nodes are visited during a preorder walk. (It can't control
the order during a postorder walk, because it is called _after_
the walk has occurred.) The ASTVisitor modifies the visitor
argument by adding a visit method to the visitor; this method can
be used to visit a particular child node. If the visitor method
returns a true value, the ASTVisitor will not traverse the child
nodes.
XXX The interface for controlling the preorder walk needs to be
re-considered. The current interface is convenient for visitors
that mostly let the ASTVisitor do everything. For something like
a code generator, where you want to walk to occur in a specific
order, it's a pain to add "return 1" to the end of each method.
"""
VERBOSE = 0
def __init__(self):
self.node = None
self._cache = {}
def default(self, node, *args):
for child in node.getChildNodes():
self.dispatch(child, *args)
def dispatch(self, node, *args):
self.node = node
klass = node.__class__
meth = self._cache.get(klass, None)
if meth is None:
className = klass.__name__
meth = getattr(self.visitor, 'visit' + className, self.default)
self._cache[klass] = meth
## if self.VERBOSE > 0:
## className = klass.__name__
## if self.VERBOSE == 1:
## if meth == 0:
## print "dispatch", className
## else:
## print "dispatch", className, (meth and meth.__name__ or '')
return meth(node, *args)
def preorder(self, tree, visitor, *args):
"""Do preorder walk of tree using visitor"""
self.visitor = visitor
visitor.visit = self.dispatch
self.dispatch(tree, *args) # XXX *args make sense?
class ExampleASTVisitor(ASTVisitor):
"""Prints examples of the nodes that aren't visited
This visitor-driver is only useful for development, when it's
helpful to develop a visitor incremently, and get feedback on what
you still have to do.
"""
examples = {}
def dispatch(self, node, *args):
self.node = node
meth = self._cache.get(node.__class__, None)
className = node.__class__.__name__
if meth is None:
meth = getattr(self.visitor, 'visit' + className, 0)
self._cache[node.__class__] = meth
if self.VERBOSE > 1:
print "dispatch", className, (meth and meth.__name__ or '')
if meth:
meth(node, *args)
elif self.VERBOSE > 0:
klass = node.__class__
if not self.examples.has_key(klass):
self.examples[klass] = klass
print
print self.visitor
print klass
for attr in dir(node):
if attr[0] != '_':
print "\t", "%-12.12s" % attr, getattr(node, attr)
print
return self.default(node, *args)
# XXX this is an API change
_walker = ASTVisitor
def walk(tree, visitor, walker=None, verbose=None):
if walker is None:
walker = _walker()
if verbose is not None:
walker.VERBOSE = verbose
walker.preorder(tree, visitor)
return walker.visitor
def dumpNode(node):
print node.__class__
for attr in dir(node):
if attr[0] != '_':
print "\t", "%-10.10s" % attr, getattr(node, attr)