From 364f9b9e2f798e4d28ed21122faffb030a6ccac5 Mon Sep 17 00:00:00 2001 From: Jeremy Hylton Date: Thu, 12 Apr 2001 06:40:42 +0000 Subject: [PATCH] Preliminary support for nested scopes XXX Still doesn't work right for classes XXX Still doesn't do sufficient error checking --- Lib/compiler/pyassem.py | 50 ++++- Lib/compiler/pycodegen.py | 305 ++++++++++++++++++++------- Lib/compiler/symbols.py | 130 +++++++++++- Tools/compiler/compiler/pyassem.py | 50 ++++- Tools/compiler/compiler/pycodegen.py | 305 ++++++++++++++++++++------- Tools/compiler/compiler/symbols.py | 130 +++++++++++- 6 files changed, 792 insertions(+), 178 deletions(-) diff --git a/Lib/compiler/pyassem.py b/Lib/compiler/pyassem.py index 43bf6f4e9b1..447a8e78e47 100644 --- a/Lib/compiler/pyassem.py +++ b/Lib/compiler/pyassem.py @@ -99,12 +99,6 @@ class FlowGraph: if not self.exit in order: order.append(self.exit) -## for b in order: -## print repr(b) -## print "\t", b.get_children() -## print b -## print - return order def getBlocks(self): @@ -222,6 +216,7 @@ CO_OPTIMIZED = 0x0001 CO_NEWLOCALS = 0x0002 CO_VARARGS = 0x0004 CO_VARKEYWORDS = 0x0008 +CO_NESTED = 0x0010 # the FlowGraph is transformed in place; it exists in one of these states RAW = "RAW" @@ -245,6 +240,15 @@ class PyFlowGraph(FlowGraph): self.flags = 0 self.consts = [] self.names = [] + # Free variables found by the symbol table scan, including + # variables used only in nested scopes, are included here. + self.freevars = [] + self.cellvars = [] + # The closure list is used to track the order of cell + # variables and free variables in the resulting code object. + # The offsets used by LOAD_CLOSURE/LOAD_DEREF refer to both + # kinds of variables. + self.closure = [] self.varnames = list(args) or [] for i in range(len(self.varnames)): var = self.varnames[i] @@ -260,6 +264,12 @@ class PyFlowGraph(FlowGraph): if flag == CO_VARARGS: self.argcount = self.argcount - 1 + def setFreeVars(self, names): + self.freevars = list(names) + + def setCellVars(self, names): + self.cellvars = names + def getCode(self): """Get a Python code object""" if self.stage == RAW: @@ -335,6 +345,7 @@ class PyFlowGraph(FlowGraph): """Convert arguments from symbolic to concrete form""" assert self.stage == FLAT self.consts.insert(0, self.docstring) + self.sort_cellvars() for i in range(len(self.insts)): t = self.insts[i] if len(t) == 2: @@ -345,6 +356,19 @@ class PyFlowGraph(FlowGraph): self.insts[i] = opname, conv(self, oparg) self.stage = CONV + def sort_cellvars(self): + """Sort cellvars in the order of varnames and prune from freevars. + """ + cells = {} + for name in self.cellvars: + cells[name] = 1 + self.cellvars = [name for name in self.varnames + if cells.has_key(name)] + for name in self.cellvars: + del cells[name] + self.cellvars = self.cellvars + cells.keys() + self.closure = self.cellvars + self.freevars + def _lookupName(self, name, list): """Return index of name in list, appending if necessary""" t = type(name) @@ -382,6 +406,17 @@ class PyFlowGraph(FlowGraph): _convert_STORE_GLOBAL = _convert_NAME _convert_DELETE_GLOBAL = _convert_NAME + def _convert_DEREF(self, arg): + self._lookupName(arg, self.names) + self._lookupName(arg, self.varnames) + return self._lookupName(arg, self.closure) + _convert_LOAD_DEREF = _convert_DEREF + _convert_STORE_DEREF = _convert_DEREF + + def _convert_LOAD_CLOSURE(self, arg): + self._lookupName(arg, self.varnames) + return self._lookupName(arg, self.closure) + _cmp = list(dis.cmp_op) def _convert_COMPARE_OP(self, arg): return self._cmp.index(arg) @@ -432,7 +467,8 @@ class PyFlowGraph(FlowGraph): self.lnotab.getCode(), self.getConsts(), tuple(self.names), tuple(self.varnames), self.filename, self.name, self.lnotab.firstline, - self.lnotab.getTable()) + self.lnotab.getTable(), tuple(self.freevars), + tuple(self.cellvars)) def getConsts(self): """Return a tuple for the const slot of the code object diff --git a/Lib/compiler/pycodegen.py b/Lib/compiler/pycodegen.py index 3ba31e47653..2d4564a6340 100644 --- a/Lib/compiler/pycodegen.py +++ b/Lib/compiler/pycodegen.py @@ -9,8 +9,10 @@ import types from cStringIO import StringIO from compiler import ast, parse, walk -from compiler import pyassem, misc, future -from compiler.pyassem import CO_VARARGS, CO_VARKEYWORDS, CO_NEWLOCALS, TupleArg +from compiler import pyassem, misc, future, symbols +from compiler.consts import SC_LOCAL, SC_GLOBAL, SC_FREE, SC_CELL +from compiler.pyassem import CO_VARARGS, CO_VARKEYWORDS, CO_NEWLOCALS,\ + CO_NESTED, TupleArg # Do we have Python 1.x or Python 2.x? try: @@ -46,7 +48,7 @@ class Module: tree = parse(self.source) root, filename = os.path.split(self.filename) if "nested_scopes" in future.find_futures(tree): - gen = NestedScopeCodeGenerator(filename) + gen = NestedScopeModuleCodeGenerator(filename) else: gen = ModuleCodeGenerator(filename) walk(tree, gen, 1) @@ -70,14 +72,71 @@ class Module: mtime = struct.pack('i', mtime) return self.MAGIC + mtime +class LocalNameFinder: + """Find local names in scope""" + def __init__(self, names=()): + self.names = misc.Set() + self.globals = misc.Set() + for name in names: + self.names.add(name) + + # XXX list comprehensions and for loops + + def getLocals(self): + for elt in self.globals.elements(): + if self.names.has_elt(elt): + self.names.remove(elt) + return self.names + + def visitDict(self, node): + pass + + def visitGlobal(self, node): + for name in node.names: + self.globals.add(name) + + def visitFunction(self, node): + self.names.add(node.name) + + def visitLambda(self, node): + pass + + def visitImport(self, node): + for name, alias in node.names: + self.names.add(alias or name) + + def visitFrom(self, node): + for name, alias in node.names: + self.names.add(alias or name) + + def visitClass(self, node): + self.names.add(node.name) + + def visitAssName(self, node): + self.names.add(node.name) + class CodeGenerator: + """Defines basic code generator for Python bytecode + + This class is an abstract base class. Concrete subclasses must + define an __init__() that defines self.graph and then calls the + __init__() defined in this class. + + The concrete class must also define the class attributes + NameFinder, FunctionGen, and ClassGen. These attributes can be + defined in the initClass() method, which is a hook for + initializing these methods after all the classes have been + defined. + """ optimized = 0 # is namespace access optimized? + __initialized = None def __init__(self, filename): -## Subclasses must define a constructor that intializes self.graph -## before calling this init function, e.g. -## self.graph = pyassem.PyFlowGraph() + if self.__initialized is None: + self.initClass() + self.__class__.__initialized = 1 + self.checkClass() self.filename = filename self.locals = misc.Stack() self.loops = misc.Stack() @@ -86,6 +145,20 @@ class CodeGenerator: self.last_lineno = None self._setupGraphDelegation() + def initClass(self): + """This method is called once for each class""" + + def checkClass(self): + """Verify that class is constructed correctly""" + try: + assert hasattr(self, 'graph') + assert getattr(self, 'NameFinder') + assert getattr(self, 'FunctionGen') + assert getattr(self, 'ClassGen') + except AssertionError, msg: + intro = "Bad class construction for %s" % self.__class__.__name__ + raise AssertionError, intro + def _setupGraphDelegation(self): self.emit = self.graph.emit self.newBlock = self.graph.newBlock @@ -139,10 +212,15 @@ class CodeGenerator: return 0 # The first few visitor methods handle nodes that generator new - # code objects + # code objects. They use class attributes to determine what + # specialized code generators to use. + + NameFinder = LocalNameFinder + FunctionGen = None + ClassGen = None def visitModule(self, node): - lnf = walk(node.node, LocalNameFinder(), 0) + lnf = walk(node.node, self.NameFinder(), 0) self.locals.push(lnf.getLocals()) if node.doc: self.fixDocstring(node.node) @@ -159,8 +237,8 @@ class CodeGenerator: def visitLambda(self, node): self._visitFuncOrLambda(node, isLambda=1) - def _visitFuncOrLambda(self, node, isLambda): - gen = FunctionCodeGenerator(node, self.filename, isLambda) + def _visitFuncOrLambda(self, node, isLambda=0): + gen = self.FunctionGen(node, self.filename, self.scopes, isLambda) walk(node.code, gen) gen.finish() self.set_lineno(node) @@ -170,7 +248,7 @@ class CodeGenerator: self.emit('MAKE_FUNCTION', len(node.defaults)) def visitClass(self, node): - gen = ClassCodeGenerator(node, self.filename) + gen = self.ClassGen(node, self.filename, self.scopes) if node.doc: self.fixDocstring(node.code) walk(node.code, gen) @@ -180,7 +258,7 @@ class CodeGenerator: for base in node.bases: self.visit(base) self.emit('BUILD_TUPLE', len(node.bases)) - self.emit('LOAD_CONST', gen.getCode()) + self.emit('LOAD_CONST', gen) self.emit('MAKE_FUNCTION', 0) self.emit('CALL_FUNCTION', 0) self.emit('BUILD_CLASS') @@ -883,34 +961,114 @@ class CodeGenerator: self.visit(k) self.emit('STORE_SUBSCR') -class ModuleCodeGenerator(CodeGenerator): - __super_init = CodeGenerator.__init__ +class NestedScopeCodeGenerator(CodeGenerator): __super_visitModule = CodeGenerator.visitModule - - def __init__(self, filename): - # XXX is ? in compile.c - self.graph = pyassem.PyFlowGraph("", filename) - self.__super_init(filename) - self.symbols = None + __super_visitClass = CodeGenerator.visitClass + __super__visitFuncOrLambda = CodeGenerator._visitFuncOrLambda + + def parseSymbols(self, tree): + s = symbols.SymbolVisitor() + walk(tree, s) + return s.scopes def visitModule(self, node): - self.symbols = self.parseSymbols(node) + self.scopes = self.parseSymbols(node) + self.scope = self.scopes[node] self.__super_visitModule(node) - def parseSymbols(self, node): - # XXX not implemented - return None + def _nameOp(self, prefix, name): + scope = self.scope.check_name(name) + if scope == SC_LOCAL: + if not self.optimized: + self.emit(prefix + '_NAME', name) + else: + self.emit(prefix + '_FAST', name) + elif scope == SC_GLOBAL: + self.emit(prefix + '_GLOBAL', name) + elif scope == SC_FREE or scope == SC_CELL: + self.emit(prefix + '_DEREF', name) + else: + raise RuntimeError, "unsupported scope for var %s: %d" % \ + (name, scope) -class NestedScopeCodeGenerator(ModuleCodeGenerator): - pass + def _visitFuncOrLambda(self, node, isLambda=0): + gen = self.FunctionGen(node, self.filename, self.scopes, isLambda) + walk(node.code, gen) + gen.finish() + self.set_lineno(node) + for default in node.defaults: + self.visit(default) + frees = gen.scope.get_free_vars() + if frees: + for name in frees: + self.emit('LOAD_CLOSURE', name) + self.emit('LOAD_CONST', gen) + self.emit('MAKE_CLOSURE', len(node.defaults)) + else: + self.emit('LOAD_CONST', gen) + self.emit('MAKE_FUNCTION', len(node.defaults)) -class FunctionCodeGenerator(CodeGenerator): - super_init = CodeGenerator.__init__ + def visitClass(self, node): + gen = self.ClassGen(node, self.filename, self.scopes) + if node.doc: + self.fixDocstring(node.code) + walk(node.code, gen) + gen.finish() + self.set_lineno(node) + self.emit('LOAD_CONST', node.name) + for base in node.bases: + self.visit(base) + self.emit('BUILD_TUPLE', len(node.bases)) + frees = gen.scope.get_free_vars() + for name in frees: + self.emit('LOAD_CLOSURE', name) + self.emit('LOAD_CONST', gen) + if frees: + self.emit('MAKE_CLOSURE', 0) + else: + self.emit('MAKE_FUNCTION', 0) + self.emit('CALL_FUNCTION', 0) + self.emit('BUILD_CLASS') + self.storeName(node.name) + +class LGBScopeMixin: + """Defines initClass() for Python 2.1-compatible scoping""" + def initClass(self): + self.__class__.NameFinder = LocalNameFinder + self.__class__.FunctionGen = FunctionCodeGenerator + self.__class__.ClassGen = ClassCodeGenerator + +class NestedScopeMixin: + """Defines initClass() for nested scoping (Python 2.2-compatible)""" + def initClass(self): + self.__class__.NameFinder = LocalNameFinder + self.__class__.FunctionGen = NestedFunctionCodeGenerator + self.__class__.ClassGen = NestedClassCodeGenerator + +class ModuleCodeGenerator(LGBScopeMixin, CodeGenerator): + __super_init = CodeGenerator.__init__ + + scopes = None + + def __init__(self, filename): + self.graph = pyassem.PyFlowGraph("", filename) + self.__super_init(filename) + +class NestedScopeModuleCodeGenerator(NestedScopeMixin, + NestedScopeCodeGenerator): + __super_init = CodeGenerator.__init__ + + def __init__(self, filename): + self.graph = pyassem.PyFlowGraph("", filename) + self.__super_init(filename) + self.graph.setFlag(CO_NESTED) + +class AbstractFunctionCode: optimized = 1 lambdaCount = 0 - def __init__(self, func, filename, isLambda=0): + def __init__(self, func, filename, scopes, isLambda): if isLambda: klass = FunctionCodeGenerator name = "" % klass.lambdaCount @@ -926,7 +1084,7 @@ class FunctionCodeGenerator(CodeGenerator): if not isLambda and func.doc: self.setDocstring(func.doc) - lnf = walk(func.code, LocalNameFinder(args), 0) + lnf = walk(func.code, self.NameFinder(args), 0) self.locals.push(lnf.getLocals()) if func.varargs: self.graph.setFlag(CO_VARARGS) @@ -963,14 +1121,32 @@ class FunctionCodeGenerator(CodeGenerator): unpackTuple = unpackSequence -class ClassCodeGenerator(CodeGenerator): - super_init = CodeGenerator.__init__ +class FunctionCodeGenerator(LGBScopeMixin, AbstractFunctionCode, + CodeGenerator): + super_init = CodeGenerator.__init__ # call be other init + scopes = None - def __init__(self, klass, filename): +class NestedFunctionCodeGenerator(AbstractFunctionCode, + NestedScopeMixin, + NestedScopeCodeGenerator): + super_init = NestedScopeCodeGenerator.__init__ # call be other init + __super_init = AbstractFunctionCode.__init__ + + def __init__(self, func, filename, scopes, isLambda): + self.scopes = scopes + self.scope = scopes[func] + self.__super_init(func, filename, scopes, isLambda) + self.graph.setFreeVars(self.scope.get_free_vars()) + self.graph.setCellVars(self.scope.get_cell_vars()) + self.graph.setFlag(CO_NESTED) + +class AbstractClassCode: + + def __init__(self, klass, filename, scopes): self.graph = pyassem.PyFlowGraph(klass.name, filename, optimized=0) self.super_init(filename) - lnf = walk(klass.code, LocalNameFinder(), 0) + lnf = walk(klass.code, self.NameFinder(), 0) self.locals.push(lnf.getLocals()) self.graph.setFlag(CO_NEWLOCALS) if klass.doc: @@ -981,6 +1157,24 @@ class ClassCodeGenerator(CodeGenerator): self.emit('LOAD_LOCALS') self.emit('RETURN_VALUE') +class ClassCodeGenerator(LGBScopeMixin, AbstractClassCode, CodeGenerator): + super_init = CodeGenerator.__init__ + scopes = None + +class NestedClassCodeGenerator(AbstractClassCode, + NestedScopeMixin, + NestedScopeCodeGenerator): + super_init = NestedScopeCodeGenerator.__init__ # call be other init + __super_init = AbstractClassCode.__init__ + + def __init__(self, klass, filename, scopes): + self.scopes = scopes + self.scope = scopes[klass] + self.__super_init(klass, filename, scopes) + self.graph.setFreeVars(self.scope.get_free_vars()) + self.graph.setCellVars(self.scope.get_cell_vars()) + self.graph.setFlag(CO_NESTED) + def generateArgList(arglist): """Generate an arg list marking TupleArgs""" args = [] @@ -997,49 +1191,6 @@ def generateArgList(arglist): raise ValueError, "unexpect argument type:", elt return args + extra, count -class LocalNameFinder: - """Find local names in scope""" - def __init__(self, names=()): - self.names = misc.Set() - self.globals = misc.Set() - for name in names: - self.names.add(name) - - # XXX list comprehensions and for loops - - def getLocals(self): - for elt in self.globals.elements(): - if self.names.has_elt(elt): - self.names.remove(elt) - return self.names - - def visitDict(self, node): - pass - - def visitGlobal(self, node): - for name in node.names: - self.globals.add(name) - - def visitFunction(self, node): - self.names.add(node.name) - - def visitLambda(self, node): - pass - - def visitImport(self, node): - for name, alias in node.names: - self.names.add(alias or name) - - def visitFrom(self, node): - for name, alias in node.names: - self.names.add(alias or name) - - def visitClass(self, node): - self.names.add(node.name) - - def visitAssName(self, node): - self.names.add(node.name) - def findOp(node): """Find the op (DELETE, LOAD, STORE) in an AssTuple tree""" v = OpFinder() diff --git a/Lib/compiler/symbols.py b/Lib/compiler/symbols.py index 3ab72f31298..cde937b7535 100644 --- a/Lib/compiler/symbols.py +++ b/Lib/compiler/symbols.py @@ -1,8 +1,11 @@ """Module symbol-table generator""" from compiler import ast +from compiler.consts import SC_LOCAL, SC_GLOBAL, SC_FREE, SC_CELL, SC_UNKNOWN import types +import sys + MANGLE_LEN = 256 class Scope: @@ -14,7 +17,12 @@ class Scope: self.uses = {} self.globals = {} self.params = {} + self.frees = {} + self.cells = {} self.children = [] + # nested is true if the class could contain free variables, + # i.e. if it is nested within another function. + self.nested = None self.klass = None if klass is not None: for i in range(len(klass)): @@ -70,13 +78,112 @@ class Scope: def get_children(self): return self.children + def DEBUG(self): + return + print >> sys.stderr, self.name, self.nested and "nested" or "" + print >> sys.stderr, "\tglobals: ", self.globals + print >> sys.stderr, "\tcells: ", self.cells + print >> sys.stderr, "\tdefs: ", self.defs + print >> sys.stderr, "\tuses: ", self.uses + print >> sys.stderr, "\tfrees:", self.frees + + def check_name(self, name): + """Return scope of name. + + The scope of a name could be LOCAL, GLOBAL, FREE, or CELL. + """ + if self.globals.has_key(name): + return SC_GLOBAL + if self.cells.has_key(name): + return SC_CELL + if self.defs.has_key(name): + return SC_LOCAL + if self.nested and (self.frees.has_key(name) or + self.uses.has_key(name)): + return SC_FREE + if self.nested: + return SC_UNKNOWN + else: + return SC_GLOBAL + + def get_free_vars(self): + if not self.nested: + return () + free = {} + free.update(self.frees) + for name in self.uses.keys(): + if not (self.defs.has_key(name) or + self.globals.has_key(name)): + free[name] = 1 + return free.keys() + + def handle_children(self): + for child in self.children: + frees = child.get_free_vars() + globals = self.add_frees(frees) + for name in globals: + child.force_global(name) + + def force_global(self, name): + """Force name to be global in scope. + + Some child of the current node had a free reference to name. + When the child was processed, it was labelled a free + variable. Now that all its enclosing scope have been + processed, the name is known to be a global or builtin. So + walk back down the child chain and set the name to be global + rather than free. + + Be careful to stop if a child does not think the name is + free. + """ + self.globals[name] = 1 + if self.frees.has_key(name): + del self.frees[name] + for child in self.children: + if child.check_name(name) == SC_FREE: + child.force_global(name) + + def add_frees(self, names): + """Process list of free vars from nested scope. + + Returns a list of names that are either 1) declared global in the + parent or 2) undefined in a top-level parent. In either case, + the nested scope should treat them as globals. + """ + child_globals = [] + for name in names: + sc = self.check_name(name) + if self.nested: + if sc == SC_UNKNOWN or sc == SC_FREE \ + or isinstance(self, ClassScope): + self.frees[name] = 1 + elif sc == SC_GLOBAL: + child_globals.append(name) + elif isinstance(self, FunctionScope) and sc == SC_LOCAL: + self.cells[name] = 1 + else: + child_globals.append(name) + else: + if sc == SC_LOCAL: + self.cells[name] = 1 + else: + child_globals.append(name) + return child_globals + + def get_cell_vars(self): + return self.cells.keys() + class ModuleScope(Scope): __super_init = Scope.__init__ def __init__(self): self.__super_init("global", self) -class LambdaScope(Scope): +class FunctionScope(Scope): + pass + +class LambdaScope(FunctionScope): __super_init = Scope.__init__ __counter = 1 @@ -86,9 +193,6 @@ class LambdaScope(Scope): self.__counter += 1 self.__super_init("lambda.%d" % i, module, klass) -class FunctionScope(Scope): - pass - class ClassScope(Scope): __super_init = Scope.__init__ @@ -111,17 +215,24 @@ class SymbolVisitor: for n in node.defaults: self.visit(n, parent) scope = FunctionScope(node.name, self.module, self.klass) + if parent.nested or isinstance(parent, FunctionScope): + scope.nested = 1 self.scopes[node] = scope self._do_args(scope, node.argnames) self.visit(node.code, scope) - + self.handle_free_vars(scope, parent) + scope.DEBUG() + def visitLambda(self, node, parent): for n in node.defaults: self.visit(n, parent) scope = LambdaScope(self.module, self.klass) + if parent.nested or isinstance(parent, FunctionScope): + scope.nested = 1 self.scopes[node] = scope self._do_args(scope, node.argnames) self.visit(node.code, scope) + self.handle_free_vars(scope, parent) def _do_args(self, scope, args): for name in args: @@ -130,16 +241,25 @@ class SymbolVisitor: else: scope.add_param(name) + def handle_free_vars(self, scope, parent): + parent.add_child(scope) + if scope.children: + scope.DEBUG() + scope.handle_children() + def visitClass(self, node, parent): parent.add_def(node.name) for n in node.bases: self.visit(n, parent) scope = ClassScope(node.name, self.module) + if parent.nested or isinstance(parent, FunctionScope): + scope.nested = 1 self.scopes[node] = scope prev = self.klass self.klass = node.name self.visit(node.code, scope) self.klass = prev + self.handle_free_vars(scope, parent) # name can be a def or a use diff --git a/Tools/compiler/compiler/pyassem.py b/Tools/compiler/compiler/pyassem.py index 43bf6f4e9b1..447a8e78e47 100644 --- a/Tools/compiler/compiler/pyassem.py +++ b/Tools/compiler/compiler/pyassem.py @@ -99,12 +99,6 @@ class FlowGraph: if not self.exit in order: order.append(self.exit) -## for b in order: -## print repr(b) -## print "\t", b.get_children() -## print b -## print - return order def getBlocks(self): @@ -222,6 +216,7 @@ CO_OPTIMIZED = 0x0001 CO_NEWLOCALS = 0x0002 CO_VARARGS = 0x0004 CO_VARKEYWORDS = 0x0008 +CO_NESTED = 0x0010 # the FlowGraph is transformed in place; it exists in one of these states RAW = "RAW" @@ -245,6 +240,15 @@ class PyFlowGraph(FlowGraph): self.flags = 0 self.consts = [] self.names = [] + # Free variables found by the symbol table scan, including + # variables used only in nested scopes, are included here. + self.freevars = [] + self.cellvars = [] + # The closure list is used to track the order of cell + # variables and free variables in the resulting code object. + # The offsets used by LOAD_CLOSURE/LOAD_DEREF refer to both + # kinds of variables. + self.closure = [] self.varnames = list(args) or [] for i in range(len(self.varnames)): var = self.varnames[i] @@ -260,6 +264,12 @@ class PyFlowGraph(FlowGraph): if flag == CO_VARARGS: self.argcount = self.argcount - 1 + def setFreeVars(self, names): + self.freevars = list(names) + + def setCellVars(self, names): + self.cellvars = names + def getCode(self): """Get a Python code object""" if self.stage == RAW: @@ -335,6 +345,7 @@ class PyFlowGraph(FlowGraph): """Convert arguments from symbolic to concrete form""" assert self.stage == FLAT self.consts.insert(0, self.docstring) + self.sort_cellvars() for i in range(len(self.insts)): t = self.insts[i] if len(t) == 2: @@ -345,6 +356,19 @@ class PyFlowGraph(FlowGraph): self.insts[i] = opname, conv(self, oparg) self.stage = CONV + def sort_cellvars(self): + """Sort cellvars in the order of varnames and prune from freevars. + """ + cells = {} + for name in self.cellvars: + cells[name] = 1 + self.cellvars = [name for name in self.varnames + if cells.has_key(name)] + for name in self.cellvars: + del cells[name] + self.cellvars = self.cellvars + cells.keys() + self.closure = self.cellvars + self.freevars + def _lookupName(self, name, list): """Return index of name in list, appending if necessary""" t = type(name) @@ -382,6 +406,17 @@ class PyFlowGraph(FlowGraph): _convert_STORE_GLOBAL = _convert_NAME _convert_DELETE_GLOBAL = _convert_NAME + def _convert_DEREF(self, arg): + self._lookupName(arg, self.names) + self._lookupName(arg, self.varnames) + return self._lookupName(arg, self.closure) + _convert_LOAD_DEREF = _convert_DEREF + _convert_STORE_DEREF = _convert_DEREF + + def _convert_LOAD_CLOSURE(self, arg): + self._lookupName(arg, self.varnames) + return self._lookupName(arg, self.closure) + _cmp = list(dis.cmp_op) def _convert_COMPARE_OP(self, arg): return self._cmp.index(arg) @@ -432,7 +467,8 @@ class PyFlowGraph(FlowGraph): self.lnotab.getCode(), self.getConsts(), tuple(self.names), tuple(self.varnames), self.filename, self.name, self.lnotab.firstline, - self.lnotab.getTable()) + self.lnotab.getTable(), tuple(self.freevars), + tuple(self.cellvars)) def getConsts(self): """Return a tuple for the const slot of the code object diff --git a/Tools/compiler/compiler/pycodegen.py b/Tools/compiler/compiler/pycodegen.py index 3ba31e47653..2d4564a6340 100644 --- a/Tools/compiler/compiler/pycodegen.py +++ b/Tools/compiler/compiler/pycodegen.py @@ -9,8 +9,10 @@ import types from cStringIO import StringIO from compiler import ast, parse, walk -from compiler import pyassem, misc, future -from compiler.pyassem import CO_VARARGS, CO_VARKEYWORDS, CO_NEWLOCALS, TupleArg +from compiler import pyassem, misc, future, symbols +from compiler.consts import SC_LOCAL, SC_GLOBAL, SC_FREE, SC_CELL +from compiler.pyassem import CO_VARARGS, CO_VARKEYWORDS, CO_NEWLOCALS,\ + CO_NESTED, TupleArg # Do we have Python 1.x or Python 2.x? try: @@ -46,7 +48,7 @@ class Module: tree = parse(self.source) root, filename = os.path.split(self.filename) if "nested_scopes" in future.find_futures(tree): - gen = NestedScopeCodeGenerator(filename) + gen = NestedScopeModuleCodeGenerator(filename) else: gen = ModuleCodeGenerator(filename) walk(tree, gen, 1) @@ -70,14 +72,71 @@ class Module: mtime = struct.pack('i', mtime) return self.MAGIC + mtime +class LocalNameFinder: + """Find local names in scope""" + def __init__(self, names=()): + self.names = misc.Set() + self.globals = misc.Set() + for name in names: + self.names.add(name) + + # XXX list comprehensions and for loops + + def getLocals(self): + for elt in self.globals.elements(): + if self.names.has_elt(elt): + self.names.remove(elt) + return self.names + + def visitDict(self, node): + pass + + def visitGlobal(self, node): + for name in node.names: + self.globals.add(name) + + def visitFunction(self, node): + self.names.add(node.name) + + def visitLambda(self, node): + pass + + def visitImport(self, node): + for name, alias in node.names: + self.names.add(alias or name) + + def visitFrom(self, node): + for name, alias in node.names: + self.names.add(alias or name) + + def visitClass(self, node): + self.names.add(node.name) + + def visitAssName(self, node): + self.names.add(node.name) + class CodeGenerator: + """Defines basic code generator for Python bytecode + + This class is an abstract base class. Concrete subclasses must + define an __init__() that defines self.graph and then calls the + __init__() defined in this class. + + The concrete class must also define the class attributes + NameFinder, FunctionGen, and ClassGen. These attributes can be + defined in the initClass() method, which is a hook for + initializing these methods after all the classes have been + defined. + """ optimized = 0 # is namespace access optimized? + __initialized = None def __init__(self, filename): -## Subclasses must define a constructor that intializes self.graph -## before calling this init function, e.g. -## self.graph = pyassem.PyFlowGraph() + if self.__initialized is None: + self.initClass() + self.__class__.__initialized = 1 + self.checkClass() self.filename = filename self.locals = misc.Stack() self.loops = misc.Stack() @@ -86,6 +145,20 @@ class CodeGenerator: self.last_lineno = None self._setupGraphDelegation() + def initClass(self): + """This method is called once for each class""" + + def checkClass(self): + """Verify that class is constructed correctly""" + try: + assert hasattr(self, 'graph') + assert getattr(self, 'NameFinder') + assert getattr(self, 'FunctionGen') + assert getattr(self, 'ClassGen') + except AssertionError, msg: + intro = "Bad class construction for %s" % self.__class__.__name__ + raise AssertionError, intro + def _setupGraphDelegation(self): self.emit = self.graph.emit self.newBlock = self.graph.newBlock @@ -139,10 +212,15 @@ class CodeGenerator: return 0 # The first few visitor methods handle nodes that generator new - # code objects + # code objects. They use class attributes to determine what + # specialized code generators to use. + + NameFinder = LocalNameFinder + FunctionGen = None + ClassGen = None def visitModule(self, node): - lnf = walk(node.node, LocalNameFinder(), 0) + lnf = walk(node.node, self.NameFinder(), 0) self.locals.push(lnf.getLocals()) if node.doc: self.fixDocstring(node.node) @@ -159,8 +237,8 @@ class CodeGenerator: def visitLambda(self, node): self._visitFuncOrLambda(node, isLambda=1) - def _visitFuncOrLambda(self, node, isLambda): - gen = FunctionCodeGenerator(node, self.filename, isLambda) + def _visitFuncOrLambda(self, node, isLambda=0): + gen = self.FunctionGen(node, self.filename, self.scopes, isLambda) walk(node.code, gen) gen.finish() self.set_lineno(node) @@ -170,7 +248,7 @@ class CodeGenerator: self.emit('MAKE_FUNCTION', len(node.defaults)) def visitClass(self, node): - gen = ClassCodeGenerator(node, self.filename) + gen = self.ClassGen(node, self.filename, self.scopes) if node.doc: self.fixDocstring(node.code) walk(node.code, gen) @@ -180,7 +258,7 @@ class CodeGenerator: for base in node.bases: self.visit(base) self.emit('BUILD_TUPLE', len(node.bases)) - self.emit('LOAD_CONST', gen.getCode()) + self.emit('LOAD_CONST', gen) self.emit('MAKE_FUNCTION', 0) self.emit('CALL_FUNCTION', 0) self.emit('BUILD_CLASS') @@ -883,34 +961,114 @@ class CodeGenerator: self.visit(k) self.emit('STORE_SUBSCR') -class ModuleCodeGenerator(CodeGenerator): - __super_init = CodeGenerator.__init__ +class NestedScopeCodeGenerator(CodeGenerator): __super_visitModule = CodeGenerator.visitModule - - def __init__(self, filename): - # XXX is ? in compile.c - self.graph = pyassem.PyFlowGraph("", filename) - self.__super_init(filename) - self.symbols = None + __super_visitClass = CodeGenerator.visitClass + __super__visitFuncOrLambda = CodeGenerator._visitFuncOrLambda + + def parseSymbols(self, tree): + s = symbols.SymbolVisitor() + walk(tree, s) + return s.scopes def visitModule(self, node): - self.symbols = self.parseSymbols(node) + self.scopes = self.parseSymbols(node) + self.scope = self.scopes[node] self.__super_visitModule(node) - def parseSymbols(self, node): - # XXX not implemented - return None + def _nameOp(self, prefix, name): + scope = self.scope.check_name(name) + if scope == SC_LOCAL: + if not self.optimized: + self.emit(prefix + '_NAME', name) + else: + self.emit(prefix + '_FAST', name) + elif scope == SC_GLOBAL: + self.emit(prefix + '_GLOBAL', name) + elif scope == SC_FREE or scope == SC_CELL: + self.emit(prefix + '_DEREF', name) + else: + raise RuntimeError, "unsupported scope for var %s: %d" % \ + (name, scope) -class NestedScopeCodeGenerator(ModuleCodeGenerator): - pass + def _visitFuncOrLambda(self, node, isLambda=0): + gen = self.FunctionGen(node, self.filename, self.scopes, isLambda) + walk(node.code, gen) + gen.finish() + self.set_lineno(node) + for default in node.defaults: + self.visit(default) + frees = gen.scope.get_free_vars() + if frees: + for name in frees: + self.emit('LOAD_CLOSURE', name) + self.emit('LOAD_CONST', gen) + self.emit('MAKE_CLOSURE', len(node.defaults)) + else: + self.emit('LOAD_CONST', gen) + self.emit('MAKE_FUNCTION', len(node.defaults)) -class FunctionCodeGenerator(CodeGenerator): - super_init = CodeGenerator.__init__ + def visitClass(self, node): + gen = self.ClassGen(node, self.filename, self.scopes) + if node.doc: + self.fixDocstring(node.code) + walk(node.code, gen) + gen.finish() + self.set_lineno(node) + self.emit('LOAD_CONST', node.name) + for base in node.bases: + self.visit(base) + self.emit('BUILD_TUPLE', len(node.bases)) + frees = gen.scope.get_free_vars() + for name in frees: + self.emit('LOAD_CLOSURE', name) + self.emit('LOAD_CONST', gen) + if frees: + self.emit('MAKE_CLOSURE', 0) + else: + self.emit('MAKE_FUNCTION', 0) + self.emit('CALL_FUNCTION', 0) + self.emit('BUILD_CLASS') + self.storeName(node.name) + +class LGBScopeMixin: + """Defines initClass() for Python 2.1-compatible scoping""" + def initClass(self): + self.__class__.NameFinder = LocalNameFinder + self.__class__.FunctionGen = FunctionCodeGenerator + self.__class__.ClassGen = ClassCodeGenerator + +class NestedScopeMixin: + """Defines initClass() for nested scoping (Python 2.2-compatible)""" + def initClass(self): + self.__class__.NameFinder = LocalNameFinder + self.__class__.FunctionGen = NestedFunctionCodeGenerator + self.__class__.ClassGen = NestedClassCodeGenerator + +class ModuleCodeGenerator(LGBScopeMixin, CodeGenerator): + __super_init = CodeGenerator.__init__ + + scopes = None + + def __init__(self, filename): + self.graph = pyassem.PyFlowGraph("", filename) + self.__super_init(filename) + +class NestedScopeModuleCodeGenerator(NestedScopeMixin, + NestedScopeCodeGenerator): + __super_init = CodeGenerator.__init__ + + def __init__(self, filename): + self.graph = pyassem.PyFlowGraph("", filename) + self.__super_init(filename) + self.graph.setFlag(CO_NESTED) + +class AbstractFunctionCode: optimized = 1 lambdaCount = 0 - def __init__(self, func, filename, isLambda=0): + def __init__(self, func, filename, scopes, isLambda): if isLambda: klass = FunctionCodeGenerator name = "" % klass.lambdaCount @@ -926,7 +1084,7 @@ class FunctionCodeGenerator(CodeGenerator): if not isLambda and func.doc: self.setDocstring(func.doc) - lnf = walk(func.code, LocalNameFinder(args), 0) + lnf = walk(func.code, self.NameFinder(args), 0) self.locals.push(lnf.getLocals()) if func.varargs: self.graph.setFlag(CO_VARARGS) @@ -963,14 +1121,32 @@ class FunctionCodeGenerator(CodeGenerator): unpackTuple = unpackSequence -class ClassCodeGenerator(CodeGenerator): - super_init = CodeGenerator.__init__ +class FunctionCodeGenerator(LGBScopeMixin, AbstractFunctionCode, + CodeGenerator): + super_init = CodeGenerator.__init__ # call be other init + scopes = None - def __init__(self, klass, filename): +class NestedFunctionCodeGenerator(AbstractFunctionCode, + NestedScopeMixin, + NestedScopeCodeGenerator): + super_init = NestedScopeCodeGenerator.__init__ # call be other init + __super_init = AbstractFunctionCode.__init__ + + def __init__(self, func, filename, scopes, isLambda): + self.scopes = scopes + self.scope = scopes[func] + self.__super_init(func, filename, scopes, isLambda) + self.graph.setFreeVars(self.scope.get_free_vars()) + self.graph.setCellVars(self.scope.get_cell_vars()) + self.graph.setFlag(CO_NESTED) + +class AbstractClassCode: + + def __init__(self, klass, filename, scopes): self.graph = pyassem.PyFlowGraph(klass.name, filename, optimized=0) self.super_init(filename) - lnf = walk(klass.code, LocalNameFinder(), 0) + lnf = walk(klass.code, self.NameFinder(), 0) self.locals.push(lnf.getLocals()) self.graph.setFlag(CO_NEWLOCALS) if klass.doc: @@ -981,6 +1157,24 @@ class ClassCodeGenerator(CodeGenerator): self.emit('LOAD_LOCALS') self.emit('RETURN_VALUE') +class ClassCodeGenerator(LGBScopeMixin, AbstractClassCode, CodeGenerator): + super_init = CodeGenerator.__init__ + scopes = None + +class NestedClassCodeGenerator(AbstractClassCode, + NestedScopeMixin, + NestedScopeCodeGenerator): + super_init = NestedScopeCodeGenerator.__init__ # call be other init + __super_init = AbstractClassCode.__init__ + + def __init__(self, klass, filename, scopes): + self.scopes = scopes + self.scope = scopes[klass] + self.__super_init(klass, filename, scopes) + self.graph.setFreeVars(self.scope.get_free_vars()) + self.graph.setCellVars(self.scope.get_cell_vars()) + self.graph.setFlag(CO_NESTED) + def generateArgList(arglist): """Generate an arg list marking TupleArgs""" args = [] @@ -997,49 +1191,6 @@ def generateArgList(arglist): raise ValueError, "unexpect argument type:", elt return args + extra, count -class LocalNameFinder: - """Find local names in scope""" - def __init__(self, names=()): - self.names = misc.Set() - self.globals = misc.Set() - for name in names: - self.names.add(name) - - # XXX list comprehensions and for loops - - def getLocals(self): - for elt in self.globals.elements(): - if self.names.has_elt(elt): - self.names.remove(elt) - return self.names - - def visitDict(self, node): - pass - - def visitGlobal(self, node): - for name in node.names: - self.globals.add(name) - - def visitFunction(self, node): - self.names.add(node.name) - - def visitLambda(self, node): - pass - - def visitImport(self, node): - for name, alias in node.names: - self.names.add(alias or name) - - def visitFrom(self, node): - for name, alias in node.names: - self.names.add(alias or name) - - def visitClass(self, node): - self.names.add(node.name) - - def visitAssName(self, node): - self.names.add(node.name) - def findOp(node): """Find the op (DELETE, LOAD, STORE) in an AssTuple tree""" v = OpFinder() diff --git a/Tools/compiler/compiler/symbols.py b/Tools/compiler/compiler/symbols.py index 3ab72f31298..cde937b7535 100644 --- a/Tools/compiler/compiler/symbols.py +++ b/Tools/compiler/compiler/symbols.py @@ -1,8 +1,11 @@ """Module symbol-table generator""" from compiler import ast +from compiler.consts import SC_LOCAL, SC_GLOBAL, SC_FREE, SC_CELL, SC_UNKNOWN import types +import sys + MANGLE_LEN = 256 class Scope: @@ -14,7 +17,12 @@ class Scope: self.uses = {} self.globals = {} self.params = {} + self.frees = {} + self.cells = {} self.children = [] + # nested is true if the class could contain free variables, + # i.e. if it is nested within another function. + self.nested = None self.klass = None if klass is not None: for i in range(len(klass)): @@ -70,13 +78,112 @@ class Scope: def get_children(self): return self.children + def DEBUG(self): + return + print >> sys.stderr, self.name, self.nested and "nested" or "" + print >> sys.stderr, "\tglobals: ", self.globals + print >> sys.stderr, "\tcells: ", self.cells + print >> sys.stderr, "\tdefs: ", self.defs + print >> sys.stderr, "\tuses: ", self.uses + print >> sys.stderr, "\tfrees:", self.frees + + def check_name(self, name): + """Return scope of name. + + The scope of a name could be LOCAL, GLOBAL, FREE, or CELL. + """ + if self.globals.has_key(name): + return SC_GLOBAL + if self.cells.has_key(name): + return SC_CELL + if self.defs.has_key(name): + return SC_LOCAL + if self.nested and (self.frees.has_key(name) or + self.uses.has_key(name)): + return SC_FREE + if self.nested: + return SC_UNKNOWN + else: + return SC_GLOBAL + + def get_free_vars(self): + if not self.nested: + return () + free = {} + free.update(self.frees) + for name in self.uses.keys(): + if not (self.defs.has_key(name) or + self.globals.has_key(name)): + free[name] = 1 + return free.keys() + + def handle_children(self): + for child in self.children: + frees = child.get_free_vars() + globals = self.add_frees(frees) + for name in globals: + child.force_global(name) + + def force_global(self, name): + """Force name to be global in scope. + + Some child of the current node had a free reference to name. + When the child was processed, it was labelled a free + variable. Now that all its enclosing scope have been + processed, the name is known to be a global or builtin. So + walk back down the child chain and set the name to be global + rather than free. + + Be careful to stop if a child does not think the name is + free. + """ + self.globals[name] = 1 + if self.frees.has_key(name): + del self.frees[name] + for child in self.children: + if child.check_name(name) == SC_FREE: + child.force_global(name) + + def add_frees(self, names): + """Process list of free vars from nested scope. + + Returns a list of names that are either 1) declared global in the + parent or 2) undefined in a top-level parent. In either case, + the nested scope should treat them as globals. + """ + child_globals = [] + for name in names: + sc = self.check_name(name) + if self.nested: + if sc == SC_UNKNOWN or sc == SC_FREE \ + or isinstance(self, ClassScope): + self.frees[name] = 1 + elif sc == SC_GLOBAL: + child_globals.append(name) + elif isinstance(self, FunctionScope) and sc == SC_LOCAL: + self.cells[name] = 1 + else: + child_globals.append(name) + else: + if sc == SC_LOCAL: + self.cells[name] = 1 + else: + child_globals.append(name) + return child_globals + + def get_cell_vars(self): + return self.cells.keys() + class ModuleScope(Scope): __super_init = Scope.__init__ def __init__(self): self.__super_init("global", self) -class LambdaScope(Scope): +class FunctionScope(Scope): + pass + +class LambdaScope(FunctionScope): __super_init = Scope.__init__ __counter = 1 @@ -86,9 +193,6 @@ class LambdaScope(Scope): self.__counter += 1 self.__super_init("lambda.%d" % i, module, klass) -class FunctionScope(Scope): - pass - class ClassScope(Scope): __super_init = Scope.__init__ @@ -111,17 +215,24 @@ class SymbolVisitor: for n in node.defaults: self.visit(n, parent) scope = FunctionScope(node.name, self.module, self.klass) + if parent.nested or isinstance(parent, FunctionScope): + scope.nested = 1 self.scopes[node] = scope self._do_args(scope, node.argnames) self.visit(node.code, scope) - + self.handle_free_vars(scope, parent) + scope.DEBUG() + def visitLambda(self, node, parent): for n in node.defaults: self.visit(n, parent) scope = LambdaScope(self.module, self.klass) + if parent.nested or isinstance(parent, FunctionScope): + scope.nested = 1 self.scopes[node] = scope self._do_args(scope, node.argnames) self.visit(node.code, scope) + self.handle_free_vars(scope, parent) def _do_args(self, scope, args): for name in args: @@ -130,16 +241,25 @@ class SymbolVisitor: else: scope.add_param(name) + def handle_free_vars(self, scope, parent): + parent.add_child(scope) + if scope.children: + scope.DEBUG() + scope.handle_children() + def visitClass(self, node, parent): parent.add_def(node.name) for n in node.bases: self.visit(n, parent) scope = ClassScope(node.name, self.module) + if parent.nested or isinstance(parent, FunctionScope): + scope.nested = 1 self.scopes[node] = scope prev = self.klass self.klass = node.name self.visit(node.code, scope) self.klass = prev + self.handle_free_vars(scope, parent) # name can be a def or a use