"""Assembler for Python bytecode The new module is used to create the code object. The following attribute definitions are included from the reference manual: co_name gives the function name co_argcount is the number of positional arguments (including arguments with default values) co_nlocals is the number of local variables used by the function (including arguments) co_varnames is a tuple containing the names of the local variables (starting with the argument names) co_code is a string representing the sequence of bytecode instructions co_consts is a tuple containing the literals used by the bytecode co_names is a tuple containing the names used by the bytecode co_filename is the filename from which the code was compiled co_firstlineno is the first line number of the function co_lnotab is a string encoding the mapping from byte code offsets to line numbers. see LineAddrTable below. co_stacksize is the required stack size (including local variables) co_flags is an integer encoding a number of flags for the interpreter. There are four flags: CO_OPTIMIZED -- uses load fast CO_NEWLOCALS -- everything? CO_VARARGS -- use *args CO_VARKEYWORDS -- uses **args If a code object represents a function, the first item in co_consts is the documentation string of the function, or None if undefined. """ import sys import dis import new import string import misc # flags for code objects CO_OPTIMIZED = 0x0001 CO_NEWLOCALS = 0x0002 CO_VARARGS = 0x0004 CO_VARKEYWORDS = 0x0008 class PyAssembler: """Creates Python code objects """ # XXX this class needs to major refactoring def __init__(self, args=(), name='?', filename='', docstring=None): # XXX why is the default value for flags 3? self.insts = [] # used by makeCodeObject self._getArgCount(args) self.code = '' self.consts = [docstring] self.filename = filename self.flags = CO_NEWLOCALS self.name = name self.names = [] self.varnames = list(args) or [] # lnotab support self.firstlineno = 0 self.lastlineno = 0 self.last_addr = 0 self.lnotab = '' def _getArgCount(self, args): if args and args[0][0] == '.': for i in range(len(args)): if args[i][0] == '.': num = i self.argcount = num + 1 else: self.argcount = len(args) def __repr__(self): return "" % len(self.insts) def setFlags(self, val): """XXX for module's function""" self.flags = val def setOptimized(self): self.flags = self.flags | CO_OPTIMIZED def setVarArgs(self): self.flags = self.flags | CO_VARARGS def setKWArgs(self): self.flags = self.flags | CO_VARKEYWORDS def getCurInst(self): return len(self.insts) def getNextInst(self): return len(self.insts) + 1 def dump(self, io=sys.stdout): i = 0 for inst in self.insts: if inst[0] == 'SET_LINENO': io.write("\n") io.write(" %3d " % i) if len(inst) == 1: io.write("%s\n" % inst) else: io.write("%-15.15s\t%s\n" % inst) i = i + 1 def makeCodeObject(self): """Make a Python code object This creates a Python code object using the new module. This seems simpler than reverse-engineering the way marshal dumps code objects into .pyc files. One of the key difficulties is figuring out how to layout references to code objects that appear on the VM stack; e.g. 3 SET_LINENO 1 6 LOAD_CONST 0 ( 0 or line > 0: # write the values in 1-byte chunks that sum # to desired value trunc_addr = addr trunc_line = line if trunc_addr > 255: trunc_addr = 255 if trunc_line > 255: trunc_line = 255 self.lnotab.append(trunc_addr) self.lnotab.append(trunc_line) addr = addr - trunc_addr line = line - trunc_line self.lastline = lineno self.lastoff = self.codeOffset def getCode(self): return string.join(self.code, '') def getTable(self): return string.join(map(chr, self.lnotab), '') class StackRef: """Manage stack locations for jumps, loops, etc.""" count = 0 def __init__(self, id=None, val=None): if id is None: id = StackRef.count StackRef.count = StackRef.count + 1 self.id = id self.val = val def __repr__(self): if self.val: return "StackRef(val=%d)" % self.val else: return "StackRef(id=%d)" % self.id def bind(self, inst): self.val = inst def resolve(self): if self.val is None: print "UNRESOLVE REF", self return 0 return self.val class StackDepthTracker: # XXX need to keep track of stack depth on jumps def findDepth(self, insts): depth = 0 maxDepth = 0 for i in insts: opname = i[0] delta = self.effect.get(opname, 0) if delta > 1: depth = depth + delta elif delta < 0: if depth > maxDepth: maxDepth = depth depth = depth + delta else: if depth > maxDepth: maxDepth = depth # now check patterns for pat, delta in self.patterns: if opname[:len(pat)] == pat: depth = depth + delta break # if we still haven't found a match if delta == 0: meth = getattr(self, opname) depth = depth + meth(i[1]) if depth < 0: depth = 0 return maxDepth effect = { 'POP_TOP': -1, 'DUP_TOP': 1, 'SLICE+1': -1, 'SLICE+2': -1, 'SLICE+3': -2, 'STORE_SLICE+0': -1, 'STORE_SLICE+1': -2, 'STORE_SLICE+2': -2, 'STORE_SLICE+3': -3, 'DELETE_SLICE+0': -1, 'DELETE_SLICE+1': -2, 'DELETE_SLICE+2': -2, 'DELETE_SLICE+3': -3, 'STORE_SUBSCR': -3, 'DELETE_SUBSCR': -2, # PRINT_EXPR? 'PRINT_ITEM': -1, 'LOAD_LOCALS': 1, 'RETURN_VALUE': -1, 'EXEC_STMT': -2, 'BUILD_CLASS': -2, 'STORE_NAME': -1, 'STORE_ATTR': -2, 'DELETE_ATTR': -1, 'STORE_GLOBAL': -1, 'BUILD_MAP': 1, 'COMPARE_OP': -1, 'STORE_FAST': -1, } # use pattern match patterns = [ ('BINARY_', -1), ('LOAD_', 1), ('IMPORT_', 1), ] # special cases #: UNPACK_TUPLE, UNPACK_LIST, BUILD_TUPLE, # BUILD_LIST, CALL_FUNCTION, MAKE_FUNCTION, BUILD_SLICE def UNPACK_TUPLE(self, count): return count def UNPACK_LIST(self, count): return count def BUILD_TUPLE(self, count): return -count def BUILD_LIST(self, count): return -count def CALL_FUNCTION(self, argc): hi, lo = divmod(argc, 256) return lo + hi * 2 def MAKE_FUNCTION(self, argc): return -argc def BUILD_SLICE(self, argc): if argc == 2: return -1 elif argc == 3: return -2 findDepth = StackDepthTracker().findDepth