From 75dc4969ab202e8c3dda15bedacc880d589e1e44 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 5 Mar 1998 03:42:00 +0000 Subject: [PATCH] Added support for packages. We have a whole new module finder that uses the actual Python parser and scans the bytecode for IMPORT_NAME and IMPORT_FROM. This requires some support in import.c (that hasn't been checked in). New command line options for this: -d, -q, -m. --- Tools/freeze/findmodules.py | 127 ------------ Tools/freeze/freeze.py | 61 ++++-- Tools/freeze/makefreeze.py | 74 ++----- Tools/freeze/modulefinder.py | 373 +++++++++++++++++++++++++++++++++++ 4 files changed, 437 insertions(+), 198 deletions(-) delete mode 100644 Tools/freeze/findmodules.py create mode 100644 Tools/freeze/modulefinder.py diff --git a/Tools/freeze/findmodules.py b/Tools/freeze/findmodules.py deleted file mode 100644 index 9e02f2be36b..00000000000 --- a/Tools/freeze/findmodules.py +++ /dev/null @@ -1,127 +0,0 @@ -# Determine the names and filenames of the modules imported by a -# script, recursively. This is done by scanning for lines containing -# import statements. (The scanning has only superficial knowledge of -# Python syntax and no knowledge of semantics, so in theory the result -# may be incorrect -- however this is quite unlikely if you don't -# intentionally obscure your Python code.) - -import os -import regex -import string -import sys - - -# Top-level interface. -# First argument is the main program (script). -# Second optional argument is list of modules to be searched as well. - -def findmodules(scriptfile, modules = [], path = sys.path): - todo = {} - todo['__main__'] = scriptfile - for name in modules: - mod = os.path.basename(name) - if mod[-3:] == '.py': mod = mod[:-3] - elif mod[-4:] == '.pyc': mod = mod[:-4] - todo[mod] = name - done = closure(todo) - return done - - -# Compute the closure of scanfile() and findmodule(). -# Return a dictionary mapping module names to filenames. -# Writes to stderr if a file can't be or read. - -def closure(todo): - done = {} - while todo: - newtodo = {} - for modname in todo.keys(): - if not done.has_key(modname): - filename = todo[modname] - if filename is None: - filename = findmodule(modname) - done[modname] = filename - if filename in ('', ''): - continue - try: - modules = scanfile(filename) - except IOError, msg: - sys.stderr.write("%s: %s\n" % - (filename, str(msg))) - continue - for m in modules: - if not done.has_key(m): - newtodo[m] = None - todo = newtodo - return done - - -# Scan a file looking for import statements. -# Return list of module names. -# Can raise IOError. - -importstr = '\(^\|:\)[ \t]*import[ \t]+\([a-zA-Z0-9_, \t]+\)' -fromstr = '\(^\|:\)[ \t]*from[ \t]+\([a-zA-Z0-9_]+\)[ \t]+import[ \t]+' -isimport = regex.compile(importstr) -isfrom = regex.compile(fromstr) - -def scanfile(filename): - allmodules = {} - f = open(filename, 'r') - try: - while 1: - line = f.readline() - if not line: break # EOF - while line[-2:] == '\\\n': # Continuation line - line = line[:-2] + ' ' - line = line + f.readline() - if isimport.search(line) >= 0: - rawmodules = isimport.group(2) - modules = string.splitfields(rawmodules, ',') - for i in range(len(modules)): - modules[i] = string.strip(modules[i]) - elif isfrom.search(line) >= 0: - modules = [isfrom.group(2)] - else: - continue - for mod in modules: - allmodules[mod] = None - finally: - f.close() - return allmodules.keys() - - -# Find the file containing a module, given its name. -# Return filename, or '', or ''. - -builtins = sys.builtin_module_names -tails = ['.py', '.pyc'] - -def findmodule(modname, path = sys.path): - if modname in builtins: return '' - for dirname in path: - for tail in tails: - fullname = os.path.join(dirname, modname + tail) - try: - f = open(fullname, 'r') - except IOError: - continue - f.close() - return fullname - return '' - - -# Test the above functions. - -def test(): - if not sys.argv[1:]: - print 'usage: python findmodules.py scriptfile [morefiles ...]' - sys.exit(2) - done = findmodules(sys.argv[1], sys.argv[2:]) - items = done.items() - items.sort() - for mod, file in [('Module', 'File')] + items: - print "%-15s %s" % (mod, file) - -if __name__ == '__main__': - test() diff --git a/Tools/freeze/freeze.py b/Tools/freeze/freeze.py index 181e84598b1..b6f2d4ada2a 100755 --- a/Tools/freeze/freeze.py +++ b/Tools/freeze/freeze.py @@ -26,6 +26,12 @@ Options: -o dir: Directory where the output files are created; default '.'. +-m: Additional arguments are module names instead of filenames. + +-d: Debugging mode for the module finder. + +-q: Make the module finder totally quiet. + -h: Print this help message. -w: Toggle Windows (NT or 95) behavior. @@ -42,7 +48,8 @@ script.py: The Python script to be executed by the resulting binary. module ...: Additional Python modules (referenced by pathname) that will be included in the resulting binary. These - may be .py or .pyc files. + may be .py or .pyc files. If -m is specified, these are + module names that are search in the path instead. NOTES: @@ -67,7 +74,7 @@ import addpack # Import the freeze-private modules import checkextensions -import findmodules +import modulefinder import makeconfig import makefreeze import makemakefile @@ -82,6 +89,8 @@ def main(): exec_prefix = None # settable with -P option extensions = [] path = sys.path + modargs = 0 + debug = 1 odir = '' win = sys.platform[:3] == 'win' @@ -97,7 +106,7 @@ def main(): # parse command line try: - opts, args = getopt.getopt(sys.argv[1:], 'he:o:p:P:s:w') + opts, args = getopt.getopt(sys.argv[1:], 'deh:mo:p:P:qs:w') except getopt.error, msg: usage('getopt error: ' + str(msg)) @@ -106,14 +115,20 @@ def main(): if o == '-h': print __doc__ return + if o == '-d': + debug = debug + 1 if o == '-e': extensions.append(a) + if o == '-m': + modargs = 1 if o == '-o': odir = a if o == '-p': prefix = a if o == '-P': exec_prefix = a + if o == '-q': + debug = 0 if o == '-w': win = not win if o == '-s': @@ -220,18 +235,30 @@ def main(): target = os.path.join(odir, target) makefile = os.path.join(odir, makefile) - for mod in implicits: - modules.append(findmodules.findmodule(mod)) - # Actual work starts here... - dict = findmodules.findmodules(scriptfile, modules, path) - names = dict.keys() - names.sort() - print "Modules being frozen:" - for name in names: - print '\t', name + # collect all modules of the program + mf = modulefinder.ModuleFinder(path, debug) + for mod in implicits: + mf.import_hook(mod) + for mod in modules: + if mod == '-m': + modargs = 1 + continue + if modargs: + if mod[-2:] == '.*': + mf.import_hook(mod[:-2], None, ["*"]) + else: + mf.import_hook(mod) + else: + mf.load_file(mod) + mf.run_script(scriptfile) + if debug > 0: + mf.report() + print + dict = mf.modules + # generate output for frozen modules backup = frozen_c + '~' try: os.rename(frozen_c, backup) @@ -239,7 +266,7 @@ def main(): backup = None outfp = open(frozen_c, 'w') try: - makefreeze.makefreeze(outfp, dict) + makefreeze.makefreeze(outfp, dict, debug) if win and subsystem == 'windows': import winmakemakefile outfp.write(winmakemakefile.WINMAINTEMPLATE) @@ -251,6 +278,7 @@ def main(): frozen_c) os.rename(backup, frozen_c) + # windows gets different treatment if win: # Taking a shortcut here... import winmakemakefile @@ -264,14 +292,17 @@ def main(): outfp.close() return + # generate config.c and Makefile builtins = [] unknown = [] mods = dict.keys() mods.sort() for mod in mods: - if dict[mod] == '': + if dict[mod].__code__: + continue + if not dict[mod].__file__: builtins.append(mod) - elif dict[mod] == '': + else: unknown.append(mod) addfiles = [] diff --git a/Tools/freeze/makefreeze.py b/Tools/freeze/makefreeze.py index 5c6f371af55..97315b33912 100644 --- a/Tools/freeze/makefreeze.py +++ b/Tools/freeze/makefreeze.py @@ -1,4 +1,5 @@ import marshal +import string # Write a file containing frozen code for the modules in the dictionary. @@ -23,51 +24,31 @@ main(argc, argv) """ -def makefreeze(outfp, dict): +def makefreeze(outfp, dict, debug=0): done = [] mods = dict.keys() mods.sort() for mod in mods: - modfn = dict[mod] - try: - str = makecode(modfn) - except IOError, msg: - sys.stderr.write("%s: %s\n" % (modfn, str(msg))) - continue - if str: - done.append(mod, len(str)) - writecode(outfp, mod, str) + m = dict[mod] + mangled = string.join(string.split(mod, "."), "__") + if m.__code__: + if debug: + print "freezing", mod, "..." + str = marshal.dumps(m.__code__) + size = len(str) + if m.__path__: + # Indicate package by negative size + size = -size + done.append((mod, mangled, size)) + writecode(outfp, mangled, str) + if debug: + print "generating table of frozen modules" outfp.write(header) - for mod, size in done: - outfp.write('\t{"%s", M_%s, %d},\n' % (mod, mod, size)) + for mod, mangled, size in done: + outfp.write('\t{"%s", M_%s, %d},\n' % (mod, mangled, size)) outfp.write(trailer) -# Return code string for a given module -- either a .py or a .pyc -# file. Return either a string or None (if it's not Python code). -# May raise IOError. - -def makecode(filename): - if filename[-3:] == '.py': - f = open(filename, 'r') - try: - text = f.read() - code = compile(text, filename, 'exec') - finally: - f.close() - return marshal.dumps(code) - if filename[-4:] == '.pyc': - f = open(filename, 'rb') - try: - f.seek(8) - str = f.read() - finally: - f.close() - return str - # Can't generate code for this extension - return None - - # Write a C initializer for a module containing the frozen python code. # The array is called M_. @@ -78,22 +59,3 @@ def writecode(outfp, mod, str): for c in str[i:i+16]: outfp.write('%d,' % ord(c)) outfp.write('\n};\n') - - -# Test for the above functions. - -def test(): - import os - import sys - if not sys.argv[1:]: - print 'usage: python freezepython.py file.py(c) ...' - sys.exit(2) - dict = {} - for arg in sys.argv[1:]: - base = os.path.basename(arg) - mod, ext = os.path.splitext(base) - dict[mod] = arg - makefreeze(sys.stdout, dict) - -if __name__ == '__main__': - test() diff --git a/Tools/freeze/modulefinder.py b/Tools/freeze/modulefinder.py new file mode 100644 index 00000000000..79665b7e75c --- /dev/null +++ b/Tools/freeze/modulefinder.py @@ -0,0 +1,373 @@ +"""Find modules used by a script, using introspection.""" + +import dis +import imp +import marshal +import os +import re +import string +import sys + + +IMPORT_NAME = dis.opname.index('IMPORT_NAME') +IMPORT_FROM = dis.opname.index('IMPORT_FROM') + + +class Module: + + def __init__(self, name, file=None, path=None): + self.__name__ = name + self.__file__ = file + self.__path__ = path + self.__code__ = None + + def __repr__(self): + s = "Module(%s" % `self.__name__` + if self.__file__ is not None: + s = s + ", %s" % `self.__file__` + if self.__path__ is not None: + s = s + ", %s" % `self.__path__` + s = s + ")" + return s + + +class ModuleFinder: + + def __init__(self, path=None, debug=0): + if path is None: + path = sys.path + self.path = path + self.modules = {} + self.badmodules = {} + self.debug = debug + self.indent = 0 + + def msg(self, level, str, *args): + if level <= self.debug: + for i in range(self.indent): + print " ", + print str, + for arg in args: + print repr(arg), + print + + def msgin(self, *args): + level = args[0] + if level <= self.debug: + self.indent = self.indent + 1 + apply(self.msg, args) + + def msgout(self, *args): + level = args[0] + if level <= self.debug: + self.indent = self.indent - 1 + apply(self.msg, args) + + def run_script(self, pathname): + self.msg(2, "run_script", pathname) + fp = open(pathname) + stuff = ("", "r", imp.PY_SOURCE) + self.load_module('__main__', fp, pathname, stuff) + + def load_file(self, pathname): + dir, name = os.path.split(pathname) + name, ext = os.path.splitext(name) + fp = open(pathname) + stuff = (ext, "r", imp.PY_SOURCE) + self.load_module(name, fp, pathname, stuff) + + def import_hook(self, name, caller=None, fromlist=None): + self.msg(3, "import_hook", name, caller, fromlist) + parent = self.determine_parent(caller) + q, tail = self.find_head_package(parent, name) + m = self.load_tail(q, tail) + if not fromlist: + return q + if m.__path__: + self.ensure_fromlist(m, fromlist) + + def determine_parent(self, caller): + self.msgin(4, "determine_parent", caller) + if not caller: + self.msgout(4, "determine_parent -> None") + return None + pname = caller.__name__ + if caller.__path__: + parent = self.modules[pname] + assert caller is parent + self.msgout(4, "determine_parent ->", parent) + return parent + if '.' in pname: + i = string.rfind(pname, '.') + pname = pname[:i] + parent = self.modules[pname] + assert parent.__name__ == pname + self.msgout(4, "determine_parent ->", parent) + return parent + self.msgout(4, "determine_parent -> None") + return None + + def find_head_package(self, parent, name): + self.msgin(4, "find_head_package", parent, name) + if '.' in name: + i = string.find(name, '.') + head = name[:i] + tail = name[i+1:] + else: + head = name + tail = "" + if parent: + qname = "%s.%s" % (parent.__name__, head) + else: + qname = head + q = self.import_module(head, qname, parent) + if q: + self.msgout(4, "find_head_package ->", (q, tail)) + return q, tail + if parent: + qname = head + parent = None + q = self.import_module(head, qname, parent) + if q: + self.msgout(4, "find_head_package ->", (q, tail)) + return q, tail + self.msgout(4, "raise ImportError: No module named", qname) + raise ImportError, "No module named " + qname + + def load_tail(self, q, tail): + self.msgin(4, "load_tail", q, tail) + m = q + while tail: + i = string.find(tail, '.') + if i < 0: i = len(tail) + head, tail = tail[:i], tail[i+1:] + mname = "%s.%s" % (m.__name__, head) + m = self.import_module(head, mname, m) + if not m: + self.msgout(4, "raise ImportError: No module named", mname) + raise ImportError, "No module named " + mname + self.msgout(4, "load_tail ->", m) + return m + + def ensure_fromlist(self, m, fromlist, recursive=0): + self.msg(4, "ensure_fromlist", m, fromlist, recursive) + for sub in fromlist: + if sub == "*": + if not recursive: + all = self.find_all_submodules(m) + if all: + self.ensure_fromlist(m, all, 1) + elif not hasattr(m, sub): + subname = "%s.%s" % (m.__name__, sub) + submod = self.import_module(sub, subname, m) + if not submod: + raise ImportError, "No module named " + subname + + def find_all_submodules(self, m): + if not m.__path__: + return + modules = {} + suffixes = [".py", ".pyc", ".pyo"] + for dir in m.__path__: + try: + names = os.listdir(dir) + except os.error: + self.msg(2, "can't list directory", dir) + continue + for name in names: + mod = None + for suff in suffixes: + n = len(suff) + if name[-n:] == suff: + mod = name[:-n] + break + if mod and mod != "__init__": + modules[mod] = mod + return modules.keys() + + def import_module(self, partname, fqname, parent): + self.msgin(3, "import_module", partname, fqname, parent) + try: + m = self.modules[fqname] + except KeyError: + pass + else: + self.msgout(3, "import_module ->", m) + return m + if self.badmodules.has_key(fqname): + self.msgout(3, "import_module -> None") + return None + try: + fp, pathname, stuff = self.find_module(partname, + parent and parent.__path__) + except ImportError: + self.msgout(3, "import_module ->", None) + return None + try: + m = self.load_module(fqname, fp, pathname, stuff) + finally: + if fp: fp.close() + if parent: + setattr(parent, partname, m) + self.msgout(3, "import_module ->", m) + return m + + def load_module(self, fqname, fp, pathname, (suffix, mode, type)): + self.msgin(2, "load_module", fqname, fp and "fp", pathname) + if type == imp.PKG_DIRECTORY: + m = self.load_package(fqname, pathname) + self.msgout(2, "load_module ->", m) + return m + if type == imp.PY_SOURCE: + co = compile(fp.read(), pathname, 'exec') + elif type == imp.PY_COMPILED: + if fp.read(4) != imp.get_magic(): + self.msgout(2, "raise ImportError: Bad magic number", pathname) + raise ImportError, "Bad magic number in %s", pathname + fp.read(4) + co = marshal.load(fp) + else: + co = None + m = self.add_module(fqname) + if co: + m.__file__ = pathname + m.__code__ = co + code = co.co_code + n = len(code) + i = 0 + lastname = None + while i < n: + c = code[i] + i = i+1 + op = ord(c) + if op >= dis.HAVE_ARGUMENT: + oparg = ord(code[i]) + ord(code[i+1])*256 + i = i+2 + if op == IMPORT_NAME: + name = lastname = co.co_names[oparg] + if not self.badmodules.has_key(lastname): + try: + self.import_hook(name, m) + except ImportError, msg: + self.msg(2, "ImportError:", str(msg)) + self.badmodules[name] = None + elif op == IMPORT_FROM: + name = co.co_names[oparg] + assert lastname is not None + if not self.badmodules.has_key(lastname): + try: + self.import_hook(lastname, m, [name]) + except ImportError, msg: + self.msg(2, "ImportError:", str(msg)) + fullname = lastname + "." + name + self.badmodules[fullname] = None + else: + lastname = None + self.msgout(2, "load_module ->", m) + return m + + def load_package(self, fqname, pathname): + self.msgin(2, "load_package", fqname, pathname) + m = self.add_module(fqname) + m.__file__ = pathname + m.__path__ = [pathname] + fp, buf, stuff = self.find_module("__init__", m.__path__) + self.load_module(fqname, fp, buf, stuff) + self.msgout(2, "load_package ->", m) + return m + + def add_module(self, fqname): + if self.modules.has_key(fqname): + return self.modules[fqname] + self.modules[fqname] = m = Module(fqname) + return m + + def find_module(self, name, path): + if path is None: + if name in sys.builtin_module_names: + return (None, None, ("", "", imp.C_BUILTIN)) + path = self.path + return imp.find_module(name, path) + + def report(self): + print + print " %-25s %s" % ("Name", "File") + print " %-25s %s" % ("----", "----") + # Print modules found + keys = self.modules.keys() + keys.sort() + for key in keys: + m = self.modules[key] + if m.__path__: + print "P", + else: + print "m", + print "%-25s" % key, m.__file__ or "" + + # Print missing modules + keys = self.badmodules.keys() + keys.sort() + for key in keys: + print "?", key + + +def test(): + # Parse command line + import getopt + try: + opts, args = getopt.getopt(sys.argv[1:], "dmp:q") + except getopt.error, msg: + print msg + return + + # Process options + debug = 1 + domods = 0 + addpath = [] + for o, a in opts: + if o == '-d': + debug = debug + 1 + if o == '-m': + domods = 1 + if o == '-p': + addpath = addpath + string.split(a, os.pathsep) + if o == '-q': + debug = 0 + + # Provide default arguments + if not args: + script = "hello.py" + else: + script = args[0] + + # Set the path based on sys.path and the script directory + path = sys.path[:] + path[0] = os.path.dirname(script) + path = addpath + path + if debug > 1: + print "path:" + for item in path: + print " ", `item` + + # Create the module finder and turn its crank + mf = ModuleFinder(path, debug) + for arg in args[1:]: + if arg == '-m': + domods = 1 + continue + if domods: + if arg[-2:] == '.*': + mf.import_hook(arg[:-2], None, ["*"]) + else: + mf.import_hook(arg) + else: + mf.load_file(arg) + mf.run_script(script) + mf.report() + + +if __name__ == '__main__': + try: + test() + except KeyboardInterrupt: + print "\n[interrupt]"