Added support for packages.

We have a whole new module finder that uses the actual Python
parser and scans the bytecode for IMPORT_NAME and IMPORT_FROM.
This requires some support in import.c (that hasn't been checked in).
New command line options for this: -d, -q, -m.
This commit is contained in:
Guido van Rossum 1998-03-05 03:42:00 +00:00
parent 3455edcbc8
commit 75dc4969ab
4 changed files with 437 additions and 198 deletions

View File

@ -1,127 +0,0 @@
# Determine the names and filenames of the modules imported by a
# script, recursively. This is done by scanning for lines containing
# import statements. (The scanning has only superficial knowledge of
# Python syntax and no knowledge of semantics, so in theory the result
# may be incorrect -- however this is quite unlikely if you don't
# intentionally obscure your Python code.)
import os
import regex
import string
import sys
# Top-level interface.
# First argument is the main program (script).
# Second optional argument is list of modules to be searched as well.
def findmodules(scriptfile, modules = [], path = sys.path):
todo = {}
todo['__main__'] = scriptfile
for name in modules:
mod = os.path.basename(name)
if mod[-3:] == '.py': mod = mod[:-3]
elif mod[-4:] == '.pyc': mod = mod[:-4]
todo[mod] = name
done = closure(todo)
return done
# Compute the closure of scanfile() and findmodule().
# Return a dictionary mapping module names to filenames.
# Writes to stderr if a file can't be or read.
def closure(todo):
done = {}
while todo:
newtodo = {}
for modname in todo.keys():
if not done.has_key(modname):
filename = todo[modname]
if filename is None:
filename = findmodule(modname)
done[modname] = filename
if filename in ('<builtin>', '<unknown>'):
continue
try:
modules = scanfile(filename)
except IOError, msg:
sys.stderr.write("%s: %s\n" %
(filename, str(msg)))
continue
for m in modules:
if not done.has_key(m):
newtodo[m] = None
todo = newtodo
return done
# Scan a file looking for import statements.
# Return list of module names.
# Can raise IOError.
importstr = '\(^\|:\)[ \t]*import[ \t]+\([a-zA-Z0-9_, \t]+\)'
fromstr = '\(^\|:\)[ \t]*from[ \t]+\([a-zA-Z0-9_]+\)[ \t]+import[ \t]+'
isimport = regex.compile(importstr)
isfrom = regex.compile(fromstr)
def scanfile(filename):
allmodules = {}
f = open(filename, 'r')
try:
while 1:
line = f.readline()
if not line: break # EOF
while line[-2:] == '\\\n': # Continuation line
line = line[:-2] + ' '
line = line + f.readline()
if isimport.search(line) >= 0:
rawmodules = isimport.group(2)
modules = string.splitfields(rawmodules, ',')
for i in range(len(modules)):
modules[i] = string.strip(modules[i])
elif isfrom.search(line) >= 0:
modules = [isfrom.group(2)]
else:
continue
for mod in modules:
allmodules[mod] = None
finally:
f.close()
return allmodules.keys()
# Find the file containing a module, given its name.
# Return filename, or '<builtin>', or '<unknown>'.
builtins = sys.builtin_module_names
tails = ['.py', '.pyc']
def findmodule(modname, path = sys.path):
if modname in builtins: return '<builtin>'
for dirname in path:
for tail in tails:
fullname = os.path.join(dirname, modname + tail)
try:
f = open(fullname, 'r')
except IOError:
continue
f.close()
return fullname
return '<unknown>'
# Test the above functions.
def test():
if not sys.argv[1:]:
print 'usage: python findmodules.py scriptfile [morefiles ...]'
sys.exit(2)
done = findmodules(sys.argv[1], sys.argv[2:])
items = done.items()
items.sort()
for mod, file in [('Module', 'File')] + items:
print "%-15s %s" % (mod, file)
if __name__ == '__main__':
test()

View File

@ -26,6 +26,12 @@ Options:
-o dir: Directory where the output files are created; default '.'. -o dir: Directory where the output files are created; default '.'.
-m: Additional arguments are module names instead of filenames.
-d: Debugging mode for the module finder.
-q: Make the module finder totally quiet.
-h: Print this help message. -h: Print this help message.
-w: Toggle Windows (NT or 95) behavior. -w: Toggle Windows (NT or 95) behavior.
@ -42,7 +48,8 @@ script.py: The Python script to be executed by the resulting binary.
module ...: Additional Python modules (referenced by pathname) module ...: Additional Python modules (referenced by pathname)
that will be included in the resulting binary. These that will be included in the resulting binary. These
may be .py or .pyc files. may be .py or .pyc files. If -m is specified, these are
module names that are search in the path instead.
NOTES: NOTES:
@ -67,7 +74,7 @@ import addpack
# Import the freeze-private modules # Import the freeze-private modules
import checkextensions import checkextensions
import findmodules import modulefinder
import makeconfig import makeconfig
import makefreeze import makefreeze
import makemakefile import makemakefile
@ -82,6 +89,8 @@ def main():
exec_prefix = None # settable with -P option exec_prefix = None # settable with -P option
extensions = [] extensions = []
path = sys.path path = sys.path
modargs = 0
debug = 1
odir = '' odir = ''
win = sys.platform[:3] == 'win' win = sys.platform[:3] == 'win'
@ -97,7 +106,7 @@ def main():
# parse command line # parse command line
try: try:
opts, args = getopt.getopt(sys.argv[1:], 'he:o:p:P:s:w') opts, args = getopt.getopt(sys.argv[1:], 'deh:mo:p:P:qs:w')
except getopt.error, msg: except getopt.error, msg:
usage('getopt error: ' + str(msg)) usage('getopt error: ' + str(msg))
@ -106,14 +115,20 @@ def main():
if o == '-h': if o == '-h':
print __doc__ print __doc__
return return
if o == '-d':
debug = debug + 1
if o == '-e': if o == '-e':
extensions.append(a) extensions.append(a)
if o == '-m':
modargs = 1
if o == '-o': if o == '-o':
odir = a odir = a
if o == '-p': if o == '-p':
prefix = a prefix = a
if o == '-P': if o == '-P':
exec_prefix = a exec_prefix = a
if o == '-q':
debug = 0
if o == '-w': if o == '-w':
win = not win win = not win
if o == '-s': if o == '-s':
@ -220,18 +235,30 @@ def main():
target = os.path.join(odir, target) target = os.path.join(odir, target)
makefile = os.path.join(odir, makefile) makefile = os.path.join(odir, makefile)
for mod in implicits:
modules.append(findmodules.findmodule(mod))
# Actual work starts here... # Actual work starts here...
dict = findmodules.findmodules(scriptfile, modules, path) # collect all modules of the program
names = dict.keys() mf = modulefinder.ModuleFinder(path, debug)
names.sort() for mod in implicits:
print "Modules being frozen:" mf.import_hook(mod)
for name in names: for mod in modules:
print '\t', name if mod == '-m':
modargs = 1
continue
if modargs:
if mod[-2:] == '.*':
mf.import_hook(mod[:-2], None, ["*"])
else:
mf.import_hook(mod)
else:
mf.load_file(mod)
mf.run_script(scriptfile)
if debug > 0:
mf.report()
print
dict = mf.modules
# generate output for frozen modules
backup = frozen_c + '~' backup = frozen_c + '~'
try: try:
os.rename(frozen_c, backup) os.rename(frozen_c, backup)
@ -239,7 +266,7 @@ def main():
backup = None backup = None
outfp = open(frozen_c, 'w') outfp = open(frozen_c, 'w')
try: try:
makefreeze.makefreeze(outfp, dict) makefreeze.makefreeze(outfp, dict, debug)
if win and subsystem == 'windows': if win and subsystem == 'windows':
import winmakemakefile import winmakemakefile
outfp.write(winmakemakefile.WINMAINTEMPLATE) outfp.write(winmakemakefile.WINMAINTEMPLATE)
@ -251,6 +278,7 @@ def main():
frozen_c) frozen_c)
os.rename(backup, frozen_c) os.rename(backup, frozen_c)
# windows gets different treatment
if win: if win:
# Taking a shortcut here... # Taking a shortcut here...
import winmakemakefile import winmakemakefile
@ -264,14 +292,17 @@ def main():
outfp.close() outfp.close()
return return
# generate config.c and Makefile
builtins = [] builtins = []
unknown = [] unknown = []
mods = dict.keys() mods = dict.keys()
mods.sort() mods.sort()
for mod in mods: for mod in mods:
if dict[mod] == '<builtin>': if dict[mod].__code__:
continue
if not dict[mod].__file__:
builtins.append(mod) builtins.append(mod)
elif dict[mod] == '<unknown>': else:
unknown.append(mod) unknown.append(mod)
addfiles = [] addfiles = []

View File

@ -1,4 +1,5 @@
import marshal import marshal
import string
# Write a file containing frozen code for the modules in the dictionary. # Write a file containing frozen code for the modules in the dictionary.
@ -23,51 +24,31 @@ main(argc, argv)
""" """
def makefreeze(outfp, dict): def makefreeze(outfp, dict, debug=0):
done = [] done = []
mods = dict.keys() mods = dict.keys()
mods.sort() mods.sort()
for mod in mods: for mod in mods:
modfn = dict[mod] m = dict[mod]
try: mangled = string.join(string.split(mod, "."), "__")
str = makecode(modfn) if m.__code__:
except IOError, msg: if debug:
sys.stderr.write("%s: %s\n" % (modfn, str(msg))) print "freezing", mod, "..."
continue str = marshal.dumps(m.__code__)
if str: size = len(str)
done.append(mod, len(str)) if m.__path__:
writecode(outfp, mod, str) # Indicate package by negative size
size = -size
done.append((mod, mangled, size))
writecode(outfp, mangled, str)
if debug:
print "generating table of frozen modules"
outfp.write(header) outfp.write(header)
for mod, size in done: for mod, mangled, size in done:
outfp.write('\t{"%s", M_%s, %d},\n' % (mod, mod, size)) outfp.write('\t{"%s", M_%s, %d},\n' % (mod, mangled, size))
outfp.write(trailer) outfp.write(trailer)
# Return code string for a given module -- either a .py or a .pyc
# file. Return either a string or None (if it's not Python code).
# May raise IOError.
def makecode(filename):
if filename[-3:] == '.py':
f = open(filename, 'r')
try:
text = f.read()
code = compile(text, filename, 'exec')
finally:
f.close()
return marshal.dumps(code)
if filename[-4:] == '.pyc':
f = open(filename, 'rb')
try:
f.seek(8)
str = f.read()
finally:
f.close()
return str
# Can't generate code for this extension
return None
# Write a C initializer for a module containing the frozen python code. # Write a C initializer for a module containing the frozen python code.
# The array is called M_<mod>. # The array is called M_<mod>.
@ -78,22 +59,3 @@ def writecode(outfp, mod, str):
for c in str[i:i+16]: for c in str[i:i+16]:
outfp.write('%d,' % ord(c)) outfp.write('%d,' % ord(c))
outfp.write('\n};\n') outfp.write('\n};\n')
# Test for the above functions.
def test():
import os
import sys
if not sys.argv[1:]:
print 'usage: python freezepython.py file.py(c) ...'
sys.exit(2)
dict = {}
for arg in sys.argv[1:]:
base = os.path.basename(arg)
mod, ext = os.path.splitext(base)
dict[mod] = arg
makefreeze(sys.stdout, dict)
if __name__ == '__main__':
test()

View File

@ -0,0 +1,373 @@
"""Find modules used by a script, using introspection."""
import dis
import imp
import marshal
import os
import re
import string
import sys
IMPORT_NAME = dis.opname.index('IMPORT_NAME')
IMPORT_FROM = dis.opname.index('IMPORT_FROM')
class Module:
def __init__(self, name, file=None, path=None):
self.__name__ = name
self.__file__ = file
self.__path__ = path
self.__code__ = None
def __repr__(self):
s = "Module(%s" % `self.__name__`
if self.__file__ is not None:
s = s + ", %s" % `self.__file__`
if self.__path__ is not None:
s = s + ", %s" % `self.__path__`
s = s + ")"
return s
class ModuleFinder:
def __init__(self, path=None, debug=0):
if path is None:
path = sys.path
self.path = path
self.modules = {}
self.badmodules = {}
self.debug = debug
self.indent = 0
def msg(self, level, str, *args):
if level <= self.debug:
for i in range(self.indent):
print " ",
print str,
for arg in args:
print repr(arg),
print
def msgin(self, *args):
level = args[0]
if level <= self.debug:
self.indent = self.indent + 1
apply(self.msg, args)
def msgout(self, *args):
level = args[0]
if level <= self.debug:
self.indent = self.indent - 1
apply(self.msg, args)
def run_script(self, pathname):
self.msg(2, "run_script", pathname)
fp = open(pathname)
stuff = ("", "r", imp.PY_SOURCE)
self.load_module('__main__', fp, pathname, stuff)
def load_file(self, pathname):
dir, name = os.path.split(pathname)
name, ext = os.path.splitext(name)
fp = open(pathname)
stuff = (ext, "r", imp.PY_SOURCE)
self.load_module(name, fp, pathname, stuff)
def import_hook(self, name, caller=None, fromlist=None):
self.msg(3, "import_hook", name, caller, fromlist)
parent = self.determine_parent(caller)
q, tail = self.find_head_package(parent, name)
m = self.load_tail(q, tail)
if not fromlist:
return q
if m.__path__:
self.ensure_fromlist(m, fromlist)
def determine_parent(self, caller):
self.msgin(4, "determine_parent", caller)
if not caller:
self.msgout(4, "determine_parent -> None")
return None
pname = caller.__name__
if caller.__path__:
parent = self.modules[pname]
assert caller is parent
self.msgout(4, "determine_parent ->", parent)
return parent
if '.' in pname:
i = string.rfind(pname, '.')
pname = pname[:i]
parent = self.modules[pname]
assert parent.__name__ == pname
self.msgout(4, "determine_parent ->", parent)
return parent
self.msgout(4, "determine_parent -> None")
return None
def find_head_package(self, parent, name):
self.msgin(4, "find_head_package", parent, name)
if '.' in name:
i = string.find(name, '.')
head = name[:i]
tail = name[i+1:]
else:
head = name
tail = ""
if parent:
qname = "%s.%s" % (parent.__name__, head)
else:
qname = head
q = self.import_module(head, qname, parent)
if q:
self.msgout(4, "find_head_package ->", (q, tail))
return q, tail
if parent:
qname = head
parent = None
q = self.import_module(head, qname, parent)
if q:
self.msgout(4, "find_head_package ->", (q, tail))
return q, tail
self.msgout(4, "raise ImportError: No module named", qname)
raise ImportError, "No module named " + qname
def load_tail(self, q, tail):
self.msgin(4, "load_tail", q, tail)
m = q
while tail:
i = string.find(tail, '.')
if i < 0: i = len(tail)
head, tail = tail[:i], tail[i+1:]
mname = "%s.%s" % (m.__name__, head)
m = self.import_module(head, mname, m)
if not m:
self.msgout(4, "raise ImportError: No module named", mname)
raise ImportError, "No module named " + mname
self.msgout(4, "load_tail ->", m)
return m
def ensure_fromlist(self, m, fromlist, recursive=0):
self.msg(4, "ensure_fromlist", m, fromlist, recursive)
for sub in fromlist:
if sub == "*":
if not recursive:
all = self.find_all_submodules(m)
if all:
self.ensure_fromlist(m, all, 1)
elif not hasattr(m, sub):
subname = "%s.%s" % (m.__name__, sub)
submod = self.import_module(sub, subname, m)
if not submod:
raise ImportError, "No module named " + subname
def find_all_submodules(self, m):
if not m.__path__:
return
modules = {}
suffixes = [".py", ".pyc", ".pyo"]
for dir in m.__path__:
try:
names = os.listdir(dir)
except os.error:
self.msg(2, "can't list directory", dir)
continue
for name in names:
mod = None
for suff in suffixes:
n = len(suff)
if name[-n:] == suff:
mod = name[:-n]
break
if mod and mod != "__init__":
modules[mod] = mod
return modules.keys()
def import_module(self, partname, fqname, parent):
self.msgin(3, "import_module", partname, fqname, parent)
try:
m = self.modules[fqname]
except KeyError:
pass
else:
self.msgout(3, "import_module ->", m)
return m
if self.badmodules.has_key(fqname):
self.msgout(3, "import_module -> None")
return None
try:
fp, pathname, stuff = self.find_module(partname,
parent and parent.__path__)
except ImportError:
self.msgout(3, "import_module ->", None)
return None
try:
m = self.load_module(fqname, fp, pathname, stuff)
finally:
if fp: fp.close()
if parent:
setattr(parent, partname, m)
self.msgout(3, "import_module ->", m)
return m
def load_module(self, fqname, fp, pathname, (suffix, mode, type)):
self.msgin(2, "load_module", fqname, fp and "fp", pathname)
if type == imp.PKG_DIRECTORY:
m = self.load_package(fqname, pathname)
self.msgout(2, "load_module ->", m)
return m
if type == imp.PY_SOURCE:
co = compile(fp.read(), pathname, 'exec')
elif type == imp.PY_COMPILED:
if fp.read(4) != imp.get_magic():
self.msgout(2, "raise ImportError: Bad magic number", pathname)
raise ImportError, "Bad magic number in %s", pathname
fp.read(4)
co = marshal.load(fp)
else:
co = None
m = self.add_module(fqname)
if co:
m.__file__ = pathname
m.__code__ = co
code = co.co_code
n = len(code)
i = 0
lastname = None
while i < n:
c = code[i]
i = i+1
op = ord(c)
if op >= dis.HAVE_ARGUMENT:
oparg = ord(code[i]) + ord(code[i+1])*256
i = i+2
if op == IMPORT_NAME:
name = lastname = co.co_names[oparg]
if not self.badmodules.has_key(lastname):
try:
self.import_hook(name, m)
except ImportError, msg:
self.msg(2, "ImportError:", str(msg))
self.badmodules[name] = None
elif op == IMPORT_FROM:
name = co.co_names[oparg]
assert lastname is not None
if not self.badmodules.has_key(lastname):
try:
self.import_hook(lastname, m, [name])
except ImportError, msg:
self.msg(2, "ImportError:", str(msg))
fullname = lastname + "." + name
self.badmodules[fullname] = None
else:
lastname = None
self.msgout(2, "load_module ->", m)
return m
def load_package(self, fqname, pathname):
self.msgin(2, "load_package", fqname, pathname)
m = self.add_module(fqname)
m.__file__ = pathname
m.__path__ = [pathname]
fp, buf, stuff = self.find_module("__init__", m.__path__)
self.load_module(fqname, fp, buf, stuff)
self.msgout(2, "load_package ->", m)
return m
def add_module(self, fqname):
if self.modules.has_key(fqname):
return self.modules[fqname]
self.modules[fqname] = m = Module(fqname)
return m
def find_module(self, name, path):
if path is None:
if name in sys.builtin_module_names:
return (None, None, ("", "", imp.C_BUILTIN))
path = self.path
return imp.find_module(name, path)
def report(self):
print
print " %-25s %s" % ("Name", "File")
print " %-25s %s" % ("----", "----")
# Print modules found
keys = self.modules.keys()
keys.sort()
for key in keys:
m = self.modules[key]
if m.__path__:
print "P",
else:
print "m",
print "%-25s" % key, m.__file__ or ""
# Print missing modules
keys = self.badmodules.keys()
keys.sort()
for key in keys:
print "?", key
def test():
# Parse command line
import getopt
try:
opts, args = getopt.getopt(sys.argv[1:], "dmp:q")
except getopt.error, msg:
print msg
return
# Process options
debug = 1
domods = 0
addpath = []
for o, a in opts:
if o == '-d':
debug = debug + 1
if o == '-m':
domods = 1
if o == '-p':
addpath = addpath + string.split(a, os.pathsep)
if o == '-q':
debug = 0
# Provide default arguments
if not args:
script = "hello.py"
else:
script = args[0]
# Set the path based on sys.path and the script directory
path = sys.path[:]
path[0] = os.path.dirname(script)
path = addpath + path
if debug > 1:
print "path:"
for item in path:
print " ", `item`
# Create the module finder and turn its crank
mf = ModuleFinder(path, debug)
for arg in args[1:]:
if arg == '-m':
domods = 1
continue
if domods:
if arg[-2:] == '.*':
mf.import_hook(arg[:-2], None, ["*"])
else:
mf.import_hook(arg)
else:
mf.load_file(arg)
mf.run_script(script)
mf.report()
if __name__ == '__main__':
try:
test()
except KeyboardInterrupt:
print "\n[interrupt]"