patch attached to sf item #643711:

any_missing() returns less bogus missing modules.

- I've rewritten scan_code() more or less from scratch,
factored bits and pieces out for readability.
- keep track of global assignments and failed imports per
module; use this to determine whether the Y in "from X
import Y" is a submodule or just a global name. This is not
100% doable: you can't tell which symbols are imported when
doing a star import of a non-Python module short of actually
importing it.
- added a new method to ModuleFinder: any_missing_maybe(),
which returns *two* lists, one with certain misses, one with
possible misses. The possible misses are *very* often false
alarms, so it's useful to keep this list separate.
any_misses() now simply returns the union of
any_missing_maybe().

TODO: documentation, test_modulefinder.py
This commit is contained in:
Just van Rossum 2002-12-31 16:33:00 +00:00
parent cbd6cd2312
commit e29310a2b3
1 changed files with 150 additions and 49 deletions

View File

@ -15,12 +15,11 @@ else:
# remain compatible with Python < 2.3 # remain compatible with Python < 2.3
READ_MODE = "r" READ_MODE = "r"
LOAD_CONST = dis.opname.index('LOAD_CONST')
IMPORT_NAME = dis.opname.index('IMPORT_NAME') IMPORT_NAME = dis.opname.index('IMPORT_NAME')
IMPORT_FROM = dis.opname.index('IMPORT_FROM')
STORE_NAME = dis.opname.index('STORE_NAME') STORE_NAME = dis.opname.index('STORE_NAME')
STORE_FAST = dis.opname.index('STORE_FAST')
STORE_GLOBAL = dis.opname.index('STORE_GLOBAL') STORE_GLOBAL = dis.opname.index('STORE_GLOBAL')
STORE_OPS = [STORE_NAME, STORE_FAST, STORE_GLOBAL] STORE_OPS = [STORE_NAME, STORE_GLOBAL]
# Modulefinder does a good job at simulating Python's, but it can not # Modulefinder does a good job at simulating Python's, but it can not
# handle __path__ modifications packages make at runtime. Therefore there # handle __path__ modifications packages make at runtime. Therefore there
@ -54,6 +53,13 @@ class Module:
self.__file__ = file self.__file__ = file
self.__path__ = path self.__path__ = path
self.__code__ = None self.__code__ = None
# The set of global names that are assigned to in the module.
# This includes those names imported through starimports of
# Python modules.
self.globalnames = {}
# The set of starimports this module did that could not be
# resolved, ie. a starimport from a non-Python module.
self.starimports = {}
def __repr__(self): def __repr__(self):
s = "Module(%s" % `self.__name__` s = "Module(%s" % `self.__name__`
@ -66,7 +72,7 @@ class Module:
class ModuleFinder: class ModuleFinder:
def __init__(self, path=None, debug=0, excludes = [], replace_paths = []): def __init__(self, path=None, debug=0, excludes=[], replace_paths=[]):
if path is None: if path is None:
path = sys.path path = sys.path
self.path = path self.path = path
@ -233,8 +239,6 @@ class ModuleFinder:
return m return m
if self.badmodules.has_key(fqname): if self.badmodules.has_key(fqname):
self.msgout(3, "import_module -> None") self.msgout(3, "import_module -> None")
if parent:
self.badmodules[fqname][parent.__name__] = None
return None return None
try: try:
fp, pathname, stuff = self.find_module(partname, fp, pathname, stuff = self.find_module(partname,
@ -277,11 +281,39 @@ class ModuleFinder:
self.msgout(2, "load_module ->", m) self.msgout(2, "load_module ->", m)
return m return m
def _add_badmodule(self, name, caller):
if name not in self.badmodules:
self.badmodules[name] = {}
self.badmodules[name][caller.__name__] = 1
def _safe_import_hook(self, name, caller, fromlist):
# wrapper for self.import_hook() that won't raise ImportError
if name in self.badmodules:
self._add_badmodule(name, caller)
return
try:
self.import_hook(name, caller)
except ImportError, msg:
self.msg(2, "ImportError:", str(msg))
self._add_badmodule(name, caller)
else:
if fromlist:
for sub in fromlist:
if sub in self.badmodules:
self._add_badmodule(sub, caller)
continue
try:
self.import_hook(name, caller, [sub])
except ImportError, msg:
self.msg(2, "ImportError:", str(msg))
fullname = name + "." + sub
self._add_badmodule(fullname, caller)
def scan_code(self, co, m): def scan_code(self, co, m):
code = co.co_code code = co.co_code
n = len(code) n = len(code)
i = 0 i = 0
lastname = None fromlist = None
while i < n: while i < n:
c = code[i] c = code[i]
i = i+1 i = i+1
@ -289,33 +321,43 @@ class ModuleFinder:
if op >= dis.HAVE_ARGUMENT: if op >= dis.HAVE_ARGUMENT:
oparg = ord(code[i]) + ord(code[i+1])*256 oparg = ord(code[i]) + ord(code[i+1])*256
i = i+2 i = i+2
if op == IMPORT_NAME: if op == LOAD_CONST:
name = lastname = co.co_names[oparg] # An IMPORT_NAME is always preceded by a LOAD_CONST, it's
if not self.badmodules.has_key(lastname): # a tuple of "from" names, or None for a regular import.
try: # The tuple may contain "*" for "from <mod> import *"
self.import_hook(name, m) fromlist = co.co_consts[oparg]
except ImportError, msg: elif op == IMPORT_NAME:
self.msg(2, "ImportError:", str(msg)) assert fromlist is None or type(fromlist) is tuple
if not self.badmodules.has_key(name):
self.badmodules[name] = {}
self.badmodules[name][m.__name__] = None
elif op == IMPORT_FROM:
name = co.co_names[oparg] name = co.co_names[oparg]
assert lastname is not None have_star = 0
if not self.badmodules.has_key(lastname): if fromlist is not None:
try: if "*" in fromlist:
self.import_hook(lastname, m, [name]) have_star = 1
except ImportError, msg: fromlist = [f for f in fromlist if f != "*"]
self.msg(2, "ImportError:", str(msg)) self._safe_import_hook(name, m, fromlist)
fullname = lastname + "." + name if have_star:
if not self.badmodules.has_key(fullname): # We've encountered an "import *". If it is a Python module,
self.badmodules[fullname] = {} # the code has already been parsed and we can suck out the
self.badmodules[fullname][m.__name__] = None # global names.
mm = None
if m.__path__:
# At this point we don't know whether 'name' is a
# submodule of 'm' or a global module. Let's just try
# the full name first.
mm = self.modules.get(m.__name__ + "." + name)
if mm is None:
mm = self.modules.get(name)
if mm is not None:
m.globalnames.update(mm.globalnames)
m.starimports.update(mm.starimports)
if mm.__code__ is None:
m.starimports[name] = 1
else:
m.starimports[name] = 1
elif op in STORE_OPS: elif op in STORE_OPS:
# Skip; each IMPORT_FROM is followed by a STORE_* opcode # keep track of all global names that are assigned to
pass name = co.co_names[oparg]
else: m.globalnames[name] = 1
lastname = None
for c in co.co_consts: for c in co.co_consts:
if isinstance(c, type(co)): if isinstance(c, type(co)):
self.scan_code(c, m) self.scan_code(c, m)
@ -360,6 +402,9 @@ class ModuleFinder:
return imp.find_module(name, path) return imp.find_module(name, path)
def report(self): def report(self):
"""Print a report to stdout, listing the found modules with their
paths, as well as modules that are missing, or seem to be missing.
"""
print print
print " %-25s %s" % ("Name", "File") print " %-25s %s" % ("Name", "File")
print " %-25s %s" % ("----", "----") print " %-25s %s" % ("----", "----")
@ -367,6 +412,7 @@ class ModuleFinder:
keys = self.modules.keys() keys = self.modules.keys()
keys.sort() keys.sort()
for key in keys: for key in keys:
continue
m = self.modules[key] m = self.modules[key]
if m.__path__: if m.__path__:
print "P", print "P",
@ -375,33 +421,87 @@ class ModuleFinder:
print "%-25s" % key, m.__file__ or "" print "%-25s" % key, m.__file__ or ""
# Print missing modules # Print missing modules
keys = self.badmodules.keys() missing, maybe = self.any_missing_maybe()
keys.sort() if missing:
for key in keys: print
# ... but not if they were explicitly excluded. print "Missing modules:"
if key not in self.excludes: for name in missing:
mods = self.badmodules[key].keys() mods = self.badmodules[name].keys()
mods.sort() mods.sort()
print "?", key, "from", ', '.join(mods) print "?", name, "imported from", ', '.join(mods)
# Print modules that may be missing, but then again, maybe not...
if maybe:
print
print "Submodules thay appear to be missing, but could also be",
print "global names in the parent package:"
for name in maybe:
mods = self.badmodules[name].keys()
mods.sort()
print "?", name, "imported from", ', '.join(mods)
def any_missing(self): def any_missing(self):
keys = self.badmodules.keys() """Return a list of modules that appear to be missing. Use
any_missing_maybe() if you want to know which modules are
certain to be missing, and which *may* be missing.
"""
missing, maybe = self.any_missing_maybe()
return missing + maybe
def any_missing_maybe(self):
"""Return two lists, one with modules that are certainly missing
and one with modules that *may* be missing. The latter names could
either be submodules *or* just global names in the package.
The reason it can't always be determined is that it's impossible to
tell which names are imported when "from module import *" is done
with an extension module, short of actually importing it.
"""
missing = [] missing = []
for key in keys: maybe = []
if key not in self.excludes: for name in self.badmodules:
# Missing, and its not supposed to be if name in self.excludes:
missing.append(key) continue
return missing i = name.rfind(".")
if i < 0:
missing.append(name)
continue
subname = name[i+1:]
pkgname = name[:i]
pkg = self.modules.get(pkgname)
if pkg is not None:
if pkgname in self.badmodules[name]:
# The package tried to import this module itself and
# failed. It's definitely missing.
missing.append(name)
elif subname in pkg.globalnames:
# It's a global in the package: definitely not missing.
pass
elif pkg.starimports:
# It could be missing, but the package did an "import *"
# from a non-Python module, so we simply can't be sure.
maybe.append(name)
else:
# It's not a global in the package, the package didn't
# do funny star imports, it's very likely to be missing.
# The symbol could be inserted into the package from the
# outside, but since that's not good style we simply list
# it missing.
missing.append(name)
else:
missing.append(name)
missing.sort()
maybe.sort()
return missing, maybe
def replace_paths_in_code(self, co): def replace_paths_in_code(self, co):
new_filename = original_filename = os.path.normpath(co.co_filename) new_filename = original_filename = os.path.normpath(co.co_filename)
for f,r in self.replace_paths: for f, r in self.replace_paths:
if original_filename.startswith(f): if original_filename.startswith(f):
new_filename = r+original_filename[len(f):] new_filename = r + original_filename[len(f):]
break break
if self.debug and original_filename not in self.processed_paths: if self.debug and original_filename not in self.processed_paths:
if new_filename!=original_filename: if new_filename != original_filename:
self.msgout(2, "co_filename %r changed to %r" \ self.msgout(2, "co_filename %r changed to %r" \
% (original_filename,new_filename,)) % (original_filename,new_filename,))
else: else:
@ -477,10 +577,11 @@ def test():
mf.load_file(arg) mf.load_file(arg)
mf.run_script(script) mf.run_script(script)
mf.report() mf.report()
return mf # for -i debugging
if __name__ == '__main__': if __name__ == '__main__':
try: try:
test() mf = test()
except KeyboardInterrupt: except KeyboardInterrupt:
print "\n[interrupt]" print "\n[interrupt]"