patch attached to sf item #643711:

any_missing() returns less bogus missing modules.

- I've rewritten scan_code() more or less from scratch,
factored bits and pieces out for readability.
- keep track of global assignments and failed imports per
module; use this to determine whether the Y in "from X
import Y" is a submodule or just a global name. This is not
100% doable: you can't tell which symbols are imported when
doing a star import of a non-Python module short of actually
importing it.
- added a new method to ModuleFinder: any_missing_maybe(),
which returns *two* lists, one with certain misses, one with
possible misses. The possible misses are *very* often false
alarms, so it's useful to keep this list separate.
any_misses() now simply returns the union of
any_missing_maybe().

TODO: documentation, test_modulefinder.py
This commit is contained in:
Just van Rossum 2002-12-31 16:33:00 +00:00
parent cbd6cd2312
commit e29310a2b3
1 changed files with 150 additions and 49 deletions

View File

@ -15,12 +15,11 @@ else:
# remain compatible with Python < 2.3
READ_MODE = "r"
LOAD_CONST = dis.opname.index('LOAD_CONST')
IMPORT_NAME = dis.opname.index('IMPORT_NAME')
IMPORT_FROM = dis.opname.index('IMPORT_FROM')
STORE_NAME = dis.opname.index('STORE_NAME')
STORE_FAST = dis.opname.index('STORE_FAST')
STORE_GLOBAL = dis.opname.index('STORE_GLOBAL')
STORE_OPS = [STORE_NAME, STORE_FAST, STORE_GLOBAL]
STORE_OPS = [STORE_NAME, STORE_GLOBAL]
# Modulefinder does a good job at simulating Python's, but it can not
# handle __path__ modifications packages make at runtime. Therefore there
@ -54,6 +53,13 @@ class Module:
self.__file__ = file
self.__path__ = path
self.__code__ = None
# The set of global names that are assigned to in the module.
# This includes those names imported through starimports of
# Python modules.
self.globalnames = {}
# The set of starimports this module did that could not be
# resolved, ie. a starimport from a non-Python module.
self.starimports = {}
def __repr__(self):
s = "Module(%s" % `self.__name__`
@ -66,7 +72,7 @@ class Module:
class ModuleFinder:
def __init__(self, path=None, debug=0, excludes = [], replace_paths = []):
def __init__(self, path=None, debug=0, excludes=[], replace_paths=[]):
if path is None:
path = sys.path
self.path = path
@ -233,8 +239,6 @@ class ModuleFinder:
return m
if self.badmodules.has_key(fqname):
self.msgout(3, "import_module -> None")
if parent:
self.badmodules[fqname][parent.__name__] = None
return None
try:
fp, pathname, stuff = self.find_module(partname,
@ -277,11 +281,39 @@ class ModuleFinder:
self.msgout(2, "load_module ->", m)
return m
def _add_badmodule(self, name, caller):
if name not in self.badmodules:
self.badmodules[name] = {}
self.badmodules[name][caller.__name__] = 1
def _safe_import_hook(self, name, caller, fromlist):
# wrapper for self.import_hook() that won't raise ImportError
if name in self.badmodules:
self._add_badmodule(name, caller)
return
try:
self.import_hook(name, caller)
except ImportError, msg:
self.msg(2, "ImportError:", str(msg))
self._add_badmodule(name, caller)
else:
if fromlist:
for sub in fromlist:
if sub in self.badmodules:
self._add_badmodule(sub, caller)
continue
try:
self.import_hook(name, caller, [sub])
except ImportError, msg:
self.msg(2, "ImportError:", str(msg))
fullname = name + "." + sub
self._add_badmodule(fullname, caller)
def scan_code(self, co, m):
code = co.co_code
n = len(code)
i = 0
lastname = None
fromlist = None
while i < n:
c = code[i]
i = i+1
@ -289,33 +321,43 @@ class ModuleFinder:
if op >= dis.HAVE_ARGUMENT:
oparg = ord(code[i]) + ord(code[i+1])*256
i = i+2
if op == IMPORT_NAME:
name = lastname = co.co_names[oparg]
if not self.badmodules.has_key(lastname):
try:
self.import_hook(name, m)
except ImportError, msg:
self.msg(2, "ImportError:", str(msg))
if not self.badmodules.has_key(name):
self.badmodules[name] = {}
self.badmodules[name][m.__name__] = None
elif op == IMPORT_FROM:
if op == LOAD_CONST:
# An IMPORT_NAME is always preceded by a LOAD_CONST, it's
# a tuple of "from" names, or None for a regular import.
# The tuple may contain "*" for "from <mod> import *"
fromlist = co.co_consts[oparg]
elif op == IMPORT_NAME:
assert fromlist is None or type(fromlist) is tuple
name = co.co_names[oparg]
assert lastname is not None
if not self.badmodules.has_key(lastname):
try:
self.import_hook(lastname, m, [name])
except ImportError, msg:
self.msg(2, "ImportError:", str(msg))
fullname = lastname + "." + name
if not self.badmodules.has_key(fullname):
self.badmodules[fullname] = {}
self.badmodules[fullname][m.__name__] = None
elif op in STORE_OPS:
# Skip; each IMPORT_FROM is followed by a STORE_* opcode
pass
have_star = 0
if fromlist is not None:
if "*" in fromlist:
have_star = 1
fromlist = [f for f in fromlist if f != "*"]
self._safe_import_hook(name, m, fromlist)
if have_star:
# We've encountered an "import *". If it is a Python module,
# the code has already been parsed and we can suck out the
# global names.
mm = None
if m.__path__:
# At this point we don't know whether 'name' is a
# submodule of 'm' or a global module. Let's just try
# the full name first.
mm = self.modules.get(m.__name__ + "." + name)
if mm is None:
mm = self.modules.get(name)
if mm is not None:
m.globalnames.update(mm.globalnames)
m.starimports.update(mm.starimports)
if mm.__code__ is None:
m.starimports[name] = 1
else:
lastname = None
m.starimports[name] = 1
elif op in STORE_OPS:
# keep track of all global names that are assigned to
name = co.co_names[oparg]
m.globalnames[name] = 1
for c in co.co_consts:
if isinstance(c, type(co)):
self.scan_code(c, m)
@ -360,6 +402,9 @@ class ModuleFinder:
return imp.find_module(name, path)
def report(self):
"""Print a report to stdout, listing the found modules with their
paths, as well as modules that are missing, or seem to be missing.
"""
print
print " %-25s %s" % ("Name", "File")
print " %-25s %s" % ("----", "----")
@ -367,6 +412,7 @@ class ModuleFinder:
keys = self.modules.keys()
keys.sort()
for key in keys:
continue
m = self.modules[key]
if m.__path__:
print "P",
@ -375,33 +421,87 @@ class ModuleFinder:
print "%-25s" % key, m.__file__ or ""
# Print missing modules
keys = self.badmodules.keys()
keys.sort()
for key in keys:
# ... but not if they were explicitly excluded.
if key not in self.excludes:
mods = self.badmodules[key].keys()
missing, maybe = self.any_missing_maybe()
if missing:
print
print "Missing modules:"
for name in missing:
mods = self.badmodules[name].keys()
mods.sort()
print "?", key, "from", ', '.join(mods)
print "?", name, "imported from", ', '.join(mods)
# Print modules that may be missing, but then again, maybe not...
if maybe:
print
print "Submodules thay appear to be missing, but could also be",
print "global names in the parent package:"
for name in maybe:
mods = self.badmodules[name].keys()
mods.sort()
print "?", name, "imported from", ', '.join(mods)
def any_missing(self):
keys = self.badmodules.keys()
"""Return a list of modules that appear to be missing. Use
any_missing_maybe() if you want to know which modules are
certain to be missing, and which *may* be missing.
"""
missing, maybe = self.any_missing_maybe()
return missing + maybe
def any_missing_maybe(self):
"""Return two lists, one with modules that are certainly missing
and one with modules that *may* be missing. The latter names could
either be submodules *or* just global names in the package.
The reason it can't always be determined is that it's impossible to
tell which names are imported when "from module import *" is done
with an extension module, short of actually importing it.
"""
missing = []
for key in keys:
if key not in self.excludes:
# Missing, and its not supposed to be
missing.append(key)
return missing
maybe = []
for name in self.badmodules:
if name in self.excludes:
continue
i = name.rfind(".")
if i < 0:
missing.append(name)
continue
subname = name[i+1:]
pkgname = name[:i]
pkg = self.modules.get(pkgname)
if pkg is not None:
if pkgname in self.badmodules[name]:
# The package tried to import this module itself and
# failed. It's definitely missing.
missing.append(name)
elif subname in pkg.globalnames:
# It's a global in the package: definitely not missing.
pass
elif pkg.starimports:
# It could be missing, but the package did an "import *"
# from a non-Python module, so we simply can't be sure.
maybe.append(name)
else:
# It's not a global in the package, the package didn't
# do funny star imports, it's very likely to be missing.
# The symbol could be inserted into the package from the
# outside, but since that's not good style we simply list
# it missing.
missing.append(name)
else:
missing.append(name)
missing.sort()
maybe.sort()
return missing, maybe
def replace_paths_in_code(self, co):
new_filename = original_filename = os.path.normpath(co.co_filename)
for f,r in self.replace_paths:
for f, r in self.replace_paths:
if original_filename.startswith(f):
new_filename = r+original_filename[len(f):]
new_filename = r + original_filename[len(f):]
break
if self.debug and original_filename not in self.processed_paths:
if new_filename!=original_filename:
if new_filename != original_filename:
self.msgout(2, "co_filename %r changed to %r" \
% (original_filename,new_filename,))
else:
@ -477,10 +577,11 @@ def test():
mf.load_file(arg)
mf.run_script(script)
mf.report()
return mf # for -i debugging
if __name__ == '__main__':
try:
test()
mf = test()
except KeyboardInterrupt:
print "\n[interrupt]"