Second phase of refactoring for runpy, pkgutil, pydoc, and setuptools

to share common PEP 302 support code, as described here:

http://mail.python.org/pipermail/python-dev/2006-April/063724.html

pydoc now supports PEP 302 importers, by way of utility functions in
pkgutil, such as 'walk_packages()'.  It will properly document
modules that are in zip files, and is backward compatible to Python
2.3 (setuptools installs for Python <2.5 will bundle it so pydoc
doesn't break when used with eggs.)

What has not changed is that pydoc command line options do not support
zip paths or other importer paths, and the webserver index does not
support sys.meta_path.  Those are probably okay as limitations.

Tasks remaining: write docs and Misc/NEWS for pkgutil/pydoc changes,
and update setuptools to use pkgutil wherever possible, then add it
to the stdlib.
This commit is contained in:
Phillip J. Eby 2006-04-18 00:59:55 +00:00
parent b507972cdd
commit ceb3087e1c
2 changed files with 239 additions and 114 deletions

View File

@ -11,6 +11,7 @@ from types import ModuleType
__all__ = [ __all__ = [
'get_importer', 'iter_importers', 'get_loader', 'find_loader', 'get_importer', 'iter_importers', 'get_loader', 'find_loader',
'walk_packages', 'iter_modules',
'ImpImporter', 'ImpLoader', 'read_code', 'extend_path', 'ImpImporter', 'ImpLoader', 'read_code', 'extend_path',
] ]
@ -27,6 +28,95 @@ def read_code(stream):
return marshal.load(stream) return marshal.load(stream)
def simplegeneric(func):
"""Make a trivial single-dispatch generic function"""
registry = {}
def wrapper(*args,**kw):
ob = args[0]
try:
cls = ob.__class__
except AttributeError:
cls = type(ob)
try:
mro = cls.__mro__
except AttributeError:
try:
class cls(cls,object): pass
mro = cls.__mro__[1:]
except TypeError:
mro = object, # must be an ExtensionClass or some such :(
for t in mro:
if t in registry:
return registry[t](*args,**kw)
else:
return func(*args,**kw)
try:
wrapper.__name__ = func.__name__
except (TypeError,AttributeError):
pass # Python 2.3 doesn't allow functions to be renamed
def register(typ, func=None):
if func is None:
return lambda f: register(typ, f)
registry[typ] = func
return func
wrapper.__dict__ = func.__dict__
wrapper.__doc__ = func.__doc__
wrapper.register = register
return wrapper
def walk_packages(path=None, prefix='', onerror=None):
"""Yield submodule names+loaders recursively, for path or sys.path"""
def seen(p,m={}):
if p in m: return True
m[p] = True
for importer, name, ispkg in iter_modules(path, prefix):
yield importer, name, ispkg
if ispkg:
try:
__import__(name)
except ImportError:
if onerror is not None:
onerror()
else:
path = getattr(sys.modules[name], '__path__', None) or []
# don't traverse path items we've seen before
path = [p for p in path if not seen(p)]
for item in walk_packages(path, name+'.'):
yield item
def iter_modules(path=None, prefix=''):
"""Yield submodule names+loaders for path or sys.path"""
if path is None:
importers = iter_importers()
else:
importers = map(get_importer, path)
yielded = {}
for i in importers:
for name, ispkg in iter_importer_modules(i, prefix):
if name not in yielded:
yielded[name] = 1
yield i, name, ispkg
#@simplegeneric
def iter_importer_modules(importer, prefix=''):
if not hasattr(importer,'iter_modules'):
return []
return importer.iter_modules(prefix)
iter_importer_modules = simplegeneric(iter_importer_modules)
class ImpImporter: class ImpImporter:
"""PEP 302 Importer that wraps Python's "classic" import algorithm """PEP 302 Importer that wraps Python's "classic" import algorithm
@ -49,13 +139,45 @@ class ImpImporter:
if self.path is None: if self.path is None:
path = None path = None
else: else:
path = [self.path] path = [os.path.realpath(self.path)]
try: try:
file, filename, etc = imp.find_module(subname, path) file, filename, etc = imp.find_module(subname, path)
except ImportError: except ImportError:
return None return None
return ImpLoader(fullname, file, filename, etc) return ImpLoader(fullname, file, filename, etc)
def iter_modules(self, prefix=''):
if self.path is None or not os.path.isdir(self.path):
return
yielded = {}
import inspect
filenames = os.listdir(self.path)
filenames.sort() # handle packages before same-named modules
for fn in filenames:
modname = inspect.getmodulename(fn)
if modname=='__init__' or modname in yielded:
continue
path = os.path.join(self.path, fn)
ispkg = False
if not modname and os.path.isdir(path) and '.' not in fn:
modname = fn
for fn in os.listdir(path):
subname = inspect.getmodulename(fn)
if subname=='__init__':
ispkg = True
break
else:
continue # not a package
if modname and '.' not in modname:
yielded[modname] = 1
yield prefix + modname, ispkg
class ImpLoader: class ImpLoader:
"""PEP 302 Loader that wraps Python's "classic" import algorithm """PEP 302 Loader that wraps Python's "classic" import algorithm
@ -97,7 +219,8 @@ class ImpLoader:
"module %s" % (self.fullname, fullname)) "module %s" % (self.fullname, fullname))
return fullname return fullname
def is_package(self): def is_package(self, fullname):
fullname = self._fix_name(fullname)
return self.etc[2]==imp.PKG_DIRECTORY return self.etc[2]==imp.PKG_DIRECTORY
def get_code(self, fullname=None): def get_code(self, fullname=None):
@ -136,6 +259,7 @@ class ImpLoader:
self.source = self._get_delegate().get_source() self.source = self._get_delegate().get_source()
return self.source return self.source
def _get_delegate(self): def _get_delegate(self):
return ImpImporter(self.filename).find_module('__init__') return ImpImporter(self.filename).find_module('__init__')
@ -149,6 +273,45 @@ class ImpLoader:
return None return None
try:
import zipimport
from zipimport import zipimporter
def iter_zipimport_modules(importer, prefix=''):
dirlist = zipimport._zip_directory_cache[importer.archive].keys()
dirlist.sort()
_prefix = importer.prefix
plen = len(_prefix)
yielded = {}
import inspect
for fn in dirlist:
if not fn.startswith(_prefix):
continue
fn = fn[plen:].split(os.sep)
if len(fn)==2 and fn[1].startswith('__init__.py'):
if fn[0] not in yielded:
yielded[fn[0]] = 1
yield fn[0], True
if len(fn)!=1:
continue
modname = inspect.getmodulename(fn[0])
if modname=='__init__':
continue
if modname and '.' not in modname and modname not in yielded:
yielded[modname] = 1
yield prefix + modname, False
iter_importer_modules.register(zipimporter, iter_zipimport_modules)
except ImportError:
pass
def get_importer(path_item): def get_importer(path_item):
"""Retrieve a PEP 302 importer for the given path item """Retrieve a PEP 302 importer for the given path item
@ -183,7 +346,7 @@ def get_importer(path_item):
return importer return importer
def iter_importers(fullname): def iter_importers(fullname=""):
"""Yield PEP 302 importers for the given module name """Yield PEP 302 importers for the given module name
If fullname contains a '.', the importers will be for the package If fullname contains a '.', the importers will be for the package
@ -224,7 +387,6 @@ def iter_importers(fullname):
if '.' not in fullname: if '.' not in fullname:
yield ImpImporter() yield ImpImporter()
def get_loader(module_or_name): def get_loader(module_or_name):
"""Get a PEP 302 "loader" object for module_or_name """Get a PEP 302 "loader" object for module_or_name
@ -250,7 +412,6 @@ def get_loader(module_or_name):
fullname = module_or_name fullname = module_or_name
return find_loader(fullname) return find_loader(fullname)
def find_loader(fullname): def find_loader(fullname):
"""Find a PEP 302 "loader" object for fullname """Find a PEP 302 "loader" object for fullname

View File

@ -52,10 +52,16 @@ Richard Chamberlain, for the first implementation of textdoc.
# the current directory is changed with os.chdir(), an incorrect # the current directory is changed with os.chdir(), an incorrect
# path will be displayed. # path will be displayed.
import sys, imp, os, re, types, inspect, __builtin__ import sys, imp, os, re, types, inspect, __builtin__, pkgutil
from repr import Repr from repr import Repr
from string import expandtabs, find, join, lower, split, strip, rfind, rstrip from string import expandtabs, find, join, lower, split, strip, rfind, rstrip
from collections import deque try:
from collections import deque
except ImportError:
# Python 2.3 compatibility
class deque(list):
def popleft(self):
return self.pop(0)
# --------------------------------------------------------- common routines # --------------------------------------------------------- common routines
@ -182,6 +188,23 @@ def ispackage(path):
return True return True
return False return False
def source_synopsis(file):
line = file.readline()
while line[:1] == '#' or not strip(line):
line = file.readline()
if not line: break
line = strip(line)
if line[:4] == 'r"""': line = line[1:]
if line[:3] == '"""':
line = line[3:]
if line[-1:] == '\\': line = line[:-1]
while not strip(line):
line = file.readline()
if not line: break
result = strip(split(line, '"""')[0])
else: result = None
return result
def synopsis(filename, cache={}): def synopsis(filename, cache={}):
"""Get the one-line summary out of a module file.""" """Get the one-line summary out of a module file."""
mtime = os.stat(filename).st_mtime mtime = os.stat(filename).st_mtime
@ -196,24 +219,11 @@ def synopsis(filename, cache={}):
if info and 'b' in info[2]: # binary modules have to be imported if info and 'b' in info[2]: # binary modules have to be imported
try: module = imp.load_module('__temp__', file, filename, info[1:]) try: module = imp.load_module('__temp__', file, filename, info[1:])
except: return None except: return None
result = split(module.__doc__ or '', '\n')[0] result = (module.__doc__ or '').splitlines()[0]
del sys.modules['__temp__'] del sys.modules['__temp__']
else: # text modules can be directly examined else: # text modules can be directly examined
line = file.readline() result = source_synopsis(file)
while line[:1] == '#' or not strip(line): file.close()
line = file.readline()
if not line: break
line = strip(line)
if line[:4] == 'r"""': line = line[1:]
if line[:3] == '"""':
line = line[3:]
if line[-1:] == '\\': line = line[:-1]
while not strip(line):
line = file.readline()
if not line: break
result = strip(split(line, '"""')[0])
else: result = None
file.close()
cache[filename] = (mtime, result) cache[filename] = (mtime, result)
return result return result
@ -643,16 +653,8 @@ class HTMLDoc(Doc):
if hasattr(object, '__path__'): if hasattr(object, '__path__'):
modpkgs = [] modpkgs = []
modnames = [] for importer, modname, ispkg in pkgutil.iter_modules(object.__path__):
for file in os.listdir(object.__path__[0]): modpkgs.append((modname, name, ispkg, 0))
path = os.path.join(object.__path__[0], file)
modname = inspect.getmodulename(file)
if modname != '__init__':
if modname and modname not in modnames:
modpkgs.append((modname, name, 0, 0))
modnames.append(modname)
elif ispackage(path):
modpkgs.append((file, name, 1, 0))
modpkgs.sort() modpkgs.sort()
contents = self.multicolumn(modpkgs, self.modpkglink) contents = self.multicolumn(modpkgs, self.modpkglink)
result = result + self.bigsection( result = result + self.bigsection(
@ -796,7 +798,10 @@ class HTMLDoc(Doc):
tag += ':<br>\n' tag += ':<br>\n'
# Sort attrs by name. # Sort attrs by name.
attrs.sort(key=lambda t: t[0]) try:
attrs.sort(key=lambda t: t[0])
except TypeError:
attrs.sort(lambda t1, t2: cmp(t1[0], t2[0])) # 2.3 compat
# Pump out the attrs, segregated by kind. # Pump out the attrs, segregated by kind.
attrs = spill('Methods %s' % tag, attrs, attrs = spill('Methods %s' % tag, attrs,
@ -914,25 +919,9 @@ class HTMLDoc(Doc):
"""Generate an HTML index for a directory of modules.""" """Generate an HTML index for a directory of modules."""
modpkgs = [] modpkgs = []
if shadowed is None: shadowed = {} if shadowed is None: shadowed = {}
seen = {} for importer, name, ispkg in pkgutil.iter_modules([dir]):
files = os.listdir(dir) modpkgs.append((name, '', ispkg, name in shadowed))
shadowed[name] = 1
def found(name, ispackage,
modpkgs=modpkgs, shadowed=shadowed, seen=seen):
if name not in seen:
modpkgs.append((name, '', ispackage, name in shadowed))
seen[name] = 1
shadowed[name] = 1
# Package spam/__init__.py takes precedence over module spam.py.
for file in files:
path = os.path.join(dir, file)
if ispackage(path): found(file, 1)
for file in files:
path = os.path.join(dir, file)
if os.path.isfile(path):
modname = inspect.getmodulename(file)
if modname: found(modname, 0)
modpkgs.sort() modpkgs.sort()
contents = self.multicolumn(modpkgs, self.modpkglink) contents = self.multicolumn(modpkgs, self.modpkglink)
@ -1059,14 +1048,12 @@ class TextDoc(Doc):
if hasattr(object, '__path__'): if hasattr(object, '__path__'):
modpkgs = [] modpkgs = []
for file in os.listdir(object.__path__[0]): for importer, modname, ispkg in pkgutil.iter_modules(object.__path__):
path = os.path.join(object.__path__[0], file) if ispkg:
modname = inspect.getmodulename(file) modpkgs.append(modname + ' (package)')
if modname != '__init__': else:
if modname and modname not in modpkgs: modpkgs.append(modname)
modpkgs.append(modname)
elif ispackage(path):
modpkgs.append(file + ' (package)')
modpkgs.sort() modpkgs.sort()
result = result + self.section( result = result + self.section(
'PACKAGE CONTENTS', join(modpkgs, '\n')) 'PACKAGE CONTENTS', join(modpkgs, '\n'))
@ -1490,20 +1477,9 @@ def writedoc(thing, forceload=0):
def writedocs(dir, pkgpath='', done=None): def writedocs(dir, pkgpath='', done=None):
"""Write out HTML documentation for all modules in a directory tree.""" """Write out HTML documentation for all modules in a directory tree."""
if done is None: done = {} if done is None: done = {}
for file in os.listdir(dir): for importer, modname, ispkg in pkgutil.walk_packages([dir], pkgpath):
path = os.path.join(dir, file) writedoc(modname)
if ispackage(path): return
writedocs(path, pkgpath + file + '.', done)
elif os.path.isfile(path):
modname = inspect.getmodulename(path)
if modname:
if modname == '__init__':
modname = pkgpath[:-1] # remove trailing period
else:
modname = pkgpath + modname
if modname not in done:
done[modname] = 1
writedoc(modname)
class Helper: class Helper:
keywords = { keywords = {
@ -1830,30 +1806,9 @@ class Scanner:
self.state.append((child, self.children(child))) self.state.append((child, self.children(child)))
return child return child
class ModuleScanner(Scanner):
class ModuleScanner:
"""An interruptible scanner that searches module synopses.""" """An interruptible scanner that searches module synopses."""
def __init__(self):
roots = map(lambda dir: (dir, ''), pathdirs())
Scanner.__init__(self, roots, self.submodules, self.isnewpackage)
self.inodes = map(lambda (dir, pkg): os.stat(dir).st_ino, roots)
def submodules(self, (dir, package)):
children = []
for file in os.listdir(dir):
path = os.path.join(dir, file)
if ispackage(path):
children.append((path, package + (package and '.') + file))
else:
children.append((path, package))
children.sort() # so that spam.py comes before spam.pyc or spam.pyo
return children
def isnewpackage(self, (dir, package)):
inode = os.path.exists(dir) and os.stat(dir).st_ino
if not (os.path.islink(dir) and inode in self.inodes):
self.inodes.append(inode) # detect circular symbolic links
return ispackage(dir)
return False
def run(self, callback, key=None, completer=None): def run(self, callback, key=None, completer=None):
if key: key = lower(key) if key: key = lower(key)
@ -1870,22 +1825,31 @@ class ModuleScanner(Scanner):
if find(lower(modname + ' - ' + desc), key) >= 0: if find(lower(modname + ' - ' + desc), key) >= 0:
callback(None, modname, desc) callback(None, modname, desc)
while not self.quit: for importer, modname, ispkg in pkgutil.walk_packages():
node = self.next() if self.quit:
if not node: break break
path, package = node if key is None:
modname = inspect.getmodulename(path) callback(None, modname, '')
if os.path.isfile(path) and modname: else:
modname = package + (package and '.') + modname loader = importer.find_module(modname)
if not modname in seen: if hasattr(loader,'get_source'):
seen[modname] = 1 # if we see spam.py, skip spam.pyc import StringIO
if key is None: desc = source_synopsis(
callback(path, modname, '') StringIO.StringIO(loader.get_source(modname))
) or ''
if hasattr(loader,'get_filename'):
path = loader.get_filename(modname)
else: else:
desc = synopsis(path) or '' path = None
if find(lower(modname + ' - ' + desc), key) >= 0: else:
callback(path, modname, desc) module = loader.load_module(modname)
if completer: completer() desc = (module.__doc__ or '').splitlines()[0]
path = getattr(module,'__file__',None)
if find(lower(modname + ' - ' + desc), key) >= 0:
callback(path, modname, desc)
if completer:
completer()
def apropos(key): def apropos(key):
"""Print all the one-line module summaries that contain a substring.""" """Print all the one-line module summaries that contain a substring."""
@ -1950,7 +1914,7 @@ def serve(port, callback=None, completer=None):
'Built-in Modules', '#ffffff', '#ee77aa', contents)] 'Built-in Modules', '#ffffff', '#ee77aa', contents)]
seen = {} seen = {}
for dir in pathdirs(): for dir in sys.path:
indices.append(html.index(dir, seen)) indices.append(html.index(dir, seen))
contents = heading + join(indices) + '''<p align=right> contents = heading + join(indices) + '''<p align=right>
<font color="#909090" face="helvetica, arial"><strong> <font color="#909090" face="helvetica, arial"><strong>