Second phase of refactoring for runpy, pkgutil, pydoc, and setuptools

to share common PEP 302 support code, as described here:

http://mail.python.org/pipermail/python-dev/2006-April/063724.html

pydoc now supports PEP 302 importers, by way of utility functions in
pkgutil, such as 'walk_packages()'.  It will properly document
modules that are in zip files, and is backward compatible to Python
2.3 (setuptools installs for Python <2.5 will bundle it so pydoc
doesn't break when used with eggs.)

What has not changed is that pydoc command line options do not support
zip paths or other importer paths, and the webserver index does not
support sys.meta_path.  Those are probably okay as limitations.

Tasks remaining: write docs and Misc/NEWS for pkgutil/pydoc changes,
and update setuptools to use pkgutil wherever possible, then add it
to the stdlib.
This commit is contained in:
Phillip J. Eby 2006-04-18 00:59:55 +00:00
parent b507972cdd
commit ceb3087e1c
2 changed files with 239 additions and 114 deletions

View File

@ -11,6 +11,7 @@ from types import ModuleType
__all__ = [
'get_importer', 'iter_importers', 'get_loader', 'find_loader',
'walk_packages', 'iter_modules',
'ImpImporter', 'ImpLoader', 'read_code', 'extend_path',
]
@ -27,6 +28,95 @@ def read_code(stream):
return marshal.load(stream)
def simplegeneric(func):
"""Make a trivial single-dispatch generic function"""
registry = {}
def wrapper(*args,**kw):
ob = args[0]
try:
cls = ob.__class__
except AttributeError:
cls = type(ob)
try:
mro = cls.__mro__
except AttributeError:
try:
class cls(cls,object): pass
mro = cls.__mro__[1:]
except TypeError:
mro = object, # must be an ExtensionClass or some such :(
for t in mro:
if t in registry:
return registry[t](*args,**kw)
else:
return func(*args,**kw)
try:
wrapper.__name__ = func.__name__
except (TypeError,AttributeError):
pass # Python 2.3 doesn't allow functions to be renamed
def register(typ, func=None):
if func is None:
return lambda f: register(typ, f)
registry[typ] = func
return func
wrapper.__dict__ = func.__dict__
wrapper.__doc__ = func.__doc__
wrapper.register = register
return wrapper
def walk_packages(path=None, prefix='', onerror=None):
"""Yield submodule names+loaders recursively, for path or sys.path"""
def seen(p,m={}):
if p in m: return True
m[p] = True
for importer, name, ispkg in iter_modules(path, prefix):
yield importer, name, ispkg
if ispkg:
try:
__import__(name)
except ImportError:
if onerror is not None:
onerror()
else:
path = getattr(sys.modules[name], '__path__', None) or []
# don't traverse path items we've seen before
path = [p for p in path if not seen(p)]
for item in walk_packages(path, name+'.'):
yield item
def iter_modules(path=None, prefix=''):
"""Yield submodule names+loaders for path or sys.path"""
if path is None:
importers = iter_importers()
else:
importers = map(get_importer, path)
yielded = {}
for i in importers:
for name, ispkg in iter_importer_modules(i, prefix):
if name not in yielded:
yielded[name] = 1
yield i, name, ispkg
#@simplegeneric
def iter_importer_modules(importer, prefix=''):
if not hasattr(importer,'iter_modules'):
return []
return importer.iter_modules(prefix)
iter_importer_modules = simplegeneric(iter_importer_modules)
class ImpImporter:
"""PEP 302 Importer that wraps Python's "classic" import algorithm
@ -49,13 +139,45 @@ class ImpImporter:
if self.path is None:
path = None
else:
path = [self.path]
path = [os.path.realpath(self.path)]
try:
file, filename, etc = imp.find_module(subname, path)
except ImportError:
return None
return ImpLoader(fullname, file, filename, etc)
def iter_modules(self, prefix=''):
if self.path is None or not os.path.isdir(self.path):
return
yielded = {}
import inspect
filenames = os.listdir(self.path)
filenames.sort() # handle packages before same-named modules
for fn in filenames:
modname = inspect.getmodulename(fn)
if modname=='__init__' or modname in yielded:
continue
path = os.path.join(self.path, fn)
ispkg = False
if not modname and os.path.isdir(path) and '.' not in fn:
modname = fn
for fn in os.listdir(path):
subname = inspect.getmodulename(fn)
if subname=='__init__':
ispkg = True
break
else:
continue # not a package
if modname and '.' not in modname:
yielded[modname] = 1
yield prefix + modname, ispkg
class ImpLoader:
"""PEP 302 Loader that wraps Python's "classic" import algorithm
@ -97,7 +219,8 @@ class ImpLoader:
"module %s" % (self.fullname, fullname))
return fullname
def is_package(self):
def is_package(self, fullname):
fullname = self._fix_name(fullname)
return self.etc[2]==imp.PKG_DIRECTORY
def get_code(self, fullname=None):
@ -136,6 +259,7 @@ class ImpLoader:
self.source = self._get_delegate().get_source()
return self.source
def _get_delegate(self):
return ImpImporter(self.filename).find_module('__init__')
@ -149,6 +273,45 @@ class ImpLoader:
return None
try:
import zipimport
from zipimport import zipimporter
def iter_zipimport_modules(importer, prefix=''):
dirlist = zipimport._zip_directory_cache[importer.archive].keys()
dirlist.sort()
_prefix = importer.prefix
plen = len(_prefix)
yielded = {}
import inspect
for fn in dirlist:
if not fn.startswith(_prefix):
continue
fn = fn[plen:].split(os.sep)
if len(fn)==2 and fn[1].startswith('__init__.py'):
if fn[0] not in yielded:
yielded[fn[0]] = 1
yield fn[0], True
if len(fn)!=1:
continue
modname = inspect.getmodulename(fn[0])
if modname=='__init__':
continue
if modname and '.' not in modname and modname not in yielded:
yielded[modname] = 1
yield prefix + modname, False
iter_importer_modules.register(zipimporter, iter_zipimport_modules)
except ImportError:
pass
def get_importer(path_item):
"""Retrieve a PEP 302 importer for the given path item
@ -183,7 +346,7 @@ def get_importer(path_item):
return importer
def iter_importers(fullname):
def iter_importers(fullname=""):
"""Yield PEP 302 importers for the given module name
If fullname contains a '.', the importers will be for the package
@ -224,7 +387,6 @@ def iter_importers(fullname):
if '.' not in fullname:
yield ImpImporter()
def get_loader(module_or_name):
"""Get a PEP 302 "loader" object for module_or_name
@ -250,7 +412,6 @@ def get_loader(module_or_name):
fullname = module_or_name
return find_loader(fullname)
def find_loader(fullname):
"""Find a PEP 302 "loader" object for fullname

View File

@ -52,10 +52,16 @@ Richard Chamberlain, for the first implementation of textdoc.
# the current directory is changed with os.chdir(), an incorrect
# path will be displayed.
import sys, imp, os, re, types, inspect, __builtin__
import sys, imp, os, re, types, inspect, __builtin__, pkgutil
from repr import Repr
from string import expandtabs, find, join, lower, split, strip, rfind, rstrip
from collections import deque
try:
from collections import deque
except ImportError:
# Python 2.3 compatibility
class deque(list):
def popleft(self):
return self.pop(0)
# --------------------------------------------------------- common routines
@ -182,23 +188,7 @@ def ispackage(path):
return True
return False
def synopsis(filename, cache={}):
"""Get the one-line summary out of a module file."""
mtime = os.stat(filename).st_mtime
lastupdate, result = cache.get(filename, (0, None))
if lastupdate < mtime:
info = inspect.getmoduleinfo(filename)
try:
file = open(filename)
except IOError:
# module can't be opened, so skip it
return None
if info and 'b' in info[2]: # binary modules have to be imported
try: module = imp.load_module('__temp__', file, filename, info[1:])
except: return None
result = split(module.__doc__ or '', '\n')[0]
del sys.modules['__temp__']
else: # text modules can be directly examined
def source_synopsis(file):
line = file.readline()
while line[:1] == '#' or not strip(line):
line = file.readline()
@ -213,6 +203,26 @@ def synopsis(filename, cache={}):
if not line: break
result = strip(split(line, '"""')[0])
else: result = None
return result
def synopsis(filename, cache={}):
"""Get the one-line summary out of a module file."""
mtime = os.stat(filename).st_mtime
lastupdate, result = cache.get(filename, (0, None))
if lastupdate < mtime:
info = inspect.getmoduleinfo(filename)
try:
file = open(filename)
except IOError:
# module can't be opened, so skip it
return None
if info and 'b' in info[2]: # binary modules have to be imported
try: module = imp.load_module('__temp__', file, filename, info[1:])
except: return None
result = (module.__doc__ or '').splitlines()[0]
del sys.modules['__temp__']
else: # text modules can be directly examined
result = source_synopsis(file)
file.close()
cache[filename] = (mtime, result)
return result
@ -643,16 +653,8 @@ class HTMLDoc(Doc):
if hasattr(object, '__path__'):
modpkgs = []
modnames = []
for file in os.listdir(object.__path__[0]):
path = os.path.join(object.__path__[0], file)
modname = inspect.getmodulename(file)
if modname != '__init__':
if modname and modname not in modnames:
modpkgs.append((modname, name, 0, 0))
modnames.append(modname)
elif ispackage(path):
modpkgs.append((file, name, 1, 0))
for importer, modname, ispkg in pkgutil.iter_modules(object.__path__):
modpkgs.append((modname, name, ispkg, 0))
modpkgs.sort()
contents = self.multicolumn(modpkgs, self.modpkglink)
result = result + self.bigsection(
@ -796,7 +798,10 @@ class HTMLDoc(Doc):
tag += ':<br>\n'
# Sort attrs by name.
try:
attrs.sort(key=lambda t: t[0])
except TypeError:
attrs.sort(lambda t1, t2: cmp(t1[0], t2[0])) # 2.3 compat
# Pump out the attrs, segregated by kind.
attrs = spill('Methods %s' % tag, attrs,
@ -914,26 +919,10 @@ class HTMLDoc(Doc):
"""Generate an HTML index for a directory of modules."""
modpkgs = []
if shadowed is None: shadowed = {}
seen = {}
files = os.listdir(dir)
def found(name, ispackage,
modpkgs=modpkgs, shadowed=shadowed, seen=seen):
if name not in seen:
modpkgs.append((name, '', ispackage, name in shadowed))
seen[name] = 1
for importer, name, ispkg in pkgutil.iter_modules([dir]):
modpkgs.append((name, '', ispkg, name in shadowed))
shadowed[name] = 1
# Package spam/__init__.py takes precedence over module spam.py.
for file in files:
path = os.path.join(dir, file)
if ispackage(path): found(file, 1)
for file in files:
path = os.path.join(dir, file)
if os.path.isfile(path):
modname = inspect.getmodulename(file)
if modname: found(modname, 0)
modpkgs.sort()
contents = self.multicolumn(modpkgs, self.modpkglink)
return self.bigsection(dir, '#ffffff', '#ee77aa', contents)
@ -1059,14 +1048,12 @@ class TextDoc(Doc):
if hasattr(object, '__path__'):
modpkgs = []
for file in os.listdir(object.__path__[0]):
path = os.path.join(object.__path__[0], file)
modname = inspect.getmodulename(file)
if modname != '__init__':
if modname and modname not in modpkgs:
for importer, modname, ispkg in pkgutil.iter_modules(object.__path__):
if ispkg:
modpkgs.append(modname + ' (package)')
else:
modpkgs.append(modname)
elif ispackage(path):
modpkgs.append(file + ' (package)')
modpkgs.sort()
result = result + self.section(
'PACKAGE CONTENTS', join(modpkgs, '\n'))
@ -1490,20 +1477,9 @@ def writedoc(thing, forceload=0):
def writedocs(dir, pkgpath='', done=None):
"""Write out HTML documentation for all modules in a directory tree."""
if done is None: done = {}
for file in os.listdir(dir):
path = os.path.join(dir, file)
if ispackage(path):
writedocs(path, pkgpath + file + '.', done)
elif os.path.isfile(path):
modname = inspect.getmodulename(path)
if modname:
if modname == '__init__':
modname = pkgpath[:-1] # remove trailing period
else:
modname = pkgpath + modname
if modname not in done:
done[modname] = 1
for importer, modname, ispkg in pkgutil.walk_packages([dir], pkgpath):
writedoc(modname)
return
class Helper:
keywords = {
@ -1830,30 +1806,9 @@ class Scanner:
self.state.append((child, self.children(child)))
return child
class ModuleScanner(Scanner):
class ModuleScanner:
"""An interruptible scanner that searches module synopses."""
def __init__(self):
roots = map(lambda dir: (dir, ''), pathdirs())
Scanner.__init__(self, roots, self.submodules, self.isnewpackage)
self.inodes = map(lambda (dir, pkg): os.stat(dir).st_ino, roots)
def submodules(self, (dir, package)):
children = []
for file in os.listdir(dir):
path = os.path.join(dir, file)
if ispackage(path):
children.append((path, package + (package and '.') + file))
else:
children.append((path, package))
children.sort() # so that spam.py comes before spam.pyc or spam.pyo
return children
def isnewpackage(self, (dir, package)):
inode = os.path.exists(dir) and os.stat(dir).st_ino
if not (os.path.islink(dir) and inode in self.inodes):
self.inodes.append(inode) # detect circular symbolic links
return ispackage(dir)
return False
def run(self, callback, key=None, completer=None):
if key: key = lower(key)
@ -1870,22 +1825,31 @@ class ModuleScanner(Scanner):
if find(lower(modname + ' - ' + desc), key) >= 0:
callback(None, modname, desc)
while not self.quit:
node = self.next()
if not node: break
path, package = node
modname = inspect.getmodulename(path)
if os.path.isfile(path) and modname:
modname = package + (package and '.') + modname
if not modname in seen:
seen[modname] = 1 # if we see spam.py, skip spam.pyc
for importer, modname, ispkg in pkgutil.walk_packages():
if self.quit:
break
if key is None:
callback(path, modname, '')
callback(None, modname, '')
else:
desc = synopsis(path) or ''
loader = importer.find_module(modname)
if hasattr(loader,'get_source'):
import StringIO
desc = source_synopsis(
StringIO.StringIO(loader.get_source(modname))
) or ''
if hasattr(loader,'get_filename'):
path = loader.get_filename(modname)
else:
path = None
else:
module = loader.load_module(modname)
desc = (module.__doc__ or '').splitlines()[0]
path = getattr(module,'__file__',None)
if find(lower(modname + ' - ' + desc), key) >= 0:
callback(path, modname, desc)
if completer: completer()
if completer:
completer()
def apropos(key):
"""Print all the one-line module summaries that contain a substring."""
@ -1950,7 +1914,7 @@ def serve(port, callback=None, completer=None):
'Built-in Modules', '#ffffff', '#ee77aa', contents)]
seen = {}
for dir in pathdirs():
for dir in sys.path:
indices.append(html.index(dir, seen))
contents = heading + join(indices) + '''<p align=right>
<font color="#909090" face="helvetica, arial"><strong>