bpo-12915: Add pkgutil.resolve_name (GH-18310)

This commit is contained in:
Vinay Sajip 2020-02-14 22:02:13 +00:00 committed by GitHub
parent 9aeb0ef930
commit 1ed61617a4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 169 additions and 0 deletions

View File

@ -227,3 +227,44 @@ support.
then ``None`` is returned. In particular, the :term:`loader` for
:term:`namespace packages <namespace package>` does not support
:meth:`get_data <importlib.abc.ResourceLoader.get_data>`.
.. function:: resolve_name(name)
Resolve a name to an object.
This functionality is used in numerous places in the standard library (see
:issue:`12915`) - and equivalent functionality is also in widely used
third-party packages such as setuptools, Django and Pyramid.
It is expected that *name* will be a string in one of the following
formats, where W is shorthand for a valid Python identifier and dot stands
for a literal period in these pseudo-regexes:
* ``W(.W)*``
* ``W(.W)*:(W(.W)*)?``
The first form is intended for backward compatibility only. It assumes that
some part of the dotted name is a package, and the rest is an object
somewhere within that package, possibly nested inside other objects.
Because the place where the package stops and the object hierarchy starts
can't be inferred by inspection, repeated attempts to import must be done
with this form.
In the second form, the caller makes the division point clear through the
provision of a single colon: the dotted name to the left of the colon is a
package to be imported, and the dotted name to the right is the object
hierarchy within that package. Only one import is needed in this form. If
it ends with the colon, then a module object is returned.
The function will return an object (which might be a module), or raise one
of the following exceptions:
:exc:`ValueError` -- if *name* isn't in a recognised format.
:exc:`ImportError` -- if an import failed when it shouldn't have.
:exc:`AttributeError` -- If a failure occurred when traversing the object
hierarchy within the imported package to get to the desired object.
.. versionadded:: 3.9

View File

@ -7,6 +7,7 @@ import importlib.util
import importlib.machinery
import os
import os.path
import re
import sys
from types import ModuleType
import warnings
@ -635,3 +636,71 @@ def get_data(package, resource):
parts.insert(0, os.path.dirname(mod.__file__))
resource_name = os.path.join(*parts)
return loader.get_data(resource_name)
_DOTTED_WORDS = r'[a-z_]\w*(\.[a-z_]\w*)*'
_NAME_PATTERN = re.compile(f'^({_DOTTED_WORDS})(:({_DOTTED_WORDS})?)?$', re.I)
del _DOTTED_WORDS
def resolve_name(name):
"""
Resolve a name to an object.
It is expected that `name` will be a string in one of the following
formats, where W is shorthand for a valid Python identifier and dot stands
for a literal period in these pseudo-regexes:
W(.W)*
W(.W)*:(W(.W)*)?
The first form is intended for backward compatibility only. It assumes that
some part of the dotted name is a package, and the rest is an object
somewhere within that package, possibly nested inside other objects.
Because the place where the package stops and the object hierarchy starts
can't be inferred by inspection, repeated attempts to import must be done
with this form.
In the second form, the caller makes the division point clear through the
provision of a single colon: the dotted name to the left of the colon is a
package to be imported, and the dotted name to the right is the object
hierarchy within that package. Only one import is needed in this form. If
it ends with the colon, then a module object is returned.
The function will return an object (which might be a module), or raise one
of the following exceptions:
ValueError - if `name` isn't in a recognised format
ImportError - if an import failed when it shouldn't have
AttributeError - if a failure occurred when traversing the object hierarchy
within the imported package to get to the desired object)
"""
m = _NAME_PATTERN.match(name)
if not m:
raise ValueError(f'invalid format: {name!r}')
groups = m.groups()
if groups[2]:
# there is a colon - a one-step import is all that's needed
mod = importlib.import_module(groups[0])
parts = groups[3].split('.') if groups[3] else []
else:
# no colon - have to iterate to find the package boundary
parts = name.split('.')
modname = parts.pop(0)
# first part *must* be a module/package.
mod = importlib.import_module(modname)
while parts:
p = parts[0]
s = f'{modname}.{p}'
try:
mod = importlib.import_module(s)
parts.pop(0)
modname = s
except ImportError:
break
# if we reach this point, mod is the module, already imported, and
# parts is the list of parts in the object hierarchy to be traversed, or
# an empty list if just the module is wanted.
result = mod
for p in parts:
result = getattr(result, p)
return result

View File

@ -186,6 +186,61 @@ class PkgutilTests(unittest.TestCase):
with self.assertRaises((TypeError, ValueError)):
list(pkgutil.walk_packages(bytes_input))
def test_name_resolution(self):
import logging
import logging.handlers
success_cases = (
('os', os),
('os.path', os.path),
('os.path:pathsep', os.path.pathsep),
('logging', logging),
('logging:', logging),
('logging.handlers', logging.handlers),
('logging.handlers:', logging.handlers),
('logging.handlers:SysLogHandler', logging.handlers.SysLogHandler),
('logging.handlers.SysLogHandler', logging.handlers.SysLogHandler),
('logging.handlers:SysLogHandler.LOG_ALERT',
logging.handlers.SysLogHandler.LOG_ALERT),
('logging.handlers.SysLogHandler.LOG_ALERT',
logging.handlers.SysLogHandler.LOG_ALERT),
('builtins.int', int),
('builtins:int', int),
('builtins.int.from_bytes', int.from_bytes),
('builtins:int.from_bytes', int.from_bytes),
('builtins.ZeroDivisionError', ZeroDivisionError),
('builtins:ZeroDivisionError', ZeroDivisionError),
('os:path', os.path),
)
failure_cases = (
(None, TypeError),
(1, TypeError),
(2.0, TypeError),
(True, TypeError),
('', ValueError),
('?abc', ValueError),
('abc/foo', ValueError),
('foo', ImportError),
('os.foo', AttributeError),
('os.foo:', ImportError),
('os.pth:pathsep', ImportError),
('logging.handlers:NoSuchHandler', AttributeError),
('logging.handlers:SysLogHandler.NO_SUCH_VALUE', AttributeError),
('logging.handlers.SysLogHandler.NO_SUCH_VALUE', AttributeError),
('ZeroDivisionError', ImportError),
)
for s, expected in success_cases:
with self.subTest(s=s):
o = pkgutil.resolve_name(s)
self.assertEqual(o, expected)
for s, exc in failure_cases:
with self.subTest(s=s):
with self.assertRaises(exc):
pkgutil.resolve_name(s)
class PkgutilPEP302Tests(unittest.TestCase):

View File

@ -0,0 +1,4 @@
A new function ``resolve_name`` has been added to the ``pkgutil`` module.
This resolves a string of the form ``'a.b.c.d'`` or ``'a.b:c.d'`` to an
object. In the example, ``a.b`` is a package/module and ``c.d`` is an object
within that package/module reached via recursive attribute access.