From 1ed61617a4a6632905ad6a0b440cd2cafb8b6414 Mon Sep 17 00:00:00 2001 From: Vinay Sajip Date: Fri, 14 Feb 2020 22:02:13 +0000 Subject: [PATCH] bpo-12915: Add pkgutil.resolve_name (GH-18310) --- Doc/library/pkgutil.rst | 41 +++++++++++ Lib/pkgutil.py | 69 +++++++++++++++++++ Lib/test/test_pkgutil.py | 55 +++++++++++++++ .../2020-02-02-10-08-25.bpo-12915.d6r50-.rst | 4 ++ 4 files changed, 169 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2020-02-02-10-08-25.bpo-12915.d6r50-.rst diff --git a/Doc/library/pkgutil.rst b/Doc/library/pkgutil.rst index 78a51573458..2066cbb9fc5 100644 --- a/Doc/library/pkgutil.rst +++ b/Doc/library/pkgutil.rst @@ -227,3 +227,44 @@ support. then ``None`` is returned. In particular, the :term:`loader` for :term:`namespace packages ` does not support :meth:`get_data `. + + +.. function:: resolve_name(name) + + Resolve a name to an object. + + This functionality is used in numerous places in the standard library (see + :issue:`12915`) - and equivalent functionality is also in widely used + third-party packages such as setuptools, Django and Pyramid. + + It is expected that *name* will be a string in one of the following + formats, where W is shorthand for a valid Python identifier and dot stands + for a literal period in these pseudo-regexes: + + * ``W(.W)*`` + * ``W(.W)*:(W(.W)*)?`` + + The first form is intended for backward compatibility only. It assumes that + some part of the dotted name is a package, and the rest is an object + somewhere within that package, possibly nested inside other objects. + Because the place where the package stops and the object hierarchy starts + can't be inferred by inspection, repeated attempts to import must be done + with this form. + + In the second form, the caller makes the division point clear through the + provision of a single colon: the dotted name to the left of the colon is a + package to be imported, and the dotted name to the right is the object + hierarchy within that package. Only one import is needed in this form. If + it ends with the colon, then a module object is returned. + + The function will return an object (which might be a module), or raise one + of the following exceptions: + + :exc:`ValueError` -- if *name* isn't in a recognised format. + + :exc:`ImportError` -- if an import failed when it shouldn't have. + + :exc:`AttributeError` -- If a failure occurred when traversing the object + hierarchy within the imported package to get to the desired object. + + .. versionadded:: 3.9 diff --git a/Lib/pkgutil.py b/Lib/pkgutil.py index 8474a773e7c..4bc3083ac19 100644 --- a/Lib/pkgutil.py +++ b/Lib/pkgutil.py @@ -7,6 +7,7 @@ import importlib.util import importlib.machinery import os import os.path +import re import sys from types import ModuleType import warnings @@ -635,3 +636,71 @@ def get_data(package, resource): parts.insert(0, os.path.dirname(mod.__file__)) resource_name = os.path.join(*parts) return loader.get_data(resource_name) + + +_DOTTED_WORDS = r'[a-z_]\w*(\.[a-z_]\w*)*' +_NAME_PATTERN = re.compile(f'^({_DOTTED_WORDS})(:({_DOTTED_WORDS})?)?$', re.I) +del _DOTTED_WORDS + +def resolve_name(name): + """ + Resolve a name to an object. + + It is expected that `name` will be a string in one of the following + formats, where W is shorthand for a valid Python identifier and dot stands + for a literal period in these pseudo-regexes: + + W(.W)* + W(.W)*:(W(.W)*)? + + The first form is intended for backward compatibility only. It assumes that + some part of the dotted name is a package, and the rest is an object + somewhere within that package, possibly nested inside other objects. + Because the place where the package stops and the object hierarchy starts + can't be inferred by inspection, repeated attempts to import must be done + with this form. + + In the second form, the caller makes the division point clear through the + provision of a single colon: the dotted name to the left of the colon is a + package to be imported, and the dotted name to the right is the object + hierarchy within that package. Only one import is needed in this form. If + it ends with the colon, then a module object is returned. + + The function will return an object (which might be a module), or raise one + of the following exceptions: + + ValueError - if `name` isn't in a recognised format + ImportError - if an import failed when it shouldn't have + AttributeError - if a failure occurred when traversing the object hierarchy + within the imported package to get to the desired object) + """ + m = _NAME_PATTERN.match(name) + if not m: + raise ValueError(f'invalid format: {name!r}') + groups = m.groups() + if groups[2]: + # there is a colon - a one-step import is all that's needed + mod = importlib.import_module(groups[0]) + parts = groups[3].split('.') if groups[3] else [] + else: + # no colon - have to iterate to find the package boundary + parts = name.split('.') + modname = parts.pop(0) + # first part *must* be a module/package. + mod = importlib.import_module(modname) + while parts: + p = parts[0] + s = f'{modname}.{p}' + try: + mod = importlib.import_module(s) + parts.pop(0) + modname = s + except ImportError: + break + # if we reach this point, mod is the module, already imported, and + # parts is the list of parts in the object hierarchy to be traversed, or + # an empty list if just the module is wanted. + result = mod + for p in parts: + result = getattr(result, p) + return result diff --git a/Lib/test/test_pkgutil.py b/Lib/test/test_pkgutil.py index 2887ce6cc05..906150b1049 100644 --- a/Lib/test/test_pkgutil.py +++ b/Lib/test/test_pkgutil.py @@ -186,6 +186,61 @@ class PkgutilTests(unittest.TestCase): with self.assertRaises((TypeError, ValueError)): list(pkgutil.walk_packages(bytes_input)) + def test_name_resolution(self): + import logging + import logging.handlers + + success_cases = ( + ('os', os), + ('os.path', os.path), + ('os.path:pathsep', os.path.pathsep), + ('logging', logging), + ('logging:', logging), + ('logging.handlers', logging.handlers), + ('logging.handlers:', logging.handlers), + ('logging.handlers:SysLogHandler', logging.handlers.SysLogHandler), + ('logging.handlers.SysLogHandler', logging.handlers.SysLogHandler), + ('logging.handlers:SysLogHandler.LOG_ALERT', + logging.handlers.SysLogHandler.LOG_ALERT), + ('logging.handlers.SysLogHandler.LOG_ALERT', + logging.handlers.SysLogHandler.LOG_ALERT), + ('builtins.int', int), + ('builtins:int', int), + ('builtins.int.from_bytes', int.from_bytes), + ('builtins:int.from_bytes', int.from_bytes), + ('builtins.ZeroDivisionError', ZeroDivisionError), + ('builtins:ZeroDivisionError', ZeroDivisionError), + ('os:path', os.path), + ) + + failure_cases = ( + (None, TypeError), + (1, TypeError), + (2.0, TypeError), + (True, TypeError), + ('', ValueError), + ('?abc', ValueError), + ('abc/foo', ValueError), + ('foo', ImportError), + ('os.foo', AttributeError), + ('os.foo:', ImportError), + ('os.pth:pathsep', ImportError), + ('logging.handlers:NoSuchHandler', AttributeError), + ('logging.handlers:SysLogHandler.NO_SUCH_VALUE', AttributeError), + ('logging.handlers.SysLogHandler.NO_SUCH_VALUE', AttributeError), + ('ZeroDivisionError', ImportError), + ) + + for s, expected in success_cases: + with self.subTest(s=s): + o = pkgutil.resolve_name(s) + self.assertEqual(o, expected) + + for s, exc in failure_cases: + with self.subTest(s=s): + with self.assertRaises(exc): + pkgutil.resolve_name(s) + class PkgutilPEP302Tests(unittest.TestCase): diff --git a/Misc/NEWS.d/next/Library/2020-02-02-10-08-25.bpo-12915.d6r50-.rst b/Misc/NEWS.d/next/Library/2020-02-02-10-08-25.bpo-12915.d6r50-.rst new file mode 100644 index 00000000000..90ee0bcac79 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-02-02-10-08-25.bpo-12915.d6r50-.rst @@ -0,0 +1,4 @@ +A new function ``resolve_name`` has been added to the ``pkgutil`` module. +This resolves a string of the form ``'a.b.c.d'`` or ``'a.b:c.d'`` to an +object. In the example, ``a.b`` is a package/module and ``c.d`` is an object +within that package/module reached via recursive attribute access.