diff --git a/Lib/importlib/_collections.py b/Lib/importlib/_collections.py new file mode 100644 index 00000000000..cf0954e1a30 --- /dev/null +++ b/Lib/importlib/_collections.py @@ -0,0 +1,30 @@ +import collections + + +# from jaraco.collections 3.3 +class FreezableDefaultDict(collections.defaultdict): + """ + Often it is desirable to prevent the mutation of + a default dict after its initial construction, such + as to prevent mutation during iteration. + + >>> dd = FreezableDefaultDict(list) + >>> dd[0].append('1') + >>> dd.freeze() + >>> dd[1] + [] + >>> len(dd) + 1 + """ + + def __missing__(self, key): + return getattr(self, '_frozen', super().__missing__)(key) + + def freeze(self): + self._frozen = lambda key: self.default_factory() + + +class Pair(collections.namedtuple('Pair', 'name value')): + @classmethod + def parse(cls, text): + return cls(*map(str.strip, text.split("=", 1))) diff --git a/Lib/importlib/_functools.py b/Lib/importlib/_functools.py new file mode 100644 index 00000000000..73f50d00bc0 --- /dev/null +++ b/Lib/importlib/_functools.py @@ -0,0 +1,85 @@ +import types +import functools + + +# from jaraco.functools 3.3 +def method_cache(method, cache_wrapper=None): + """ + Wrap lru_cache to support storing the cache data in the object instances. + + Abstracts the common paradigm where the method explicitly saves an + underscore-prefixed protected property on first call and returns that + subsequently. + + >>> class MyClass: + ... calls = 0 + ... + ... @method_cache + ... def method(self, value): + ... self.calls += 1 + ... return value + + >>> a = MyClass() + >>> a.method(3) + 3 + >>> for x in range(75): + ... res = a.method(x) + >>> a.calls + 75 + + Note that the apparent behavior will be exactly like that of lru_cache + except that the cache is stored on each instance, so values in one + instance will not flush values from another, and when an instance is + deleted, so are the cached values for that instance. + + >>> b = MyClass() + >>> for x in range(35): + ... res = b.method(x) + >>> b.calls + 35 + >>> a.method(0) + 0 + >>> a.calls + 75 + + Note that if method had been decorated with ``functools.lru_cache()``, + a.calls would have been 76 (due to the cached value of 0 having been + flushed by the 'b' instance). + + Clear the cache with ``.cache_clear()`` + + >>> a.method.cache_clear() + + Same for a method that hasn't yet been called. + + >>> c = MyClass() + >>> c.method.cache_clear() + + Another cache wrapper may be supplied: + + >>> cache = functools.lru_cache(maxsize=2) + >>> MyClass.method2 = method_cache(lambda self: 3, cache_wrapper=cache) + >>> a = MyClass() + >>> a.method2() + 3 + + Caution - do not subsequently wrap the method with another decorator, such + as ``@property``, which changes the semantics of the function. + + See also + http://code.activestate.com/recipes/577452-a-memoize-decorator-for-instance-methods/ + for another implementation and additional justification. + """ + cache_wrapper = cache_wrapper or functools.lru_cache() + + def wrapper(self, *args, **kwargs): + # it's the first call, replace the method with a cached, bound method + bound_method = types.MethodType(method, self) + cached_method = cache_wrapper(bound_method) + setattr(self, method.__name__, cached_method) + return cached_method(*args, **kwargs) + + # Support cache clear even before cache has been created. + wrapper.cache_clear = lambda: None + + return wrapper diff --git a/Lib/importlib/metadata.py b/Lib/importlib/metadata.py index 53c1a145f5c..7a427eb3b28 100644 --- a/Lib/importlib/metadata.py +++ b/Lib/importlib/metadata.py @@ -7,15 +7,17 @@ import email import pathlib import zipfile import operator +import textwrap import warnings import functools import itertools import posixpath import collections +from ._collections import FreezableDefaultDict, Pair +from ._functools import method_cache from ._itertools import unique_everseen -from configparser import ConfigParser from contextlib import suppress from importlib import import_module from importlib.abc import MetaPathFinder @@ -51,6 +53,71 @@ class PackageNotFoundError(ModuleNotFoundError): return name +class Sectioned: + """ + A simple entry point config parser for performance + + >>> for item in Sectioned.read(Sectioned._sample): + ... print(item) + Pair(name='sec1', value='# comments ignored') + Pair(name='sec1', value='a = 1') + Pair(name='sec1', value='b = 2') + Pair(name='sec2', value='a = 2') + + >>> res = Sectioned.section_pairs(Sectioned._sample) + >>> item = next(res) + >>> item.name + 'sec1' + >>> item.value + Pair(name='a', value='1') + >>> item = next(res) + >>> item.value + Pair(name='b', value='2') + >>> item = next(res) + >>> item.name + 'sec2' + >>> item.value + Pair(name='a', value='2') + >>> list(res) + [] + """ + + _sample = textwrap.dedent( + """ + [sec1] + # comments ignored + a = 1 + b = 2 + + [sec2] + a = 2 + """ + ).lstrip() + + @classmethod + def section_pairs(cls, text): + return ( + section._replace(value=Pair.parse(section.value)) + for section in cls.read(text, filter_=cls.valid) + if section.name is not None + ) + + @staticmethod + def read(text, filter_=None): + lines = filter(filter_, map(str.strip, text.splitlines())) + name = None + for value in lines: + section_match = value.startswith('[') and value.endswith(']') + if section_match: + name = value.strip('[]') + continue + yield Pair(name, value) + + @staticmethod + def valid(line): + return line and not line.startswith('#') + + class EntryPoint( collections.namedtuple('EntryPointBase', 'name value group')): """An entry point as defined by Python packaging conventions. @@ -108,22 +175,6 @@ class EntryPoint( match = self.pattern.match(self.value) return list(re.finditer(r'\w+', match.group('extras') or '')) - @classmethod - def _from_config(cls, config): - return ( - cls(name, value, group) - for group in config.sections() - for name, value in config.items(group) - ) - - @classmethod - def _from_text(cls, text): - config = ConfigParser(delimiters='=') - # case sensitive: https://stackoverflow.com/q/1611799/812183 - config.optionxform = str - config.read_string(text) - return cls._from_config(config) - def _for(self, dist): self.dist = dist return self @@ -193,7 +244,18 @@ class EntryPoints(tuple): @classmethod def _from_text_for(cls, text, dist): - return cls(ep._for(dist) for ep in EntryPoint._from_text(text)) + return cls(ep._for(dist) for ep in cls._from_text(text)) + + @classmethod + def _from_text(cls, text): + return itertools.starmap(EntryPoint, cls._parse_groups(text or '')) + + @staticmethod + def _parse_groups(text): + return ( + (item.value.name, item.value.value, item.name) + for item in Sectioned.section_pairs(text) + ) def flake8_bypass(func): @@ -259,7 +321,7 @@ class Deprecated: return super().values() -class SelectableGroups(dict): +class SelectableGroups(Deprecated, dict): """ A backward- and forward-compatible result from entry_points that fully implements the dict interface. @@ -277,7 +339,8 @@ class SelectableGroups(dict): """ Reconstruct a list of all entrypoints from the groups. """ - return EntryPoints(itertools.chain.from_iterable(self.values())) + groups = super(Deprecated, self).values() + return EntryPoints(itertools.chain.from_iterable(groups)) @property def groups(self): @@ -507,24 +570,7 @@ class Distribution: @classmethod def _deps_from_requires_text(cls, source): - section_pairs = cls._read_sections(source.splitlines()) - sections = { - section: list(map(operator.itemgetter('line'), results)) - for section, results in itertools.groupby( - section_pairs, operator.itemgetter('section') - ) - } - return cls._convert_egg_info_reqs_to_simple_reqs(sections) - - @staticmethod - def _read_sections(lines): - section = None - for line in filter(None, lines): - section_match = re.match(r'\[(.*)\]$', line) - if section_match: - section = section_match.group(1) - continue - yield locals() + return cls._convert_egg_info_reqs_to_simple_reqs(Sectioned.read(source)) @staticmethod def _convert_egg_info_reqs_to_simple_reqs(sections): @@ -549,9 +595,8 @@ class Distribution: conditions = list(filter(None, [markers, make_condition(extra)])) return '; ' + ' and '.join(conditions) if conditions else '' - for section, deps in sections.items(): - for dep in deps: - yield dep + parse_condition(section) + for section in sections: + yield section.value + parse_condition(section.name) class DistributionFinder(MetaPathFinder): @@ -607,6 +652,10 @@ class FastPath: children. """ + @functools.lru_cache() # type: ignore + def __new__(cls, root): + return super().__new__(cls) + def __init__(self, root): self.root = root self.base = os.path.basename(self.root).lower() @@ -629,11 +678,53 @@ class FastPath: return dict.fromkeys(child.split(posixpath.sep, 1)[0] for child in names) def search(self, name): - return ( - self.joinpath(child) - for child in self.children() - if name.matches(child, self.base) + return self.lookup(self.mtime).search(name) + + @property + def mtime(self): + with suppress(OSError): + return os.stat(self.root).st_mtime + self.lookup.cache_clear() + + @method_cache + def lookup(self, mtime): + return Lookup(self) + + +class Lookup: + def __init__(self, path: FastPath): + base = os.path.basename(path.root).lower() + base_is_egg = base.endswith(".egg") + self.infos = FreezableDefaultDict(list) + self.eggs = FreezableDefaultDict(list) + + for child in path.children(): + low = child.lower() + if low.endswith((".dist-info", ".egg-info")): + # rpartition is faster than splitext and suitable for this purpose. + name = low.rpartition(".")[0].partition("-")[0] + normalized = Prepared.normalize(name) + self.infos[normalized].append(path.joinpath(child)) + elif base_is_egg and low == "egg-info": + name = base.rpartition(".")[0].partition("-")[0] + legacy_normalized = Prepared.legacy_normalize(name) + self.eggs[legacy_normalized].append(path.joinpath(child)) + + self.infos.freeze() + self.eggs.freeze() + + def search(self, prepared): + infos = ( + self.infos[prepared.normalized] + if prepared + else itertools.chain.from_iterable(self.infos.values()) ) + eggs = ( + self.eggs[prepared.legacy_normalized] + if prepared + else itertools.chain.from_iterable(self.eggs.values()) + ) + return itertools.chain(infos, eggs) class Prepared: @@ -642,22 +733,14 @@ class Prepared: """ normalized = None - suffixes = 'dist-info', 'egg-info' - exact_matches = [''][:0] - egg_prefix = '' - versionless_egg_name = '' + legacy_normalized = None def __init__(self, name): self.name = name if name is None: return self.normalized = self.normalize(name) - self.exact_matches = [ - self.normalized + '.' + suffix for suffix in self.suffixes - ] - legacy_normalized = self.legacy_normalize(self.name) - self.egg_prefix = legacy_normalized + '-' - self.versionless_egg_name = legacy_normalized + '.egg' + self.legacy_normalized = self.legacy_normalize(name) @staticmethod def normalize(name): @@ -674,26 +757,8 @@ class Prepared: """ return name.lower().replace('-', '_') - def matches(self, cand, base): - low = cand.lower() - # rpartition is faster than splitext and suitable for this purpose. - pre, _, ext = low.rpartition('.') - name, _, rest = pre.partition('-') - return ( - low in self.exact_matches - or ext in self.suffixes - and (not self.normalized or name.replace('.', '_') == self.normalized) - # legacy case: - or self.is_egg(base) - and low == 'egg-info' - ) - - def is_egg(self, base): - return ( - base == self.versionless_egg_name - or base.startswith(self.egg_prefix) - and base.endswith('.egg') - ) + def __bool__(self): + return bool(self.name) class MetadataPathFinder(DistributionFinder): @@ -718,6 +783,9 @@ class MetadataPathFinder(DistributionFinder): path.search(prepared) for path in map(FastPath, paths) ) + def invalidate_caches(cls): + FastPath.__new__.cache_clear() + class PathDistribution(Distribution): def __init__(self, path): diff --git a/Lib/test/test_importlib/fixtures.py b/Lib/test/test_importlib/fixtures.py index b50afda0f8f..1ae70c70f10 100644 --- a/Lib/test/test_importlib/fixtures.py +++ b/Lib/test/test_importlib/fixtures.py @@ -86,6 +86,10 @@ class DistInfoPkg(OnSysPath, SiteDir): Version: 1.0.0 Requires-Dist: wheel >= 1.0 Requires-Dist: pytest; extra == 'test' + Keywords: sample package + + Once upon a time + There was a distinfo pkg """, "RECORD": "mod.py,sha256=abc,20\n", "entry_points.txt": """ @@ -157,6 +161,9 @@ class EggInfoPkg(OnSysPath, SiteDir): Version: 1.0.0 Classifier: Intended Audience :: Developers Classifier: Topic :: Software Development :: Libraries + Keywords: sample package + Description: Once upon a time + There was an egginfo package """, "SOURCES.txt": """ mod.py diff --git a/Lib/test/test_importlib/test_metadata_api.py b/Lib/test/test_importlib/test_metadata_api.py index b54c3bd098d..657c16603f6 100644 --- a/Lib/test/test_importlib/test_metadata_api.py +++ b/Lib/test/test_importlib/test_metadata_api.py @@ -2,6 +2,7 @@ import re import textwrap import unittest import warnings +import importlib from . import fixtures from importlib.metadata import ( @@ -260,3 +261,9 @@ class OffSysPathTests(fixtures.DistInfoPkgOffPath, unittest.TestCase): dist_info_path = self.site_dir / 'distinfo_pkg-1.0.0.dist-info' dist = Distribution.at(str(dist_info_path)) assert dist.version == '1.0.0' + + +class InvalidateCache(unittest.TestCase): + def test_invalidate_cache(self): + # No externally observable behavior, but ensures test coverage... + importlib.invalidate_caches() diff --git a/Misc/NEWS.d/next/Library/2021-04-08-20-04-46.bpo-43780.hUOgCh.rst b/Misc/NEWS.d/next/Library/2021-04-08-20-04-46.bpo-43780.hUOgCh.rst new file mode 100644 index 00000000000..3adbe50512b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2021-04-08-20-04-46.bpo-43780.hUOgCh.rst @@ -0,0 +1,3 @@ +In ``importlib.metadata``, incorporate changes from importlib_metadata 3.10: +Add mtime-based caching during distribution discovery. Flagged use of dict +result from ``entry_points()`` as deprecated.