bpo-43926: Cleaner metadata with PEP 566 JSON support. (GH-25565)

* bpo-43926: Cleaner metadata with PEP 566 JSON support.

* Add blurb

* Add versionchanged and versionadded declarations for changes to metadata.

* Use descriptor for PEP 566
This commit is contained in:
Jason R. Coombs 2021-05-02 17:03:40 -04:00 committed by GitHub
parent 0ad1e0384c
commit 37e0c7850d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 254 additions and 26 deletions

View File

@ -170,6 +170,19 @@ the values are returned unparsed from the distribution metadata::
>>> wheel_metadata['Requires-Python'] # doctest: +SKIP
'>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*'
``PackageMetadata`` also presents a ``json`` attribute that returns
all the metadata in a JSON-compatible form per :PEP:`566`::
>>> wheel_metadata.json['requires_python']
'>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*'
.. versionchanged:: 3.10
The ``Description`` is now included in the metadata when presented
through the payload. Line continuation characters have been removed.
.. versionadded:: 3.10
The ``json`` attribute was added.
.. _version:

View File

@ -14,6 +14,7 @@ import itertools
import posixpath
import collections
from . import _adapters, _meta
from ._collections import FreezableDefaultDict, Pair
from ._functools import method_cache
from ._itertools import unique_everseen
@ -22,7 +23,7 @@ from contextlib import suppress
from importlib import import_module
from importlib.abc import MetaPathFinder
from itertools import starmap
from typing import Any, List, Mapping, Optional, Protocol, TypeVar, Union
from typing import List, Mapping, Optional, Union
__all__ = [
@ -385,25 +386,6 @@ class FileHash:
return '<FileHash mode: {} value: {}>'.format(self.mode, self.value)
_T = TypeVar("_T")
class PackageMetadata(Protocol):
def __len__(self) -> int:
... # pragma: no cover
def __contains__(self, item: str) -> bool:
... # pragma: no cover
def __getitem__(self, key: str) -> str:
... # pragma: no cover
def get_all(self, name: str, failobj: _T = ...) -> Union[List[Any], _T]:
"""
Return all values associated with a possibly multi-valued key.
"""
class Distribution:
"""A Python distribution package."""
@ -488,7 +470,7 @@ class Distribution:
return PathDistribution(zipfile.Path(meta.build_as_zip(builder)))
@property
def metadata(self) -> PackageMetadata:
def metadata(self) -> _meta.PackageMetadata:
"""Return the parsed metadata for this Distribution.
The returned object will have keys that name the various bits of
@ -502,7 +484,7 @@ class Distribution:
# (which points to the egg-info file) attribute unchanged.
or self.read_text('')
)
return email.message_from_string(text)
return _adapters.Message(email.message_from_string(text))
@property
def name(self):
@ -829,7 +811,7 @@ def distributions(**kwargs):
return Distribution.discover(**kwargs)
def metadata(distribution_name) -> PackageMetadata:
def metadata(distribution_name) -> _meta.PackageMetadata:
"""Get the metadata for the named package.
:param distribution_name: The name of the distribution package to query.

View File

@ -0,0 +1,67 @@
import re
import textwrap
import email.message
from ._text import FoldedCase
class Message(email.message.Message):
multiple_use_keys = set(
map(
FoldedCase,
[
'Classifier',
'Obsoletes-Dist',
'Platform',
'Project-URL',
'Provides-Dist',
'Provides-Extra',
'Requires-Dist',
'Requires-External',
'Supported-Platform',
],
)
)
"""
Keys that may be indicated multiple times per PEP 566.
"""
def __new__(cls, orig: email.message.Message):
res = super().__new__(cls)
vars(res).update(vars(orig))
return res
def __init__(self, *args, **kwargs):
self._headers = self._repair_headers()
# suppress spurious error from mypy
def __iter__(self):
return super().__iter__()
def _repair_headers(self):
def redent(value):
"Correct for RFC822 indentation"
if not value or '\n' not in value:
return value
return textwrap.dedent(' ' * 8 + value)
headers = [(key, redent(value)) for key, value in vars(self)['_headers']]
if self._payload:
headers.append(('Description', self.get_payload()))
return headers
@property
def json(self):
"""
Convert PackageMetadata to a JSON-compatible format
per PEP 0566.
"""
def transform(key):
value = self.get_all(key) if key in self.multiple_use_keys else self[key]
if key == 'Keywords':
value = re.split(r'\s+', value)
tk = key.lower().replace('-', '_')
return tk, value
return dict(map(transform, map(FoldedCase, self)))

View File

@ -0,0 +1,29 @@
from typing import Any, Dict, Iterator, List, Protocol, TypeVar, Union
_T = TypeVar("_T")
class PackageMetadata(Protocol):
def __len__(self) -> int:
... # pragma: no cover
def __contains__(self, item: str) -> bool:
... # pragma: no cover
def __getitem__(self, key: str) -> str:
... # pragma: no cover
def __iter__(self) -> Iterator[str]:
... # pragma: no cover
def get_all(self, name: str, failobj: _T = ...) -> Union[List[Any], _T]:
"""
Return all values associated with a possibly multi-valued key.
"""
@property
def json(self) -> Dict[str, Union[str, List[str]]]:
"""
A JSON-compatible form of the metadata.
"""

View File

@ -0,0 +1,99 @@
import re
from ._functools import method_cache
# from jaraco.text 3.5
class FoldedCase(str):
"""
A case insensitive string class; behaves just like str
except compares equal when the only variation is case.
>>> s = FoldedCase('hello world')
>>> s == 'Hello World'
True
>>> 'Hello World' == s
True
>>> s != 'Hello World'
False
>>> s.index('O')
4
>>> s.split('O')
['hell', ' w', 'rld']
>>> sorted(map(FoldedCase, ['GAMMA', 'alpha', 'Beta']))
['alpha', 'Beta', 'GAMMA']
Sequence membership is straightforward.
>>> "Hello World" in [s]
True
>>> s in ["Hello World"]
True
You may test for set inclusion, but candidate and elements
must both be folded.
>>> FoldedCase("Hello World") in {s}
True
>>> s in {FoldedCase("Hello World")}
True
String inclusion works as long as the FoldedCase object
is on the right.
>>> "hello" in FoldedCase("Hello World")
True
But not if the FoldedCase object is on the left:
>>> FoldedCase('hello') in 'Hello World'
False
In that case, use in_:
>>> FoldedCase('hello').in_('Hello World')
True
>>> FoldedCase('hello') > FoldedCase('Hello')
False
"""
def __lt__(self, other):
return self.lower() < other.lower()
def __gt__(self, other):
return self.lower() > other.lower()
def __eq__(self, other):
return self.lower() == other.lower()
def __ne__(self, other):
return self.lower() != other.lower()
def __hash__(self):
return hash(self.lower())
def __contains__(self, other):
return super(FoldedCase, self).lower().__contains__(other.lower())
def in_(self, other):
"Does self appear in other?"
return self in FoldedCase(other)
# cache lower since it's likely to be called frequently.
@method_cache
def lower(self):
return super(FoldedCase, self).lower()
def index(self, sub):
return self.lower().index(sub.lower())
def split(self, splitter=' ', maxsplit=0):
pattern = re.compile(re.escape(splitter), re.I)
return pattern.split(self, maxsplit)

View File

@ -1,5 +1,6 @@
import os
import sys
import copy
import shutil
import pathlib
import tempfile
@ -108,6 +109,16 @@ class DistInfoPkg(OnSysPath, SiteDir):
super(DistInfoPkg, self).setUp()
build_files(DistInfoPkg.files, self.site_dir)
def make_uppercase(self):
"""
Rewrite metadata with everything uppercase.
"""
shutil.rmtree(self.site_dir / "distinfo_pkg-1.0.0.dist-info")
files = copy.deepcopy(DistInfoPkg.files)
info = files["distinfo_pkg-1.0.0.dist-info"]
info["METADATA"] = info["METADATA"].upper()
build_files(files, self.site_dir)
class DistInfoPkgWithDot(OnSysPath, SiteDir):
files: FilesDef = {

View File

@ -125,7 +125,7 @@ class NonASCIITests(fixtures.OnSysPath, fixtures.SiteDir, unittest.TestCase):
metadata_dir.mkdir()
metadata = metadata_dir / 'METADATA'
with metadata.open('w', encoding='utf-8') as fp:
fp.write('Description: pôrˈtend\n')
fp.write('Description: pôrˈtend')
return 'portend'
@staticmethod
@ -145,7 +145,7 @@ class NonASCIITests(fixtures.OnSysPath, fixtures.SiteDir, unittest.TestCase):
pôrˈtend
"""
).lstrip()
).strip()
)
return 'portend'
@ -157,7 +157,7 @@ class NonASCIITests(fixtures.OnSysPath, fixtures.SiteDir, unittest.TestCase):
def test_metadata_loads_egg_info(self):
pkg_name = self.pkg_with_non_ascii_description_egg_info(self.site_dir)
meta = metadata(pkg_name)
assert meta.get_payload() == 'pôrˈtend\n'
assert meta['Description'] == 'pôrˈtend'
class DiscoveryTests(fixtures.EggInfoPkg, fixtures.DistInfoPkg, unittest.TestCase):

View File

@ -231,6 +231,29 @@ class APITests(
assert deps == expected
def test_as_json(self):
md = metadata('distinfo-pkg').json
assert 'name' in md
assert md['keywords'] == ['sample', 'package']
desc = md['description']
assert desc.startswith('Once upon a time\nThere was')
assert len(md['requires_dist']) == 2
def test_as_json_egg_info(self):
md = metadata('egginfo-pkg').json
assert 'name' in md
assert md['keywords'] == ['sample', 'package']
desc = md['description']
assert desc.startswith('Once upon a time\nThere was')
assert len(md['classifier']) == 2
def test_as_json_odd_case(self):
self.make_uppercase()
md = metadata('distinfo-pkg').json
assert 'name' in md
assert len(md['requires_dist']) == 2
assert md['keywords'] == ['SAMPLE', 'PACKAGE']
class LegacyDots(fixtures.DistInfoPkgWithDotLegacy, unittest.TestCase):
def test_name_normalization(self):

View File

@ -0,0 +1,4 @@
In ``importlib.metadata``, provide a uniform interface to ``Description``,
allow for any field to be encoded with multiline values, remove continuation
lines from multiline values, and add a ``.json`` property for easy access to
the PEP 566 JSON-compatible form. Sync with ``importlib_metadata 4.0``.