cpython/Lib/packaging/metadata.py

570 lines
20 KiB
Python

"""Implementation of the Metadata for Python packages PEPs.
Supports all metadata formats (1.0, 1.1, 1.2).
"""
import re
import logging
from io import StringIO
from email import message_from_file
from packaging import logger
from packaging.markers import interpret
from packaging.version import (is_valid_predicate, is_valid_version,
is_valid_versions)
from packaging.errors import (MetadataMissingError,
MetadataConflictError,
MetadataUnrecognizedVersionError)
try:
# docutils is installed
from docutils.utils import Reporter
from docutils.parsers.rst import Parser
from docutils import frontend
from docutils import nodes
class SilentReporter(Reporter):
def __init__(self, source, report_level, halt_level, stream=None,
debug=0, encoding='ascii', error_handler='replace'):
self.messages = []
super(SilentReporter, self).__init__(
source, report_level, halt_level, stream,
debug, encoding, error_handler)
def system_message(self, level, message, *children, **kwargs):
self.messages.append((level, message, children, kwargs))
_HAS_DOCUTILS = True
except ImportError:
# docutils is not installed
_HAS_DOCUTILS = False
# public API of this module
__all__ = ['Metadata', 'PKG_INFO_ENCODING', 'PKG_INFO_PREFERRED_VERSION']
# Encoding used for the PKG-INFO files
PKG_INFO_ENCODING = 'utf-8'
# preferred version. Hopefully will be changed
# to 1.2 once PEP 345 is supported everywhere
PKG_INFO_PREFERRED_VERSION = '1.0'
_LINE_PREFIX = re.compile('\n \|')
_241_FIELDS = ('Metadata-Version', 'Name', 'Version', 'Platform',
'Summary', 'Description',
'Keywords', 'Home-page', 'Author', 'Author-email',
'License')
_314_FIELDS = ('Metadata-Version', 'Name', 'Version', 'Platform',
'Supported-Platform', 'Summary', 'Description',
'Keywords', 'Home-page', 'Author', 'Author-email',
'License', 'Classifier', 'Download-URL', 'Obsoletes',
'Provides', 'Requires')
_314_MARKERS = ('Obsoletes', 'Provides', 'Requires', 'Classifier',
'Download-URL')
_345_FIELDS = ('Metadata-Version', 'Name', 'Version', 'Platform',
'Supported-Platform', 'Summary', 'Description',
'Keywords', 'Home-page', 'Author', 'Author-email',
'Maintainer', 'Maintainer-email', 'License',
'Classifier', 'Download-URL', 'Obsoletes-Dist',
'Project-URL', 'Provides-Dist', 'Requires-Dist',
'Requires-Python', 'Requires-External')
_345_MARKERS = ('Provides-Dist', 'Requires-Dist', 'Requires-Python',
'Obsoletes-Dist', 'Requires-External', 'Maintainer',
'Maintainer-email', 'Project-URL')
_ALL_FIELDS = set()
_ALL_FIELDS.update(_241_FIELDS)
_ALL_FIELDS.update(_314_FIELDS)
_ALL_FIELDS.update(_345_FIELDS)
def _version2fieldlist(version):
if version == '1.0':
return _241_FIELDS
elif version == '1.1':
return _314_FIELDS
elif version == '1.2':
return _345_FIELDS
raise MetadataUnrecognizedVersionError(version)
def _best_version(fields):
"""Detect the best version depending on the fields used."""
def _has_marker(keys, markers):
for marker in markers:
if marker in keys:
return True
return False
keys = list(fields)
possible_versions = ['1.0', '1.1', '1.2']
# first let's try to see if a field is not part of one of the version
for key in keys:
if key not in _241_FIELDS and '1.0' in possible_versions:
possible_versions.remove('1.0')
if key not in _314_FIELDS and '1.1' in possible_versions:
possible_versions.remove('1.1')
if key not in _345_FIELDS and '1.2' in possible_versions:
possible_versions.remove('1.2')
# possible_version contains qualified versions
if len(possible_versions) == 1:
return possible_versions[0] # found !
elif len(possible_versions) == 0:
raise MetadataConflictError('Unknown metadata set')
# let's see if one unique marker is found
is_1_1 = '1.1' in possible_versions and _has_marker(keys, _314_MARKERS)
is_1_2 = '1.2' in possible_versions and _has_marker(keys, _345_MARKERS)
if is_1_1 and is_1_2:
raise MetadataConflictError('You used incompatible 1.1 and 1.2 fields')
# we have the choice, either 1.0, or 1.2
# - 1.0 has a broken Summary field but works with all tools
# - 1.1 is to avoid
# - 1.2 fixes Summary but is not widespread yet
if not is_1_1 and not is_1_2:
# we couldn't find any specific marker
if PKG_INFO_PREFERRED_VERSION in possible_versions:
return PKG_INFO_PREFERRED_VERSION
if is_1_1:
return '1.1'
# default marker when 1.0 is disqualified
return '1.2'
_ATTR2FIELD = {
'metadata_version': 'Metadata-Version',
'name': 'Name',
'version': 'Version',
'platform': 'Platform',
'supported_platform': 'Supported-Platform',
'summary': 'Summary',
'description': 'Description',
'keywords': 'Keywords',
'home_page': 'Home-page',
'author': 'Author',
'author_email': 'Author-email',
'maintainer': 'Maintainer',
'maintainer_email': 'Maintainer-email',
'license': 'License',
'classifier': 'Classifier',
'download_url': 'Download-URL',
'obsoletes_dist': 'Obsoletes-Dist',
'provides_dist': 'Provides-Dist',
'requires_dist': 'Requires-Dist',
'requires_python': 'Requires-Python',
'requires_external': 'Requires-External',
'requires': 'Requires',
'provides': 'Provides',
'obsoletes': 'Obsoletes',
'project_url': 'Project-URL',
}
_PREDICATE_FIELDS = ('Requires-Dist', 'Obsoletes-Dist', 'Provides-Dist')
_VERSIONS_FIELDS = ('Requires-Python',)
_VERSION_FIELDS = ('Version',)
_LISTFIELDS = ('Platform', 'Classifier', 'Obsoletes',
'Requires', 'Provides', 'Obsoletes-Dist',
'Provides-Dist', 'Requires-Dist', 'Requires-External',
'Project-URL', 'Supported-Platform')
_LISTTUPLEFIELDS = ('Project-URL',)
_ELEMENTSFIELD = ('Keywords',)
_UNICODEFIELDS = ('Author', 'Maintainer', 'Summary', 'Description')
_MISSING = object()
_FILESAFE = re.compile('[^A-Za-z0-9.]+')
class Metadata:
"""The metadata of a release.
Supports versions 1.0, 1.1 and 1.2 (auto-detected). You can
instantiate the class with one of these arguments (or none):
- *path*, the path to a METADATA file
- *fileobj* give a file-like object with METADATA as content
- *mapping* is a dict-like object
"""
# TODO document that execution_context and platform_dependent are used
# to filter on query, not when setting a key
# also document the mapping API and UNKNOWN default key
def __init__(self, path=None, platform_dependent=False,
execution_context=None, fileobj=None, mapping=None):
self._fields = {}
self.requires_files = []
self.docutils_support = _HAS_DOCUTILS
self.platform_dependent = platform_dependent
self.execution_context = execution_context
if [path, fileobj, mapping].count(None) < 2:
raise TypeError('path, fileobj and mapping are exclusive')
if path is not None:
self.read(path)
elif fileobj is not None:
self.read_file(fileobj)
elif mapping is not None:
self.update(mapping)
def _set_best_version(self):
self._fields['Metadata-Version'] = _best_version(self._fields)
def _write_field(self, file, name, value):
file.write('%s: %s\n' % (name, value))
def __getitem__(self, name):
return self.get(name)
def __setitem__(self, name, value):
return self.set(name, value)
def __delitem__(self, name):
field_name = self._convert_name(name)
try:
del self._fields[field_name]
except KeyError:
raise KeyError(name)
self._set_best_version()
def __contains__(self, name):
return (name in self._fields or
self._convert_name(name) in self._fields)
def _convert_name(self, name):
if name in _ALL_FIELDS:
return name
name = name.replace('-', '_').lower()
return _ATTR2FIELD.get(name, name)
def _default_value(self, name):
if name in _LISTFIELDS or name in _ELEMENTSFIELD:
return []
return 'UNKNOWN'
def _check_rst_data(self, data):
"""Return warnings when the provided data has syntax errors."""
source_path = StringIO()
parser = Parser()
settings = frontend.OptionParser().get_default_values()
settings.tab_width = 4
settings.pep_references = None
settings.rfc_references = None
reporter = SilentReporter(source_path,
settings.report_level,
settings.halt_level,
stream=settings.warning_stream,
debug=settings.debug,
encoding=settings.error_encoding,
error_handler=settings.error_encoding_error_handler)
document = nodes.document(settings, reporter, source=source_path)
document.note_source(source_path, -1)
try:
parser.parse(data, document)
except AttributeError:
reporter.messages.append((-1, 'Could not finish the parsing.',
'', {}))
return reporter.messages
def _platform(self, value):
if not self.platform_dependent or ';' not in value:
return True, value
value, marker = value.split(';')
return interpret(marker, self.execution_context), value
def _remove_line_prefix(self, value):
return _LINE_PREFIX.sub('\n', value)
#
# Public API
#
def get_fullname(self, filesafe=False):
"""Return the distribution name with version.
If filesafe is true, return a filename-escaped form."""
name, version = self['Name'], self['Version']
if filesafe:
# For both name and version any runs of non-alphanumeric or '.'
# characters are replaced with a single '-'. Additionally any
# spaces in the version string become '.'
name = _FILESAFE.sub('-', name)
version = _FILESAFE.sub('-', version.replace(' ', '.'))
return '%s-%s' % (name, version)
def is_metadata_field(self, name):
"""return True if name is a valid metadata key"""
name = self._convert_name(name)
return name in _ALL_FIELDS
def is_multi_field(self, name):
name = self._convert_name(name)
return name in _LISTFIELDS
def read(self, filepath):
"""Read the metadata values from a file path."""
with open(filepath, 'r', encoding='utf-8') as fp:
self.read_file(fp)
def read_file(self, fileob):
"""Read the metadata values from a file object."""
msg = message_from_file(fileob)
self._fields['Metadata-Version'] = msg['metadata-version']
for field in _version2fieldlist(self['Metadata-Version']):
if field in _LISTFIELDS:
# we can have multiple lines
values = msg.get_all(field)
if field in _LISTTUPLEFIELDS and values is not None:
values = [tuple(value.split(',')) for value in values]
self.set(field, values)
else:
# single line
value = msg[field]
if value is not None and value != 'UNKNOWN':
self.set(field, value)
def write(self, filepath):
"""Write the metadata fields to filepath."""
with open(filepath, 'w', encoding='utf-8') as fp:
self.write_file(fp)
def write_file(self, fileobject):
"""Write the PKG-INFO format data to a file object."""
self._set_best_version()
for field in _version2fieldlist(self['Metadata-Version']):
values = self.get(field)
if field in _ELEMENTSFIELD:
self._write_field(fileobject, field, ','.join(values))
continue
if field not in _LISTFIELDS:
if field == 'Description':
values = values.replace('\n', '\n |')
values = [values]
if field in _LISTTUPLEFIELDS:
values = [','.join(value) for value in values]
for value in values:
self._write_field(fileobject, field, value)
def update(self, other=None, **kwargs):
"""Set metadata values from the given iterable `other` and kwargs.
Behavior is like `dict.update`: If `other` has a ``keys`` method,
they are looped over and ``self[key]`` is assigned ``other[key]``.
Else, ``other`` is an iterable of ``(key, value)`` iterables.
Keys that don't match a metadata field or that have an empty value are
dropped.
"""
# XXX the code should just use self.set, which does tbe same checks and
# conversions already, but that would break packaging.pypi: it uses the
# update method, which does not call _set_best_version (which set
# does), and thus allows having a Metadata object (as long as you don't
# modify or write it) with extra fields from PyPI that are not fields
# defined in Metadata PEPs. to solve it, the best_version system
# should be reworked so that it's called only for writing, or in a new
# strict mode, or with a new, more lax Metadata subclass in p7g.pypi
def _set(key, value):
if key in _ATTR2FIELD and value:
self.set(self._convert_name(key), value)
if not other:
# other is None or empty container
pass
elif hasattr(other, 'keys'):
for k in other.keys():
_set(k, other[k])
else:
for k, v in other:
_set(k, v)
if kwargs:
for k, v in kwargs.items():
_set(k, v)
def set(self, name, value):
"""Control then set a metadata field."""
name = self._convert_name(name)
if ((name in _ELEMENTSFIELD or name == 'Platform') and
not isinstance(value, (list, tuple))):
if isinstance(value, str):
value = [v.strip() for v in value.split(',')]
else:
value = []
elif (name in _LISTFIELDS and
not isinstance(value, (list, tuple))):
if isinstance(value, str):
value = [value]
else:
value = []
if logger.isEnabledFor(logging.WARNING):
project_name = self['Name']
if name in _PREDICATE_FIELDS and value is not None:
for v in value:
# check that the values are valid predicates
if not is_valid_predicate(v.split(';')[0]):
logger.warning(
'%r: %r is not a valid predicate (field %r)',
project_name, v, name)
# FIXME this rejects UNKNOWN, is that right?
elif name in _VERSIONS_FIELDS and value is not None:
if not is_valid_versions(value):
logger.warning('%r: %r is not a valid version (field %r)',
project_name, value, name)
elif name in _VERSION_FIELDS and value is not None:
if not is_valid_version(value):
logger.warning('%r: %r is not a valid version (field %r)',
project_name, value, name)
if name in _UNICODEFIELDS:
if name == 'Description':
value = self._remove_line_prefix(value)
self._fields[name] = value
self._set_best_version()
def get(self, name, default=_MISSING):
"""Get a metadata field."""
name = self._convert_name(name)
if name not in self._fields:
if default is _MISSING:
default = self._default_value(name)
return default
if name in _UNICODEFIELDS:
value = self._fields[name]
return value
elif name in _LISTFIELDS:
value = self._fields[name]
if value is None:
return []
res = []
for val in value:
valid, val = self._platform(val)
if not valid:
continue
if name not in _LISTTUPLEFIELDS:
res.append(val)
else:
# That's for Project-URL
res.append((val[0], val[1]))
return res
elif name in _ELEMENTSFIELD:
valid, value = self._platform(self._fields[name])
if not valid:
return []
if isinstance(value, str):
return value.split(',')
valid, value = self._platform(self._fields[name])
if not valid:
return None
return value
def check(self, strict=False, restructuredtext=False):
"""Check if the metadata is compliant. If strict is False then raise if
no Name or Version are provided"""
# XXX should check the versions (if the file was loaded)
missing, warnings = [], []
for attr in ('Name', 'Version'): # required by PEP 345
if attr not in self:
missing.append(attr)
if strict and missing != []:
msg = 'missing required metadata: %s' % ', '.join(missing)
raise MetadataMissingError(msg)
for attr in ('Home-page', 'Author'):
if attr not in self:
missing.append(attr)
if _HAS_DOCUTILS and restructuredtext:
warnings.extend(self._check_rst_data(self['Description']))
# checking metadata 1.2 (XXX needs to check 1.1, 1.0)
if self['Metadata-Version'] != '1.2':
return missing, warnings
def is_valid_predicates(value):
for v in value:
if not is_valid_predicate(v.split(';')[0]):
return False
return True
for fields, controller in ((_PREDICATE_FIELDS, is_valid_predicates),
(_VERSIONS_FIELDS, is_valid_versions),
(_VERSION_FIELDS, is_valid_version)):
for field in fields:
value = self.get(field, None)
if value is not None and not controller(value):
warnings.append('Wrong value for %r: %s' % (field, value))
return missing, warnings
def todict(self):
"""Return fields as a dict.
Field names will be converted to use the underscore-lowercase style
instead of hyphen-mixed case (i.e. home_page instead of Home-page).
"""
data = {
'metadata_version': self['Metadata-Version'],
'name': self['Name'],
'version': self['Version'],
'summary': self['Summary'],
'home_page': self['Home-page'],
'author': self['Author'],
'author_email': self['Author-email'],
'license': self['License'],
'description': self['Description'],
'keywords': self['Keywords'],
'platform': self['Platform'],
'classifier': self['Classifier'],
'download_url': self['Download-URL'],
}
if self['Metadata-Version'] == '1.2':
data['requires_dist'] = self['Requires-Dist']
data['requires_python'] = self['Requires-Python']
data['requires_external'] = self['Requires-External']
data['provides_dist'] = self['Provides-Dist']
data['obsoletes_dist'] = self['Obsoletes-Dist']
data['project_url'] = [','.join(url) for url in
self['Project-URL']]
elif self['Metadata-Version'] == '1.1':
data['provides'] = self['Provides']
data['requires'] = self['Requires']
data['obsoletes'] = self['Obsoletes']
return data
# Mapping API
# XXX these methods should return views or sets in 3.x
def keys(self):
return list(_version2fieldlist(self['Metadata-Version']))
def __iter__(self):
for key in self.keys():
yield key
def values(self):
return [self[key] for key in self.keys()]
def items(self):
return [(key, self[key]) for key in self.keys()]