mirror of https://github.com/python/cpython
gh-113257: Automatically generate pip SBOM metadata from wheel (#113295)
Co-authored-by: Hugo van Kemenade <hugovk@users.noreply.github.com>
This commit is contained in:
parent
11ee912327
commit
b221e03010
|
@ -1700,7 +1700,7 @@
|
||||||
"checksums": [
|
"checksums": [
|
||||||
{
|
{
|
||||||
"algorithm": "SHA256",
|
"algorithm": "SHA256",
|
||||||
"checksumValue": "7ccf472345f20d35bdc9d1841ff5f313260c2c33fe417f48c30ac46cccabf5be"
|
"checksumValue": "5052d7889c1f9d05224cd41741acb7c5d6fa735ab34e339624a614eaaa7e7d76"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"downloadLocation": "https://files.pythonhosted.org/packages/15/aa/3f4c7bcee2057a76562a5b33ecbd199be08cdb4443a02e26bd2c3cf6fc39/pip-23.3.2-py3-none-any.whl",
|
"downloadLocation": "https://files.pythonhosted.org/packages/15/aa/3f4c7bcee2057a76562a5b33ecbd199be08cdb4443a02e26bd2c3cf6fc39/pip-23.3.2-py3-none-any.whl",
|
||||||
|
|
|
@ -1,12 +1,16 @@
|
||||||
"""Tool for generating Software Bill of Materials (SBOM) for Python's dependencies"""
|
"""Tool for generating Software Bill of Materials (SBOM) for Python's dependencies"""
|
||||||
|
import os
|
||||||
import re
|
import re
|
||||||
import hashlib
|
import hashlib
|
||||||
import json
|
import json
|
||||||
import glob
|
import glob
|
||||||
import pathlib
|
import pathlib
|
||||||
import subprocess
|
import subprocess
|
||||||
|
import sys
|
||||||
import typing
|
import typing
|
||||||
|
from urllib.request import urlopen
|
||||||
|
|
||||||
|
CPYTHON_ROOT_DIR = pathlib.Path(__file__).parent.parent.parent
|
||||||
|
|
||||||
# Before adding a new entry to this list, double check that
|
# Before adding a new entry to this list, double check that
|
||||||
# the license expression is a valid SPDX license expression:
|
# the license expression is a valid SPDX license expression:
|
||||||
|
@ -43,15 +47,14 @@ class PackageFiles(typing.NamedTuple):
|
||||||
# values to 'exclude' if we create new files within tracked
|
# values to 'exclude' if we create new files within tracked
|
||||||
# directories that aren't sourced from third-party packages.
|
# directories that aren't sourced from third-party packages.
|
||||||
PACKAGE_TO_FILES = {
|
PACKAGE_TO_FILES = {
|
||||||
|
# NOTE: pip's entry in this structure is automatically generated in
|
||||||
|
# the 'discover_pip_sbom_package()' function below.
|
||||||
"mpdecimal": PackageFiles(
|
"mpdecimal": PackageFiles(
|
||||||
include=["Modules/_decimal/libmpdec/**"]
|
include=["Modules/_decimal/libmpdec/**"]
|
||||||
),
|
),
|
||||||
"expat": PackageFiles(
|
"expat": PackageFiles(
|
||||||
include=["Modules/expat/**"]
|
include=["Modules/expat/**"]
|
||||||
),
|
),
|
||||||
"pip": PackageFiles(
|
|
||||||
include=["Lib/ensurepip/_bundled/pip-23.3.2-py3-none-any.whl"]
|
|
||||||
),
|
|
||||||
"macholib": PackageFiles(
|
"macholib": PackageFiles(
|
||||||
include=["Lib/ctypes/macholib/**"],
|
include=["Lib/ctypes/macholib/**"],
|
||||||
exclude=[
|
exclude=[
|
||||||
|
@ -106,13 +109,106 @@ def filter_gitignored_paths(paths: list[str]) -> list[str]:
|
||||||
return sorted([line.split()[-1] for line in git_check_ignore_lines if line.startswith("::")])
|
return sorted([line.split()[-1] for line in git_check_ignore_lines if line.startswith("::")])
|
||||||
|
|
||||||
|
|
||||||
|
def discover_pip_sbom_package(sbom_data: dict[str, typing.Any]) -> None:
|
||||||
|
"""pip is a part of a packaging ecosystem (Python, surprise!) so it's actually
|
||||||
|
automatable to discover the metadata we need like the version and checksums
|
||||||
|
so let's do that on behalf of our friends at the PyPA.
|
||||||
|
"""
|
||||||
|
global PACKAGE_TO_FILES
|
||||||
|
|
||||||
|
ensurepip_bundled_dir = CPYTHON_ROOT_DIR / "Lib/ensurepip/_bundled"
|
||||||
|
pip_wheels = []
|
||||||
|
|
||||||
|
# Find the hopefully one pip wheel in the bundled directory.
|
||||||
|
for wheel_filename in os.listdir(ensurepip_bundled_dir):
|
||||||
|
if wheel_filename.startswith("pip-"):
|
||||||
|
pip_wheels.append(wheel_filename)
|
||||||
|
if len(pip_wheels) != 1:
|
||||||
|
print("Zero or multiple pip wheels detected in 'Lib/ensurepip/_bundled'")
|
||||||
|
sys.exit(1)
|
||||||
|
pip_wheel_filename = pip_wheels[0]
|
||||||
|
|
||||||
|
# Add the wheel filename to the list of files so the SBOM file
|
||||||
|
# and relationship generator can work its magic on the wheel too.
|
||||||
|
PACKAGE_TO_FILES["pip"] = PackageFiles(
|
||||||
|
include=[f"Lib/ensurepip/_bundled/{pip_wheel_filename}"]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Wheel filename format puts the version right after the project name.
|
||||||
|
pip_version = pip_wheel_filename.split("-")[1]
|
||||||
|
pip_checksum_sha256 = hashlib.sha256(
|
||||||
|
(ensurepip_bundled_dir / pip_wheel_filename).read_bytes()
|
||||||
|
).hexdigest()
|
||||||
|
|
||||||
|
# Get pip's download location from PyPI. Check that the checksum is correct too.
|
||||||
|
try:
|
||||||
|
raw_text = urlopen(f"https://pypi.org/pypi/pip/{pip_version}/json").read()
|
||||||
|
pip_release_metadata = json.loads(raw_text)
|
||||||
|
url: dict[str, typing.Any]
|
||||||
|
|
||||||
|
# Look for a matching artifact filename and then check
|
||||||
|
# its remote checksum to the local one.
|
||||||
|
for url in pip_release_metadata["urls"]:
|
||||||
|
if url["filename"] == pip_wheel_filename:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
raise ValueError(f"No matching filename on PyPI for '{pip_wheel_filename}'")
|
||||||
|
if url["digests"]["sha256"] != pip_checksum_sha256:
|
||||||
|
raise ValueError(f"Local pip checksum doesn't match artifact on PyPI")
|
||||||
|
|
||||||
|
# Successfully found the download URL for the matching artifact.
|
||||||
|
pip_download_url = url["url"]
|
||||||
|
|
||||||
|
except (OSError, ValueError) as e:
|
||||||
|
print(f"Couldn't fetch pip's metadata from PyPI: {e}")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
# Remove pip from the existing SBOM packages if it's there
|
||||||
|
# and then overwrite its entry with our own generated one.
|
||||||
|
sbom_data["packages"] = [
|
||||||
|
sbom_package
|
||||||
|
for sbom_package in sbom_data["packages"]
|
||||||
|
if sbom_package["name"] != "pip"
|
||||||
|
]
|
||||||
|
sbom_data["packages"].append(
|
||||||
|
{
|
||||||
|
"SPDXID": spdx_id("SPDXRef-PACKAGE-pip"),
|
||||||
|
"name": "pip",
|
||||||
|
"versionInfo": pip_version,
|
||||||
|
"originator": "Organization: Python Packaging Authority",
|
||||||
|
"licenseConcluded": "MIT",
|
||||||
|
"downloadLocation": pip_download_url,
|
||||||
|
"checksums": [
|
||||||
|
{"algorithm": "SHA256", "checksumValue": pip_checksum_sha256}
|
||||||
|
],
|
||||||
|
"externalRefs": [
|
||||||
|
{
|
||||||
|
"referenceCategory": "SECURITY",
|
||||||
|
"referenceLocator": f"cpe:2.3:a:pypa:pip:{pip_version}:*:*:*:*:*:*:*",
|
||||||
|
"referenceType": "cpe23Type",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"referenceCategory": "PACKAGE_MANAGER",
|
||||||
|
"referenceLocator": f"pkg:pypi/pip@{pip_version}",
|
||||||
|
"referenceType": "purl",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
"primaryPackagePurpose": "SOURCE",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
def main() -> None:
|
||||||
root_dir = pathlib.Path(__file__).parent.parent.parent
|
sbom_path = CPYTHON_ROOT_DIR / "Misc/sbom.spdx.json"
|
||||||
sbom_path = root_dir / "Misc/sbom.spdx.json"
|
|
||||||
sbom_data = json.loads(sbom_path.read_bytes())
|
sbom_data = json.loads(sbom_path.read_bytes())
|
||||||
|
|
||||||
# Make a bunch of assertions about the SBOM data to ensure it's consistent.
|
# Insert pip's SBOM metadata from the wheel.
|
||||||
|
discover_pip_sbom_package(sbom_data)
|
||||||
|
|
||||||
|
# Ensure all packages in this tool are represented also in the SBOM file.
|
||||||
assert {package["name"] for package in sbom_data["packages"]} == set(PACKAGE_TO_FILES)
|
assert {package["name"] for package in sbom_data["packages"]} == set(PACKAGE_TO_FILES)
|
||||||
|
|
||||||
|
# Make a bunch of assertions about the SBOM data to ensure it's consistent.
|
||||||
for package in sbom_data["packages"]:
|
for package in sbom_data["packages"]:
|
||||||
|
|
||||||
# Properties and ID must be properly formed.
|
# Properties and ID must be properly formed.
|
||||||
|
@ -138,17 +234,17 @@ def main() -> None:
|
||||||
for include in sorted(files.include):
|
for include in sorted(files.include):
|
||||||
|
|
||||||
# Find all the paths and then filter them through .gitignore.
|
# Find all the paths and then filter them through .gitignore.
|
||||||
paths = glob.glob(include, root_dir=root_dir, recursive=True)
|
paths = glob.glob(include, root_dir=CPYTHON_ROOT_DIR, recursive=True)
|
||||||
paths = filter_gitignored_paths(paths)
|
paths = filter_gitignored_paths(paths)
|
||||||
assert paths, include # Make sure that every value returns something!
|
assert paths, include # Make sure that every value returns something!
|
||||||
|
|
||||||
for path in paths:
|
for path in paths:
|
||||||
# Skip directories and excluded files
|
# Skip directories and excluded files
|
||||||
if not (root_dir / path).is_file() or path in exclude:
|
if not (CPYTHON_ROOT_DIR / path).is_file() or path in exclude:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# SPDX requires SHA1 to be used for files, but we provide SHA256 too.
|
# SPDX requires SHA1 to be used for files, but we provide SHA256 too.
|
||||||
data = (root_dir / path).read_bytes()
|
data = (CPYTHON_ROOT_DIR / path).read_bytes()
|
||||||
checksum_sha1 = hashlib.sha1(data).hexdigest()
|
checksum_sha1 = hashlib.sha1(data).hexdigest()
|
||||||
checksum_sha256 = hashlib.sha256(data).hexdigest()
|
checksum_sha256 = hashlib.sha256(data).hexdigest()
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue