From 45d8871dc4da33fcef92991031707c5bf88a40cf Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Thu, 29 Feb 2024 09:38:04 -0600 Subject: [PATCH] gh-112844: Add SBOM for external dependencies (#115789) --- .github/CODEOWNERS | 1 + Misc/externals.spdx.json | 174 +++++++++++++++++++++++++++++++++++ Tools/build/generate_sbom.py | 110 ++++++++++++++++++---- 3 files changed, 267 insertions(+), 18 deletions(-) create mode 100644 Misc/externals.spdx.json diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 1d0bce11132..e8eed400d96 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -247,6 +247,7 @@ Lib/test/test_interpreters/ @ericsnowcurrently /Tools/wasm/ @brettcannon # SBOM +/Misc/externals.spdx.json @sethmlarson /Misc/sbom.spdx.json @sethmlarson /Tools/build/generate_sbom.py @sethmlarson diff --git a/Misc/externals.spdx.json b/Misc/externals.spdx.json new file mode 100644 index 00000000000..2acfccbb004 --- /dev/null +++ b/Misc/externals.spdx.json @@ -0,0 +1,174 @@ +{ + "SPDXID": "SPDXRef-DOCUMENT", + "packages": [ + { + "SPDXID": "SPDXRef-PACKAGE-bzip2", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "ab8d1b0cc087c20d4c32c0e4fcf7d0c733a95da12cedc6d63b3f0a9af07427e2" + } + ], + "downloadLocation": "https://github.com/python/cpython-source-deps/archive/refs/tags/bzip2-1.0.8.tar.gz", + "externalRefs": [ + { + "referenceCategory": "SECURITY", + "referenceLocator": "cpe:2.3:a:bzip:bzip2:1.0.8:*:*:*:*:*:*:*", + "referenceType": "cpe23Type" + } + ], + "licenseConcluded": "NOASSERTION", + "name": "bzip2", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "1.0.8" + }, + { + "SPDXID": "SPDXRef-PACKAGE-libffi", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "9d802681adfea27d84cae0487a785fb9caa925bdad44c401b364c59ab2b8edda" + } + ], + "downloadLocation": "https://github.com/python/cpython-source-deps/archive/refs/tags/libffi-3.4.4.tar.gz", + "externalRefs": [ + { + "referenceCategory": "SECURITY", + "referenceLocator": "cpe:2.3:a:libffi_project:libffi:3.4.4:*:*:*:*:*:*:*", + "referenceType": "cpe23Type" + } + ], + "licenseConcluded": "NOASSERTION", + "name": "libffi", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "3.4.4" + }, + { + "SPDXID": "SPDXRef-PACKAGE-openssl", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "e6a77c273ebb284fedd8ea19b081fce74a9455936ffd47215f7c24713e2614b2" + } + ], + "downloadLocation": "https://github.com/python/cpython-source-deps/archive/refs/tags/openssl-3.0.13.tar.gz", + "externalRefs": [ + { + "referenceCategory": "SECURITY", + "referenceLocator": "cpe:2.3:a:openssl:openssl:3.0.13:*:*:*:*:*:*:*", + "referenceType": "cpe23Type" + } + ], + "licenseConcluded": "NOASSERTION", + "name": "openssl", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "3.0.13" + }, + { + "SPDXID": "SPDXRef-PACKAGE-sqlite", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "6f0364a27375435a34137b138ca4fedef8d23eec6493ca1dfff33bfc0c34fda4" + } + ], + "downloadLocation": "https://github.com/python/cpython-source-deps/archive/refs/tags/sqlite-3.45.1.0.tar.gz", + "externalRefs": [ + { + "referenceCategory": "SECURITY", + "referenceLocator": "cpe:2.3:a:sqlite:sqlite:3.45.1.0:*:*:*:*:*:*:*", + "referenceType": "cpe23Type" + } + ], + "licenseConcluded": "NOASSERTION", + "name": "sqlite", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "3.45.1.0" + }, + { + "SPDXID": "SPDXRef-PACKAGE-tcl-core", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "1d3f2015e49e269cf681373d433cd54d88d5ef7443fe87f5f50f5fcfe9003e73" + } + ], + "downloadLocation": "https://github.com/python/cpython-source-deps/archive/refs/tags/tcl-core-8.6.13.1.tar.gz", + "externalRefs": [ + { + "referenceCategory": "SECURITY", + "referenceLocator": "cpe:2.3:a:tcl_tk:tcl_tk:8.6.13.1:*:*:*:*:*:*:*", + "referenceType": "cpe23Type" + } + ], + "licenseConcluded": "NOASSERTION", + "name": "tcl-core", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "8.6.13.1" + }, + { + "SPDXID": "SPDXRef-PACKAGE-tk", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "6056203b8a6aaf6ea89d90a7b55dc7f407e55c093f731a98fd830a712a3c81d3" + } + ], + "downloadLocation": "https://github.com/python/cpython-source-deps/archive/refs/tags/tk-8.6.13.1.tar.gz", + "externalRefs": [ + { + "referenceCategory": "SECURITY", + "referenceLocator": "cpe:2.3:a:tcl_tk:tcl_tk:8.6.13.1:*:*:*:*:*:*:*", + "referenceType": "cpe23Type" + } + ], + "licenseConcluded": "NOASSERTION", + "name": "tk", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "8.6.13.1" + }, + { + "SPDXID": "SPDXRef-PACKAGE-xz", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "a15c168e39e87d750c3dc766edc7f19bdda57dacf01e509678467eace91ad282" + } + ], + "downloadLocation": "https://github.com/python/cpython-source-deps/archive/refs/tags/xz-5.2.5.tar.gz", + "externalRefs": [ + { + "referenceCategory": "SECURITY", + "referenceLocator": "cpe:2.3:a:xz_project:xz:5.2.5:*:*:*:*:*:*:*", + "referenceType": "cpe23Type" + } + ], + "licenseConcluded": "NOASSERTION", + "name": "xz", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "5.2.5" + }, + { + "SPDXID": "SPDXRef-PACKAGE-zlib", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "e3f3fb32564952006eb18b091ca8464740e5eca29d328cfb0b2da22768e0b638" + } + ], + "downloadLocation": "https://github.com/python/cpython-source-deps/archive/refs/tags/zlib-1.3.1.tar.gz", + "externalRefs": [ + { + "referenceCategory": "SECURITY", + "referenceLocator": "cpe:2.3:a:zlib:zlib:1.3.1:*:*:*:*:*:*:*", + "referenceType": "cpe23Type" + } + ], + "licenseConcluded": "NOASSERTION", + "name": "zlib", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "1.3.1" + } + ], + "spdxVersion": "SPDX-2.3" +} \ No newline at end of file diff --git a/Tools/build/generate_sbom.py b/Tools/build/generate_sbom.py index 201c81c4d14..6aa4946ee22 100644 --- a/Tools/build/generate_sbom.py +++ b/Tools/build/generate_sbom.py @@ -7,9 +7,8 @@ import glob import pathlib import subprocess import sys +import urllib.request import typing -import zipfile -from urllib.request import urlopen CPYTHON_ROOT_DIR = pathlib.Path(__file__).parent.parent.parent @@ -125,30 +124,41 @@ def filter_gitignored_paths(paths: list[str]) -> list[str]: return sorted([line.split()[-1] for line in git_check_ignore_lines if line.startswith("::")]) -def main() -> None: - sbom_path = CPYTHON_ROOT_DIR / "Misc/sbom.spdx.json" - sbom_data = json.loads(sbom_path.read_bytes()) - - # We regenerate all of this information. Package information - # should be preserved though since that is edited by humans. - sbom_data["files"] = [] - sbom_data["relationships"] = [] - - # Ensure all packages in this tool are represented also in the SBOM file. - actual_names = {package["name"] for package in sbom_data["packages"]} - expected_names = set(PACKAGE_TO_FILES) - error_if( - actual_names != expected_names, - f"Packages defined in SBOM tool don't match those defined in SBOM file: {actual_names}, {expected_names}", +def get_externals() -> list[str]: + """ + Parses 'PCbuild/get_externals.bat' for external libraries. + Returns a list of (git tag, name, version) tuples. + """ + get_externals_bat_path = CPYTHON_ROOT_DIR / "PCbuild/get_externals.bat" + externals = re.findall( + r"set\s+libraries\s*=\s*%libraries%\s+([a-zA-Z0-9.-]+)\s", + get_externals_bat_path.read_text() ) + return externals + + +def check_sbom_packages(sbom_data: dict[str, typing.Any]) -> None: + """Make a bunch of assertions about the SBOM package data to ensure it's consistent.""" - # Make a bunch of assertions about the SBOM data to ensure it's consistent. for package in sbom_data["packages"]: # Properties and ID must be properly formed. error_if( "name" not in package, "Package is missing the 'name' field" ) + + # Verify that the checksum matches the expected value + # and that the download URL is valid. + if "checksums" not in package or "CI" in os.environ: + download_location = package["downloadLocation"] + resp = urllib.request.urlopen(download_location) + error_if(resp.status != 200, f"Couldn't access URL: {download_location}'") + + package["checksums"] = [{ + "algorithm": "SHA256", + "checksumValue": hashlib.sha256(resp.read()).hexdigest() + }] + missing_required_keys = REQUIRED_PROPERTIES_PACKAGE - set(package.keys()) error_if( bool(missing_required_keys), @@ -180,6 +190,26 @@ def main() -> None: f"License identifier must be 'NOASSERTION'" ) + +def create_source_sbom() -> None: + sbom_path = CPYTHON_ROOT_DIR / "Misc/sbom.spdx.json" + sbom_data = json.loads(sbom_path.read_bytes()) + + # We regenerate all of this information. Package information + # should be preserved though since that is edited by humans. + sbom_data["files"] = [] + sbom_data["relationships"] = [] + + # Ensure all packages in this tool are represented also in the SBOM file. + actual_names = {package["name"] for package in sbom_data["packages"]} + expected_names = set(PACKAGE_TO_FILES) + error_if( + actual_names != expected_names, + f"Packages defined in SBOM tool don't match those defined in SBOM file: {actual_names}, {expected_names}", + ) + + check_sbom_packages(sbom_data) + # We call 'sorted()' here a lot to avoid filesystem scan order issues. for name, files in sorted(PACKAGE_TO_FILES.items()): package_spdx_id = spdx_id(f"SPDXRef-PACKAGE-{name}") @@ -224,5 +254,49 @@ def main() -> None: sbom_path.write_text(json.dumps(sbom_data, indent=2, sort_keys=True)) +def create_externals_sbom() -> None: + sbom_path = CPYTHON_ROOT_DIR / "Misc/externals.spdx.json" + sbom_data = json.loads(sbom_path.read_bytes()) + + externals = get_externals() + externals_name_to_version = {} + externals_name_to_git_tag = {} + for git_tag in externals: + name, _, version = git_tag.rpartition("-") + externals_name_to_version[name] = version + externals_name_to_git_tag[name] = git_tag + + # Ensure all packages in this tool are represented also in the SBOM file. + actual_names = {package["name"] for package in sbom_data["packages"]} + expected_names = set(externals_name_to_version) + error_if( + actual_names != expected_names, + f"Packages defined in SBOM tool don't match those defined in SBOM file: {actual_names}, {expected_names}", + ) + + # Set the versionInfo and downloadLocation fields for all packages. + for package in sbom_data["packages"]: + package["versionInfo"] = externals_name_to_version[package["name"]] + download_location = ( + f"https://github.com/python/cpython-source-deps/archive/refs/tags/{externals_name_to_git_tag[package['name']]}.tar.gz" + ) + download_location_changed = download_location != package["downloadLocation"] + package["downloadLocation"] = download_location + + # If the download URL has changed we want one to get recalulated. + if download_location_changed: + package.pop("checksums", None) + + check_sbom_packages(sbom_data) + + # Update the SBOM on disk + sbom_path.write_text(json.dumps(sbom_data, indent=2, sort_keys=True)) + + +def main() -> None: + create_source_sbom() + create_externals_sbom() + + if __name__ == "__main__": main()