summaryrefslogtreecommitdiff
path: root/poky/meta/lib/oe/spdx30_tasks.py
diff options
context:
space:
mode:
Diffstat (limited to 'poky/meta/lib/oe/spdx30_tasks.py')
-rw-r--r--poky/meta/lib/oe/spdx30_tasks.py1243
1 files changed, 1243 insertions, 0 deletions
diff --git a/poky/meta/lib/oe/spdx30_tasks.py b/poky/meta/lib/oe/spdx30_tasks.py
new file mode 100644
index 0000000000..6a2858c665
--- /dev/null
+++ b/poky/meta/lib/oe/spdx30_tasks.py
@@ -0,0 +1,1243 @@
+#
+# Copyright OpenEmbedded Contributors
+#
+# SPDX-License-Identifier: GPL-2.0-only
+#
+
+import json
+import oe.cve_check
+import oe.packagedata
+import oe.patch
+import oe.sbom30
+import oe.spdx30
+import oe.spdx_common
+import oe.sdk
+import os
+
+from contextlib import contextmanager
+from datetime import datetime, timezone
+from pathlib import Path
+
+
+def set_timestamp_now(d, o, prop):
+ if d.getVar("SPDX_INCLUDE_TIMESTAMPS") == "1":
+ setattr(o, prop, datetime.now(timezone.utc))
+ else:
+ # Doing this helps to validated that the property actually exists, and
+ # also that it is not mandatory
+ delattr(o, prop)
+
+
+def add_license_expression(d, objset, license_expression, license_data):
+ simple_license_text = {}
+ license_text_map = {}
+ license_ref_idx = 0
+
+ def add_license_text(name):
+ nonlocal objset
+ nonlocal simple_license_text
+
+ if name in simple_license_text:
+ return simple_license_text[name]
+
+ lic = objset.find_filter(
+ oe.spdx30.simplelicensing_SimpleLicensingText,
+ name=name,
+ )
+
+ if lic is not None:
+ simple_license_text[name] = lic
+ return lic
+
+ lic = objset.add(
+ oe.spdx30.simplelicensing_SimpleLicensingText(
+ _id=objset.new_spdxid("license-text", name),
+ creationInfo=objset.doc.creationInfo,
+ name=name,
+ )
+ )
+ simple_license_text[name] = lic
+
+ if name == "PD":
+ lic.simplelicensing_licenseText = "Software released to the public domain"
+ return lic
+
+ # Seach for the license in COMMON_LICENSE_DIR and LICENSE_PATH
+ for directory in [d.getVar("COMMON_LICENSE_DIR")] + (
+ d.getVar("LICENSE_PATH") or ""
+ ).split():
+ try:
+ with (Path(directory) / name).open(errors="replace") as f:
+ lic.simplelicensing_licenseText = f.read()
+ return lic
+
+ except FileNotFoundError:
+ pass
+
+ # If it's not SPDX or PD, then NO_GENERIC_LICENSE must be set
+ filename = d.getVarFlag("NO_GENERIC_LICENSE", name)
+ if filename:
+ filename = d.expand("${S}/" + filename)
+ with open(filename, errors="replace") as f:
+ lic.simplelicensing_licenseText = f.read()
+ return lic
+ else:
+ bb.fatal("Cannot find any text for license %s" % name)
+
+ def convert(l):
+ nonlocal license_text_map
+ nonlocal license_ref_idx
+
+ if l == "(" or l == ")":
+ return l
+
+ if l == "&":
+ return "AND"
+
+ if l == "|":
+ return "OR"
+
+ if l == "CLOSED":
+ return "NONE"
+
+ spdx_license = d.getVarFlag("SPDXLICENSEMAP", l) or l
+ if spdx_license in license_data["licenses"]:
+ return spdx_license
+
+ spdx_license = "LicenseRef-" + l
+ if spdx_license not in license_text_map:
+ license_text_map[spdx_license] = add_license_text(l)._id
+
+ return spdx_license
+
+ lic_split = (
+ license_expression.replace("(", " ( ")
+ .replace(")", " ) ")
+ .replace("|", " | ")
+ .replace("&", " & ")
+ .split()
+ )
+ spdx_license_expression = " ".join(convert(l) for l in lic_split)
+
+ return objset.new_license_expression(spdx_license_expression, license_data, license_text_map)
+
+
+def add_package_files(
+ d,
+ objset,
+ topdir,
+ get_spdxid,
+ get_purposes,
+ license_data,
+ *,
+ archive=None,
+ ignore_dirs=[],
+ ignore_top_level_dirs=[],
+):
+ source_date_epoch = d.getVar("SOURCE_DATE_EPOCH")
+ if source_date_epoch:
+ source_date_epoch = int(source_date_epoch)
+
+ spdx_files = set()
+
+ file_counter = 1
+ for subdir, dirs, files in os.walk(topdir):
+ dirs[:] = [d for d in dirs if d not in ignore_dirs]
+ if subdir == str(topdir):
+ dirs[:] = [d for d in dirs if d not in ignore_top_level_dirs]
+
+ for file in files:
+ filepath = Path(subdir) / file
+ if filepath.is_symlink() or not filepath.is_file():
+ continue
+
+ bb.debug(1, "Adding file %s to %s" % (filepath, objset.doc._id))
+
+ filename = str(filepath.relative_to(topdir))
+ file_purposes = get_purposes(filepath)
+
+ spdx_file = objset.new_file(
+ get_spdxid(file_counter),
+ filename,
+ filepath,
+ purposes=file_purposes,
+ )
+ spdx_files.add(spdx_file)
+
+ if oe.spdx30.software_SoftwarePurpose.source in file_purposes:
+ objset.scan_declared_licenses(spdx_file, filepath, license_data)
+
+ if archive is not None:
+ with filepath.open("rb") as f:
+ info = archive.gettarinfo(fileobj=f)
+ info.name = filename
+ info.uid = 0
+ info.gid = 0
+ info.uname = "root"
+ info.gname = "root"
+
+ if source_date_epoch is not None and info.mtime > source_date_epoch:
+ info.mtime = source_date_epoch
+
+ archive.addfile(info, f)
+
+ file_counter += 1
+
+ return spdx_files
+
+
+def get_package_sources_from_debug(
+ d, package, package_files, sources, source_hash_cache
+):
+ def file_path_match(file_path, pkg_file):
+ if file_path.lstrip("/") == pkg_file.name.lstrip("/"):
+ return True
+
+ for e in pkg_file.extension:
+ if isinstance(e, oe.sbom30.OEFileNameAliasExtension):
+ for a in e.aliases:
+ if file_path.lstrip("/") == a.lstrip("/"):
+ return True
+
+ return False
+
+ debug_search_paths = [
+ Path(d.getVar("PKGD")),
+ Path(d.getVar("STAGING_DIR_TARGET")),
+ Path(d.getVar("STAGING_DIR_NATIVE")),
+ Path(d.getVar("STAGING_KERNEL_DIR")),
+ ]
+
+ pkg_data = oe.packagedata.read_subpkgdata_extended(package, d)
+
+ if pkg_data is None:
+ return
+
+ dep_source_files = set()
+
+ for file_path, file_data in pkg_data["files_info"].items():
+ if not "debugsrc" in file_data:
+ continue
+
+ if not any(file_path_match(file_path, pkg_file) for pkg_file in package_files):
+ bb.fatal(
+ "No package file found for %s in %s; SPDX found: %s"
+ % (str(file_path), package, " ".join(p.name for p in package_files))
+ )
+ continue
+
+ for debugsrc in file_data["debugsrc"]:
+ for search in debug_search_paths:
+ if debugsrc.startswith("/usr/src/kernel"):
+ debugsrc_path = search / debugsrc.replace("/usr/src/kernel/", "")
+ else:
+ debugsrc_path = search / debugsrc.lstrip("/")
+
+ if debugsrc_path in source_hash_cache:
+ file_sha256 = source_hash_cache[debugsrc_path]
+ if file_sha256 is None:
+ continue
+ else:
+ # We can only hash files below, skip directories, links, etc.
+ if not debugsrc_path.is_file():
+ source_hash_cache[debugsrc_path] = None
+ continue
+
+ file_sha256 = bb.utils.sha256_file(debugsrc_path)
+ source_hash_cache[debugsrc_path] = file_sha256
+
+ if file_sha256 in sources:
+ source_file = sources[file_sha256]
+ dep_source_files.add(source_file)
+ else:
+ bb.debug(
+ 1,
+ "Debug source %s with SHA256 %s not found in any dependency"
+ % (str(debugsrc_path), file_sha256),
+ )
+ break
+ else:
+ bb.debug(1, "Debug source %s not found" % debugsrc)
+
+ return dep_source_files
+
+
+def collect_dep_objsets(d, build):
+ deps = oe.spdx_common.get_spdx_deps(d)
+
+ dep_objsets = []
+ dep_builds = set()
+
+ dep_build_spdxids = set()
+ for dep in deps:
+ bb.debug(1, "Fetching SPDX for dependency %s" % (dep.pn))
+ dep_build, dep_objset = oe.sbom30.find_root_obj_in_jsonld(
+ d, "recipes", dep.pn, oe.spdx30.build_Build
+ )
+ # If the dependency is part of the taskhash, return it to be linked
+ # against. Otherwise, it cannot be linked against because this recipe
+ # will not rebuilt if dependency changes
+ if dep.in_taskhash:
+ dep_objsets.append(dep_objset)
+
+ # The build _can_ be linked against (by alias)
+ dep_builds.add(dep_build)
+
+ return dep_objsets, dep_builds
+
+
+def collect_dep_sources(dep_objsets):
+ sources = {}
+ for objset in dep_objsets:
+ # Don't collect sources from native recipes as they
+ # match non-native sources also.
+ if objset.is_native():
+ continue
+
+ bb.debug(1, "Fetching Sources for dependency %s" % (objset.doc.name))
+
+ dep_build = objset.find_root(oe.spdx30.build_Build)
+ if not dep_build:
+ bb.fatal("Unable to find a build")
+
+ for e in objset.foreach_type(oe.spdx30.Relationship):
+ if dep_build is not e.from_:
+ continue
+
+ if e.relationshipType != oe.spdx30.RelationshipType.hasInputs:
+ continue
+
+ for to in e.to:
+ if not isinstance(to, oe.spdx30.software_File):
+ continue
+
+ if (
+ to.software_primaryPurpose
+ != oe.spdx30.software_SoftwarePurpose.source
+ ):
+ continue
+
+ for v in to.verifiedUsing:
+ if v.algorithm == oe.spdx30.HashAlgorithm.sha256:
+ sources[v.hashValue] = to
+ break
+ else:
+ bb.fatal(
+ "No SHA256 found for %s in %s" % (to.name, objset.doc.name)
+ )
+
+ return sources
+
+
+def add_download_files(d, objset):
+ inputs = set()
+
+ urls = d.getVar("SRC_URI").split()
+ fetch = bb.fetch2.Fetch(urls, d)
+
+ for download_idx, src_uri in enumerate(urls):
+ fd = fetch.ud[src_uri]
+
+ for name in fd.names:
+ file_name = os.path.basename(fetch.localpath(src_uri))
+ if oe.patch.patch_path(src_uri, fetch, "", expand=False):
+ primary_purpose = oe.spdx30.software_SoftwarePurpose.patch
+ else:
+ primary_purpose = oe.spdx30.software_SoftwarePurpose.source
+
+ if fd.type == "file":
+ if os.path.isdir(fd.localpath):
+ walk_idx = 1
+ for root, dirs, files in os.walk(fd.localpath):
+ for f in files:
+ f_path = os.path.join(root, f)
+ if os.path.islink(f_path):
+ # TODO: SPDX doesn't support symlinks yet
+ continue
+
+ file = objset.new_file(
+ objset.new_spdxid(
+ "source", str(download_idx + 1), str(walk_idx)
+ ),
+ os.path.join(
+ file_name, os.path.relpath(f_path, fd.localpath)
+ ),
+ f_path,
+ purposes=[primary_purpose],
+ )
+
+ inputs.add(file)
+ walk_idx += 1
+
+ else:
+ file = objset.new_file(
+ objset.new_spdxid("source", str(download_idx + 1)),
+ file_name,
+ fd.localpath,
+ purposes=[primary_purpose],
+ )
+ inputs.add(file)
+
+ else:
+ uri = fd.type
+ proto = getattr(fd, "proto", None)
+ if proto is not None:
+ uri = uri + "+" + proto
+ uri = uri + "://" + fd.host + fd.path
+
+ if fd.method.supports_srcrev():
+ uri = uri + "@" + fd.revisions[name]
+
+ dl = objset.add(
+ oe.spdx30.software_Package(
+ _id=objset.new_spdxid("source", str(download_idx + 1)),
+ creationInfo=objset.doc.creationInfo,
+ name=file_name,
+ software_primaryPurpose=primary_purpose,
+ software_downloadLocation=uri,
+ )
+ )
+
+ if fd.method.supports_checksum(fd):
+ # TODO Need something better than hard coding this
+ for checksum_id in ["sha256", "sha1"]:
+ expected_checksum = getattr(
+ fd, "%s_expected" % checksum_id, None
+ )
+ if expected_checksum is None:
+ continue
+
+ dl.verifiedUsing.append(
+ oe.spdx30.Hash(
+ algorithm=getattr(oe.spdx30.HashAlgorithm, checksum_id),
+ hashValue=expected_checksum,
+ )
+ )
+
+ inputs.add(dl)
+
+ return inputs
+
+
+def set_purposes(d, element, *var_names, force_purposes=[]):
+ purposes = force_purposes[:]
+
+ for var_name in var_names:
+ val = d.getVar(var_name)
+ if val:
+ purposes.extend(val.split())
+ break
+
+ if not purposes:
+ bb.warn("No SPDX purposes found in %s" % " ".join(var_names))
+ return
+
+ element.software_primaryPurpose = getattr(
+ oe.spdx30.software_SoftwarePurpose, purposes[0]
+ )
+ element.software_additionalPurpose = [
+ getattr(oe.spdx30.software_SoftwarePurpose, p) for p in purposes[1:]
+ ]
+
+
+def create_spdx(d):
+ def set_var_field(var, obj, name, package=None):
+ val = None
+ if package:
+ val = d.getVar("%s:%s" % (var, package))
+
+ if not val:
+ val = d.getVar(var)
+
+ if val:
+ setattr(obj, name, val)
+
+ license_data = oe.spdx_common.load_spdx_license_data(d)
+
+ deploydir = Path(d.getVar("SPDXDEPLOY"))
+ deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
+ spdx_workdir = Path(d.getVar("SPDXWORK"))
+ include_sources = d.getVar("SPDX_INCLUDE_SOURCES") == "1"
+ pkg_arch = d.getVar("SSTATE_PKGARCH")
+ is_native = bb.data.inherits_class("native", d) or bb.data.inherits_class(
+ "cross", d
+ )
+ include_vex = d.getVar("SPDX_INCLUDE_VEX")
+ if not include_vex in ("none", "current", "all"):
+ bb.fatal("SPDX_INCLUDE_VEX must be one of 'none', 'current', 'all'")
+
+ build_objset = oe.sbom30.ObjectSet.new_objset(d, d.getVar("PN"))
+
+ build = build_objset.new_task_build("recipe", "recipe")
+ build_objset.doc.rootElement.append(build)
+
+ build_objset.set_is_native(is_native)
+
+ for var in (d.getVar("SPDX_CUSTOM_ANNOTATION_VARS") or "").split():
+ new_annotation(
+ d,
+ build_objset,
+ build,
+ "%s=%s" % (var, d.getVar(var)),
+ oe.spdx30.AnnotationType.other,
+ )
+
+ build_inputs = set()
+
+ # Add CVEs
+ cve_by_status = {}
+ if include_vex != "none":
+ for cve in d.getVarFlags("CVE_STATUS") or {}:
+ decoded_status = oe.cve_check.decode_cve_status(d, cve)
+
+ # If this CVE is fixed upstream, skip it unless all CVEs are
+ # specified.
+ if include_vex != "all" and 'detail' in decoded_status and \
+ decoded_status['detail'] in (
+ "fixed-version",
+ "cpe-stable-backport",
+ ):
+ bb.debug(1, "Skipping %s since it is already fixed upstream" % cve)
+ continue
+
+ cve_by_status.setdefault(decoded_status['mapping'], {})[cve] = (
+ build_objset.new_cve_vuln(cve),
+ decoded_status['detail'],
+ decoded_status['description'],
+ )
+
+ cpe_ids = oe.cve_check.get_cpe_ids(d.getVar("CVE_PRODUCT"), d.getVar("CVE_VERSION"))
+
+ source_files = add_download_files(d, build_objset)
+ build_inputs |= source_files
+
+ recipe_spdx_license = add_license_expression(d, build_objset, d.getVar("LICENSE"), license_data)
+ build_objset.new_relationship(
+ source_files,
+ oe.spdx30.RelationshipType.hasConcludedLicense,
+ [recipe_spdx_license],
+ )
+
+ if oe.spdx_common.process_sources(d) and include_sources:
+ bb.debug(1, "Adding source files to SPDX")
+ oe.spdx_common.get_patched_src(d)
+
+ build_inputs |= add_package_files(
+ d,
+ build_objset,
+ spdx_workdir,
+ lambda file_counter: build_objset.new_spdxid(
+ "sourcefile", str(file_counter)
+ ),
+ lambda filepath: [oe.spdx30.software_SoftwarePurpose.source],
+ license_data,
+ ignore_dirs=[".git"],
+ ignore_top_level_dirs=["temp"],
+ archive=None,
+ )
+
+ dep_objsets, dep_builds = collect_dep_objsets(d, build)
+ if dep_builds:
+ build_objset.new_scoped_relationship(
+ [build],
+ oe.spdx30.RelationshipType.dependsOn,
+ oe.spdx30.LifecycleScopeType.build,
+ sorted(oe.sbom30.get_element_link_id(b) for b in dep_builds),
+ )
+
+ debug_source_ids = set()
+ source_hash_cache = {}
+
+ # Write out the package SPDX data now. It is not complete as we cannot
+ # write the runtime data, so write it to a staging area and a later task
+ # will write out the final collection
+
+ # TODO: Handle native recipe output
+ if not is_native:
+ bb.debug(1, "Collecting Dependency sources files")
+ sources = collect_dep_sources(dep_objsets)
+
+ bb.build.exec_func("read_subpackage_metadata", d)
+
+ pkgdest = Path(d.getVar("PKGDEST"))
+ for package in d.getVar("PACKAGES").split():
+ if not oe.packagedata.packaged(package, d):
+ continue
+
+ pkg_name = d.getVar("PKG:%s" % package) or package
+
+ bb.debug(1, "Creating SPDX for package %s" % pkg_name)
+
+ pkg_objset = oe.sbom30.ObjectSet.new_objset(d, pkg_name)
+
+ spdx_package = pkg_objset.add_root(
+ oe.spdx30.software_Package(
+ _id=pkg_objset.new_spdxid("package", pkg_name),
+ creationInfo=pkg_objset.doc.creationInfo,
+ name=pkg_name,
+ software_packageVersion=d.getVar("PV"),
+ )
+ )
+ set_timestamp_now(d, spdx_package, "builtTime")
+
+ set_purposes(
+ d,
+ spdx_package,
+ "SPDX_PACKAGE_ADDITIONAL_PURPOSE:%s" % package,
+ "SPDX_PACKAGE_ADDITIONAL_PURPOSE",
+ force_purposes=["install"],
+ )
+
+ supplier = build_objset.new_agent("SPDX_PACKAGE_SUPPLIER")
+ if supplier is not None:
+ spdx_package.supplier = (
+ supplier if isinstance(supplier, str) else supplier._id
+ )
+
+ set_var_field(
+ "HOMEPAGE", spdx_package, "software_homePage", package=package
+ )
+ set_var_field("SUMMARY", spdx_package, "summary", package=package)
+ set_var_field("DESCRIPTION", spdx_package, "description", package=package)
+
+ pkg_objset.new_scoped_relationship(
+ [build._id],
+ oe.spdx30.RelationshipType.hasOutputs,
+ oe.spdx30.LifecycleScopeType.build,
+ [spdx_package],
+ )
+
+ for cpe_id in cpe_ids:
+ spdx_package.externalIdentifier.append(
+ oe.spdx30.ExternalIdentifier(
+ externalIdentifierType=oe.spdx30.ExternalIdentifierType.cpe23,
+ identifier=cpe_id,
+ )
+ )
+
+ # TODO: Generate a file for each actual IPK/DEB/RPM/TGZ file
+ # generated and link it to the package
+ # spdx_package_file = pkg_objset.add(oe.spdx30.software_File(
+ # _id=pkg_objset.new_spdxid("distribution", pkg_name),
+ # creationInfo=pkg_objset.doc.creationInfo,
+ # name=pkg_name,
+ # software_primaryPurpose=spdx_package.software_primaryPurpose,
+ # software_additionalPurpose=spdx_package.software_additionalPurpose,
+ # ))
+ # set_timestamp_now(d, spdx_package_file, "builtTime")
+
+ ## TODO add hashes
+ # pkg_objset.new_relationship(
+ # [spdx_package],
+ # oe.spdx30.RelationshipType.hasDistributionArtifact,
+ # [spdx_package_file],
+ # )
+
+ # NOTE: licenses live in the recipe collection and are referenced
+ # by ID in the package collection(s). This helps reduce duplication
+ # (since a lot of packages will have the same license), and also
+ # prevents duplicate license SPDX IDs in the packages
+ package_license = d.getVar("LICENSE:%s" % package)
+ if package_license and package_license != d.getVar("LICENSE"):
+ package_spdx_license = add_license_expression(
+ d, build_objset, package_license, license_data
+ )
+ else:
+ package_spdx_license = recipe_spdx_license
+
+ pkg_objset.new_relationship(
+ [spdx_package],
+ oe.spdx30.RelationshipType.hasConcludedLicense,
+ [package_spdx_license._id],
+ )
+
+ # NOTE: CVE Elements live in the recipe collection
+ all_cves = set()
+ for status, cves in cve_by_status.items():
+ for cve, items in cves.items():
+ spdx_cve, detail, description = items
+
+ all_cves.add(spdx_cve._id)
+
+ if status == "Patched":
+ pkg_objset.new_vex_patched_relationship(
+ [spdx_cve._id], [spdx_package]
+ )
+ elif status == "Unpatched":
+ pkg_objset.new_vex_unpatched_relationship(
+ [spdx_cve._id], [spdx_package]
+ )
+ elif status == "Ignored":
+ spdx_vex = pkg_objset.new_vex_ignored_relationship(
+ [spdx_cve._id],
+ [spdx_package],
+ impact_statement=description,
+ )
+
+ if detail in (
+ "ignored",
+ "cpe-incorrect",
+ "disputed",
+ "upstream-wontfix",
+ ):
+ # VEX doesn't have justifications for this
+ pass
+ elif detail in (
+ "not-applicable-config",
+ "not-applicable-platform",
+ ):
+ for v in spdx_vex:
+ v.security_justificationType = (
+ oe.spdx30.security_VexJustificationType.vulnerableCodeNotPresent
+ )
+ else:
+ bb.fatal(f"Unknown detail '{detail}' for ignored {cve}")
+ else:
+ bb.fatal(f"Unknown {cve} status '{status}'")
+
+ if all_cves:
+ pkg_objset.new_relationship(
+ [spdx_package],
+ oe.spdx30.RelationshipType.hasAssociatedVulnerability,
+ sorted(list(all_cves)),
+ )
+
+ bb.debug(1, "Adding package files to SPDX for package %s" % pkg_name)
+ package_files = add_package_files(
+ d,
+ pkg_objset,
+ pkgdest / package,
+ lambda file_counter: pkg_objset.new_spdxid(
+ "package", pkg_name, "file", str(file_counter)
+ ),
+ # TODO: Can we know the purpose here?
+ lambda filepath: [],
+ license_data,
+ ignore_top_level_dirs=["CONTROL", "DEBIAN"],
+ archive=None,
+ )
+
+ if package_files:
+ pkg_objset.new_relationship(
+ [spdx_package],
+ oe.spdx30.RelationshipType.contains,
+ sorted(list(package_files)),
+ )
+
+ if include_sources:
+ debug_sources = get_package_sources_from_debug(
+ d, package, package_files, sources, source_hash_cache
+ )
+ debug_source_ids |= set(
+ oe.sbom30.get_element_link_id(d) for d in debug_sources
+ )
+
+ oe.sbom30.write_recipe_jsonld_doc(
+ d, pkg_objset, "packages-staging", deploydir, create_spdx_id_links=False
+ )
+
+ if include_sources:
+ bb.debug(1, "Adding sysroot files to SPDX")
+ sysroot_files = add_package_files(
+ d,
+ build_objset,
+ d.expand("${COMPONENTS_DIR}/${PACKAGE_ARCH}/${PN}"),
+ lambda file_counter: build_objset.new_spdxid("sysroot", str(file_counter)),
+ lambda filepath: [],
+ license_data,
+ archive=None,
+ )
+
+ if sysroot_files:
+ build_objset.new_scoped_relationship(
+ [build],
+ oe.spdx30.RelationshipType.hasOutputs,
+ oe.spdx30.LifecycleScopeType.build,
+ sorted(list(sysroot_files)),
+ )
+
+ if build_inputs or debug_source_ids:
+ build_objset.new_scoped_relationship(
+ [build],
+ oe.spdx30.RelationshipType.hasInputs,
+ oe.spdx30.LifecycleScopeType.build,
+ sorted(list(build_inputs)) + sorted(list(debug_source_ids)),
+ )
+
+ oe.sbom30.write_recipe_jsonld_doc(d, build_objset, "recipes", deploydir)
+
+
+def create_package_spdx(d):
+ deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
+ deploydir = Path(d.getVar("SPDXRUNTIMEDEPLOY"))
+ is_native = bb.data.inherits_class("native", d) or bb.data.inherits_class(
+ "cross", d
+ )
+
+ providers = oe.spdx_common.collect_package_providers(d)
+ pkg_arch = d.getVar("SSTATE_PKGARCH")
+
+ if is_native:
+ return
+
+ bb.build.exec_func("read_subpackage_metadata", d)
+
+ dep_package_cache = {}
+
+ # Any element common to all packages that need to be referenced by ID
+ # should be written into this objset set
+ common_objset = oe.sbom30.ObjectSet.new_objset(
+ d, "%s-package-common" % d.getVar("PN")
+ )
+
+ pkgdest = Path(d.getVar("PKGDEST"))
+ for package in d.getVar("PACKAGES").split():
+ localdata = bb.data.createCopy(d)
+ pkg_name = d.getVar("PKG:%s" % package) or package
+ localdata.setVar("PKG", pkg_name)
+ localdata.setVar("OVERRIDES", d.getVar("OVERRIDES", False) + ":" + package)
+
+ if not oe.packagedata.packaged(package, localdata):
+ continue
+
+ spdx_package, pkg_objset = oe.sbom30.load_obj_in_jsonld(
+ d,
+ pkg_arch,
+ "packages-staging",
+ pkg_name,
+ oe.spdx30.software_Package,
+ software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.install,
+ )
+
+ # We will write out a new collection, so link it to the new
+ # creation info in the common package data. The old creation info
+ # should still exist and be referenced by all the existing elements
+ # in the package
+ pkg_objset.creationInfo = pkg_objset.copy_creation_info(
+ common_objset.doc.creationInfo
+ )
+
+ runtime_spdx_deps = set()
+
+ deps = bb.utils.explode_dep_versions2(localdata.getVar("RDEPENDS") or "")
+ seen_deps = set()
+ for dep, _ in deps.items():
+ if dep in seen_deps:
+ continue
+
+ if dep not in providers:
+ continue
+
+ (dep, _) = providers[dep]
+
+ if not oe.packagedata.packaged(dep, localdata):
+ continue
+
+ dep_pkg_data = oe.packagedata.read_subpkgdata_dict(dep, d)
+ dep_pkg = dep_pkg_data["PKG"]
+
+ if dep in dep_package_cache:
+ dep_spdx_package = dep_package_cache[dep]
+ else:
+ bb.debug(1, "Searching for %s" % dep_pkg)
+ dep_spdx_package, _ = oe.sbom30.find_root_obj_in_jsonld(
+ d,
+ "packages-staging",
+ dep_pkg,
+ oe.spdx30.software_Package,
+ software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.install,
+ )
+ dep_package_cache[dep] = dep_spdx_package
+
+ runtime_spdx_deps.add(dep_spdx_package)
+ seen_deps.add(dep)
+
+ if runtime_spdx_deps:
+ pkg_objset.new_scoped_relationship(
+ [spdx_package],
+ oe.spdx30.RelationshipType.dependsOn,
+ oe.spdx30.LifecycleScopeType.runtime,
+ [oe.sbom30.get_element_link_id(dep) for dep in runtime_spdx_deps],
+ )
+
+ oe.sbom30.write_recipe_jsonld_doc(d, pkg_objset, "packages", deploydir)
+
+ oe.sbom30.write_recipe_jsonld_doc(d, common_objset, "common-package", deploydir)
+
+
+def write_bitbake_spdx(d):
+ # Set PN to "bitbake" so that SPDX IDs can be generated
+ d.setVar("PN", "bitbake")
+ d.setVar("BB_TASKHASH", "bitbake")
+ oe.spdx_common.load_spdx_license_data(d)
+
+ deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
+
+ objset = oe.sbom30.ObjectSet.new_objset(d, "bitbake", False)
+
+ host_import_key = d.getVar("SPDX_BUILD_HOST")
+ invoked_by = objset.new_agent("SPDX_INVOKED_BY", add=False)
+ on_behalf_of = objset.new_agent("SPDX_ON_BEHALF_OF", add=False)
+
+ if d.getVar("SPDX_INCLUDE_BITBAKE_PARENT_BUILD") == "1":
+ # Since the Build objects are unique, we may as well set the creation
+ # time to the current time instead of the fallback SDE
+ objset.doc.creationInfo.created = datetime.now(timezone.utc)
+
+ # Each invocation of bitbake should have a unique ID since it is a
+ # unique build
+ nonce = os.urandom(16).hex()
+
+ build = objset.add_root(
+ oe.spdx30.build_Build(
+ _id=objset.new_spdxid(nonce, include_unihash=False),
+ creationInfo=objset.doc.creationInfo,
+ build_buildType=oe.sbom30.SPDX_BUILD_TYPE,
+ )
+ )
+ set_timestamp_now(d, build, "build_buildStartTime")
+
+ if host_import_key:
+ objset.new_scoped_relationship(
+ [build],
+ oe.spdx30.RelationshipType.hasHost,
+ oe.spdx30.LifecycleScopeType.build,
+ [objset.new_import("SPDX_BUILD_HOST")],
+ )
+
+ if invoked_by:
+ objset.add(invoked_by)
+ invoked_by_spdx = objset.new_scoped_relationship(
+ [build],
+ oe.spdx30.RelationshipType.invokedBy,
+ oe.spdx30.LifecycleScopeType.build,
+ [invoked_by],
+ )
+
+ if on_behalf_of:
+ objset.add(on_behalf_of)
+ objset.new_scoped_relationship(
+ [on_behalf_of],
+ oe.spdx30.RelationshipType.delegatedTo,
+ oe.spdx30.LifecycleScopeType.build,
+ invoked_by_spdx,
+ )
+
+ elif on_behalf_of:
+ bb.warn("SPDX_ON_BEHALF_OF has no effect if SPDX_INVOKED_BY is not set")
+
+ else:
+ if host_import_key:
+ bb.warn(
+ "SPDX_BUILD_HOST has no effect if SPDX_INCLUDE_BITBAKE_PARENT_BUILD is not set"
+ )
+
+ if invoked_by:
+ bb.warn(
+ "SPDX_INVOKED_BY has no effect if SPDX_INCLUDE_BITBAKE_PARENT_BUILD is not set"
+ )
+
+ if on_behalf_of:
+ bb.warn(
+ "SPDX_ON_BEHALF_OF has no effect if SPDX_INCLUDE_BITBAKE_PARENT_BUILD is not set"
+ )
+
+ for obj in objset.foreach_type(oe.spdx30.Element):
+ obj.extension.append(oe.sbom30.OELinkExtension(link_spdx_id=False))
+ obj.extension.append(oe.sbom30.OEIdAliasExtension())
+
+ oe.sbom30.write_jsonld_doc(d, objset, deploy_dir_spdx / "bitbake.spdx.json")
+
+
+def collect_build_package_inputs(d, objset, build, packages):
+ providers = oe.spdx_common.collect_package_providers(d)
+
+ build_deps = set()
+ missing_providers = set()
+
+ for name in sorted(packages.keys()):
+ if name not in providers:
+ missing_providers.add(name)
+ continue
+
+ pkg_name, pkg_hashfn = providers[name]
+
+ # Copy all of the package SPDX files into the Sbom elements
+ pkg_spdx, _ = oe.sbom30.find_root_obj_in_jsonld(
+ d,
+ "packages",
+ pkg_name,
+ oe.spdx30.software_Package,
+ software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.install,
+ )
+ build_deps.add(pkg_spdx._id)
+
+ if missing_providers:
+ bb.fatal(
+ f"Unable to find SPDX provider(s) for: {', '.join(sorted(missing_providers))}"
+ )
+
+ if build_deps:
+ objset.new_scoped_relationship(
+ [build],
+ oe.spdx30.RelationshipType.hasInputs,
+ oe.spdx30.LifecycleScopeType.build,
+ sorted(list(build_deps)),
+ )
+
+
+def create_rootfs_spdx(d):
+ deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
+ deploydir = Path(d.getVar("SPDXROOTFSDEPLOY"))
+ root_packages_file = Path(d.getVar("SPDX_ROOTFS_PACKAGES"))
+ image_basename = d.getVar("IMAGE_BASENAME")
+ machine = d.getVar("MACHINE")
+
+ with root_packages_file.open("r") as f:
+ packages = json.load(f)
+
+ objset = oe.sbom30.ObjectSet.new_objset(d, "%s-%s" % (image_basename, machine))
+
+ rootfs = objset.add_root(
+ oe.spdx30.software_Package(
+ _id=objset.new_spdxid("rootfs", image_basename),
+ creationInfo=objset.doc.creationInfo,
+ name=image_basename,
+ software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.archive,
+ )
+ )
+ set_timestamp_now(d, rootfs, "builtTime")
+
+ rootfs_build = objset.add_root(objset.new_task_build("rootfs", "rootfs"))
+ set_timestamp_now(d, rootfs_build, "build_buildEndTime")
+
+ objset.new_scoped_relationship(
+ [rootfs_build],
+ oe.spdx30.RelationshipType.hasOutputs,
+ oe.spdx30.LifecycleScopeType.build,
+ [rootfs],
+ )
+
+ collect_build_package_inputs(d, objset, rootfs_build, packages)
+
+ oe.sbom30.write_recipe_jsonld_doc(d, objset, "rootfs", deploydir)
+
+
+def create_image_spdx(d):
+ image_deploy_dir = Path(d.getVar("IMGDEPLOYDIR"))
+ manifest_path = Path(d.getVar("IMAGE_OUTPUT_MANIFEST"))
+ spdx_work_dir = Path(d.getVar("SPDXIMAGEWORK"))
+
+ image_basename = d.getVar("IMAGE_BASENAME")
+ machine = d.getVar("MACHINE")
+
+ objset = oe.sbom30.ObjectSet.new_objset(d, "%s-%s" % (image_basename, machine))
+
+ with manifest_path.open("r") as f:
+ manifest = json.load(f)
+
+ builds = []
+ for task in manifest:
+ imagetype = task["imagetype"]
+ taskname = task["taskname"]
+
+ image_build = objset.add_root(
+ objset.new_task_build(taskname, "image/%s" % imagetype)
+ )
+ set_timestamp_now(d, image_build, "build_buildEndTime")
+ builds.append(image_build)
+
+ artifacts = []
+
+ for image in task["images"]:
+ image_filename = image["filename"]
+ image_path = image_deploy_dir / image_filename
+ a = objset.add_root(
+ oe.spdx30.software_File(
+ _id=objset.new_spdxid("image", image_filename),
+ creationInfo=objset.doc.creationInfo,
+ name=image_filename,
+ verifiedUsing=[
+ oe.spdx30.Hash(
+ algorithm=oe.spdx30.HashAlgorithm.sha256,
+ hashValue=bb.utils.sha256_file(image_path),
+ )
+ ],
+ )
+ )
+ set_purposes(
+ d, a, "SPDX_IMAGE_PURPOSE:%s" % imagetype, "SPDX_IMAGE_PURPOSE"
+ )
+ set_timestamp_now(d, a, "builtTime")
+
+ artifacts.append(a)
+
+ if artifacts:
+ objset.new_scoped_relationship(
+ [image_build],
+ oe.spdx30.RelationshipType.hasOutputs,
+ oe.spdx30.LifecycleScopeType.build,
+ artifacts,
+ )
+
+ if builds:
+ rootfs_image, _ = oe.sbom30.find_root_obj_in_jsonld(
+ d,
+ "rootfs",
+ "%s-%s" % (image_basename, machine),
+ oe.spdx30.software_Package,
+ # TODO: Should use a purpose to filter here?
+ )
+ objset.new_scoped_relationship(
+ builds,
+ oe.spdx30.RelationshipType.hasInputs,
+ oe.spdx30.LifecycleScopeType.build,
+ [rootfs_image._id],
+ )
+
+ objset.add_aliases()
+ objset.link()
+ oe.sbom30.write_recipe_jsonld_doc(d, objset, "image", spdx_work_dir)
+
+
+def create_image_sbom_spdx(d):
+ image_name = d.getVar("IMAGE_NAME")
+ image_basename = d.getVar("IMAGE_BASENAME")
+ image_link_name = d.getVar("IMAGE_LINK_NAME")
+ imgdeploydir = Path(d.getVar("SPDXIMAGEDEPLOYDIR"))
+ machine = d.getVar("MACHINE")
+
+ spdx_path = imgdeploydir / (image_name + ".spdx.json")
+
+ root_elements = []
+
+ # TODO: Do we need to add the rootfs or are the image files sufficient?
+ rootfs_image, _ = oe.sbom30.find_root_obj_in_jsonld(
+ d,
+ "rootfs",
+ "%s-%s" % (image_basename, machine),
+ oe.spdx30.software_Package,
+ # TODO: Should use a purpose here?
+ )
+ root_elements.append(rootfs_image._id)
+
+ image_objset, _ = oe.sbom30.find_jsonld(
+ d, "image", "%s-%s" % (image_basename, machine), required=True
+ )
+ for o in image_objset.foreach_root(oe.spdx30.software_File):
+ root_elements.append(o._id)
+
+ objset, sbom = oe.sbom30.create_sbom(d, image_name, root_elements)
+
+ oe.sbom30.write_jsonld_doc(d, objset, spdx_path)
+
+ def make_image_link(target_path, suffix):
+ if image_link_name:
+ link = imgdeploydir / (image_link_name + suffix)
+ if link != target_path:
+ link.symlink_to(os.path.relpath(target_path, link.parent))
+
+ make_image_link(spdx_path, ".spdx.json")
+
+
+def sdk_create_spdx(d, sdk_type, spdx_work_dir, toolchain_outputname):
+ sdk_name = toolchain_outputname + "-" + sdk_type
+ sdk_packages = oe.sdk.sdk_list_installed_packages(d, sdk_type == "target")
+
+ objset = oe.sbom30.ObjectSet.new_objset(d, sdk_name)
+
+ sdk_rootfs = objset.add_root(
+ oe.spdx30.software_Package(
+ _id=objset.new_spdxid("sdk-rootfs", sdk_name),
+ creationInfo=objset.doc.creationInfo,
+ name=sdk_name,
+ software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.archive,
+ )
+ )
+ set_timestamp_now(d, sdk_rootfs, "builtTime")
+
+ sdk_build = objset.add_root(objset.new_task_build("sdk-rootfs", "sdk-rootfs"))
+ set_timestamp_now(d, sdk_build, "build_buildEndTime")
+
+ objset.new_scoped_relationship(
+ [sdk_build],
+ oe.spdx30.RelationshipType.hasOutputs,
+ oe.spdx30.LifecycleScopeType.build,
+ [sdk_rootfs],
+ )
+
+ collect_build_package_inputs(d, objset, sdk_build, sdk_packages)
+
+ objset.add_aliases()
+ oe.sbom30.write_jsonld_doc(d, objset, spdx_work_dir / "sdk-rootfs.spdx.json")
+
+
+def create_sdk_sbom(d, sdk_deploydir, spdx_work_dir, toolchain_outputname):
+ # Load the document written earlier
+ rootfs_objset = oe.sbom30.load_jsonld(
+ d, spdx_work_dir / "sdk-rootfs.spdx.json", required=True
+ )
+
+ # Create a new build for the SDK installer
+ sdk_build = rootfs_objset.new_task_build("sdk-populate", "sdk-populate")
+ set_timestamp_now(d, sdk_build, "build_buildEndTime")
+
+ rootfs = rootfs_objset.find_root(oe.spdx30.software_Package)
+ if rootfs is None:
+ bb.fatal("Unable to find rootfs artifact")
+
+ rootfs_objset.new_scoped_relationship(
+ [sdk_build],
+ oe.spdx30.RelationshipType.hasInputs,
+ oe.spdx30.LifecycleScopeType.build,
+ [rootfs],
+ )
+
+ files = set()
+ root_files = []
+
+ # NOTE: os.walk() doesn't return symlinks
+ for dirpath, dirnames, filenames in os.walk(sdk_deploydir):
+ for fn in filenames:
+ fpath = Path(dirpath) / fn
+ if not fpath.is_file() or fpath.is_symlink():
+ continue
+
+ relpath = str(fpath.relative_to(sdk_deploydir))
+
+ f = rootfs_objset.new_file(
+ rootfs_objset.new_spdxid("sdk-installer", relpath),
+ relpath,
+ fpath,
+ )
+ set_timestamp_now(d, f, "builtTime")
+
+ if fn.endswith(".manifest"):
+ f.software_primaryPurpose = oe.spdx30.software_SoftwarePurpose.manifest
+ elif fn.endswith(".testdata.json"):
+ f.software_primaryPurpose = (
+ oe.spdx30.software_SoftwarePurpose.configuration
+ )
+ else:
+ set_purposes(d, f, "SPDX_SDK_PURPOSE")
+ root_files.append(f)
+
+ files.add(f)
+
+ if files:
+ rootfs_objset.new_scoped_relationship(
+ [sdk_build],
+ oe.spdx30.RelationshipType.hasOutputs,
+ oe.spdx30.LifecycleScopeType.build,
+ files,
+ )
+ else:
+ bb.warn(f"No SDK output files found in {sdk_deploydir}")
+
+ objset, sbom = oe.sbom30.create_sbom(
+ d, toolchain_outputname, sorted(list(files)), [rootfs_objset]
+ )
+
+ oe.sbom30.write_jsonld_doc(
+ d, objset, sdk_deploydir / (toolchain_outputname + ".spdx.json")
+ )