xref: /openbmc/openbmc/poky/meta/lib/oe/spdx30_tasks.py (revision 96e4b4e121e0e2da1535d7d537d6a982a6ff5bc0)
1 #
2 # Copyright OpenEmbedded Contributors
3 #
4 # SPDX-License-Identifier: GPL-2.0-only
5 #
6 
7 import json
8 import oe.cve_check
9 import oe.packagedata
10 import oe.patch
11 import oe.sbom30
12 import oe.spdx30
13 import oe.spdx_common
14 import oe.sdk
15 import os
16 
17 from contextlib import contextmanager
18 from datetime import datetime, timezone
19 from pathlib import Path
20 
21 
22 def set_timestamp_now(d, o, prop):
23     if d.getVar("SPDX_INCLUDE_TIMESTAMPS") == "1":
24         setattr(o, prop, datetime.now(timezone.utc))
25     else:
26         # Doing this helps to validated that the property actually exists, and
27         # also that it is not mandatory
28         delattr(o, prop)
29 
30 
31 def add_license_expression(d, objset, license_expression, license_data):
32     simple_license_text = {}
33     license_text_map = {}
34     license_ref_idx = 0
35 
36     def add_license_text(name):
37         nonlocal objset
38         nonlocal simple_license_text
39 
40         if name in simple_license_text:
41             return simple_license_text[name]
42 
43         lic = objset.find_filter(
44             oe.spdx30.simplelicensing_SimpleLicensingText,
45             name=name,
46         )
47 
48         if lic is not None:
49             simple_license_text[name] = lic
50             return lic
51 
52         lic = objset.add(
53             oe.spdx30.simplelicensing_SimpleLicensingText(
54                 _id=objset.new_spdxid("license-text", name),
55                 creationInfo=objset.doc.creationInfo,
56                 name=name,
57             )
58         )
59         objset.set_element_alias(lic)
60         simple_license_text[name] = lic
61 
62         if name == "PD":
63             lic.simplelicensing_licenseText = "Software released to the public domain"
64             return lic
65 
66         # Seach for the license in COMMON_LICENSE_DIR and LICENSE_PATH
67         for directory in [d.getVar("COMMON_LICENSE_DIR")] + (
68             d.getVar("LICENSE_PATH") or ""
69         ).split():
70             try:
71                 with (Path(directory) / name).open(errors="replace") as f:
72                     lic.simplelicensing_licenseText = f.read()
73                     return lic
74 
75             except FileNotFoundError:
76                 pass
77 
78         # If it's not SPDX or PD, then NO_GENERIC_LICENSE must be set
79         filename = d.getVarFlag("NO_GENERIC_LICENSE", name)
80         if filename:
81             filename = d.expand("${S}/" + filename)
82             with open(filename, errors="replace") as f:
83                 lic.simplelicensing_licenseText = f.read()
84                 return lic
85         else:
86             bb.fatal("Cannot find any text for license %s" % name)
87 
88     def convert(l):
89         nonlocal license_text_map
90         nonlocal license_ref_idx
91 
92         if l == "(" or l == ")":
93             return l
94 
95         if l == "&":
96             return "AND"
97 
98         if l == "|":
99             return "OR"
100 
101         if l == "CLOSED":
102             return "NONE"
103 
104         spdx_license = d.getVarFlag("SPDXLICENSEMAP", l) or l
105         if spdx_license in license_data["licenses"]:
106             return spdx_license
107 
108         spdx_license = "LicenseRef-" + l
109         if spdx_license not in license_text_map:
110             license_text_map[spdx_license] = oe.sbom30.get_element_link_id(
111                 add_license_text(l)
112             )
113 
114         return spdx_license
115 
116     lic_split = (
117         license_expression.replace("(", " ( ")
118         .replace(")", " ) ")
119         .replace("|", " | ")
120         .replace("&", " & ")
121         .split()
122     )
123     spdx_license_expression = " ".join(convert(l) for l in lic_split)
124 
125     o = objset.new_license_expression(
126         spdx_license_expression, license_data, license_text_map
127     )
128     objset.set_element_alias(o)
129     return o
130 
131 
132 def add_package_files(
133     d,
134     objset,
135     topdir,
136     get_spdxid,
137     get_purposes,
138     license_data=None,
139     *,
140     archive=None,
141     ignore_dirs=[],
142     ignore_top_level_dirs=[],
143 ):
144     source_date_epoch = d.getVar("SOURCE_DATE_EPOCH")
145     if source_date_epoch:
146         source_date_epoch = int(source_date_epoch)
147 
148     spdx_files = set()
149 
150     file_counter = 1
151     for subdir, dirs, files in os.walk(topdir):
152         dirs[:] = [d for d in dirs if d not in ignore_dirs]
153         if subdir == str(topdir):
154             dirs[:] = [d for d in dirs if d not in ignore_top_level_dirs]
155 
156         for file in files:
157             filepath = Path(subdir) / file
158             if filepath.is_symlink() or not filepath.is_file():
159                 continue
160 
161             filename = str(filepath.relative_to(topdir))
162             file_purposes = get_purposes(filepath)
163 
164             spdx_file = objset.new_file(
165                 get_spdxid(file_counter),
166                 filename,
167                 filepath,
168                 purposes=file_purposes,
169             )
170             spdx_files.add(spdx_file)
171 
172             if (
173                 oe.spdx30.software_SoftwarePurpose.source in file_purposes
174                 and license_data is not None
175             ):
176                 objset.scan_declared_licenses(spdx_file, filepath, license_data)
177 
178             if archive is not None:
179                 with filepath.open("rb") as f:
180                     info = archive.gettarinfo(fileobj=f)
181                     info.name = filename
182                     info.uid = 0
183                     info.gid = 0
184                     info.uname = "root"
185                     info.gname = "root"
186 
187                     if source_date_epoch is not None and info.mtime > source_date_epoch:
188                         info.mtime = source_date_epoch
189 
190                     archive.addfile(info, f)
191 
192             file_counter += 1
193 
194     bb.debug(1, "Added %d files to %s" % (len(spdx_files), objset.doc._id))
195 
196     return spdx_files
197 
198 
199 def get_package_sources_from_debug(
200     d, package, package_files, sources, source_hash_cache
201 ):
202     def file_path_match(file_path, pkg_file):
203         if file_path.lstrip("/") == pkg_file.name.lstrip("/"):
204             return True
205 
206         for e in pkg_file.extension:
207             if isinstance(e, oe.sbom30.OEFileNameAliasExtension):
208                 for a in e.aliases:
209                     if file_path.lstrip("/") == a.lstrip("/"):
210                         return True
211 
212         return False
213 
214     debug_search_paths = [
215         Path(d.getVar("SPDXWORK")),
216         Path(d.getVar("PKGD")),
217         Path(d.getVar("STAGING_DIR_TARGET")),
218         Path(d.getVar("STAGING_DIR_NATIVE")),
219         Path(d.getVar("STAGING_KERNEL_DIR")),
220     ]
221 
222     pkg_data = oe.packagedata.read_subpkgdata_extended(package, d)
223 
224     if pkg_data is None:
225         return
226 
227     dep_source_files = set()
228 
229     for file_path, file_data in pkg_data["files_info"].items():
230         if not "debugsrc" in file_data:
231             continue
232 
233         if not any(file_path_match(file_path, pkg_file) for pkg_file in package_files):
234             bb.fatal(
235                 "No package file found for %s in %s; SPDX found: %s"
236                 % (str(file_path), package, " ".join(p.name for p in package_files))
237             )
238             continue
239 
240         for debugsrc in file_data["debugsrc"]:
241             for search in debug_search_paths:
242                 if debugsrc.startswith("/usr/src/kernel"):
243                     debugsrc_path = search / debugsrc.replace("/usr/src/kernel/", "")
244                 else:
245                     debugsrc_path = search / debugsrc.lstrip("/")
246 
247                 if debugsrc_path in source_hash_cache:
248                     file_sha256 = source_hash_cache[debugsrc_path]
249                     if file_sha256 is None:
250                         continue
251                 else:
252                     # We can only hash files below, skip directories, links, etc.
253                     if not debugsrc_path.is_file():
254                         source_hash_cache[debugsrc_path] = None
255                         continue
256 
257                     file_sha256 = bb.utils.sha256_file(debugsrc_path)
258                     source_hash_cache[debugsrc_path] = file_sha256
259 
260                 if file_sha256 in sources:
261                     source_file = sources[file_sha256]
262                     dep_source_files.add(source_file)
263                 else:
264                     bb.debug(
265                         1,
266                         "Debug source %s with SHA256 %s not found in any dependency"
267                         % (str(debugsrc_path), file_sha256),
268                     )
269                 break
270             else:
271                 bb.debug(1, "Debug source %s not found" % debugsrc)
272 
273     return dep_source_files
274 
275 
276 def collect_dep_objsets(d, build):
277     deps = oe.spdx_common.get_spdx_deps(d)
278 
279     dep_objsets = []
280     dep_builds = set()
281 
282     dep_build_spdxids = set()
283     for dep in deps:
284         bb.debug(1, "Fetching SPDX for dependency %s" % (dep.pn))
285         dep_build, dep_objset = oe.sbom30.find_root_obj_in_jsonld(
286             d, "recipes", "recipe-" + dep.pn, oe.spdx30.build_Build
287         )
288         # If the dependency is part of the taskhash, return it to be linked
289         # against. Otherwise, it cannot be linked against because this recipe
290         # will not rebuilt if dependency changes
291         if dep.in_taskhash:
292             dep_objsets.append(dep_objset)
293 
294         # The build _can_ be linked against (by alias)
295         dep_builds.add(dep_build)
296 
297     return dep_objsets, dep_builds
298 
299 
300 def index_sources_by_hash(sources, dest):
301     for s in sources:
302         if not isinstance(s, oe.spdx30.software_File):
303             continue
304 
305         if s.software_primaryPurpose != oe.spdx30.software_SoftwarePurpose.source:
306             continue
307 
308         for v in s.verifiedUsing:
309             if v.algorithm == oe.spdx30.HashAlgorithm.sha256:
310                 if not v.hashValue in dest:
311                     dest[v.hashValue] = s
312                 break
313         else:
314             bb.fatal(f"No SHA256 found for {s.name}")
315 
316 
317 def collect_dep_sources(dep_objsets, dest):
318     for objset in dep_objsets:
319         # Don't collect sources from native recipes as they
320         # match non-native sources also.
321         if objset.is_native():
322             continue
323 
324         bb.debug(1, "Fetching Sources for dependency %s" % (objset.doc.name))
325 
326         dep_build = objset.find_root(oe.spdx30.build_Build)
327         if not dep_build:
328             bb.fatal("Unable to find a build")
329 
330         for e in objset.foreach_type(oe.spdx30.Relationship):
331             if dep_build is not e.from_:
332                 continue
333 
334             if e.relationshipType != oe.spdx30.RelationshipType.hasInput:
335                 continue
336 
337             index_sources_by_hash(e.to, dest)
338 
339 
340 def add_download_files(d, objset):
341     inputs = set()
342 
343     urls = d.getVar("SRC_URI").split()
344     fetch = bb.fetch2.Fetch(urls, d)
345 
346     for download_idx, src_uri in enumerate(urls):
347         fd = fetch.ud[src_uri]
348 
349         for name in fd.names:
350             file_name = os.path.basename(fetch.localpath(src_uri))
351             if oe.patch.patch_path(src_uri, fetch, "", expand=False):
352                 primary_purpose = oe.spdx30.software_SoftwarePurpose.patch
353             else:
354                 primary_purpose = oe.spdx30.software_SoftwarePurpose.source
355 
356             if fd.type == "file":
357                 if os.path.isdir(fd.localpath):
358                     walk_idx = 1
359                     for root, dirs, files in os.walk(fd.localpath):
360                         for f in files:
361                             f_path = os.path.join(root, f)
362                             if os.path.islink(f_path):
363                                 # TODO: SPDX doesn't support symlinks yet
364                                 continue
365 
366                             file = objset.new_file(
367                                 objset.new_spdxid(
368                                     "source", str(download_idx + 1), str(walk_idx)
369                                 ),
370                                 os.path.join(
371                                     file_name, os.path.relpath(f_path, fd.localpath)
372                                 ),
373                                 f_path,
374                                 purposes=[primary_purpose],
375                             )
376 
377                             inputs.add(file)
378                             walk_idx += 1
379 
380                 else:
381                     file = objset.new_file(
382                         objset.new_spdxid("source", str(download_idx + 1)),
383                         file_name,
384                         fd.localpath,
385                         purposes=[primary_purpose],
386                     )
387                     inputs.add(file)
388 
389             else:
390                 dl = objset.add(
391                     oe.spdx30.software_Package(
392                         _id=objset.new_spdxid("source", str(download_idx + 1)),
393                         creationInfo=objset.doc.creationInfo,
394                         name=file_name,
395                         software_primaryPurpose=primary_purpose,
396                         software_downloadLocation=oe.spdx_common.fetch_data_to_uri(
397                             fd, name
398                         ),
399                     )
400                 )
401 
402                 if fd.method.supports_checksum(fd):
403                     # TODO Need something better than hard coding this
404                     for checksum_id in ["sha256", "sha1"]:
405                         expected_checksum = getattr(
406                             fd, "%s_expected" % checksum_id, None
407                         )
408                         if expected_checksum is None:
409                             continue
410 
411                         dl.verifiedUsing.append(
412                             oe.spdx30.Hash(
413                                 algorithm=getattr(oe.spdx30.HashAlgorithm, checksum_id),
414                                 hashValue=expected_checksum,
415                             )
416                         )
417 
418                 inputs.add(dl)
419 
420     return inputs
421 
422 
423 def set_purposes(d, element, *var_names, force_purposes=[]):
424     purposes = force_purposes[:]
425 
426     for var_name in var_names:
427         val = d.getVar(var_name)
428         if val:
429             purposes.extend(val.split())
430             break
431 
432     if not purposes:
433         bb.warn("No SPDX purposes found in %s" % " ".join(var_names))
434         return
435 
436     element.software_primaryPurpose = getattr(
437         oe.spdx30.software_SoftwarePurpose, purposes[0]
438     )
439     element.software_additionalPurpose = [
440         getattr(oe.spdx30.software_SoftwarePurpose, p) for p in purposes[1:]
441     ]
442 
443 
444 def create_spdx(d):
445     def set_var_field(var, obj, name, package=None):
446         val = None
447         if package:
448             val = d.getVar("%s:%s" % (var, package))
449 
450         if not val:
451             val = d.getVar(var)
452 
453         if val:
454             setattr(obj, name, val)
455 
456     license_data = oe.spdx_common.load_spdx_license_data(d)
457 
458     deploydir = Path(d.getVar("SPDXDEPLOY"))
459     deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
460     spdx_workdir = Path(d.getVar("SPDXWORK"))
461     include_sources = d.getVar("SPDX_INCLUDE_SOURCES") == "1"
462     pkg_arch = d.getVar("SSTATE_PKGARCH")
463     is_native = bb.data.inherits_class("native", d) or bb.data.inherits_class(
464         "cross", d
465     )
466     include_vex = d.getVar("SPDX_INCLUDE_VEX")
467     if not include_vex in ("none", "current", "all"):
468         bb.fatal("SPDX_INCLUDE_VEX must be one of 'none', 'current', 'all'")
469 
470     build_objset = oe.sbom30.ObjectSet.new_objset(d, "recipe-" + d.getVar("PN"))
471 
472     build = build_objset.new_task_build("recipe", "recipe")
473     build_objset.set_element_alias(build)
474 
475     build_objset.doc.rootElement.append(build)
476 
477     build_objset.set_is_native(is_native)
478 
479     for var in (d.getVar("SPDX_CUSTOM_ANNOTATION_VARS") or "").split():
480         new_annotation(
481             d,
482             build_objset,
483             build,
484             "%s=%s" % (var, d.getVar(var)),
485             oe.spdx30.AnnotationType.other,
486         )
487 
488     build_inputs = set()
489 
490     # Add CVEs
491     cve_by_status = {}
492     if include_vex != "none":
493         for cve in d.getVarFlags("CVE_STATUS") or {}:
494             decoded_status = oe.cve_check.decode_cve_status(d, cve)
495 
496             # If this CVE is fixed upstream, skip it unless all CVEs are
497             # specified.
498             if (
499                 include_vex != "all"
500                 and "detail" in decoded_status
501                 and decoded_status["detail"]
502                 in (
503                     "fixed-version",
504                     "cpe-stable-backport",
505                 )
506             ):
507                 bb.debug(1, "Skipping %s since it is already fixed upstream" % cve)
508                 continue
509 
510             spdx_cve = build_objset.new_cve_vuln(cve)
511             build_objset.set_element_alias(spdx_cve)
512 
513             cve_by_status.setdefault(decoded_status["mapping"], {})[cve] = (
514                 spdx_cve,
515                 decoded_status["detail"],
516                 decoded_status["description"],
517             )
518 
519     cpe_ids = oe.cve_check.get_cpe_ids(d.getVar("CVE_PRODUCT"), d.getVar("CVE_VERSION"))
520 
521     source_files = add_download_files(d, build_objset)
522     build_inputs |= source_files
523 
524     recipe_spdx_license = add_license_expression(
525         d, build_objset, d.getVar("LICENSE"), license_data
526     )
527     build_objset.new_relationship(
528         source_files,
529         oe.spdx30.RelationshipType.hasConcludedLicense,
530         [oe.sbom30.get_element_link_id(recipe_spdx_license)],
531     )
532 
533     dep_sources = {}
534     if oe.spdx_common.process_sources(d) and include_sources:
535         bb.debug(1, "Adding source files to SPDX")
536         oe.spdx_common.get_patched_src(d)
537 
538         files = add_package_files(
539             d,
540             build_objset,
541             spdx_workdir,
542             lambda file_counter: build_objset.new_spdxid(
543                 "sourcefile", str(file_counter)
544             ),
545             lambda filepath: [oe.spdx30.software_SoftwarePurpose.source],
546             license_data,
547             ignore_dirs=[".git"],
548             ignore_top_level_dirs=["temp"],
549             archive=None,
550         )
551         build_inputs |= files
552         index_sources_by_hash(files, dep_sources)
553 
554     dep_objsets, dep_builds = collect_dep_objsets(d, build)
555     if dep_builds:
556         build_objset.new_scoped_relationship(
557             [build],
558             oe.spdx30.RelationshipType.dependsOn,
559             oe.spdx30.LifecycleScopeType.build,
560             sorted(oe.sbom30.get_element_link_id(b) for b in dep_builds),
561         )
562 
563     debug_source_ids = set()
564     source_hash_cache = {}
565 
566     # Write out the package SPDX data now. It is not complete as we cannot
567     # write the runtime data, so write it to a staging area and a later task
568     # will write out the final collection
569 
570     # TODO: Handle native recipe output
571     if not is_native:
572         bb.debug(1, "Collecting Dependency sources files")
573         collect_dep_sources(dep_objsets, dep_sources)
574 
575         bb.build.exec_func("read_subpackage_metadata", d)
576 
577         pkgdest = Path(d.getVar("PKGDEST"))
578         for package in d.getVar("PACKAGES").split():
579             if not oe.packagedata.packaged(package, d):
580                 continue
581 
582             pkg_name = d.getVar("PKG:%s" % package) or package
583 
584             bb.debug(1, "Creating SPDX for package %s" % pkg_name)
585 
586             pkg_objset = oe.sbom30.ObjectSet.new_objset(d, "package-" + pkg_name)
587 
588             spdx_package = pkg_objset.add_root(
589                 oe.spdx30.software_Package(
590                     _id=pkg_objset.new_spdxid("package", pkg_name),
591                     creationInfo=pkg_objset.doc.creationInfo,
592                     name=pkg_name,
593                     software_packageVersion=d.getVar("PV"),
594                 )
595             )
596             set_timestamp_now(d, spdx_package, "builtTime")
597 
598             set_purposes(
599                 d,
600                 spdx_package,
601                 "SPDX_PACKAGE_ADDITIONAL_PURPOSE:%s" % package,
602                 "SPDX_PACKAGE_ADDITIONAL_PURPOSE",
603                 force_purposes=["install"],
604             )
605 
606             supplier = build_objset.new_agent("SPDX_PACKAGE_SUPPLIER")
607             if supplier is not None:
608                 spdx_package.suppliedBy = (
609                     supplier if isinstance(supplier, str) else supplier._id
610                 )
611 
612             set_var_field(
613                 "HOMEPAGE", spdx_package, "software_homePage", package=package
614             )
615             set_var_field("SUMMARY", spdx_package, "summary", package=package)
616             set_var_field("DESCRIPTION", spdx_package, "description", package=package)
617 
618             pkg_objset.new_scoped_relationship(
619                 [oe.sbom30.get_element_link_id(build)],
620                 oe.spdx30.RelationshipType.hasOutput,
621                 oe.spdx30.LifecycleScopeType.build,
622                 [spdx_package],
623             )
624 
625             for cpe_id in cpe_ids:
626                 spdx_package.externalIdentifier.append(
627                     oe.spdx30.ExternalIdentifier(
628                         externalIdentifierType=oe.spdx30.ExternalIdentifierType.cpe23,
629                         identifier=cpe_id,
630                     )
631                 )
632 
633             # TODO: Generate a file for each actual IPK/DEB/RPM/TGZ file
634             # generated and link it to the package
635             # spdx_package_file = pkg_objset.add(oe.spdx30.software_File(
636             #    _id=pkg_objset.new_spdxid("distribution", pkg_name),
637             #    creationInfo=pkg_objset.doc.creationInfo,
638             #    name=pkg_name,
639             #    software_primaryPurpose=spdx_package.software_primaryPurpose,
640             #    software_additionalPurpose=spdx_package.software_additionalPurpose,
641             # ))
642             # set_timestamp_now(d, spdx_package_file, "builtTime")
643 
644             ## TODO add hashes
645             # pkg_objset.new_relationship(
646             #    [spdx_package],
647             #    oe.spdx30.RelationshipType.hasDistributionArtifact,
648             #    [spdx_package_file],
649             # )
650 
651             # NOTE: licenses live in the recipe collection and are referenced
652             # by ID in the package collection(s). This helps reduce duplication
653             # (since a lot of packages will have the same license), and also
654             # prevents duplicate license SPDX IDs in the packages
655             package_license = d.getVar("LICENSE:%s" % package)
656             if package_license and package_license != d.getVar("LICENSE"):
657                 package_spdx_license = add_license_expression(
658                     d, build_objset, package_license, license_data
659                 )
660             else:
661                 package_spdx_license = recipe_spdx_license
662 
663             pkg_objset.new_relationship(
664                 [spdx_package],
665                 oe.spdx30.RelationshipType.hasConcludedLicense,
666                 [oe.sbom30.get_element_link_id(package_spdx_license)],
667             )
668 
669             # NOTE: CVE Elements live in the recipe collection
670             all_cves = set()
671             for status, cves in cve_by_status.items():
672                 for cve, items in cves.items():
673                     spdx_cve, detail, description = items
674                     spdx_cve_id = oe.sbom30.get_element_link_id(spdx_cve)
675 
676                     all_cves.add(spdx_cve_id)
677 
678                     if status == "Patched":
679                         pkg_objset.new_vex_patched_relationship(
680                             [spdx_cve_id], [spdx_package]
681                         )
682                     elif status == "Unpatched":
683                         pkg_objset.new_vex_unpatched_relationship(
684                             [spdx_cve_id], [spdx_package]
685                         )
686                     elif status == "Ignored":
687                         spdx_vex = pkg_objset.new_vex_ignored_relationship(
688                             [spdx_cve_id],
689                             [spdx_package],
690                             impact_statement=description,
691                         )
692 
693                         if detail in (
694                             "ignored",
695                             "cpe-incorrect",
696                             "disputed",
697                             "upstream-wontfix",
698                         ):
699                             # VEX doesn't have justifications for this
700                             pass
701                         elif detail in (
702                             "not-applicable-config",
703                             "not-applicable-platform",
704                         ):
705                             for v in spdx_vex:
706                                 v.security_justificationType = (
707                                     oe.spdx30.security_VexJustificationType.vulnerableCodeNotPresent
708                                 )
709                         else:
710                             bb.fatal(f"Unknown detail '{detail}' for ignored {cve}")
711                     else:
712                         bb.fatal(f"Unknown {cve} status '{status}'")
713 
714             if all_cves:
715                 pkg_objset.new_relationship(
716                     [spdx_package],
717                     oe.spdx30.RelationshipType.hasAssociatedVulnerability,
718                     sorted(list(all_cves)),
719                 )
720 
721             bb.debug(1, "Adding package files to SPDX for package %s" % pkg_name)
722             package_files = add_package_files(
723                 d,
724                 pkg_objset,
725                 pkgdest / package,
726                 lambda file_counter: pkg_objset.new_spdxid(
727                     "package", pkg_name, "file", str(file_counter)
728                 ),
729                 # TODO: Can we know the purpose here?
730                 lambda filepath: [],
731                 license_data,
732                 ignore_top_level_dirs=["CONTROL", "DEBIAN"],
733                 archive=None,
734             )
735 
736             if package_files:
737                 pkg_objset.new_relationship(
738                     [spdx_package],
739                     oe.spdx30.RelationshipType.contains,
740                     sorted(list(package_files)),
741                 )
742 
743             if include_sources:
744                 debug_sources = get_package_sources_from_debug(
745                     d, package, package_files, dep_sources, source_hash_cache
746                 )
747                 debug_source_ids |= set(
748                     oe.sbom30.get_element_link_id(d) for d in debug_sources
749                 )
750 
751             oe.sbom30.write_recipe_jsonld_doc(
752                 d, pkg_objset, "packages-staging", deploydir, create_spdx_id_links=False
753             )
754 
755     if include_sources:
756         bb.debug(1, "Adding sysroot files to SPDX")
757         sysroot_files = add_package_files(
758             d,
759             build_objset,
760             d.expand("${COMPONENTS_DIR}/${PACKAGE_ARCH}/${PN}"),
761             lambda file_counter: build_objset.new_spdxid("sysroot", str(file_counter)),
762             lambda filepath: [],
763             license_data,
764             archive=None,
765         )
766 
767         if sysroot_files:
768             build_objset.new_scoped_relationship(
769                 [build],
770                 oe.spdx30.RelationshipType.hasOutput,
771                 oe.spdx30.LifecycleScopeType.build,
772                 sorted(list(sysroot_files)),
773             )
774 
775     if build_inputs or debug_source_ids:
776         build_objset.new_scoped_relationship(
777             [build],
778             oe.spdx30.RelationshipType.hasInput,
779             oe.spdx30.LifecycleScopeType.build,
780             sorted(list(build_inputs)) + sorted(list(debug_source_ids)),
781         )
782 
783     oe.sbom30.write_recipe_jsonld_doc(d, build_objset, "recipes", deploydir)
784 
785 
786 def create_package_spdx(d):
787     deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
788     deploydir = Path(d.getVar("SPDXRUNTIMEDEPLOY"))
789     is_native = bb.data.inherits_class("native", d) or bb.data.inherits_class(
790         "cross", d
791     )
792 
793     providers = oe.spdx_common.collect_package_providers(d)
794     pkg_arch = d.getVar("SSTATE_PKGARCH")
795 
796     if is_native:
797         return
798 
799     bb.build.exec_func("read_subpackage_metadata", d)
800 
801     dep_package_cache = {}
802 
803     # Any element common to all packages that need to be referenced by ID
804     # should be written into this objset set
805     common_objset = oe.sbom30.ObjectSet.new_objset(
806         d, "%s-package-common" % d.getVar("PN")
807     )
808 
809     pkgdest = Path(d.getVar("PKGDEST"))
810     for package in d.getVar("PACKAGES").split():
811         localdata = bb.data.createCopy(d)
812         pkg_name = d.getVar("PKG:%s" % package) or package
813         localdata.setVar("PKG", pkg_name)
814         localdata.setVar("OVERRIDES", d.getVar("OVERRIDES", False) + ":" + package)
815 
816         if not oe.packagedata.packaged(package, localdata):
817             continue
818 
819         spdx_package, pkg_objset = oe.sbom30.load_obj_in_jsonld(
820             d,
821             pkg_arch,
822             "packages-staging",
823             "package-" + pkg_name,
824             oe.spdx30.software_Package,
825             software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.install,
826         )
827 
828         # We will write out a new collection, so link it to the new
829         # creation info in the common package data. The old creation info
830         # should still exist and be referenced by all the existing elements
831         # in the package
832         pkg_objset.creationInfo = pkg_objset.copy_creation_info(
833             common_objset.doc.creationInfo
834         )
835 
836         runtime_spdx_deps = set()
837 
838         deps = bb.utils.explode_dep_versions2(localdata.getVar("RDEPENDS") or "")
839         seen_deps = set()
840         for dep, _ in deps.items():
841             if dep in seen_deps:
842                 continue
843 
844             if dep not in providers:
845                 continue
846 
847             (dep, _) = providers[dep]
848 
849             if not oe.packagedata.packaged(dep, localdata):
850                 continue
851 
852             dep_pkg_data = oe.packagedata.read_subpkgdata_dict(dep, d)
853             dep_pkg = dep_pkg_data["PKG"]
854 
855             if dep in dep_package_cache:
856                 dep_spdx_package = dep_package_cache[dep]
857             else:
858                 bb.debug(1, "Searching for %s" % dep_pkg)
859                 dep_spdx_package, _ = oe.sbom30.find_root_obj_in_jsonld(
860                     d,
861                     "packages-staging",
862                     "package-" + dep_pkg,
863                     oe.spdx30.software_Package,
864                     software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.install,
865                 )
866                 dep_package_cache[dep] = dep_spdx_package
867 
868             runtime_spdx_deps.add(dep_spdx_package)
869             seen_deps.add(dep)
870 
871         if runtime_spdx_deps:
872             pkg_objset.new_scoped_relationship(
873                 [spdx_package],
874                 oe.spdx30.RelationshipType.dependsOn,
875                 oe.spdx30.LifecycleScopeType.runtime,
876                 [oe.sbom30.get_element_link_id(dep) for dep in runtime_spdx_deps],
877             )
878 
879         oe.sbom30.write_recipe_jsonld_doc(d, pkg_objset, "packages", deploydir)
880 
881     oe.sbom30.write_recipe_jsonld_doc(d, common_objset, "common-package", deploydir)
882 
883 
884 def write_bitbake_spdx(d):
885     # Set PN to "bitbake" so that SPDX IDs can be generated
886     d.setVar("PN", "bitbake")
887     d.setVar("BB_TASKHASH", "bitbake")
888     oe.spdx_common.load_spdx_license_data(d)
889 
890     deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
891 
892     objset = oe.sbom30.ObjectSet.new_objset(d, "bitbake", False)
893 
894     host_import_key = d.getVar("SPDX_BUILD_HOST")
895     invoked_by = objset.new_agent("SPDX_INVOKED_BY", add=False)
896     on_behalf_of = objset.new_agent("SPDX_ON_BEHALF_OF", add=False)
897 
898     if d.getVar("SPDX_INCLUDE_BITBAKE_PARENT_BUILD") == "1":
899         # Since the Build objects are unique, we may as well set the creation
900         # time to the current time instead of the fallback SDE
901         objset.doc.creationInfo.created = datetime.now(timezone.utc)
902 
903         # Each invocation of bitbake should have a unique ID since it is a
904         # unique build
905         nonce = os.urandom(16).hex()
906 
907         build = objset.add_root(
908             oe.spdx30.build_Build(
909                 _id=objset.new_spdxid(nonce, include_unihash=False),
910                 creationInfo=objset.doc.creationInfo,
911                 build_buildType=oe.sbom30.SPDX_BUILD_TYPE,
912             )
913         )
914         set_timestamp_now(d, build, "build_buildStartTime")
915 
916         if host_import_key:
917             objset.new_scoped_relationship(
918                 [build],
919                 oe.spdx30.RelationshipType.hasHost,
920                 oe.spdx30.LifecycleScopeType.build,
921                 [objset.new_import(host_import_key)],
922             )
923 
924         if invoked_by:
925             objset.add(invoked_by)
926             invoked_by_spdx = objset.new_scoped_relationship(
927                 [build],
928                 oe.spdx30.RelationshipType.invokedBy,
929                 oe.spdx30.LifecycleScopeType.build,
930                 [invoked_by],
931             )
932 
933             if on_behalf_of:
934                 objset.add(on_behalf_of)
935                 objset.new_scoped_relationship(
936                     [on_behalf_of],
937                     oe.spdx30.RelationshipType.delegatedTo,
938                     oe.spdx30.LifecycleScopeType.build,
939                     invoked_by_spdx,
940                 )
941 
942         elif on_behalf_of:
943             bb.warn("SPDX_ON_BEHALF_OF has no effect if SPDX_INVOKED_BY is not set")
944 
945     else:
946         if host_import_key:
947             bb.warn(
948                 "SPDX_BUILD_HOST has no effect if SPDX_INCLUDE_BITBAKE_PARENT_BUILD is not set"
949             )
950 
951         if invoked_by:
952             bb.warn(
953                 "SPDX_INVOKED_BY has no effect if SPDX_INCLUDE_BITBAKE_PARENT_BUILD is not set"
954             )
955 
956         if on_behalf_of:
957             bb.warn(
958                 "SPDX_ON_BEHALF_OF has no effect if SPDX_INCLUDE_BITBAKE_PARENT_BUILD is not set"
959             )
960 
961     for obj in objset.foreach_type(oe.spdx30.Element):
962         obj.extension.append(oe.sbom30.OEIdAliasExtension())
963 
964     oe.sbom30.write_jsonld_doc(d, objset, deploy_dir_spdx / "bitbake.spdx.json")
965 
966 
967 def collect_build_package_inputs(d, objset, build, packages):
968     import oe.sbom30
969 
970     providers = oe.spdx_common.collect_package_providers(d)
971 
972     build_deps = set()
973     missing_providers = set()
974 
975     for name in sorted(packages.keys()):
976         if name not in providers:
977             missing_providers.add(name)
978             continue
979 
980         pkg_name, pkg_hashfn = providers[name]
981 
982         # Copy all of the package SPDX files into the Sbom elements
983         pkg_spdx, _ = oe.sbom30.find_root_obj_in_jsonld(
984             d,
985             "packages",
986             "package-" + pkg_name,
987             oe.spdx30.software_Package,
988             software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.install,
989         )
990         build_deps.add(oe.sbom30.get_element_link_id(pkg_spdx))
991 
992     if missing_providers:
993         bb.fatal(
994             f"Unable to find SPDX provider(s) for: {', '.join(sorted(missing_providers))}"
995         )
996 
997     if build_deps:
998         objset.new_scoped_relationship(
999             [build],
1000             oe.spdx30.RelationshipType.hasInput,
1001             oe.spdx30.LifecycleScopeType.build,
1002             sorted(list(build_deps)),
1003         )
1004 
1005 
1006 def create_rootfs_spdx(d):
1007     deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
1008     deploydir = Path(d.getVar("SPDXROOTFSDEPLOY"))
1009     root_packages_file = Path(d.getVar("SPDX_ROOTFS_PACKAGES"))
1010     image_basename = d.getVar("IMAGE_BASENAME")
1011     machine = d.getVar("MACHINE")
1012 
1013     with root_packages_file.open("r") as f:
1014         packages = json.load(f)
1015 
1016     objset = oe.sbom30.ObjectSet.new_objset(
1017         d, "%s-%s-rootfs" % (image_basename, machine)
1018     )
1019 
1020     rootfs = objset.add_root(
1021         oe.spdx30.software_Package(
1022             _id=objset.new_spdxid("rootfs", image_basename),
1023             creationInfo=objset.doc.creationInfo,
1024             name=image_basename,
1025             software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.archive,
1026         )
1027     )
1028     set_timestamp_now(d, rootfs, "builtTime")
1029 
1030     rootfs_build = objset.add_root(objset.new_task_build("rootfs", "rootfs"))
1031     set_timestamp_now(d, rootfs_build, "build_buildEndTime")
1032 
1033     objset.new_scoped_relationship(
1034         [rootfs_build],
1035         oe.spdx30.RelationshipType.hasOutput,
1036         oe.spdx30.LifecycleScopeType.build,
1037         [rootfs],
1038     )
1039 
1040     collect_build_package_inputs(d, objset, rootfs_build, packages)
1041 
1042     oe.sbom30.write_recipe_jsonld_doc(d, objset, "rootfs", deploydir)
1043 
1044 
1045 def create_image_spdx(d):
1046     import oe.sbom30
1047 
1048     image_deploy_dir = Path(d.getVar("IMGDEPLOYDIR"))
1049     manifest_path = Path(d.getVar("IMAGE_OUTPUT_MANIFEST"))
1050     spdx_work_dir = Path(d.getVar("SPDXIMAGEWORK"))
1051 
1052     image_basename = d.getVar("IMAGE_BASENAME")
1053     machine = d.getVar("MACHINE")
1054 
1055     objset = oe.sbom30.ObjectSet.new_objset(
1056         d, "%s-%s-image" % (image_basename, machine)
1057     )
1058 
1059     with manifest_path.open("r") as f:
1060         manifest = json.load(f)
1061 
1062     builds = []
1063     for task in manifest:
1064         imagetype = task["imagetype"]
1065         taskname = task["taskname"]
1066 
1067         image_build = objset.add_root(
1068             objset.new_task_build(taskname, "image/%s" % imagetype)
1069         )
1070         set_timestamp_now(d, image_build, "build_buildEndTime")
1071         builds.append(image_build)
1072 
1073         artifacts = []
1074 
1075         for image in task["images"]:
1076             image_filename = image["filename"]
1077             image_path = image_deploy_dir / image_filename
1078             if os.path.isdir(image_path):
1079                 a = add_package_files(
1080                         d,
1081                         objset,
1082                         image_path,
1083                         lambda file_counter: objset.new_spdxid(
1084                             "imagefile", str(file_counter)
1085                         ),
1086                         lambda filepath: [],
1087                         license_data=None,
1088                         ignore_dirs=[],
1089                         ignore_top_level_dirs=[],
1090                         archive=None,
1091                 )
1092                 artifacts.extend(a)
1093             else:
1094                 a = objset.add_root(
1095                     oe.spdx30.software_File(
1096                         _id=objset.new_spdxid("image", image_filename),
1097                         creationInfo=objset.doc.creationInfo,
1098                         name=image_filename,
1099                         verifiedUsing=[
1100                             oe.spdx30.Hash(
1101                                 algorithm=oe.spdx30.HashAlgorithm.sha256,
1102                                 hashValue=bb.utils.sha256_file(image_path),
1103                             )
1104                         ],
1105                     )
1106                 )
1107 
1108                 artifacts.append(a)
1109 
1110             for a in artifacts:
1111                 set_purposes(
1112                     d, a, "SPDX_IMAGE_PURPOSE:%s" % imagetype, "SPDX_IMAGE_PURPOSE"
1113                 )
1114 
1115                 set_timestamp_now(d, a, "builtTime")
1116 
1117 
1118         if artifacts:
1119             objset.new_scoped_relationship(
1120                 [image_build],
1121                 oe.spdx30.RelationshipType.hasOutput,
1122                 oe.spdx30.LifecycleScopeType.build,
1123                 artifacts,
1124             )
1125 
1126     if builds:
1127         rootfs_image, _ = oe.sbom30.find_root_obj_in_jsonld(
1128             d,
1129             "rootfs",
1130             "%s-%s-rootfs" % (image_basename, machine),
1131             oe.spdx30.software_Package,
1132             # TODO: Should use a purpose to filter here?
1133         )
1134         objset.new_scoped_relationship(
1135             builds,
1136             oe.spdx30.RelationshipType.hasInput,
1137             oe.spdx30.LifecycleScopeType.build,
1138             [oe.sbom30.get_element_link_id(rootfs_image)],
1139         )
1140 
1141     objset.add_aliases()
1142     objset.link()
1143     oe.sbom30.write_recipe_jsonld_doc(d, objset, "image", spdx_work_dir)
1144 
1145 
1146 def create_image_sbom_spdx(d):
1147     import oe.sbom30
1148 
1149     image_name = d.getVar("IMAGE_NAME")
1150     image_basename = d.getVar("IMAGE_BASENAME")
1151     image_link_name = d.getVar("IMAGE_LINK_NAME")
1152     imgdeploydir = Path(d.getVar("SPDXIMAGEDEPLOYDIR"))
1153     machine = d.getVar("MACHINE")
1154 
1155     spdx_path = imgdeploydir / (image_name + ".spdx.json")
1156 
1157     root_elements = []
1158 
1159     # TODO: Do we need to add the rootfs or are the image files sufficient?
1160     rootfs_image, _ = oe.sbom30.find_root_obj_in_jsonld(
1161         d,
1162         "rootfs",
1163         "%s-%s-rootfs" % (image_basename, machine),
1164         oe.spdx30.software_Package,
1165         # TODO: Should use a purpose here?
1166     )
1167     root_elements.append(oe.sbom30.get_element_link_id(rootfs_image))
1168 
1169     image_objset, _ = oe.sbom30.find_jsonld(
1170         d, "image", "%s-%s-image" % (image_basename, machine), required=True
1171     )
1172     for o in image_objset.foreach_root(oe.spdx30.software_File):
1173         root_elements.append(oe.sbom30.get_element_link_id(o))
1174 
1175     objset, sbom = oe.sbom30.create_sbom(d, image_name, root_elements)
1176 
1177     oe.sbom30.write_jsonld_doc(d, objset, spdx_path)
1178 
1179     def make_image_link(target_path, suffix):
1180         if image_link_name:
1181             link = imgdeploydir / (image_link_name + suffix)
1182             if link != target_path:
1183                 link.symlink_to(os.path.relpath(target_path, link.parent))
1184 
1185     make_image_link(spdx_path, ".spdx.json")
1186 
1187 
1188 def sdk_create_spdx(d, sdk_type, spdx_work_dir, toolchain_outputname):
1189     sdk_name = toolchain_outputname + "-" + sdk_type
1190     sdk_packages = oe.sdk.sdk_list_installed_packages(d, sdk_type == "target")
1191 
1192     objset = oe.sbom30.ObjectSet.new_objset(d, sdk_name)
1193 
1194     sdk_rootfs = objset.add_root(
1195         oe.spdx30.software_Package(
1196             _id=objset.new_spdxid("sdk-rootfs", sdk_name),
1197             creationInfo=objset.doc.creationInfo,
1198             name=sdk_name,
1199             software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.archive,
1200         )
1201     )
1202     set_timestamp_now(d, sdk_rootfs, "builtTime")
1203 
1204     sdk_build = objset.add_root(objset.new_task_build("sdk-rootfs", "sdk-rootfs"))
1205     set_timestamp_now(d, sdk_build, "build_buildEndTime")
1206 
1207     objset.new_scoped_relationship(
1208         [sdk_build],
1209         oe.spdx30.RelationshipType.hasOutput,
1210         oe.spdx30.LifecycleScopeType.build,
1211         [sdk_rootfs],
1212     )
1213 
1214     collect_build_package_inputs(d, objset, sdk_build, sdk_packages)
1215 
1216     objset.add_aliases()
1217     oe.sbom30.write_jsonld_doc(d, objset, spdx_work_dir / "sdk-rootfs.spdx.json")
1218 
1219 
1220 def create_sdk_sbom(d, sdk_deploydir, spdx_work_dir, toolchain_outputname):
1221     # Load the document written earlier
1222     rootfs_objset = oe.sbom30.load_jsonld(
1223         d, spdx_work_dir / "sdk-rootfs.spdx.json", required=True
1224     )
1225 
1226     # Create a new build for the SDK installer
1227     sdk_build = rootfs_objset.new_task_build("sdk-populate", "sdk-populate")
1228     set_timestamp_now(d, sdk_build, "build_buildEndTime")
1229 
1230     rootfs = rootfs_objset.find_root(oe.spdx30.software_Package)
1231     if rootfs is None:
1232         bb.fatal("Unable to find rootfs artifact")
1233 
1234     rootfs_objset.new_scoped_relationship(
1235         [sdk_build],
1236         oe.spdx30.RelationshipType.hasInput,
1237         oe.spdx30.LifecycleScopeType.build,
1238         [rootfs],
1239     )
1240 
1241     files = set()
1242     root_files = []
1243 
1244     # NOTE: os.walk() doesn't return symlinks
1245     for dirpath, dirnames, filenames in os.walk(sdk_deploydir):
1246         for fn in filenames:
1247             fpath = Path(dirpath) / fn
1248             if not fpath.is_file() or fpath.is_symlink():
1249                 continue
1250 
1251             relpath = str(fpath.relative_to(sdk_deploydir))
1252 
1253             f = rootfs_objset.new_file(
1254                 rootfs_objset.new_spdxid("sdk-installer", relpath),
1255                 relpath,
1256                 fpath,
1257             )
1258             set_timestamp_now(d, f, "builtTime")
1259 
1260             if fn.endswith(".manifest"):
1261                 f.software_primaryPurpose = oe.spdx30.software_SoftwarePurpose.manifest
1262             elif fn.endswith(".testdata.json"):
1263                 f.software_primaryPurpose = (
1264                     oe.spdx30.software_SoftwarePurpose.configuration
1265                 )
1266             else:
1267                 set_purposes(d, f, "SPDX_SDK_PURPOSE")
1268                 root_files.append(f)
1269 
1270             files.add(f)
1271 
1272     if files:
1273         rootfs_objset.new_scoped_relationship(
1274             [sdk_build],
1275             oe.spdx30.RelationshipType.hasOutput,
1276             oe.spdx30.LifecycleScopeType.build,
1277             files,
1278         )
1279     else:
1280         bb.warn(f"No SDK output files found in {sdk_deploydir}")
1281 
1282     objset, sbom = oe.sbom30.create_sbom(
1283         d, toolchain_outputname, sorted(list(files)), [rootfs_objset]
1284     )
1285 
1286     oe.sbom30.write_jsonld_doc(
1287         d, objset, sdk_deploydir / (toolchain_outputname + ".spdx.json")
1288     )
1289