xref: /openbmc/openbmc/poky/meta/lib/oe/spdx30_tasks.py (revision 96e4b4e121e0e2da1535d7d537d6a982a6ff5bc0)
1#
2# Copyright OpenEmbedded Contributors
3#
4# SPDX-License-Identifier: GPL-2.0-only
5#
6
7import json
8import oe.cve_check
9import oe.packagedata
10import oe.patch
11import oe.sbom30
12import oe.spdx30
13import oe.spdx_common
14import oe.sdk
15import os
16
17from contextlib import contextmanager
18from datetime import datetime, timezone
19from pathlib import Path
20
21
22def set_timestamp_now(d, o, prop):
23    if d.getVar("SPDX_INCLUDE_TIMESTAMPS") == "1":
24        setattr(o, prop, datetime.now(timezone.utc))
25    else:
26        # Doing this helps to validated that the property actually exists, and
27        # also that it is not mandatory
28        delattr(o, prop)
29
30
31def add_license_expression(d, objset, license_expression, license_data):
32    simple_license_text = {}
33    license_text_map = {}
34    license_ref_idx = 0
35
36    def add_license_text(name):
37        nonlocal objset
38        nonlocal simple_license_text
39
40        if name in simple_license_text:
41            return simple_license_text[name]
42
43        lic = objset.find_filter(
44            oe.spdx30.simplelicensing_SimpleLicensingText,
45            name=name,
46        )
47
48        if lic is not None:
49            simple_license_text[name] = lic
50            return lic
51
52        lic = objset.add(
53            oe.spdx30.simplelicensing_SimpleLicensingText(
54                _id=objset.new_spdxid("license-text", name),
55                creationInfo=objset.doc.creationInfo,
56                name=name,
57            )
58        )
59        objset.set_element_alias(lic)
60        simple_license_text[name] = lic
61
62        if name == "PD":
63            lic.simplelicensing_licenseText = "Software released to the public domain"
64            return lic
65
66        # Seach for the license in COMMON_LICENSE_DIR and LICENSE_PATH
67        for directory in [d.getVar("COMMON_LICENSE_DIR")] + (
68            d.getVar("LICENSE_PATH") or ""
69        ).split():
70            try:
71                with (Path(directory) / name).open(errors="replace") as f:
72                    lic.simplelicensing_licenseText = f.read()
73                    return lic
74
75            except FileNotFoundError:
76                pass
77
78        # If it's not SPDX or PD, then NO_GENERIC_LICENSE must be set
79        filename = d.getVarFlag("NO_GENERIC_LICENSE", name)
80        if filename:
81            filename = d.expand("${S}/" + filename)
82            with open(filename, errors="replace") as f:
83                lic.simplelicensing_licenseText = f.read()
84                return lic
85        else:
86            bb.fatal("Cannot find any text for license %s" % name)
87
88    def convert(l):
89        nonlocal license_text_map
90        nonlocal license_ref_idx
91
92        if l == "(" or l == ")":
93            return l
94
95        if l == "&":
96            return "AND"
97
98        if l == "|":
99            return "OR"
100
101        if l == "CLOSED":
102            return "NONE"
103
104        spdx_license = d.getVarFlag("SPDXLICENSEMAP", l) or l
105        if spdx_license in license_data["licenses"]:
106            return spdx_license
107
108        spdx_license = "LicenseRef-" + l
109        if spdx_license not in license_text_map:
110            license_text_map[spdx_license] = oe.sbom30.get_element_link_id(
111                add_license_text(l)
112            )
113
114        return spdx_license
115
116    lic_split = (
117        license_expression.replace("(", " ( ")
118        .replace(")", " ) ")
119        .replace("|", " | ")
120        .replace("&", " & ")
121        .split()
122    )
123    spdx_license_expression = " ".join(convert(l) for l in lic_split)
124
125    o = objset.new_license_expression(
126        spdx_license_expression, license_data, license_text_map
127    )
128    objset.set_element_alias(o)
129    return o
130
131
132def add_package_files(
133    d,
134    objset,
135    topdir,
136    get_spdxid,
137    get_purposes,
138    license_data=None,
139    *,
140    archive=None,
141    ignore_dirs=[],
142    ignore_top_level_dirs=[],
143):
144    source_date_epoch = d.getVar("SOURCE_DATE_EPOCH")
145    if source_date_epoch:
146        source_date_epoch = int(source_date_epoch)
147
148    spdx_files = set()
149
150    file_counter = 1
151    for subdir, dirs, files in os.walk(topdir):
152        dirs[:] = [d for d in dirs if d not in ignore_dirs]
153        if subdir == str(topdir):
154            dirs[:] = [d for d in dirs if d not in ignore_top_level_dirs]
155
156        for file in files:
157            filepath = Path(subdir) / file
158            if filepath.is_symlink() or not filepath.is_file():
159                continue
160
161            filename = str(filepath.relative_to(topdir))
162            file_purposes = get_purposes(filepath)
163
164            spdx_file = objset.new_file(
165                get_spdxid(file_counter),
166                filename,
167                filepath,
168                purposes=file_purposes,
169            )
170            spdx_files.add(spdx_file)
171
172            if (
173                oe.spdx30.software_SoftwarePurpose.source in file_purposes
174                and license_data is not None
175            ):
176                objset.scan_declared_licenses(spdx_file, filepath, license_data)
177
178            if archive is not None:
179                with filepath.open("rb") as f:
180                    info = archive.gettarinfo(fileobj=f)
181                    info.name = filename
182                    info.uid = 0
183                    info.gid = 0
184                    info.uname = "root"
185                    info.gname = "root"
186
187                    if source_date_epoch is not None and info.mtime > source_date_epoch:
188                        info.mtime = source_date_epoch
189
190                    archive.addfile(info, f)
191
192            file_counter += 1
193
194    bb.debug(1, "Added %d files to %s" % (len(spdx_files), objset.doc._id))
195
196    return spdx_files
197
198
199def get_package_sources_from_debug(
200    d, package, package_files, sources, source_hash_cache
201):
202    def file_path_match(file_path, pkg_file):
203        if file_path.lstrip("/") == pkg_file.name.lstrip("/"):
204            return True
205
206        for e in pkg_file.extension:
207            if isinstance(e, oe.sbom30.OEFileNameAliasExtension):
208                for a in e.aliases:
209                    if file_path.lstrip("/") == a.lstrip("/"):
210                        return True
211
212        return False
213
214    debug_search_paths = [
215        Path(d.getVar("SPDXWORK")),
216        Path(d.getVar("PKGD")),
217        Path(d.getVar("STAGING_DIR_TARGET")),
218        Path(d.getVar("STAGING_DIR_NATIVE")),
219        Path(d.getVar("STAGING_KERNEL_DIR")),
220    ]
221
222    pkg_data = oe.packagedata.read_subpkgdata_extended(package, d)
223
224    if pkg_data is None:
225        return
226
227    dep_source_files = set()
228
229    for file_path, file_data in pkg_data["files_info"].items():
230        if not "debugsrc" in file_data:
231            continue
232
233        if not any(file_path_match(file_path, pkg_file) for pkg_file in package_files):
234            bb.fatal(
235                "No package file found for %s in %s; SPDX found: %s"
236                % (str(file_path), package, " ".join(p.name for p in package_files))
237            )
238            continue
239
240        for debugsrc in file_data["debugsrc"]:
241            for search in debug_search_paths:
242                if debugsrc.startswith("/usr/src/kernel"):
243                    debugsrc_path = search / debugsrc.replace("/usr/src/kernel/", "")
244                else:
245                    debugsrc_path = search / debugsrc.lstrip("/")
246
247                if debugsrc_path in source_hash_cache:
248                    file_sha256 = source_hash_cache[debugsrc_path]
249                    if file_sha256 is None:
250                        continue
251                else:
252                    # We can only hash files below, skip directories, links, etc.
253                    if not debugsrc_path.is_file():
254                        source_hash_cache[debugsrc_path] = None
255                        continue
256
257                    file_sha256 = bb.utils.sha256_file(debugsrc_path)
258                    source_hash_cache[debugsrc_path] = file_sha256
259
260                if file_sha256 in sources:
261                    source_file = sources[file_sha256]
262                    dep_source_files.add(source_file)
263                else:
264                    bb.debug(
265                        1,
266                        "Debug source %s with SHA256 %s not found in any dependency"
267                        % (str(debugsrc_path), file_sha256),
268                    )
269                break
270            else:
271                bb.debug(1, "Debug source %s not found" % debugsrc)
272
273    return dep_source_files
274
275
276def collect_dep_objsets(d, build):
277    deps = oe.spdx_common.get_spdx_deps(d)
278
279    dep_objsets = []
280    dep_builds = set()
281
282    dep_build_spdxids = set()
283    for dep in deps:
284        bb.debug(1, "Fetching SPDX for dependency %s" % (dep.pn))
285        dep_build, dep_objset = oe.sbom30.find_root_obj_in_jsonld(
286            d, "recipes", "recipe-" + dep.pn, oe.spdx30.build_Build
287        )
288        # If the dependency is part of the taskhash, return it to be linked
289        # against. Otherwise, it cannot be linked against because this recipe
290        # will not rebuilt if dependency changes
291        if dep.in_taskhash:
292            dep_objsets.append(dep_objset)
293
294        # The build _can_ be linked against (by alias)
295        dep_builds.add(dep_build)
296
297    return dep_objsets, dep_builds
298
299
300def index_sources_by_hash(sources, dest):
301    for s in sources:
302        if not isinstance(s, oe.spdx30.software_File):
303            continue
304
305        if s.software_primaryPurpose != oe.spdx30.software_SoftwarePurpose.source:
306            continue
307
308        for v in s.verifiedUsing:
309            if v.algorithm == oe.spdx30.HashAlgorithm.sha256:
310                if not v.hashValue in dest:
311                    dest[v.hashValue] = s
312                break
313        else:
314            bb.fatal(f"No SHA256 found for {s.name}")
315
316
317def collect_dep_sources(dep_objsets, dest):
318    for objset in dep_objsets:
319        # Don't collect sources from native recipes as they
320        # match non-native sources also.
321        if objset.is_native():
322            continue
323
324        bb.debug(1, "Fetching Sources for dependency %s" % (objset.doc.name))
325
326        dep_build = objset.find_root(oe.spdx30.build_Build)
327        if not dep_build:
328            bb.fatal("Unable to find a build")
329
330        for e in objset.foreach_type(oe.spdx30.Relationship):
331            if dep_build is not e.from_:
332                continue
333
334            if e.relationshipType != oe.spdx30.RelationshipType.hasInput:
335                continue
336
337            index_sources_by_hash(e.to, dest)
338
339
340def add_download_files(d, objset):
341    inputs = set()
342
343    urls = d.getVar("SRC_URI").split()
344    fetch = bb.fetch2.Fetch(urls, d)
345
346    for download_idx, src_uri in enumerate(urls):
347        fd = fetch.ud[src_uri]
348
349        for name in fd.names:
350            file_name = os.path.basename(fetch.localpath(src_uri))
351            if oe.patch.patch_path(src_uri, fetch, "", expand=False):
352                primary_purpose = oe.spdx30.software_SoftwarePurpose.patch
353            else:
354                primary_purpose = oe.spdx30.software_SoftwarePurpose.source
355
356            if fd.type == "file":
357                if os.path.isdir(fd.localpath):
358                    walk_idx = 1
359                    for root, dirs, files in os.walk(fd.localpath):
360                        for f in files:
361                            f_path = os.path.join(root, f)
362                            if os.path.islink(f_path):
363                                # TODO: SPDX doesn't support symlinks yet
364                                continue
365
366                            file = objset.new_file(
367                                objset.new_spdxid(
368                                    "source", str(download_idx + 1), str(walk_idx)
369                                ),
370                                os.path.join(
371                                    file_name, os.path.relpath(f_path, fd.localpath)
372                                ),
373                                f_path,
374                                purposes=[primary_purpose],
375                            )
376
377                            inputs.add(file)
378                            walk_idx += 1
379
380                else:
381                    file = objset.new_file(
382                        objset.new_spdxid("source", str(download_idx + 1)),
383                        file_name,
384                        fd.localpath,
385                        purposes=[primary_purpose],
386                    )
387                    inputs.add(file)
388
389            else:
390                dl = objset.add(
391                    oe.spdx30.software_Package(
392                        _id=objset.new_spdxid("source", str(download_idx + 1)),
393                        creationInfo=objset.doc.creationInfo,
394                        name=file_name,
395                        software_primaryPurpose=primary_purpose,
396                        software_downloadLocation=oe.spdx_common.fetch_data_to_uri(
397                            fd, name
398                        ),
399                    )
400                )
401
402                if fd.method.supports_checksum(fd):
403                    # TODO Need something better than hard coding this
404                    for checksum_id in ["sha256", "sha1"]:
405                        expected_checksum = getattr(
406                            fd, "%s_expected" % checksum_id, None
407                        )
408                        if expected_checksum is None:
409                            continue
410
411                        dl.verifiedUsing.append(
412                            oe.spdx30.Hash(
413                                algorithm=getattr(oe.spdx30.HashAlgorithm, checksum_id),
414                                hashValue=expected_checksum,
415                            )
416                        )
417
418                inputs.add(dl)
419
420    return inputs
421
422
423def set_purposes(d, element, *var_names, force_purposes=[]):
424    purposes = force_purposes[:]
425
426    for var_name in var_names:
427        val = d.getVar(var_name)
428        if val:
429            purposes.extend(val.split())
430            break
431
432    if not purposes:
433        bb.warn("No SPDX purposes found in %s" % " ".join(var_names))
434        return
435
436    element.software_primaryPurpose = getattr(
437        oe.spdx30.software_SoftwarePurpose, purposes[0]
438    )
439    element.software_additionalPurpose = [
440        getattr(oe.spdx30.software_SoftwarePurpose, p) for p in purposes[1:]
441    ]
442
443
444def create_spdx(d):
445    def set_var_field(var, obj, name, package=None):
446        val = None
447        if package:
448            val = d.getVar("%s:%s" % (var, package))
449
450        if not val:
451            val = d.getVar(var)
452
453        if val:
454            setattr(obj, name, val)
455
456    license_data = oe.spdx_common.load_spdx_license_data(d)
457
458    deploydir = Path(d.getVar("SPDXDEPLOY"))
459    deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
460    spdx_workdir = Path(d.getVar("SPDXWORK"))
461    include_sources = d.getVar("SPDX_INCLUDE_SOURCES") == "1"
462    pkg_arch = d.getVar("SSTATE_PKGARCH")
463    is_native = bb.data.inherits_class("native", d) or bb.data.inherits_class(
464        "cross", d
465    )
466    include_vex = d.getVar("SPDX_INCLUDE_VEX")
467    if not include_vex in ("none", "current", "all"):
468        bb.fatal("SPDX_INCLUDE_VEX must be one of 'none', 'current', 'all'")
469
470    build_objset = oe.sbom30.ObjectSet.new_objset(d, "recipe-" + d.getVar("PN"))
471
472    build = build_objset.new_task_build("recipe", "recipe")
473    build_objset.set_element_alias(build)
474
475    build_objset.doc.rootElement.append(build)
476
477    build_objset.set_is_native(is_native)
478
479    for var in (d.getVar("SPDX_CUSTOM_ANNOTATION_VARS") or "").split():
480        new_annotation(
481            d,
482            build_objset,
483            build,
484            "%s=%s" % (var, d.getVar(var)),
485            oe.spdx30.AnnotationType.other,
486        )
487
488    build_inputs = set()
489
490    # Add CVEs
491    cve_by_status = {}
492    if include_vex != "none":
493        for cve in d.getVarFlags("CVE_STATUS") or {}:
494            decoded_status = oe.cve_check.decode_cve_status(d, cve)
495
496            # If this CVE is fixed upstream, skip it unless all CVEs are
497            # specified.
498            if (
499                include_vex != "all"
500                and "detail" in decoded_status
501                and decoded_status["detail"]
502                in (
503                    "fixed-version",
504                    "cpe-stable-backport",
505                )
506            ):
507                bb.debug(1, "Skipping %s since it is already fixed upstream" % cve)
508                continue
509
510            spdx_cve = build_objset.new_cve_vuln(cve)
511            build_objset.set_element_alias(spdx_cve)
512
513            cve_by_status.setdefault(decoded_status["mapping"], {})[cve] = (
514                spdx_cve,
515                decoded_status["detail"],
516                decoded_status["description"],
517            )
518
519    cpe_ids = oe.cve_check.get_cpe_ids(d.getVar("CVE_PRODUCT"), d.getVar("CVE_VERSION"))
520
521    source_files = add_download_files(d, build_objset)
522    build_inputs |= source_files
523
524    recipe_spdx_license = add_license_expression(
525        d, build_objset, d.getVar("LICENSE"), license_data
526    )
527    build_objset.new_relationship(
528        source_files,
529        oe.spdx30.RelationshipType.hasConcludedLicense,
530        [oe.sbom30.get_element_link_id(recipe_spdx_license)],
531    )
532
533    dep_sources = {}
534    if oe.spdx_common.process_sources(d) and include_sources:
535        bb.debug(1, "Adding source files to SPDX")
536        oe.spdx_common.get_patched_src(d)
537
538        files = add_package_files(
539            d,
540            build_objset,
541            spdx_workdir,
542            lambda file_counter: build_objset.new_spdxid(
543                "sourcefile", str(file_counter)
544            ),
545            lambda filepath: [oe.spdx30.software_SoftwarePurpose.source],
546            license_data,
547            ignore_dirs=[".git"],
548            ignore_top_level_dirs=["temp"],
549            archive=None,
550        )
551        build_inputs |= files
552        index_sources_by_hash(files, dep_sources)
553
554    dep_objsets, dep_builds = collect_dep_objsets(d, build)
555    if dep_builds:
556        build_objset.new_scoped_relationship(
557            [build],
558            oe.spdx30.RelationshipType.dependsOn,
559            oe.spdx30.LifecycleScopeType.build,
560            sorted(oe.sbom30.get_element_link_id(b) for b in dep_builds),
561        )
562
563    debug_source_ids = set()
564    source_hash_cache = {}
565
566    # Write out the package SPDX data now. It is not complete as we cannot
567    # write the runtime data, so write it to a staging area and a later task
568    # will write out the final collection
569
570    # TODO: Handle native recipe output
571    if not is_native:
572        bb.debug(1, "Collecting Dependency sources files")
573        collect_dep_sources(dep_objsets, dep_sources)
574
575        bb.build.exec_func("read_subpackage_metadata", d)
576
577        pkgdest = Path(d.getVar("PKGDEST"))
578        for package in d.getVar("PACKAGES").split():
579            if not oe.packagedata.packaged(package, d):
580                continue
581
582            pkg_name = d.getVar("PKG:%s" % package) or package
583
584            bb.debug(1, "Creating SPDX for package %s" % pkg_name)
585
586            pkg_objset = oe.sbom30.ObjectSet.new_objset(d, "package-" + pkg_name)
587
588            spdx_package = pkg_objset.add_root(
589                oe.spdx30.software_Package(
590                    _id=pkg_objset.new_spdxid("package", pkg_name),
591                    creationInfo=pkg_objset.doc.creationInfo,
592                    name=pkg_name,
593                    software_packageVersion=d.getVar("PV"),
594                )
595            )
596            set_timestamp_now(d, spdx_package, "builtTime")
597
598            set_purposes(
599                d,
600                spdx_package,
601                "SPDX_PACKAGE_ADDITIONAL_PURPOSE:%s" % package,
602                "SPDX_PACKAGE_ADDITIONAL_PURPOSE",
603                force_purposes=["install"],
604            )
605
606            supplier = build_objset.new_agent("SPDX_PACKAGE_SUPPLIER")
607            if supplier is not None:
608                spdx_package.suppliedBy = (
609                    supplier if isinstance(supplier, str) else supplier._id
610                )
611
612            set_var_field(
613                "HOMEPAGE", spdx_package, "software_homePage", package=package
614            )
615            set_var_field("SUMMARY", spdx_package, "summary", package=package)
616            set_var_field("DESCRIPTION", spdx_package, "description", package=package)
617
618            pkg_objset.new_scoped_relationship(
619                [oe.sbom30.get_element_link_id(build)],
620                oe.spdx30.RelationshipType.hasOutput,
621                oe.spdx30.LifecycleScopeType.build,
622                [spdx_package],
623            )
624
625            for cpe_id in cpe_ids:
626                spdx_package.externalIdentifier.append(
627                    oe.spdx30.ExternalIdentifier(
628                        externalIdentifierType=oe.spdx30.ExternalIdentifierType.cpe23,
629                        identifier=cpe_id,
630                    )
631                )
632
633            # TODO: Generate a file for each actual IPK/DEB/RPM/TGZ file
634            # generated and link it to the package
635            # spdx_package_file = pkg_objset.add(oe.spdx30.software_File(
636            #    _id=pkg_objset.new_spdxid("distribution", pkg_name),
637            #    creationInfo=pkg_objset.doc.creationInfo,
638            #    name=pkg_name,
639            #    software_primaryPurpose=spdx_package.software_primaryPurpose,
640            #    software_additionalPurpose=spdx_package.software_additionalPurpose,
641            # ))
642            # set_timestamp_now(d, spdx_package_file, "builtTime")
643
644            ## TODO add hashes
645            # pkg_objset.new_relationship(
646            #    [spdx_package],
647            #    oe.spdx30.RelationshipType.hasDistributionArtifact,
648            #    [spdx_package_file],
649            # )
650
651            # NOTE: licenses live in the recipe collection and are referenced
652            # by ID in the package collection(s). This helps reduce duplication
653            # (since a lot of packages will have the same license), and also
654            # prevents duplicate license SPDX IDs in the packages
655            package_license = d.getVar("LICENSE:%s" % package)
656            if package_license and package_license != d.getVar("LICENSE"):
657                package_spdx_license = add_license_expression(
658                    d, build_objset, package_license, license_data
659                )
660            else:
661                package_spdx_license = recipe_spdx_license
662
663            pkg_objset.new_relationship(
664                [spdx_package],
665                oe.spdx30.RelationshipType.hasConcludedLicense,
666                [oe.sbom30.get_element_link_id(package_spdx_license)],
667            )
668
669            # NOTE: CVE Elements live in the recipe collection
670            all_cves = set()
671            for status, cves in cve_by_status.items():
672                for cve, items in cves.items():
673                    spdx_cve, detail, description = items
674                    spdx_cve_id = oe.sbom30.get_element_link_id(spdx_cve)
675
676                    all_cves.add(spdx_cve_id)
677
678                    if status == "Patched":
679                        pkg_objset.new_vex_patched_relationship(
680                            [spdx_cve_id], [spdx_package]
681                        )
682                    elif status == "Unpatched":
683                        pkg_objset.new_vex_unpatched_relationship(
684                            [spdx_cve_id], [spdx_package]
685                        )
686                    elif status == "Ignored":
687                        spdx_vex = pkg_objset.new_vex_ignored_relationship(
688                            [spdx_cve_id],
689                            [spdx_package],
690                            impact_statement=description,
691                        )
692
693                        if detail in (
694                            "ignored",
695                            "cpe-incorrect",
696                            "disputed",
697                            "upstream-wontfix",
698                        ):
699                            # VEX doesn't have justifications for this
700                            pass
701                        elif detail in (
702                            "not-applicable-config",
703                            "not-applicable-platform",
704                        ):
705                            for v in spdx_vex:
706                                v.security_justificationType = (
707                                    oe.spdx30.security_VexJustificationType.vulnerableCodeNotPresent
708                                )
709                        else:
710                            bb.fatal(f"Unknown detail '{detail}' for ignored {cve}")
711                    else:
712                        bb.fatal(f"Unknown {cve} status '{status}'")
713
714            if all_cves:
715                pkg_objset.new_relationship(
716                    [spdx_package],
717                    oe.spdx30.RelationshipType.hasAssociatedVulnerability,
718                    sorted(list(all_cves)),
719                )
720
721            bb.debug(1, "Adding package files to SPDX for package %s" % pkg_name)
722            package_files = add_package_files(
723                d,
724                pkg_objset,
725                pkgdest / package,
726                lambda file_counter: pkg_objset.new_spdxid(
727                    "package", pkg_name, "file", str(file_counter)
728                ),
729                # TODO: Can we know the purpose here?
730                lambda filepath: [],
731                license_data,
732                ignore_top_level_dirs=["CONTROL", "DEBIAN"],
733                archive=None,
734            )
735
736            if package_files:
737                pkg_objset.new_relationship(
738                    [spdx_package],
739                    oe.spdx30.RelationshipType.contains,
740                    sorted(list(package_files)),
741                )
742
743            if include_sources:
744                debug_sources = get_package_sources_from_debug(
745                    d, package, package_files, dep_sources, source_hash_cache
746                )
747                debug_source_ids |= set(
748                    oe.sbom30.get_element_link_id(d) for d in debug_sources
749                )
750
751            oe.sbom30.write_recipe_jsonld_doc(
752                d, pkg_objset, "packages-staging", deploydir, create_spdx_id_links=False
753            )
754
755    if include_sources:
756        bb.debug(1, "Adding sysroot files to SPDX")
757        sysroot_files = add_package_files(
758            d,
759            build_objset,
760            d.expand("${COMPONENTS_DIR}/${PACKAGE_ARCH}/${PN}"),
761            lambda file_counter: build_objset.new_spdxid("sysroot", str(file_counter)),
762            lambda filepath: [],
763            license_data,
764            archive=None,
765        )
766
767        if sysroot_files:
768            build_objset.new_scoped_relationship(
769                [build],
770                oe.spdx30.RelationshipType.hasOutput,
771                oe.spdx30.LifecycleScopeType.build,
772                sorted(list(sysroot_files)),
773            )
774
775    if build_inputs or debug_source_ids:
776        build_objset.new_scoped_relationship(
777            [build],
778            oe.spdx30.RelationshipType.hasInput,
779            oe.spdx30.LifecycleScopeType.build,
780            sorted(list(build_inputs)) + sorted(list(debug_source_ids)),
781        )
782
783    oe.sbom30.write_recipe_jsonld_doc(d, build_objset, "recipes", deploydir)
784
785
786def create_package_spdx(d):
787    deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
788    deploydir = Path(d.getVar("SPDXRUNTIMEDEPLOY"))
789    is_native = bb.data.inherits_class("native", d) or bb.data.inherits_class(
790        "cross", d
791    )
792
793    providers = oe.spdx_common.collect_package_providers(d)
794    pkg_arch = d.getVar("SSTATE_PKGARCH")
795
796    if is_native:
797        return
798
799    bb.build.exec_func("read_subpackage_metadata", d)
800
801    dep_package_cache = {}
802
803    # Any element common to all packages that need to be referenced by ID
804    # should be written into this objset set
805    common_objset = oe.sbom30.ObjectSet.new_objset(
806        d, "%s-package-common" % d.getVar("PN")
807    )
808
809    pkgdest = Path(d.getVar("PKGDEST"))
810    for package in d.getVar("PACKAGES").split():
811        localdata = bb.data.createCopy(d)
812        pkg_name = d.getVar("PKG:%s" % package) or package
813        localdata.setVar("PKG", pkg_name)
814        localdata.setVar("OVERRIDES", d.getVar("OVERRIDES", False) + ":" + package)
815
816        if not oe.packagedata.packaged(package, localdata):
817            continue
818
819        spdx_package, pkg_objset = oe.sbom30.load_obj_in_jsonld(
820            d,
821            pkg_arch,
822            "packages-staging",
823            "package-" + pkg_name,
824            oe.spdx30.software_Package,
825            software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.install,
826        )
827
828        # We will write out a new collection, so link it to the new
829        # creation info in the common package data. The old creation info
830        # should still exist and be referenced by all the existing elements
831        # in the package
832        pkg_objset.creationInfo = pkg_objset.copy_creation_info(
833            common_objset.doc.creationInfo
834        )
835
836        runtime_spdx_deps = set()
837
838        deps = bb.utils.explode_dep_versions2(localdata.getVar("RDEPENDS") or "")
839        seen_deps = set()
840        for dep, _ in deps.items():
841            if dep in seen_deps:
842                continue
843
844            if dep not in providers:
845                continue
846
847            (dep, _) = providers[dep]
848
849            if not oe.packagedata.packaged(dep, localdata):
850                continue
851
852            dep_pkg_data = oe.packagedata.read_subpkgdata_dict(dep, d)
853            dep_pkg = dep_pkg_data["PKG"]
854
855            if dep in dep_package_cache:
856                dep_spdx_package = dep_package_cache[dep]
857            else:
858                bb.debug(1, "Searching for %s" % dep_pkg)
859                dep_spdx_package, _ = oe.sbom30.find_root_obj_in_jsonld(
860                    d,
861                    "packages-staging",
862                    "package-" + dep_pkg,
863                    oe.spdx30.software_Package,
864                    software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.install,
865                )
866                dep_package_cache[dep] = dep_spdx_package
867
868            runtime_spdx_deps.add(dep_spdx_package)
869            seen_deps.add(dep)
870
871        if runtime_spdx_deps:
872            pkg_objset.new_scoped_relationship(
873                [spdx_package],
874                oe.spdx30.RelationshipType.dependsOn,
875                oe.spdx30.LifecycleScopeType.runtime,
876                [oe.sbom30.get_element_link_id(dep) for dep in runtime_spdx_deps],
877            )
878
879        oe.sbom30.write_recipe_jsonld_doc(d, pkg_objset, "packages", deploydir)
880
881    oe.sbom30.write_recipe_jsonld_doc(d, common_objset, "common-package", deploydir)
882
883
884def write_bitbake_spdx(d):
885    # Set PN to "bitbake" so that SPDX IDs can be generated
886    d.setVar("PN", "bitbake")
887    d.setVar("BB_TASKHASH", "bitbake")
888    oe.spdx_common.load_spdx_license_data(d)
889
890    deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
891
892    objset = oe.sbom30.ObjectSet.new_objset(d, "bitbake", False)
893
894    host_import_key = d.getVar("SPDX_BUILD_HOST")
895    invoked_by = objset.new_agent("SPDX_INVOKED_BY", add=False)
896    on_behalf_of = objset.new_agent("SPDX_ON_BEHALF_OF", add=False)
897
898    if d.getVar("SPDX_INCLUDE_BITBAKE_PARENT_BUILD") == "1":
899        # Since the Build objects are unique, we may as well set the creation
900        # time to the current time instead of the fallback SDE
901        objset.doc.creationInfo.created = datetime.now(timezone.utc)
902
903        # Each invocation of bitbake should have a unique ID since it is a
904        # unique build
905        nonce = os.urandom(16).hex()
906
907        build = objset.add_root(
908            oe.spdx30.build_Build(
909                _id=objset.new_spdxid(nonce, include_unihash=False),
910                creationInfo=objset.doc.creationInfo,
911                build_buildType=oe.sbom30.SPDX_BUILD_TYPE,
912            )
913        )
914        set_timestamp_now(d, build, "build_buildStartTime")
915
916        if host_import_key:
917            objset.new_scoped_relationship(
918                [build],
919                oe.spdx30.RelationshipType.hasHost,
920                oe.spdx30.LifecycleScopeType.build,
921                [objset.new_import(host_import_key)],
922            )
923
924        if invoked_by:
925            objset.add(invoked_by)
926            invoked_by_spdx = objset.new_scoped_relationship(
927                [build],
928                oe.spdx30.RelationshipType.invokedBy,
929                oe.spdx30.LifecycleScopeType.build,
930                [invoked_by],
931            )
932
933            if on_behalf_of:
934                objset.add(on_behalf_of)
935                objset.new_scoped_relationship(
936                    [on_behalf_of],
937                    oe.spdx30.RelationshipType.delegatedTo,
938                    oe.spdx30.LifecycleScopeType.build,
939                    invoked_by_spdx,
940                )
941
942        elif on_behalf_of:
943            bb.warn("SPDX_ON_BEHALF_OF has no effect if SPDX_INVOKED_BY is not set")
944
945    else:
946        if host_import_key:
947            bb.warn(
948                "SPDX_BUILD_HOST has no effect if SPDX_INCLUDE_BITBAKE_PARENT_BUILD is not set"
949            )
950
951        if invoked_by:
952            bb.warn(
953                "SPDX_INVOKED_BY has no effect if SPDX_INCLUDE_BITBAKE_PARENT_BUILD is not set"
954            )
955
956        if on_behalf_of:
957            bb.warn(
958                "SPDX_ON_BEHALF_OF has no effect if SPDX_INCLUDE_BITBAKE_PARENT_BUILD is not set"
959            )
960
961    for obj in objset.foreach_type(oe.spdx30.Element):
962        obj.extension.append(oe.sbom30.OEIdAliasExtension())
963
964    oe.sbom30.write_jsonld_doc(d, objset, deploy_dir_spdx / "bitbake.spdx.json")
965
966
967def collect_build_package_inputs(d, objset, build, packages):
968    import oe.sbom30
969
970    providers = oe.spdx_common.collect_package_providers(d)
971
972    build_deps = set()
973    missing_providers = set()
974
975    for name in sorted(packages.keys()):
976        if name not in providers:
977            missing_providers.add(name)
978            continue
979
980        pkg_name, pkg_hashfn = providers[name]
981
982        # Copy all of the package SPDX files into the Sbom elements
983        pkg_spdx, _ = oe.sbom30.find_root_obj_in_jsonld(
984            d,
985            "packages",
986            "package-" + pkg_name,
987            oe.spdx30.software_Package,
988            software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.install,
989        )
990        build_deps.add(oe.sbom30.get_element_link_id(pkg_spdx))
991
992    if missing_providers:
993        bb.fatal(
994            f"Unable to find SPDX provider(s) for: {', '.join(sorted(missing_providers))}"
995        )
996
997    if build_deps:
998        objset.new_scoped_relationship(
999            [build],
1000            oe.spdx30.RelationshipType.hasInput,
1001            oe.spdx30.LifecycleScopeType.build,
1002            sorted(list(build_deps)),
1003        )
1004
1005
1006def create_rootfs_spdx(d):
1007    deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
1008    deploydir = Path(d.getVar("SPDXROOTFSDEPLOY"))
1009    root_packages_file = Path(d.getVar("SPDX_ROOTFS_PACKAGES"))
1010    image_basename = d.getVar("IMAGE_BASENAME")
1011    machine = d.getVar("MACHINE")
1012
1013    with root_packages_file.open("r") as f:
1014        packages = json.load(f)
1015
1016    objset = oe.sbom30.ObjectSet.new_objset(
1017        d, "%s-%s-rootfs" % (image_basename, machine)
1018    )
1019
1020    rootfs = objset.add_root(
1021        oe.spdx30.software_Package(
1022            _id=objset.new_spdxid("rootfs", image_basename),
1023            creationInfo=objset.doc.creationInfo,
1024            name=image_basename,
1025            software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.archive,
1026        )
1027    )
1028    set_timestamp_now(d, rootfs, "builtTime")
1029
1030    rootfs_build = objset.add_root(objset.new_task_build("rootfs", "rootfs"))
1031    set_timestamp_now(d, rootfs_build, "build_buildEndTime")
1032
1033    objset.new_scoped_relationship(
1034        [rootfs_build],
1035        oe.spdx30.RelationshipType.hasOutput,
1036        oe.spdx30.LifecycleScopeType.build,
1037        [rootfs],
1038    )
1039
1040    collect_build_package_inputs(d, objset, rootfs_build, packages)
1041
1042    oe.sbom30.write_recipe_jsonld_doc(d, objset, "rootfs", deploydir)
1043
1044
1045def create_image_spdx(d):
1046    import oe.sbom30
1047
1048    image_deploy_dir = Path(d.getVar("IMGDEPLOYDIR"))
1049    manifest_path = Path(d.getVar("IMAGE_OUTPUT_MANIFEST"))
1050    spdx_work_dir = Path(d.getVar("SPDXIMAGEWORK"))
1051
1052    image_basename = d.getVar("IMAGE_BASENAME")
1053    machine = d.getVar("MACHINE")
1054
1055    objset = oe.sbom30.ObjectSet.new_objset(
1056        d, "%s-%s-image" % (image_basename, machine)
1057    )
1058
1059    with manifest_path.open("r") as f:
1060        manifest = json.load(f)
1061
1062    builds = []
1063    for task in manifest:
1064        imagetype = task["imagetype"]
1065        taskname = task["taskname"]
1066
1067        image_build = objset.add_root(
1068            objset.new_task_build(taskname, "image/%s" % imagetype)
1069        )
1070        set_timestamp_now(d, image_build, "build_buildEndTime")
1071        builds.append(image_build)
1072
1073        artifacts = []
1074
1075        for image in task["images"]:
1076            image_filename = image["filename"]
1077            image_path = image_deploy_dir / image_filename
1078            if os.path.isdir(image_path):
1079                a = add_package_files(
1080                        d,
1081                        objset,
1082                        image_path,
1083                        lambda file_counter: objset.new_spdxid(
1084                            "imagefile", str(file_counter)
1085                        ),
1086                        lambda filepath: [],
1087                        license_data=None,
1088                        ignore_dirs=[],
1089                        ignore_top_level_dirs=[],
1090                        archive=None,
1091                )
1092                artifacts.extend(a)
1093            else:
1094                a = objset.add_root(
1095                    oe.spdx30.software_File(
1096                        _id=objset.new_spdxid("image", image_filename),
1097                        creationInfo=objset.doc.creationInfo,
1098                        name=image_filename,
1099                        verifiedUsing=[
1100                            oe.spdx30.Hash(
1101                                algorithm=oe.spdx30.HashAlgorithm.sha256,
1102                                hashValue=bb.utils.sha256_file(image_path),
1103                            )
1104                        ],
1105                    )
1106                )
1107
1108                artifacts.append(a)
1109
1110            for a in artifacts:
1111                set_purposes(
1112                    d, a, "SPDX_IMAGE_PURPOSE:%s" % imagetype, "SPDX_IMAGE_PURPOSE"
1113                )
1114
1115                set_timestamp_now(d, a, "builtTime")
1116
1117
1118        if artifacts:
1119            objset.new_scoped_relationship(
1120                [image_build],
1121                oe.spdx30.RelationshipType.hasOutput,
1122                oe.spdx30.LifecycleScopeType.build,
1123                artifacts,
1124            )
1125
1126    if builds:
1127        rootfs_image, _ = oe.sbom30.find_root_obj_in_jsonld(
1128            d,
1129            "rootfs",
1130            "%s-%s-rootfs" % (image_basename, machine),
1131            oe.spdx30.software_Package,
1132            # TODO: Should use a purpose to filter here?
1133        )
1134        objset.new_scoped_relationship(
1135            builds,
1136            oe.spdx30.RelationshipType.hasInput,
1137            oe.spdx30.LifecycleScopeType.build,
1138            [oe.sbom30.get_element_link_id(rootfs_image)],
1139        )
1140
1141    objset.add_aliases()
1142    objset.link()
1143    oe.sbom30.write_recipe_jsonld_doc(d, objset, "image", spdx_work_dir)
1144
1145
1146def create_image_sbom_spdx(d):
1147    import oe.sbom30
1148
1149    image_name = d.getVar("IMAGE_NAME")
1150    image_basename = d.getVar("IMAGE_BASENAME")
1151    image_link_name = d.getVar("IMAGE_LINK_NAME")
1152    imgdeploydir = Path(d.getVar("SPDXIMAGEDEPLOYDIR"))
1153    machine = d.getVar("MACHINE")
1154
1155    spdx_path = imgdeploydir / (image_name + ".spdx.json")
1156
1157    root_elements = []
1158
1159    # TODO: Do we need to add the rootfs or are the image files sufficient?
1160    rootfs_image, _ = oe.sbom30.find_root_obj_in_jsonld(
1161        d,
1162        "rootfs",
1163        "%s-%s-rootfs" % (image_basename, machine),
1164        oe.spdx30.software_Package,
1165        # TODO: Should use a purpose here?
1166    )
1167    root_elements.append(oe.sbom30.get_element_link_id(rootfs_image))
1168
1169    image_objset, _ = oe.sbom30.find_jsonld(
1170        d, "image", "%s-%s-image" % (image_basename, machine), required=True
1171    )
1172    for o in image_objset.foreach_root(oe.spdx30.software_File):
1173        root_elements.append(oe.sbom30.get_element_link_id(o))
1174
1175    objset, sbom = oe.sbom30.create_sbom(d, image_name, root_elements)
1176
1177    oe.sbom30.write_jsonld_doc(d, objset, spdx_path)
1178
1179    def make_image_link(target_path, suffix):
1180        if image_link_name:
1181            link = imgdeploydir / (image_link_name + suffix)
1182            if link != target_path:
1183                link.symlink_to(os.path.relpath(target_path, link.parent))
1184
1185    make_image_link(spdx_path, ".spdx.json")
1186
1187
1188def sdk_create_spdx(d, sdk_type, spdx_work_dir, toolchain_outputname):
1189    sdk_name = toolchain_outputname + "-" + sdk_type
1190    sdk_packages = oe.sdk.sdk_list_installed_packages(d, sdk_type == "target")
1191
1192    objset = oe.sbom30.ObjectSet.new_objset(d, sdk_name)
1193
1194    sdk_rootfs = objset.add_root(
1195        oe.spdx30.software_Package(
1196            _id=objset.new_spdxid("sdk-rootfs", sdk_name),
1197            creationInfo=objset.doc.creationInfo,
1198            name=sdk_name,
1199            software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.archive,
1200        )
1201    )
1202    set_timestamp_now(d, sdk_rootfs, "builtTime")
1203
1204    sdk_build = objset.add_root(objset.new_task_build("sdk-rootfs", "sdk-rootfs"))
1205    set_timestamp_now(d, sdk_build, "build_buildEndTime")
1206
1207    objset.new_scoped_relationship(
1208        [sdk_build],
1209        oe.spdx30.RelationshipType.hasOutput,
1210        oe.spdx30.LifecycleScopeType.build,
1211        [sdk_rootfs],
1212    )
1213
1214    collect_build_package_inputs(d, objset, sdk_build, sdk_packages)
1215
1216    objset.add_aliases()
1217    oe.sbom30.write_jsonld_doc(d, objset, spdx_work_dir / "sdk-rootfs.spdx.json")
1218
1219
1220def create_sdk_sbom(d, sdk_deploydir, spdx_work_dir, toolchain_outputname):
1221    # Load the document written earlier
1222    rootfs_objset = oe.sbom30.load_jsonld(
1223        d, spdx_work_dir / "sdk-rootfs.spdx.json", required=True
1224    )
1225
1226    # Create a new build for the SDK installer
1227    sdk_build = rootfs_objset.new_task_build("sdk-populate", "sdk-populate")
1228    set_timestamp_now(d, sdk_build, "build_buildEndTime")
1229
1230    rootfs = rootfs_objset.find_root(oe.spdx30.software_Package)
1231    if rootfs is None:
1232        bb.fatal("Unable to find rootfs artifact")
1233
1234    rootfs_objset.new_scoped_relationship(
1235        [sdk_build],
1236        oe.spdx30.RelationshipType.hasInput,
1237        oe.spdx30.LifecycleScopeType.build,
1238        [rootfs],
1239    )
1240
1241    files = set()
1242    root_files = []
1243
1244    # NOTE: os.walk() doesn't return symlinks
1245    for dirpath, dirnames, filenames in os.walk(sdk_deploydir):
1246        for fn in filenames:
1247            fpath = Path(dirpath) / fn
1248            if not fpath.is_file() or fpath.is_symlink():
1249                continue
1250
1251            relpath = str(fpath.relative_to(sdk_deploydir))
1252
1253            f = rootfs_objset.new_file(
1254                rootfs_objset.new_spdxid("sdk-installer", relpath),
1255                relpath,
1256                fpath,
1257            )
1258            set_timestamp_now(d, f, "builtTime")
1259
1260            if fn.endswith(".manifest"):
1261                f.software_primaryPurpose = oe.spdx30.software_SoftwarePurpose.manifest
1262            elif fn.endswith(".testdata.json"):
1263                f.software_primaryPurpose = (
1264                    oe.spdx30.software_SoftwarePurpose.configuration
1265                )
1266            else:
1267                set_purposes(d, f, "SPDX_SDK_PURPOSE")
1268                root_files.append(f)
1269
1270            files.add(f)
1271
1272    if files:
1273        rootfs_objset.new_scoped_relationship(
1274            [sdk_build],
1275            oe.spdx30.RelationshipType.hasOutput,
1276            oe.spdx30.LifecycleScopeType.build,
1277            files,
1278        )
1279    else:
1280        bb.warn(f"No SDK output files found in {sdk_deploydir}")
1281
1282    objset, sbom = oe.sbom30.create_sbom(
1283        d, toolchain_outputname, sorted(list(files)), [rootfs_objset]
1284    )
1285
1286    oe.sbom30.write_jsonld_doc(
1287        d, objset, sdk_deploydir / (toolchain_outputname + ".spdx.json")
1288    )
1289