xref: /openbmc/openbmc/poky/meta/lib/oe/spdx30_tasks.py (revision 8460358c)
1#
2# Copyright OpenEmbedded Contributors
3#
4# SPDX-License-Identifier: GPL-2.0-only
5#
6
7import json
8import oe.cve_check
9import oe.packagedata
10import oe.patch
11import oe.sbom30
12import oe.spdx30
13import oe.spdx_common
14import oe.sdk
15import os
16
17from contextlib import contextmanager
18from datetime import datetime, timezone
19from pathlib import Path
20
21
22def set_timestamp_now(d, o, prop):
23    if d.getVar("SPDX_INCLUDE_TIMESTAMPS") == "1":
24        setattr(o, prop, datetime.now(timezone.utc))
25    else:
26        # Doing this helps to validated that the property actually exists, and
27        # also that it is not mandatory
28        delattr(o, prop)
29
30
31def add_license_expression(d, objset, license_expression, license_data):
32    simple_license_text = {}
33    license_text_map = {}
34    license_ref_idx = 0
35
36    def add_license_text(name):
37        nonlocal objset
38        nonlocal simple_license_text
39
40        if name in simple_license_text:
41            return simple_license_text[name]
42
43        lic = objset.find_filter(
44            oe.spdx30.simplelicensing_SimpleLicensingText,
45            name=name,
46        )
47
48        if lic is not None:
49            simple_license_text[name] = lic
50            return lic
51
52        lic = objset.add(
53            oe.spdx30.simplelicensing_SimpleLicensingText(
54                _id=objset.new_spdxid("license-text", name),
55                creationInfo=objset.doc.creationInfo,
56                name=name,
57            )
58        )
59        objset.set_element_alias(lic)
60        simple_license_text[name] = lic
61
62        if name == "PD":
63            lic.simplelicensing_licenseText = "Software released to the public domain"
64            return lic
65
66        # Seach for the license in COMMON_LICENSE_DIR and LICENSE_PATH
67        for directory in [d.getVar("COMMON_LICENSE_DIR")] + (
68            d.getVar("LICENSE_PATH") or ""
69        ).split():
70            try:
71                with (Path(directory) / name).open(errors="replace") as f:
72                    lic.simplelicensing_licenseText = f.read()
73                    return lic
74
75            except FileNotFoundError:
76                pass
77
78        # If it's not SPDX or PD, then NO_GENERIC_LICENSE must be set
79        filename = d.getVarFlag("NO_GENERIC_LICENSE", name)
80        if filename:
81            filename = d.expand("${S}/" + filename)
82            with open(filename, errors="replace") as f:
83                lic.simplelicensing_licenseText = f.read()
84                return lic
85        else:
86            bb.fatal("Cannot find any text for license %s" % name)
87
88    def convert(l):
89        nonlocal license_text_map
90        nonlocal license_ref_idx
91
92        if l == "(" or l == ")":
93            return l
94
95        if l == "&":
96            return "AND"
97
98        if l == "|":
99            return "OR"
100
101        if l == "CLOSED":
102            return "NONE"
103
104        spdx_license = d.getVarFlag("SPDXLICENSEMAP", l) or l
105        if spdx_license in license_data["licenses"]:
106            return spdx_license
107
108        spdx_license = "LicenseRef-" + l
109        if spdx_license not in license_text_map:
110            license_text_map[spdx_license] = oe.sbom30.get_element_link_id(
111                add_license_text(l)
112            )
113
114        return spdx_license
115
116    lic_split = (
117        license_expression.replace("(", " ( ")
118        .replace(")", " ) ")
119        .replace("|", " | ")
120        .replace("&", " & ")
121        .split()
122    )
123    spdx_license_expression = " ".join(convert(l) for l in lic_split)
124
125    o = objset.new_license_expression(
126        spdx_license_expression, license_data, license_text_map
127    )
128    objset.set_element_alias(o)
129    return o
130
131
132def add_package_files(
133    d,
134    objset,
135    topdir,
136    get_spdxid,
137    get_purposes,
138    license_data,
139    *,
140    archive=None,
141    ignore_dirs=[],
142    ignore_top_level_dirs=[],
143):
144    source_date_epoch = d.getVar("SOURCE_DATE_EPOCH")
145    if source_date_epoch:
146        source_date_epoch = int(source_date_epoch)
147
148    spdx_files = set()
149
150    file_counter = 1
151    for subdir, dirs, files in os.walk(topdir):
152        dirs[:] = [d for d in dirs if d not in ignore_dirs]
153        if subdir == str(topdir):
154            dirs[:] = [d for d in dirs if d not in ignore_top_level_dirs]
155
156        for file in files:
157            filepath = Path(subdir) / file
158            if filepath.is_symlink() or not filepath.is_file():
159                continue
160
161            filename = str(filepath.relative_to(topdir))
162            file_purposes = get_purposes(filepath)
163
164            spdx_file = objset.new_file(
165                get_spdxid(file_counter),
166                filename,
167                filepath,
168                purposes=file_purposes,
169            )
170            spdx_files.add(spdx_file)
171
172            if oe.spdx30.software_SoftwarePurpose.source in file_purposes:
173                objset.scan_declared_licenses(spdx_file, filepath, license_data)
174
175            if archive is not None:
176                with filepath.open("rb") as f:
177                    info = archive.gettarinfo(fileobj=f)
178                    info.name = filename
179                    info.uid = 0
180                    info.gid = 0
181                    info.uname = "root"
182                    info.gname = "root"
183
184                    if source_date_epoch is not None and info.mtime > source_date_epoch:
185                        info.mtime = source_date_epoch
186
187                    archive.addfile(info, f)
188
189            file_counter += 1
190
191    bb.debug(1, "Added %d files to %s" % (len(spdx_files), objset.doc._id))
192
193    return spdx_files
194
195
196def get_package_sources_from_debug(
197    d, package, package_files, sources, source_hash_cache
198):
199    def file_path_match(file_path, pkg_file):
200        if file_path.lstrip("/") == pkg_file.name.lstrip("/"):
201            return True
202
203        for e in pkg_file.extension:
204            if isinstance(e, oe.sbom30.OEFileNameAliasExtension):
205                for a in e.aliases:
206                    if file_path.lstrip("/") == a.lstrip("/"):
207                        return True
208
209        return False
210
211    debug_search_paths = [
212        Path(d.getVar("SPDXWORK")),
213        Path(d.getVar("PKGD")),
214        Path(d.getVar("STAGING_DIR_TARGET")),
215        Path(d.getVar("STAGING_DIR_NATIVE")),
216        Path(d.getVar("STAGING_KERNEL_DIR")),
217    ]
218
219    pkg_data = oe.packagedata.read_subpkgdata_extended(package, d)
220
221    if pkg_data is None:
222        return
223
224    dep_source_files = set()
225
226    for file_path, file_data in pkg_data["files_info"].items():
227        if not "debugsrc" in file_data:
228            continue
229
230        if not any(file_path_match(file_path, pkg_file) for pkg_file in package_files):
231            bb.fatal(
232                "No package file found for %s in %s; SPDX found: %s"
233                % (str(file_path), package, " ".join(p.name for p in package_files))
234            )
235            continue
236
237        for debugsrc in file_data["debugsrc"]:
238            for search in debug_search_paths:
239                if debugsrc.startswith("/usr/src/kernel"):
240                    debugsrc_path = search / debugsrc.replace("/usr/src/kernel/", "")
241                else:
242                    debugsrc_path = search / debugsrc.lstrip("/")
243
244                if debugsrc_path in source_hash_cache:
245                    file_sha256 = source_hash_cache[debugsrc_path]
246                    if file_sha256 is None:
247                        continue
248                else:
249                    # We can only hash files below, skip directories, links, etc.
250                    if not debugsrc_path.is_file():
251                        source_hash_cache[debugsrc_path] = None
252                        continue
253
254                    file_sha256 = bb.utils.sha256_file(debugsrc_path)
255                    source_hash_cache[debugsrc_path] = file_sha256
256
257                if file_sha256 in sources:
258                    source_file = sources[file_sha256]
259                    dep_source_files.add(source_file)
260                else:
261                    bb.debug(
262                        1,
263                        "Debug source %s with SHA256 %s not found in any dependency"
264                        % (str(debugsrc_path), file_sha256),
265                    )
266                break
267            else:
268                bb.debug(1, "Debug source %s not found" % debugsrc)
269
270    return dep_source_files
271
272
273def collect_dep_objsets(d, build):
274    deps = oe.spdx_common.get_spdx_deps(d)
275
276    dep_objsets = []
277    dep_builds = set()
278
279    dep_build_spdxids = set()
280    for dep in deps:
281        bb.debug(1, "Fetching SPDX for dependency %s" % (dep.pn))
282        dep_build, dep_objset = oe.sbom30.find_root_obj_in_jsonld(
283            d, "recipes", "recipe-" + dep.pn, oe.spdx30.build_Build
284        )
285        # If the dependency is part of the taskhash, return it to be linked
286        # against. Otherwise, it cannot be linked against because this recipe
287        # will not rebuilt if dependency changes
288        if dep.in_taskhash:
289            dep_objsets.append(dep_objset)
290
291        # The build _can_ be linked against (by alias)
292        dep_builds.add(dep_build)
293
294    return dep_objsets, dep_builds
295
296
297def index_sources_by_hash(sources, dest):
298    for s in sources:
299        if not isinstance(s, oe.spdx30.software_File):
300            continue
301
302        if s.software_primaryPurpose != oe.spdx30.software_SoftwarePurpose.source:
303            continue
304
305        for v in s.verifiedUsing:
306            if v.algorithm == oe.spdx30.HashAlgorithm.sha256:
307                if not v.hashValue in dest:
308                    dest[v.hashValue] = s
309                break
310        else:
311            bb.fatal(f"No SHA256 found for {s.name}")
312
313
314def collect_dep_sources(dep_objsets, dest):
315    for objset in dep_objsets:
316        # Don't collect sources from native recipes as they
317        # match non-native sources also.
318        if objset.is_native():
319            continue
320
321        bb.debug(1, "Fetching Sources for dependency %s" % (objset.doc.name))
322
323        dep_build = objset.find_root(oe.spdx30.build_Build)
324        if not dep_build:
325            bb.fatal("Unable to find a build")
326
327        for e in objset.foreach_type(oe.spdx30.Relationship):
328            if dep_build is not e.from_:
329                continue
330
331            if e.relationshipType != oe.spdx30.RelationshipType.hasInput:
332                continue
333
334            index_sources_by_hash(e.to, dest)
335
336
337def add_download_files(d, objset):
338    inputs = set()
339
340    urls = d.getVar("SRC_URI").split()
341    fetch = bb.fetch2.Fetch(urls, d)
342
343    for download_idx, src_uri in enumerate(urls):
344        fd = fetch.ud[src_uri]
345
346        for name in fd.names:
347            file_name = os.path.basename(fetch.localpath(src_uri))
348            if oe.patch.patch_path(src_uri, fetch, "", expand=False):
349                primary_purpose = oe.spdx30.software_SoftwarePurpose.patch
350            else:
351                primary_purpose = oe.spdx30.software_SoftwarePurpose.source
352
353            if fd.type == "file":
354                if os.path.isdir(fd.localpath):
355                    walk_idx = 1
356                    for root, dirs, files in os.walk(fd.localpath):
357                        for f in files:
358                            f_path = os.path.join(root, f)
359                            if os.path.islink(f_path):
360                                # TODO: SPDX doesn't support symlinks yet
361                                continue
362
363                            file = objset.new_file(
364                                objset.new_spdxid(
365                                    "source", str(download_idx + 1), str(walk_idx)
366                                ),
367                                os.path.join(
368                                    file_name, os.path.relpath(f_path, fd.localpath)
369                                ),
370                                f_path,
371                                purposes=[primary_purpose],
372                            )
373
374                            inputs.add(file)
375                            walk_idx += 1
376
377                else:
378                    file = objset.new_file(
379                        objset.new_spdxid("source", str(download_idx + 1)),
380                        file_name,
381                        fd.localpath,
382                        purposes=[primary_purpose],
383                    )
384                    inputs.add(file)
385
386            else:
387                dl = objset.add(
388                    oe.spdx30.software_Package(
389                        _id=objset.new_spdxid("source", str(download_idx + 1)),
390                        creationInfo=objset.doc.creationInfo,
391                        name=file_name,
392                        software_primaryPurpose=primary_purpose,
393                        software_downloadLocation=oe.spdx_common.fetch_data_to_uri(
394                            fd, name
395                        ),
396                    )
397                )
398
399                if fd.method.supports_checksum(fd):
400                    # TODO Need something better than hard coding this
401                    for checksum_id in ["sha256", "sha1"]:
402                        expected_checksum = getattr(
403                            fd, "%s_expected" % checksum_id, None
404                        )
405                        if expected_checksum is None:
406                            continue
407
408                        dl.verifiedUsing.append(
409                            oe.spdx30.Hash(
410                                algorithm=getattr(oe.spdx30.HashAlgorithm, checksum_id),
411                                hashValue=expected_checksum,
412                            )
413                        )
414
415                inputs.add(dl)
416
417    return inputs
418
419
420def set_purposes(d, element, *var_names, force_purposes=[]):
421    purposes = force_purposes[:]
422
423    for var_name in var_names:
424        val = d.getVar(var_name)
425        if val:
426            purposes.extend(val.split())
427            break
428
429    if not purposes:
430        bb.warn("No SPDX purposes found in %s" % " ".join(var_names))
431        return
432
433    element.software_primaryPurpose = getattr(
434        oe.spdx30.software_SoftwarePurpose, purposes[0]
435    )
436    element.software_additionalPurpose = [
437        getattr(oe.spdx30.software_SoftwarePurpose, p) for p in purposes[1:]
438    ]
439
440
441def create_spdx(d):
442    def set_var_field(var, obj, name, package=None):
443        val = None
444        if package:
445            val = d.getVar("%s:%s" % (var, package))
446
447        if not val:
448            val = d.getVar(var)
449
450        if val:
451            setattr(obj, name, val)
452
453    license_data = oe.spdx_common.load_spdx_license_data(d)
454
455    deploydir = Path(d.getVar("SPDXDEPLOY"))
456    deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
457    spdx_workdir = Path(d.getVar("SPDXWORK"))
458    include_sources = d.getVar("SPDX_INCLUDE_SOURCES") == "1"
459    pkg_arch = d.getVar("SSTATE_PKGARCH")
460    is_native = bb.data.inherits_class("native", d) or bb.data.inherits_class(
461        "cross", d
462    )
463    include_vex = d.getVar("SPDX_INCLUDE_VEX")
464    if not include_vex in ("none", "current", "all"):
465        bb.fatal("SPDX_INCLUDE_VEX must be one of 'none', 'current', 'all'")
466
467    build_objset = oe.sbom30.ObjectSet.new_objset(d, "recipe-" + d.getVar("PN"))
468
469    build = build_objset.new_task_build("recipe", "recipe")
470    build_objset.set_element_alias(build)
471
472    build_objset.doc.rootElement.append(build)
473
474    build_objset.set_is_native(is_native)
475
476    for var in (d.getVar("SPDX_CUSTOM_ANNOTATION_VARS") or "").split():
477        new_annotation(
478            d,
479            build_objset,
480            build,
481            "%s=%s" % (var, d.getVar(var)),
482            oe.spdx30.AnnotationType.other,
483        )
484
485    build_inputs = set()
486
487    # Add CVEs
488    cve_by_status = {}
489    if include_vex != "none":
490        for cve in d.getVarFlags("CVE_STATUS") or {}:
491            decoded_status = oe.cve_check.decode_cve_status(d, cve)
492
493            # If this CVE is fixed upstream, skip it unless all CVEs are
494            # specified.
495            if (
496                include_vex != "all"
497                and "detail" in decoded_status
498                and decoded_status["detail"]
499                in (
500                    "fixed-version",
501                    "cpe-stable-backport",
502                )
503            ):
504                bb.debug(1, "Skipping %s since it is already fixed upstream" % cve)
505                continue
506
507            spdx_cve = build_objset.new_cve_vuln(cve)
508            build_objset.set_element_alias(spdx_cve)
509
510            cve_by_status.setdefault(decoded_status["mapping"], {})[cve] = (
511                spdx_cve,
512                decoded_status["detail"],
513                decoded_status["description"],
514            )
515
516    cpe_ids = oe.cve_check.get_cpe_ids(d.getVar("CVE_PRODUCT"), d.getVar("CVE_VERSION"))
517
518    source_files = add_download_files(d, build_objset)
519    build_inputs |= source_files
520
521    recipe_spdx_license = add_license_expression(
522        d, build_objset, d.getVar("LICENSE"), license_data
523    )
524    build_objset.new_relationship(
525        source_files,
526        oe.spdx30.RelationshipType.hasConcludedLicense,
527        [recipe_spdx_license],
528    )
529
530    dep_sources = {}
531    if oe.spdx_common.process_sources(d) and include_sources:
532        bb.debug(1, "Adding source files to SPDX")
533        oe.spdx_common.get_patched_src(d)
534
535        files = add_package_files(
536            d,
537            build_objset,
538            spdx_workdir,
539            lambda file_counter: build_objset.new_spdxid(
540                "sourcefile", str(file_counter)
541            ),
542            lambda filepath: [oe.spdx30.software_SoftwarePurpose.source],
543            license_data,
544            ignore_dirs=[".git"],
545            ignore_top_level_dirs=["temp"],
546            archive=None,
547        )
548        build_inputs |= files
549        index_sources_by_hash(files, dep_sources)
550
551    dep_objsets, dep_builds = collect_dep_objsets(d, build)
552    if dep_builds:
553        build_objset.new_scoped_relationship(
554            [build],
555            oe.spdx30.RelationshipType.dependsOn,
556            oe.spdx30.LifecycleScopeType.build,
557            sorted(oe.sbom30.get_element_link_id(b) for b in dep_builds),
558        )
559
560    debug_source_ids = set()
561    source_hash_cache = {}
562
563    # Write out the package SPDX data now. It is not complete as we cannot
564    # write the runtime data, so write it to a staging area and a later task
565    # will write out the final collection
566
567    # TODO: Handle native recipe output
568    if not is_native:
569        bb.debug(1, "Collecting Dependency sources files")
570        collect_dep_sources(dep_objsets, dep_sources)
571
572        bb.build.exec_func("read_subpackage_metadata", d)
573
574        pkgdest = Path(d.getVar("PKGDEST"))
575        for package in d.getVar("PACKAGES").split():
576            if not oe.packagedata.packaged(package, d):
577                continue
578
579            pkg_name = d.getVar("PKG:%s" % package) or package
580
581            bb.debug(1, "Creating SPDX for package %s" % pkg_name)
582
583            pkg_objset = oe.sbom30.ObjectSet.new_objset(d, "package-" + pkg_name)
584
585            spdx_package = pkg_objset.add_root(
586                oe.spdx30.software_Package(
587                    _id=pkg_objset.new_spdxid("package", pkg_name),
588                    creationInfo=pkg_objset.doc.creationInfo,
589                    name=pkg_name,
590                    software_packageVersion=d.getVar("PV"),
591                )
592            )
593            set_timestamp_now(d, spdx_package, "builtTime")
594
595            set_purposes(
596                d,
597                spdx_package,
598                "SPDX_PACKAGE_ADDITIONAL_PURPOSE:%s" % package,
599                "SPDX_PACKAGE_ADDITIONAL_PURPOSE",
600                force_purposes=["install"],
601            )
602
603            supplier = build_objset.new_agent("SPDX_PACKAGE_SUPPLIER")
604            if supplier is not None:
605                spdx_package.suppliedBy = (
606                    supplier if isinstance(supplier, str) else supplier._id
607                )
608
609            set_var_field(
610                "HOMEPAGE", spdx_package, "software_homePage", package=package
611            )
612            set_var_field("SUMMARY", spdx_package, "summary", package=package)
613            set_var_field("DESCRIPTION", spdx_package, "description", package=package)
614
615            pkg_objset.new_scoped_relationship(
616                [oe.sbom30.get_element_link_id(build)],
617                oe.spdx30.RelationshipType.hasOutput,
618                oe.spdx30.LifecycleScopeType.build,
619                [spdx_package],
620            )
621
622            for cpe_id in cpe_ids:
623                spdx_package.externalIdentifier.append(
624                    oe.spdx30.ExternalIdentifier(
625                        externalIdentifierType=oe.spdx30.ExternalIdentifierType.cpe23,
626                        identifier=cpe_id,
627                    )
628                )
629
630            # TODO: Generate a file for each actual IPK/DEB/RPM/TGZ file
631            # generated and link it to the package
632            # spdx_package_file = pkg_objset.add(oe.spdx30.software_File(
633            #    _id=pkg_objset.new_spdxid("distribution", pkg_name),
634            #    creationInfo=pkg_objset.doc.creationInfo,
635            #    name=pkg_name,
636            #    software_primaryPurpose=spdx_package.software_primaryPurpose,
637            #    software_additionalPurpose=spdx_package.software_additionalPurpose,
638            # ))
639            # set_timestamp_now(d, spdx_package_file, "builtTime")
640
641            ## TODO add hashes
642            # pkg_objset.new_relationship(
643            #    [spdx_package],
644            #    oe.spdx30.RelationshipType.hasDistributionArtifact,
645            #    [spdx_package_file],
646            # )
647
648            # NOTE: licenses live in the recipe collection and are referenced
649            # by ID in the package collection(s). This helps reduce duplication
650            # (since a lot of packages will have the same license), and also
651            # prevents duplicate license SPDX IDs in the packages
652            package_license = d.getVar("LICENSE:%s" % package)
653            if package_license and package_license != d.getVar("LICENSE"):
654                package_spdx_license = add_license_expression(
655                    d, build_objset, package_license, license_data
656                )
657            else:
658                package_spdx_license = recipe_spdx_license
659
660            pkg_objset.new_relationship(
661                [spdx_package],
662                oe.spdx30.RelationshipType.hasConcludedLicense,
663                [oe.sbom30.get_element_link_id(package_spdx_license)],
664            )
665
666            # NOTE: CVE Elements live in the recipe collection
667            all_cves = set()
668            for status, cves in cve_by_status.items():
669                for cve, items in cves.items():
670                    spdx_cve, detail, description = items
671                    spdx_cve_id = oe.sbom30.get_element_link_id(spdx_cve)
672
673                    all_cves.add(spdx_cve_id)
674
675                    if status == "Patched":
676                        pkg_objset.new_vex_patched_relationship(
677                            [spdx_cve_id], [spdx_package]
678                        )
679                    elif status == "Unpatched":
680                        pkg_objset.new_vex_unpatched_relationship(
681                            [spdx_cve_id], [spdx_package]
682                        )
683                    elif status == "Ignored":
684                        spdx_vex = pkg_objset.new_vex_ignored_relationship(
685                            [spdx_cve_id],
686                            [spdx_package],
687                            impact_statement=description,
688                        )
689
690                        if detail in (
691                            "ignored",
692                            "cpe-incorrect",
693                            "disputed",
694                            "upstream-wontfix",
695                        ):
696                            # VEX doesn't have justifications for this
697                            pass
698                        elif detail in (
699                            "not-applicable-config",
700                            "not-applicable-platform",
701                        ):
702                            for v in spdx_vex:
703                                v.security_justificationType = (
704                                    oe.spdx30.security_VexJustificationType.vulnerableCodeNotPresent
705                                )
706                        else:
707                            bb.fatal(f"Unknown detail '{detail}' for ignored {cve}")
708                    else:
709                        bb.fatal(f"Unknown {cve} status '{status}'")
710
711            if all_cves:
712                pkg_objset.new_relationship(
713                    [spdx_package],
714                    oe.spdx30.RelationshipType.hasAssociatedVulnerability,
715                    sorted(list(all_cves)),
716                )
717
718            bb.debug(1, "Adding package files to SPDX for package %s" % pkg_name)
719            package_files = add_package_files(
720                d,
721                pkg_objset,
722                pkgdest / package,
723                lambda file_counter: pkg_objset.new_spdxid(
724                    "package", pkg_name, "file", str(file_counter)
725                ),
726                # TODO: Can we know the purpose here?
727                lambda filepath: [],
728                license_data,
729                ignore_top_level_dirs=["CONTROL", "DEBIAN"],
730                archive=None,
731            )
732
733            if package_files:
734                pkg_objset.new_relationship(
735                    [spdx_package],
736                    oe.spdx30.RelationshipType.contains,
737                    sorted(list(package_files)),
738                )
739
740            if include_sources:
741                debug_sources = get_package_sources_from_debug(
742                    d, package, package_files, dep_sources, source_hash_cache
743                )
744                debug_source_ids |= set(
745                    oe.sbom30.get_element_link_id(d) for d in debug_sources
746                )
747
748            oe.sbom30.write_recipe_jsonld_doc(
749                d, pkg_objset, "packages-staging", deploydir, create_spdx_id_links=False
750            )
751
752    if include_sources:
753        bb.debug(1, "Adding sysroot files to SPDX")
754        sysroot_files = add_package_files(
755            d,
756            build_objset,
757            d.expand("${COMPONENTS_DIR}/${PACKAGE_ARCH}/${PN}"),
758            lambda file_counter: build_objset.new_spdxid("sysroot", str(file_counter)),
759            lambda filepath: [],
760            license_data,
761            archive=None,
762        )
763
764        if sysroot_files:
765            build_objset.new_scoped_relationship(
766                [build],
767                oe.spdx30.RelationshipType.hasOutput,
768                oe.spdx30.LifecycleScopeType.build,
769                sorted(list(sysroot_files)),
770            )
771
772    if build_inputs or debug_source_ids:
773        build_objset.new_scoped_relationship(
774            [build],
775            oe.spdx30.RelationshipType.hasInput,
776            oe.spdx30.LifecycleScopeType.build,
777            sorted(list(build_inputs)) + sorted(list(debug_source_ids)),
778        )
779
780    oe.sbom30.write_recipe_jsonld_doc(d, build_objset, "recipes", deploydir)
781
782
783def create_package_spdx(d):
784    deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
785    deploydir = Path(d.getVar("SPDXRUNTIMEDEPLOY"))
786    is_native = bb.data.inherits_class("native", d) or bb.data.inherits_class(
787        "cross", d
788    )
789
790    providers = oe.spdx_common.collect_package_providers(d)
791    pkg_arch = d.getVar("SSTATE_PKGARCH")
792
793    if is_native:
794        return
795
796    bb.build.exec_func("read_subpackage_metadata", d)
797
798    dep_package_cache = {}
799
800    # Any element common to all packages that need to be referenced by ID
801    # should be written into this objset set
802    common_objset = oe.sbom30.ObjectSet.new_objset(
803        d, "%s-package-common" % d.getVar("PN")
804    )
805
806    pkgdest = Path(d.getVar("PKGDEST"))
807    for package in d.getVar("PACKAGES").split():
808        localdata = bb.data.createCopy(d)
809        pkg_name = d.getVar("PKG:%s" % package) or package
810        localdata.setVar("PKG", pkg_name)
811        localdata.setVar("OVERRIDES", d.getVar("OVERRIDES", False) + ":" + package)
812
813        if not oe.packagedata.packaged(package, localdata):
814            continue
815
816        spdx_package, pkg_objset = oe.sbom30.load_obj_in_jsonld(
817            d,
818            pkg_arch,
819            "packages-staging",
820            "package-" + pkg_name,
821            oe.spdx30.software_Package,
822            software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.install,
823        )
824
825        # We will write out a new collection, so link it to the new
826        # creation info in the common package data. The old creation info
827        # should still exist and be referenced by all the existing elements
828        # in the package
829        pkg_objset.creationInfo = pkg_objset.copy_creation_info(
830            common_objset.doc.creationInfo
831        )
832
833        runtime_spdx_deps = set()
834
835        deps = bb.utils.explode_dep_versions2(localdata.getVar("RDEPENDS") or "")
836        seen_deps = set()
837        for dep, _ in deps.items():
838            if dep in seen_deps:
839                continue
840
841            if dep not in providers:
842                continue
843
844            (dep, _) = providers[dep]
845
846            if not oe.packagedata.packaged(dep, localdata):
847                continue
848
849            dep_pkg_data = oe.packagedata.read_subpkgdata_dict(dep, d)
850            dep_pkg = dep_pkg_data["PKG"]
851
852            if dep in dep_package_cache:
853                dep_spdx_package = dep_package_cache[dep]
854            else:
855                bb.debug(1, "Searching for %s" % dep_pkg)
856                dep_spdx_package, _ = oe.sbom30.find_root_obj_in_jsonld(
857                    d,
858                    "packages-staging",
859                    "package-" + dep_pkg,
860                    oe.spdx30.software_Package,
861                    software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.install,
862                )
863                dep_package_cache[dep] = dep_spdx_package
864
865            runtime_spdx_deps.add(dep_spdx_package)
866            seen_deps.add(dep)
867
868        if runtime_spdx_deps:
869            pkg_objset.new_scoped_relationship(
870                [spdx_package],
871                oe.spdx30.RelationshipType.dependsOn,
872                oe.spdx30.LifecycleScopeType.runtime,
873                [oe.sbom30.get_element_link_id(dep) for dep in runtime_spdx_deps],
874            )
875
876        oe.sbom30.write_recipe_jsonld_doc(d, pkg_objset, "packages", deploydir)
877
878    oe.sbom30.write_recipe_jsonld_doc(d, common_objset, "common-package", deploydir)
879
880
881def write_bitbake_spdx(d):
882    # Set PN to "bitbake" so that SPDX IDs can be generated
883    d.setVar("PN", "bitbake")
884    d.setVar("BB_TASKHASH", "bitbake")
885    oe.spdx_common.load_spdx_license_data(d)
886
887    deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
888
889    objset = oe.sbom30.ObjectSet.new_objset(d, "bitbake", False)
890
891    host_import_key = d.getVar("SPDX_BUILD_HOST")
892    invoked_by = objset.new_agent("SPDX_INVOKED_BY", add=False)
893    on_behalf_of = objset.new_agent("SPDX_ON_BEHALF_OF", add=False)
894
895    if d.getVar("SPDX_INCLUDE_BITBAKE_PARENT_BUILD") == "1":
896        # Since the Build objects are unique, we may as well set the creation
897        # time to the current time instead of the fallback SDE
898        objset.doc.creationInfo.created = datetime.now(timezone.utc)
899
900        # Each invocation of bitbake should have a unique ID since it is a
901        # unique build
902        nonce = os.urandom(16).hex()
903
904        build = objset.add_root(
905            oe.spdx30.build_Build(
906                _id=objset.new_spdxid(nonce, include_unihash=False),
907                creationInfo=objset.doc.creationInfo,
908                build_buildType=oe.sbom30.SPDX_BUILD_TYPE,
909            )
910        )
911        set_timestamp_now(d, build, "build_buildStartTime")
912
913        if host_import_key:
914            objset.new_scoped_relationship(
915                [build],
916                oe.spdx30.RelationshipType.hasHost,
917                oe.spdx30.LifecycleScopeType.build,
918                [objset.new_import(host_import_key)],
919            )
920
921        if invoked_by:
922            objset.add(invoked_by)
923            invoked_by_spdx = objset.new_scoped_relationship(
924                [build],
925                oe.spdx30.RelationshipType.invokedBy,
926                oe.spdx30.LifecycleScopeType.build,
927                [invoked_by],
928            )
929
930            if on_behalf_of:
931                objset.add(on_behalf_of)
932                objset.new_scoped_relationship(
933                    [on_behalf_of],
934                    oe.spdx30.RelationshipType.delegatedTo,
935                    oe.spdx30.LifecycleScopeType.build,
936                    invoked_by_spdx,
937                )
938
939        elif on_behalf_of:
940            bb.warn("SPDX_ON_BEHALF_OF has no effect if SPDX_INVOKED_BY is not set")
941
942    else:
943        if host_import_key:
944            bb.warn(
945                "SPDX_BUILD_HOST has no effect if SPDX_INCLUDE_BITBAKE_PARENT_BUILD is not set"
946            )
947
948        if invoked_by:
949            bb.warn(
950                "SPDX_INVOKED_BY has no effect if SPDX_INCLUDE_BITBAKE_PARENT_BUILD is not set"
951            )
952
953        if on_behalf_of:
954            bb.warn(
955                "SPDX_ON_BEHALF_OF has no effect if SPDX_INCLUDE_BITBAKE_PARENT_BUILD is not set"
956            )
957
958    for obj in objset.foreach_type(oe.spdx30.Element):
959        obj.extension.append(oe.sbom30.OEIdAliasExtension())
960
961    oe.sbom30.write_jsonld_doc(d, objset, deploy_dir_spdx / "bitbake.spdx.json")
962
963
964def collect_build_package_inputs(d, objset, build, packages):
965    import oe.sbom30
966
967    providers = oe.spdx_common.collect_package_providers(d)
968
969    build_deps = set()
970    missing_providers = set()
971
972    for name in sorted(packages.keys()):
973        if name not in providers:
974            missing_providers.add(name)
975            continue
976
977        pkg_name, pkg_hashfn = providers[name]
978
979        # Copy all of the package SPDX files into the Sbom elements
980        pkg_spdx, _ = oe.sbom30.find_root_obj_in_jsonld(
981            d,
982            "packages",
983            "package-" + pkg_name,
984            oe.spdx30.software_Package,
985            software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.install,
986        )
987        build_deps.add(oe.sbom30.get_element_link_id(pkg_spdx))
988
989    if missing_providers:
990        bb.fatal(
991            f"Unable to find SPDX provider(s) for: {', '.join(sorted(missing_providers))}"
992        )
993
994    if build_deps:
995        objset.new_scoped_relationship(
996            [build],
997            oe.spdx30.RelationshipType.hasInput,
998            oe.spdx30.LifecycleScopeType.build,
999            sorted(list(build_deps)),
1000        )
1001
1002
1003def create_rootfs_spdx(d):
1004    deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
1005    deploydir = Path(d.getVar("SPDXROOTFSDEPLOY"))
1006    root_packages_file = Path(d.getVar("SPDX_ROOTFS_PACKAGES"))
1007    image_basename = d.getVar("IMAGE_BASENAME")
1008    machine = d.getVar("MACHINE")
1009
1010    with root_packages_file.open("r") as f:
1011        packages = json.load(f)
1012
1013    objset = oe.sbom30.ObjectSet.new_objset(
1014        d, "%s-%s-rootfs" % (image_basename, machine)
1015    )
1016
1017    rootfs = objset.add_root(
1018        oe.spdx30.software_Package(
1019            _id=objset.new_spdxid("rootfs", image_basename),
1020            creationInfo=objset.doc.creationInfo,
1021            name=image_basename,
1022            software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.archive,
1023        )
1024    )
1025    set_timestamp_now(d, rootfs, "builtTime")
1026
1027    rootfs_build = objset.add_root(objset.new_task_build("rootfs", "rootfs"))
1028    set_timestamp_now(d, rootfs_build, "build_buildEndTime")
1029
1030    objset.new_scoped_relationship(
1031        [rootfs_build],
1032        oe.spdx30.RelationshipType.hasOutput,
1033        oe.spdx30.LifecycleScopeType.build,
1034        [rootfs],
1035    )
1036
1037    collect_build_package_inputs(d, objset, rootfs_build, packages)
1038
1039    oe.sbom30.write_recipe_jsonld_doc(d, objset, "rootfs", deploydir)
1040
1041
1042def create_image_spdx(d):
1043    import oe.sbom30
1044
1045    image_deploy_dir = Path(d.getVar("IMGDEPLOYDIR"))
1046    manifest_path = Path(d.getVar("IMAGE_OUTPUT_MANIFEST"))
1047    spdx_work_dir = Path(d.getVar("SPDXIMAGEWORK"))
1048
1049    image_basename = d.getVar("IMAGE_BASENAME")
1050    machine = d.getVar("MACHINE")
1051
1052    objset = oe.sbom30.ObjectSet.new_objset(
1053        d, "%s-%s-image" % (image_basename, machine)
1054    )
1055
1056    with manifest_path.open("r") as f:
1057        manifest = json.load(f)
1058
1059    builds = []
1060    for task in manifest:
1061        imagetype = task["imagetype"]
1062        taskname = task["taskname"]
1063
1064        image_build = objset.add_root(
1065            objset.new_task_build(taskname, "image/%s" % imagetype)
1066        )
1067        set_timestamp_now(d, image_build, "build_buildEndTime")
1068        builds.append(image_build)
1069
1070        artifacts = []
1071
1072        for image in task["images"]:
1073            image_filename = image["filename"]
1074            image_path = image_deploy_dir / image_filename
1075            a = objset.add_root(
1076                oe.spdx30.software_File(
1077                    _id=objset.new_spdxid("image", image_filename),
1078                    creationInfo=objset.doc.creationInfo,
1079                    name=image_filename,
1080                    verifiedUsing=[
1081                        oe.spdx30.Hash(
1082                            algorithm=oe.spdx30.HashAlgorithm.sha256,
1083                            hashValue=bb.utils.sha256_file(image_path),
1084                        )
1085                    ],
1086                )
1087            )
1088            set_purposes(
1089                d, a, "SPDX_IMAGE_PURPOSE:%s" % imagetype, "SPDX_IMAGE_PURPOSE"
1090            )
1091            set_timestamp_now(d, a, "builtTime")
1092
1093            artifacts.append(a)
1094
1095        if artifacts:
1096            objset.new_scoped_relationship(
1097                [image_build],
1098                oe.spdx30.RelationshipType.hasOutput,
1099                oe.spdx30.LifecycleScopeType.build,
1100                artifacts,
1101            )
1102
1103    if builds:
1104        rootfs_image, _ = oe.sbom30.find_root_obj_in_jsonld(
1105            d,
1106            "rootfs",
1107            "%s-%s-rootfs" % (image_basename, machine),
1108            oe.spdx30.software_Package,
1109            # TODO: Should use a purpose to filter here?
1110        )
1111        objset.new_scoped_relationship(
1112            builds,
1113            oe.spdx30.RelationshipType.hasInput,
1114            oe.spdx30.LifecycleScopeType.build,
1115            [oe.sbom30.get_element_link_id(rootfs_image)],
1116        )
1117
1118    objset.add_aliases()
1119    objset.link()
1120    oe.sbom30.write_recipe_jsonld_doc(d, objset, "image", spdx_work_dir)
1121
1122
1123def create_image_sbom_spdx(d):
1124    import oe.sbom30
1125
1126    image_name = d.getVar("IMAGE_NAME")
1127    image_basename = d.getVar("IMAGE_BASENAME")
1128    image_link_name = d.getVar("IMAGE_LINK_NAME")
1129    imgdeploydir = Path(d.getVar("SPDXIMAGEDEPLOYDIR"))
1130    machine = d.getVar("MACHINE")
1131
1132    spdx_path = imgdeploydir / (image_name + ".spdx.json")
1133
1134    root_elements = []
1135
1136    # TODO: Do we need to add the rootfs or are the image files sufficient?
1137    rootfs_image, _ = oe.sbom30.find_root_obj_in_jsonld(
1138        d,
1139        "rootfs",
1140        "%s-%s-rootfs" % (image_basename, machine),
1141        oe.spdx30.software_Package,
1142        # TODO: Should use a purpose here?
1143    )
1144    root_elements.append(oe.sbom30.get_element_link_id(rootfs_image))
1145
1146    image_objset, _ = oe.sbom30.find_jsonld(
1147        d, "image", "%s-%s-image" % (image_basename, machine), required=True
1148    )
1149    for o in image_objset.foreach_root(oe.spdx30.software_File):
1150        root_elements.append(oe.sbom30.get_element_link_id(o))
1151
1152    objset, sbom = oe.sbom30.create_sbom(d, image_name, root_elements)
1153
1154    oe.sbom30.write_jsonld_doc(d, objset, spdx_path)
1155
1156    def make_image_link(target_path, suffix):
1157        if image_link_name:
1158            link = imgdeploydir / (image_link_name + suffix)
1159            if link != target_path:
1160                link.symlink_to(os.path.relpath(target_path, link.parent))
1161
1162    make_image_link(spdx_path, ".spdx.json")
1163
1164
1165def sdk_create_spdx(d, sdk_type, spdx_work_dir, toolchain_outputname):
1166    sdk_name = toolchain_outputname + "-" + sdk_type
1167    sdk_packages = oe.sdk.sdk_list_installed_packages(d, sdk_type == "target")
1168
1169    objset = oe.sbom30.ObjectSet.new_objset(d, sdk_name)
1170
1171    sdk_rootfs = objset.add_root(
1172        oe.spdx30.software_Package(
1173            _id=objset.new_spdxid("sdk-rootfs", sdk_name),
1174            creationInfo=objset.doc.creationInfo,
1175            name=sdk_name,
1176            software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.archive,
1177        )
1178    )
1179    set_timestamp_now(d, sdk_rootfs, "builtTime")
1180
1181    sdk_build = objset.add_root(objset.new_task_build("sdk-rootfs", "sdk-rootfs"))
1182    set_timestamp_now(d, sdk_build, "build_buildEndTime")
1183
1184    objset.new_scoped_relationship(
1185        [sdk_build],
1186        oe.spdx30.RelationshipType.hasOutput,
1187        oe.spdx30.LifecycleScopeType.build,
1188        [sdk_rootfs],
1189    )
1190
1191    collect_build_package_inputs(d, objset, sdk_build, sdk_packages)
1192
1193    objset.add_aliases()
1194    oe.sbom30.write_jsonld_doc(d, objset, spdx_work_dir / "sdk-rootfs.spdx.json")
1195
1196
1197def create_sdk_sbom(d, sdk_deploydir, spdx_work_dir, toolchain_outputname):
1198    # Load the document written earlier
1199    rootfs_objset = oe.sbom30.load_jsonld(
1200        d, spdx_work_dir / "sdk-rootfs.spdx.json", required=True
1201    )
1202
1203    # Create a new build for the SDK installer
1204    sdk_build = rootfs_objset.new_task_build("sdk-populate", "sdk-populate")
1205    set_timestamp_now(d, sdk_build, "build_buildEndTime")
1206
1207    rootfs = rootfs_objset.find_root(oe.spdx30.software_Package)
1208    if rootfs is None:
1209        bb.fatal("Unable to find rootfs artifact")
1210
1211    rootfs_objset.new_scoped_relationship(
1212        [sdk_build],
1213        oe.spdx30.RelationshipType.hasInput,
1214        oe.spdx30.LifecycleScopeType.build,
1215        [rootfs],
1216    )
1217
1218    files = set()
1219    root_files = []
1220
1221    # NOTE: os.walk() doesn't return symlinks
1222    for dirpath, dirnames, filenames in os.walk(sdk_deploydir):
1223        for fn in filenames:
1224            fpath = Path(dirpath) / fn
1225            if not fpath.is_file() or fpath.is_symlink():
1226                continue
1227
1228            relpath = str(fpath.relative_to(sdk_deploydir))
1229
1230            f = rootfs_objset.new_file(
1231                rootfs_objset.new_spdxid("sdk-installer", relpath),
1232                relpath,
1233                fpath,
1234            )
1235            set_timestamp_now(d, f, "builtTime")
1236
1237            if fn.endswith(".manifest"):
1238                f.software_primaryPurpose = oe.spdx30.software_SoftwarePurpose.manifest
1239            elif fn.endswith(".testdata.json"):
1240                f.software_primaryPurpose = (
1241                    oe.spdx30.software_SoftwarePurpose.configuration
1242                )
1243            else:
1244                set_purposes(d, f, "SPDX_SDK_PURPOSE")
1245                root_files.append(f)
1246
1247            files.add(f)
1248
1249    if files:
1250        rootfs_objset.new_scoped_relationship(
1251            [sdk_build],
1252            oe.spdx30.RelationshipType.hasOutput,
1253            oe.spdx30.LifecycleScopeType.build,
1254            files,
1255        )
1256    else:
1257        bb.warn(f"No SDK output files found in {sdk_deploydir}")
1258
1259    objset, sbom = oe.sbom30.create_sbom(
1260        d, toolchain_outputname, sorted(list(files)), [rootfs_objset]
1261    )
1262
1263    oe.sbom30.write_jsonld_doc(
1264        d, objset, sdk_deploydir / (toolchain_outputname + ".spdx.json")
1265    )
1266