xref: /openbmc/openbmc/poky/meta/lib/oe/spdx30_tasks.py (revision c9537f57ab488bf5d90132917b0184e2527970a5)
1#
2# Copyright OpenEmbedded Contributors
3#
4# SPDX-License-Identifier: GPL-2.0-only
5#
6
7import json
8import oe.cve_check
9import oe.packagedata
10import oe.patch
11import oe.sbom30
12import oe.spdx30
13import oe.spdx_common
14import oe.sdk
15import os
16
17from contextlib import contextmanager
18from datetime import datetime, timezone
19from pathlib import Path
20
21
22def walk_error(err):
23    bb.error(f"ERROR walking {err.filename}: {err}")
24
25
26def set_timestamp_now(d, o, prop):
27    if d.getVar("SPDX_INCLUDE_TIMESTAMPS") == "1":
28        setattr(o, prop, datetime.now(timezone.utc))
29    else:
30        # Doing this helps to validated that the property actually exists, and
31        # also that it is not mandatory
32        delattr(o, prop)
33
34
35def add_license_expression(d, objset, license_expression, license_data):
36    simple_license_text = {}
37    license_text_map = {}
38    license_ref_idx = 0
39
40    def add_license_text(name):
41        nonlocal objset
42        nonlocal simple_license_text
43
44        if name in simple_license_text:
45            return simple_license_text[name]
46
47        lic = objset.find_filter(
48            oe.spdx30.simplelicensing_SimpleLicensingText,
49            name=name,
50        )
51
52        if lic is not None:
53            simple_license_text[name] = lic
54            return lic
55
56        lic = objset.add(
57            oe.spdx30.simplelicensing_SimpleLicensingText(
58                _id=objset.new_spdxid("license-text", name),
59                creationInfo=objset.doc.creationInfo,
60                name=name,
61            )
62        )
63        objset.set_element_alias(lic)
64        simple_license_text[name] = lic
65
66        if name == "PD":
67            lic.simplelicensing_licenseText = "Software released to the public domain"
68            return lic
69
70        # Seach for the license in COMMON_LICENSE_DIR and LICENSE_PATH
71        for directory in [d.getVar("COMMON_LICENSE_DIR")] + (
72            d.getVar("LICENSE_PATH") or ""
73        ).split():
74            try:
75                with (Path(directory) / name).open(errors="replace") as f:
76                    lic.simplelicensing_licenseText = f.read()
77                    return lic
78
79            except FileNotFoundError:
80                pass
81
82        # If it's not SPDX or PD, then NO_GENERIC_LICENSE must be set
83        filename = d.getVarFlag("NO_GENERIC_LICENSE", name)
84        if filename:
85            filename = d.expand("${S}/" + filename)
86            with open(filename, errors="replace") as f:
87                lic.simplelicensing_licenseText = f.read()
88                return lic
89        else:
90            bb.fatal("Cannot find any text for license %s" % name)
91
92    def convert(l):
93        nonlocal license_text_map
94        nonlocal license_ref_idx
95
96        if l == "(" or l == ")":
97            return l
98
99        if l == "&":
100            return "AND"
101
102        if l == "|":
103            return "OR"
104
105        if l == "CLOSED":
106            return "NONE"
107
108        spdx_license = d.getVarFlag("SPDXLICENSEMAP", l) or l
109        if spdx_license in license_data["licenses"]:
110            return spdx_license
111
112        spdx_license = "LicenseRef-" + l
113        if spdx_license not in license_text_map:
114            license_text_map[spdx_license] = oe.sbom30.get_element_link_id(
115                add_license_text(l)
116            )
117
118        return spdx_license
119
120    lic_split = (
121        license_expression.replace("(", " ( ")
122        .replace(")", " ) ")
123        .replace("|", " | ")
124        .replace("&", " & ")
125        .split()
126    )
127    spdx_license_expression = " ".join(convert(l) for l in lic_split)
128
129    o = objset.new_license_expression(
130        spdx_license_expression, license_data, license_text_map
131    )
132    objset.set_element_alias(o)
133    return o
134
135
136def add_package_files(
137    d,
138    objset,
139    topdir,
140    get_spdxid,
141    get_purposes,
142    license_data=None,
143    *,
144    archive=None,
145    ignore_dirs=[],
146    ignore_top_level_dirs=[],
147):
148    source_date_epoch = d.getVar("SOURCE_DATE_EPOCH")
149    if source_date_epoch:
150        source_date_epoch = int(source_date_epoch)
151
152    spdx_files = set()
153
154    file_counter = 1
155    if not os.path.exists(topdir):
156        bb.note(f"Skip {topdir}")
157        return spdx_files
158
159    for subdir, dirs, files in os.walk(topdir, onerror=walk_error):
160        dirs[:] = [d for d in dirs if d not in ignore_dirs]
161        if subdir == str(topdir):
162            dirs[:] = [d for d in dirs if d not in ignore_top_level_dirs]
163
164        dirs.sort()
165        files.sort()
166        for file in files:
167            filepath = Path(subdir) / file
168            if filepath.is_symlink() or not filepath.is_file():
169                continue
170
171            filename = str(filepath.relative_to(topdir))
172            file_purposes = get_purposes(filepath)
173
174            spdx_file = objset.new_file(
175                get_spdxid(file_counter),
176                filename,
177                filepath,
178                purposes=file_purposes,
179            )
180            spdx_files.add(spdx_file)
181
182            if (
183                oe.spdx30.software_SoftwarePurpose.source in file_purposes
184                and license_data is not None
185            ):
186                objset.scan_declared_licenses(spdx_file, filepath, license_data)
187
188            if archive is not None:
189                with filepath.open("rb") as f:
190                    info = archive.gettarinfo(fileobj=f)
191                    info.name = filename
192                    info.uid = 0
193                    info.gid = 0
194                    info.uname = "root"
195                    info.gname = "root"
196
197                    if source_date_epoch is not None and info.mtime > source_date_epoch:
198                        info.mtime = source_date_epoch
199
200                    archive.addfile(info, f)
201
202            file_counter += 1
203
204    bb.debug(1, "Added %d files to %s" % (len(spdx_files), objset.doc._id))
205
206    return spdx_files
207
208
209def get_package_sources_from_debug(
210    d, package, package_files, sources, source_hash_cache
211):
212    def file_path_match(file_path, pkg_file):
213        if file_path.lstrip("/") == pkg_file.name.lstrip("/"):
214            return True
215
216        for e in pkg_file.extension:
217            if isinstance(e, oe.sbom30.OEFileNameAliasExtension):
218                for a in e.aliases:
219                    if file_path.lstrip("/") == a.lstrip("/"):
220                        return True
221
222        return False
223
224    debug_search_paths = [
225        Path(d.getVar("SPDXWORK")),
226        Path(d.getVar("PKGD")),
227        Path(d.getVar("STAGING_DIR_TARGET")),
228        Path(d.getVar("STAGING_DIR_NATIVE")),
229        Path(d.getVar("STAGING_KERNEL_DIR")),
230    ]
231
232    pkg_data = oe.packagedata.read_subpkgdata_extended(package, d)
233
234    if pkg_data is None:
235        return
236
237    dep_source_files = set()
238
239    for file_path, file_data in pkg_data["files_info"].items():
240        if not "debugsrc" in file_data:
241            continue
242
243        if not any(file_path_match(file_path, pkg_file) for pkg_file in package_files):
244            bb.fatal(
245                "No package file found for %s in %s; SPDX found: %s"
246                % (str(file_path), package, " ".join(p.name for p in package_files))
247            )
248            continue
249
250        for debugsrc in file_data["debugsrc"]:
251            for search in debug_search_paths:
252                if debugsrc.startswith("/usr/src/kernel"):
253                    debugsrc_path = search / debugsrc.replace("/usr/src/kernel/", "")
254                else:
255                    debugsrc_path = search / debugsrc.lstrip("/")
256
257                if debugsrc_path in source_hash_cache:
258                    file_sha256 = source_hash_cache[debugsrc_path]
259                    if file_sha256 is None:
260                        continue
261                else:
262                    # We can only hash files below, skip directories, links, etc.
263                    if not debugsrc_path.is_file():
264                        source_hash_cache[debugsrc_path] = None
265                        continue
266
267                    file_sha256 = bb.utils.sha256_file(debugsrc_path)
268                    source_hash_cache[debugsrc_path] = file_sha256
269
270                if file_sha256 in sources:
271                    source_file = sources[file_sha256]
272                    dep_source_files.add(source_file)
273                else:
274                    bb.debug(
275                        1,
276                        "Debug source %s with SHA256 %s not found in any dependency"
277                        % (str(debugsrc_path), file_sha256),
278                    )
279                break
280            else:
281                bb.debug(1, "Debug source %s not found" % debugsrc)
282
283    return dep_source_files
284
285
286def collect_dep_objsets(d, build):
287    deps = oe.spdx_common.get_spdx_deps(d)
288
289    dep_objsets = []
290    dep_builds = set()
291
292    dep_build_spdxids = set()
293    for dep in deps:
294        bb.debug(1, "Fetching SPDX for dependency %s" % (dep.pn))
295        dep_build, dep_objset = oe.sbom30.find_root_obj_in_jsonld(
296            d, "recipes", "recipe-" + dep.pn, oe.spdx30.build_Build
297        )
298        # If the dependency is part of the taskhash, return it to be linked
299        # against. Otherwise, it cannot be linked against because this recipe
300        # will not rebuilt if dependency changes
301        if dep.in_taskhash:
302            dep_objsets.append(dep_objset)
303
304        # The build _can_ be linked against (by alias)
305        dep_builds.add(dep_build)
306
307    return dep_objsets, dep_builds
308
309
310def index_sources_by_hash(sources, dest):
311    for s in sources:
312        if not isinstance(s, oe.spdx30.software_File):
313            continue
314
315        if s.software_primaryPurpose != oe.spdx30.software_SoftwarePurpose.source:
316            continue
317
318        for v in s.verifiedUsing:
319            if v.algorithm == oe.spdx30.HashAlgorithm.sha256:
320                if not v.hashValue in dest:
321                    dest[v.hashValue] = s
322                break
323        else:
324            bb.fatal(f"No SHA256 found for {s.name}")
325
326
327def collect_dep_sources(dep_objsets, dest):
328    for objset in dep_objsets:
329        # Don't collect sources from native recipes as they
330        # match non-native sources also.
331        if objset.is_native():
332            continue
333
334        bb.debug(1, "Fetching Sources for dependency %s" % (objset.doc.name))
335
336        dep_build = objset.find_root(oe.spdx30.build_Build)
337        if not dep_build:
338            bb.fatal("Unable to find a build")
339
340        for e in objset.foreach_type(oe.spdx30.Relationship):
341            if dep_build is not e.from_:
342                continue
343
344            if e.relationshipType != oe.spdx30.RelationshipType.hasInput:
345                continue
346
347            index_sources_by_hash(e.to, dest)
348
349
350def add_download_files(d, objset):
351    inputs = set()
352
353    urls = d.getVar("SRC_URI").split()
354    fetch = bb.fetch2.Fetch(urls, d)
355
356    for download_idx, src_uri in enumerate(urls):
357        fd = fetch.ud[src_uri]
358
359        file_name = os.path.basename(fetch.localpath(src_uri))
360        if oe.patch.patch_path(src_uri, fetch, "", expand=False):
361            primary_purpose = oe.spdx30.software_SoftwarePurpose.patch
362        else:
363            primary_purpose = oe.spdx30.software_SoftwarePurpose.source
364
365        if fd.type == "file":
366            if os.path.isdir(fd.localpath):
367                walk_idx = 1
368                for root, dirs, files in os.walk(fd.localpath, onerror=walk_error):
369                    dirs.sort()
370                    files.sort()
371                    for f in files:
372                        f_path = os.path.join(root, f)
373                        if os.path.islink(f_path):
374                            # TODO: SPDX doesn't support symlinks yet
375                            continue
376
377                        file = objset.new_file(
378                            objset.new_spdxid(
379                                "source", str(download_idx + 1), str(walk_idx)
380                            ),
381                            os.path.join(
382                                file_name, os.path.relpath(f_path, fd.localpath)
383                            ),
384                            f_path,
385                            purposes=[primary_purpose],
386                        )
387
388                        inputs.add(file)
389                        walk_idx += 1
390
391            else:
392                file = objset.new_file(
393                    objset.new_spdxid("source", str(download_idx + 1)),
394                    file_name,
395                    fd.localpath,
396                    purposes=[primary_purpose],
397                )
398                inputs.add(file)
399
400        else:
401            dl = objset.add(
402                oe.spdx30.software_Package(
403                    _id=objset.new_spdxid("source", str(download_idx + 1)),
404                    creationInfo=objset.doc.creationInfo,
405                    name=file_name,
406                    software_primaryPurpose=primary_purpose,
407                    software_downloadLocation=oe.spdx_common.fetch_data_to_uri(
408                        fd, fd.name
409                    ),
410                )
411            )
412
413            if fd.method.supports_checksum(fd):
414                # TODO Need something better than hard coding this
415                for checksum_id in ["sha256", "sha1"]:
416                    expected_checksum = getattr(
417                        fd, "%s_expected" % checksum_id, None
418                    )
419                    if expected_checksum is None:
420                        continue
421
422                    dl.verifiedUsing.append(
423                        oe.spdx30.Hash(
424                            algorithm=getattr(oe.spdx30.HashAlgorithm, checksum_id),
425                            hashValue=expected_checksum,
426                        )
427                    )
428
429            inputs.add(dl)
430
431    return inputs
432
433
434def set_purposes(d, element, *var_names, force_purposes=[]):
435    purposes = force_purposes[:]
436
437    for var_name in var_names:
438        val = d.getVar(var_name)
439        if val:
440            purposes.extend(val.split())
441            break
442
443    if not purposes:
444        bb.warn("No SPDX purposes found in %s" % " ".join(var_names))
445        return
446
447    element.software_primaryPurpose = getattr(
448        oe.spdx30.software_SoftwarePurpose, purposes[0]
449    )
450    element.software_additionalPurpose = [
451        getattr(oe.spdx30.software_SoftwarePurpose, p) for p in purposes[1:]
452    ]
453
454
455def create_spdx(d):
456    def set_var_field(var, obj, name, package=None):
457        val = None
458        if package:
459            val = d.getVar("%s:%s" % (var, package))
460
461        if not val:
462            val = d.getVar(var)
463
464        if val:
465            setattr(obj, name, val)
466
467    license_data = oe.spdx_common.load_spdx_license_data(d)
468
469    deploydir = Path(d.getVar("SPDXDEPLOY"))
470    deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
471    spdx_workdir = Path(d.getVar("SPDXWORK"))
472    include_sources = d.getVar("SPDX_INCLUDE_SOURCES") == "1"
473    pkg_arch = d.getVar("SSTATE_PKGARCH")
474    is_native = bb.data.inherits_class("native", d) or bb.data.inherits_class(
475        "cross", d
476    )
477    include_vex = d.getVar("SPDX_INCLUDE_VEX")
478    if not include_vex in ("none", "current", "all"):
479        bb.fatal("SPDX_INCLUDE_VEX must be one of 'none', 'current', 'all'")
480
481    build_objset = oe.sbom30.ObjectSet.new_objset(d, "recipe-" + d.getVar("PN"))
482
483    build = build_objset.new_task_build("recipe", "recipe")
484    build_objset.set_element_alias(build)
485
486    build_objset.doc.rootElement.append(build)
487
488    build_objset.set_is_native(is_native)
489
490    for var in (d.getVar("SPDX_CUSTOM_ANNOTATION_VARS") or "").split():
491        new_annotation(
492            d,
493            build_objset,
494            build,
495            "%s=%s" % (var, d.getVar(var)),
496            oe.spdx30.AnnotationType.other,
497        )
498
499    build_inputs = set()
500
501    # Add CVEs
502    cve_by_status = {}
503    if include_vex != "none":
504        patched_cves = oe.cve_check.get_patched_cves(d)
505        for cve, patched_cve in patched_cves.items():
506            decoded_status = {
507                "mapping": patched_cve["abbrev-status"],
508                "detail": patched_cve["status"],
509                "description": patched_cve.get("justification", None)
510            }
511
512            # If this CVE is fixed upstream, skip it unless all CVEs are
513            # specified.
514            if (
515                include_vex != "all"
516                and "detail" in decoded_status
517                and decoded_status["detail"]
518                in (
519                    "fixed-version",
520                    "cpe-stable-backport",
521                )
522            ):
523                bb.debug(1, "Skipping %s since it is already fixed upstream" % cve)
524                continue
525
526            spdx_cve = build_objset.new_cve_vuln(cve)
527            build_objset.set_element_alias(spdx_cve)
528
529            cve_by_status.setdefault(decoded_status["mapping"], {})[cve] = (
530                spdx_cve,
531                decoded_status["detail"],
532                decoded_status["description"],
533            )
534
535    cpe_ids = oe.cve_check.get_cpe_ids(d.getVar("CVE_PRODUCT"), d.getVar("CVE_VERSION"))
536
537    source_files = add_download_files(d, build_objset)
538    build_inputs |= source_files
539
540    recipe_spdx_license = add_license_expression(
541        d, build_objset, d.getVar("LICENSE"), license_data
542    )
543    build_objset.new_relationship(
544        source_files,
545        oe.spdx30.RelationshipType.hasConcludedLicense,
546        [oe.sbom30.get_element_link_id(recipe_spdx_license)],
547    )
548
549    dep_sources = {}
550    if oe.spdx_common.process_sources(d) and include_sources:
551        bb.debug(1, "Adding source files to SPDX")
552        oe.spdx_common.get_patched_src(d)
553
554        files = add_package_files(
555            d,
556            build_objset,
557            spdx_workdir,
558            lambda file_counter: build_objset.new_spdxid(
559                "sourcefile", str(file_counter)
560            ),
561            lambda filepath: [oe.spdx30.software_SoftwarePurpose.source],
562            license_data,
563            ignore_dirs=[".git"],
564            ignore_top_level_dirs=["temp"],
565            archive=None,
566        )
567        build_inputs |= files
568        index_sources_by_hash(files, dep_sources)
569
570    dep_objsets, dep_builds = collect_dep_objsets(d, build)
571    if dep_builds:
572        build_objset.new_scoped_relationship(
573            [build],
574            oe.spdx30.RelationshipType.dependsOn,
575            oe.spdx30.LifecycleScopeType.build,
576            sorted(oe.sbom30.get_element_link_id(b) for b in dep_builds),
577        )
578
579    debug_source_ids = set()
580    source_hash_cache = {}
581
582    # Write out the package SPDX data now. It is not complete as we cannot
583    # write the runtime data, so write it to a staging area and a later task
584    # will write out the final collection
585
586    # TODO: Handle native recipe output
587    if not is_native:
588        bb.debug(1, "Collecting Dependency sources files")
589        collect_dep_sources(dep_objsets, dep_sources)
590
591        bb.build.exec_func("read_subpackage_metadata", d)
592
593        pkgdest = Path(d.getVar("PKGDEST"))
594        for package in d.getVar("PACKAGES").split():
595            if not oe.packagedata.packaged(package, d):
596                continue
597
598            pkg_name = d.getVar("PKG:%s" % package) or package
599
600            bb.debug(1, "Creating SPDX for package %s" % pkg_name)
601
602            pkg_objset = oe.sbom30.ObjectSet.new_objset(d, "package-" + pkg_name)
603
604            spdx_package = pkg_objset.add_root(
605                oe.spdx30.software_Package(
606                    _id=pkg_objset.new_spdxid("package", pkg_name),
607                    creationInfo=pkg_objset.doc.creationInfo,
608                    name=pkg_name,
609                    software_packageVersion=d.getVar("SPDX_PACKAGE_VERSION"),
610                )
611            )
612            set_timestamp_now(d, spdx_package, "builtTime")
613
614            set_purposes(
615                d,
616                spdx_package,
617                "SPDX_PACKAGE_ADDITIONAL_PURPOSE:%s" % package,
618                "SPDX_PACKAGE_ADDITIONAL_PURPOSE",
619                force_purposes=["install"],
620            )
621
622            supplier = build_objset.new_agent("SPDX_PACKAGE_SUPPLIER")
623            if supplier is not None:
624                spdx_package.suppliedBy = (
625                    supplier if isinstance(supplier, str) else supplier._id
626                )
627
628            set_var_field(
629                "HOMEPAGE", spdx_package, "software_homePage", package=package
630            )
631            set_var_field("SUMMARY", spdx_package, "summary", package=package)
632            set_var_field("DESCRIPTION", spdx_package, "description", package=package)
633
634            if d.getVar("SPDX_PACKAGE_URL:%s" % package) or d.getVar("SPDX_PACKAGE_URL"):
635                set_var_field(
636                    "SPDX_PACKAGE_URL",
637                    spdx_package,
638                    "software_packageUrl",
639                    package=package
640                )
641
642            pkg_objset.new_scoped_relationship(
643                [oe.sbom30.get_element_link_id(build)],
644                oe.spdx30.RelationshipType.hasOutput,
645                oe.spdx30.LifecycleScopeType.build,
646                [spdx_package],
647            )
648
649            for cpe_id in cpe_ids:
650                spdx_package.externalIdentifier.append(
651                    oe.spdx30.ExternalIdentifier(
652                        externalIdentifierType=oe.spdx30.ExternalIdentifierType.cpe23,
653                        identifier=cpe_id,
654                    )
655                )
656
657            # TODO: Generate a file for each actual IPK/DEB/RPM/TGZ file
658            # generated and link it to the package
659            # spdx_package_file = pkg_objset.add(oe.spdx30.software_File(
660            #    _id=pkg_objset.new_spdxid("distribution", pkg_name),
661            #    creationInfo=pkg_objset.doc.creationInfo,
662            #    name=pkg_name,
663            #    software_primaryPurpose=spdx_package.software_primaryPurpose,
664            #    software_additionalPurpose=spdx_package.software_additionalPurpose,
665            # ))
666            # set_timestamp_now(d, spdx_package_file, "builtTime")
667
668            ## TODO add hashes
669            # pkg_objset.new_relationship(
670            #    [spdx_package],
671            #    oe.spdx30.RelationshipType.hasDistributionArtifact,
672            #    [spdx_package_file],
673            # )
674
675            # NOTE: licenses live in the recipe collection and are referenced
676            # by ID in the package collection(s). This helps reduce duplication
677            # (since a lot of packages will have the same license), and also
678            # prevents duplicate license SPDX IDs in the packages
679            package_license = d.getVar("LICENSE:%s" % package)
680            if package_license and package_license != d.getVar("LICENSE"):
681                package_spdx_license = add_license_expression(
682                    d, build_objset, package_license, license_data
683                )
684            else:
685                package_spdx_license = recipe_spdx_license
686
687            pkg_objset.new_relationship(
688                [spdx_package],
689                oe.spdx30.RelationshipType.hasConcludedLicense,
690                [oe.sbom30.get_element_link_id(package_spdx_license)],
691            )
692
693            # NOTE: CVE Elements live in the recipe collection
694            all_cves = set()
695            for status, cves in cve_by_status.items():
696                for cve, items in cves.items():
697                    spdx_cve, detail, description = items
698                    spdx_cve_id = oe.sbom30.get_element_link_id(spdx_cve)
699
700                    all_cves.add(spdx_cve_id)
701
702                    if status == "Patched":
703                        pkg_objset.new_vex_patched_relationship(
704                            [spdx_cve_id], [spdx_package]
705                        )
706                    elif status == "Unpatched":
707                        pkg_objset.new_vex_unpatched_relationship(
708                            [spdx_cve_id], [spdx_package]
709                        )
710                    elif status == "Ignored":
711                        spdx_vex = pkg_objset.new_vex_ignored_relationship(
712                            [spdx_cve_id],
713                            [spdx_package],
714                            impact_statement=description,
715                        )
716
717                        if detail in (
718                            "ignored",
719                            "cpe-incorrect",
720                            "disputed",
721                            "upstream-wontfix",
722                        ):
723                            # VEX doesn't have justifications for this
724                            pass
725                        elif detail in (
726                            "not-applicable-config",
727                            "not-applicable-platform",
728                        ):
729                            for v in spdx_vex:
730                                v.security_justificationType = (
731                                    oe.spdx30.security_VexJustificationType.vulnerableCodeNotPresent
732                                )
733                        else:
734                            bb.fatal(f"Unknown detail '{detail}' for ignored {cve}")
735                    elif status == "Unknown":
736                        bb.note(f"Skipping {cve} with status 'Unknown'")
737                    else:
738                        bb.fatal(f"Unknown {cve} status '{status}'")
739
740            if all_cves:
741                pkg_objset.new_relationship(
742                    [spdx_package],
743                    oe.spdx30.RelationshipType.hasAssociatedVulnerability,
744                    sorted(list(all_cves)),
745                )
746
747            bb.debug(1, "Adding package files to SPDX for package %s" % pkg_name)
748            package_files = add_package_files(
749                d,
750                pkg_objset,
751                pkgdest / package,
752                lambda file_counter: pkg_objset.new_spdxid(
753                    "package", pkg_name, "file", str(file_counter)
754                ),
755                # TODO: Can we know the purpose here?
756                lambda filepath: [],
757                license_data,
758                ignore_top_level_dirs=["CONTROL", "DEBIAN"],
759                archive=None,
760            )
761
762            if package_files:
763                pkg_objset.new_relationship(
764                    [spdx_package],
765                    oe.spdx30.RelationshipType.contains,
766                    sorted(list(package_files)),
767                )
768
769            if include_sources:
770                debug_sources = get_package_sources_from_debug(
771                    d, package, package_files, dep_sources, source_hash_cache
772                )
773                debug_source_ids |= set(
774                    oe.sbom30.get_element_link_id(d) for d in debug_sources
775                )
776
777            oe.sbom30.write_recipe_jsonld_doc(
778                d, pkg_objset, "packages-staging", deploydir, create_spdx_id_links=False
779            )
780
781    if include_sources:
782        bb.debug(1, "Adding sysroot files to SPDX")
783        sysroot_files = add_package_files(
784            d,
785            build_objset,
786            d.expand("${COMPONENTS_DIR}/${PACKAGE_ARCH}/${PN}"),
787            lambda file_counter: build_objset.new_spdxid("sysroot", str(file_counter)),
788            lambda filepath: [],
789            license_data,
790            archive=None,
791        )
792
793        if sysroot_files:
794            build_objset.new_scoped_relationship(
795                [build],
796                oe.spdx30.RelationshipType.hasOutput,
797                oe.spdx30.LifecycleScopeType.build,
798                sorted(list(sysroot_files)),
799            )
800
801    if build_inputs or debug_source_ids:
802        build_objset.new_scoped_relationship(
803            [build],
804            oe.spdx30.RelationshipType.hasInput,
805            oe.spdx30.LifecycleScopeType.build,
806            sorted(list(build_inputs)) + sorted(list(debug_source_ids)),
807        )
808
809    oe.sbom30.write_recipe_jsonld_doc(d, build_objset, "recipes", deploydir)
810
811
812def create_package_spdx(d):
813    deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
814    deploydir = Path(d.getVar("SPDXRUNTIMEDEPLOY"))
815    is_native = bb.data.inherits_class("native", d) or bb.data.inherits_class(
816        "cross", d
817    )
818
819    providers = oe.spdx_common.collect_package_providers(d)
820    pkg_arch = d.getVar("SSTATE_PKGARCH")
821
822    if is_native:
823        return
824
825    bb.build.exec_func("read_subpackage_metadata", d)
826
827    dep_package_cache = {}
828
829    # Any element common to all packages that need to be referenced by ID
830    # should be written into this objset set
831    common_objset = oe.sbom30.ObjectSet.new_objset(
832        d, "%s-package-common" % d.getVar("PN")
833    )
834
835    pkgdest = Path(d.getVar("PKGDEST"))
836    for package in d.getVar("PACKAGES").split():
837        localdata = bb.data.createCopy(d)
838        pkg_name = d.getVar("PKG:%s" % package) or package
839        localdata.setVar("PKG", pkg_name)
840        localdata.setVar("OVERRIDES", d.getVar("OVERRIDES", False) + ":" + package)
841
842        if not oe.packagedata.packaged(package, localdata):
843            continue
844
845        spdx_package, pkg_objset = oe.sbom30.load_obj_in_jsonld(
846            d,
847            pkg_arch,
848            "packages-staging",
849            "package-" + pkg_name,
850            oe.spdx30.software_Package,
851            software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.install,
852        )
853
854        # We will write out a new collection, so link it to the new
855        # creation info in the common package data. The old creation info
856        # should still exist and be referenced by all the existing elements
857        # in the package
858        pkg_objset.creationInfo = pkg_objset.copy_creation_info(
859            common_objset.doc.creationInfo
860        )
861
862        runtime_spdx_deps = set()
863
864        deps = bb.utils.explode_dep_versions2(localdata.getVar("RDEPENDS") or "")
865        seen_deps = set()
866        for dep, _ in deps.items():
867            if dep in seen_deps:
868                continue
869
870            if dep not in providers:
871                continue
872
873            (dep, _) = providers[dep]
874
875            if not oe.packagedata.packaged(dep, localdata):
876                continue
877
878            dep_pkg_data = oe.packagedata.read_subpkgdata_dict(dep, d)
879            dep_pkg = dep_pkg_data["PKG"]
880
881            if dep in dep_package_cache:
882                dep_spdx_package = dep_package_cache[dep]
883            else:
884                bb.debug(1, "Searching for %s" % dep_pkg)
885                dep_spdx_package, _ = oe.sbom30.find_root_obj_in_jsonld(
886                    d,
887                    "packages-staging",
888                    "package-" + dep_pkg,
889                    oe.spdx30.software_Package,
890                    software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.install,
891                )
892                dep_package_cache[dep] = dep_spdx_package
893
894            runtime_spdx_deps.add(dep_spdx_package)
895            seen_deps.add(dep)
896
897        if runtime_spdx_deps:
898            pkg_objset.new_scoped_relationship(
899                [spdx_package],
900                oe.spdx30.RelationshipType.dependsOn,
901                oe.spdx30.LifecycleScopeType.runtime,
902                [oe.sbom30.get_element_link_id(dep) for dep in runtime_spdx_deps],
903            )
904
905        oe.sbom30.write_recipe_jsonld_doc(d, pkg_objset, "packages", deploydir)
906
907    oe.sbom30.write_recipe_jsonld_doc(d, common_objset, "common-package", deploydir)
908
909
910def write_bitbake_spdx(d):
911    # Set PN to "bitbake" so that SPDX IDs can be generated
912    d.setVar("PN", "bitbake")
913    d.setVar("BB_TASKHASH", "bitbake")
914    oe.spdx_common.load_spdx_license_data(d)
915
916    deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
917
918    objset = oe.sbom30.ObjectSet.new_objset(d, "bitbake", False)
919
920    host_import_key = d.getVar("SPDX_BUILD_HOST")
921    invoked_by = objset.new_agent("SPDX_INVOKED_BY", add=False)
922    on_behalf_of = objset.new_agent("SPDX_ON_BEHALF_OF", add=False)
923
924    if d.getVar("SPDX_INCLUDE_BITBAKE_PARENT_BUILD") == "1":
925        # Since the Build objects are unique, we may as well set the creation
926        # time to the current time instead of the fallback SDE
927        objset.doc.creationInfo.created = datetime.now(timezone.utc)
928
929        # Each invocation of bitbake should have a unique ID since it is a
930        # unique build
931        nonce = os.urandom(16).hex()
932
933        build = objset.add_root(
934            oe.spdx30.build_Build(
935                _id=objset.new_spdxid(nonce, include_unihash=False),
936                creationInfo=objset.doc.creationInfo,
937                build_buildType=oe.sbom30.SPDX_BUILD_TYPE,
938            )
939        )
940        set_timestamp_now(d, build, "build_buildStartTime")
941
942        if host_import_key:
943            objset.new_scoped_relationship(
944                [build],
945                oe.spdx30.RelationshipType.hasHost,
946                oe.spdx30.LifecycleScopeType.build,
947                [objset.new_import(host_import_key)],
948            )
949
950        if invoked_by:
951            objset.add(invoked_by)
952            invoked_by_spdx = objset.new_scoped_relationship(
953                [build],
954                oe.spdx30.RelationshipType.invokedBy,
955                oe.spdx30.LifecycleScopeType.build,
956                [invoked_by],
957            )
958
959            if on_behalf_of:
960                objset.add(on_behalf_of)
961                objset.new_scoped_relationship(
962                    [on_behalf_of],
963                    oe.spdx30.RelationshipType.delegatedTo,
964                    oe.spdx30.LifecycleScopeType.build,
965                    invoked_by_spdx,
966                )
967
968        elif on_behalf_of:
969            bb.warn("SPDX_ON_BEHALF_OF has no effect if SPDX_INVOKED_BY is not set")
970
971    else:
972        if host_import_key:
973            bb.warn(
974                "SPDX_BUILD_HOST has no effect if SPDX_INCLUDE_BITBAKE_PARENT_BUILD is not set"
975            )
976
977        if invoked_by:
978            bb.warn(
979                "SPDX_INVOKED_BY has no effect if SPDX_INCLUDE_BITBAKE_PARENT_BUILD is not set"
980            )
981
982        if on_behalf_of:
983            bb.warn(
984                "SPDX_ON_BEHALF_OF has no effect if SPDX_INCLUDE_BITBAKE_PARENT_BUILD is not set"
985            )
986
987    for obj in objset.foreach_type(oe.spdx30.Element):
988        obj.extension.append(oe.sbom30.OEIdAliasExtension())
989
990    oe.sbom30.write_jsonld_doc(d, objset, deploy_dir_spdx / "bitbake.spdx.json")
991
992
993def collect_build_package_inputs(d, objset, build, packages, files_by_hash=None):
994    import oe.sbom30
995
996    providers = oe.spdx_common.collect_package_providers(d)
997
998    build_deps = set()
999    missing_providers = set()
1000
1001    for name in sorted(packages.keys()):
1002        if name not in providers:
1003            missing_providers.add(name)
1004            continue
1005
1006        pkg_name, pkg_hashfn = providers[name]
1007
1008        # Copy all of the package SPDX files into the Sbom elements
1009        pkg_spdx, pkg_objset = oe.sbom30.find_root_obj_in_jsonld(
1010            d,
1011            "packages",
1012            "package-" + pkg_name,
1013            oe.spdx30.software_Package,
1014            software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.install,
1015        )
1016        build_deps.add(oe.sbom30.get_element_link_id(pkg_spdx))
1017
1018        if files_by_hash is not None:
1019            for h, f in pkg_objset.by_sha256_hash.items():
1020                files_by_hash.setdefault(h, set()).update(f)
1021
1022    if missing_providers:
1023        bb.fatal(
1024            f"Unable to find SPDX provider(s) for: {', '.join(sorted(missing_providers))}"
1025        )
1026
1027    if build_deps:
1028        objset.new_scoped_relationship(
1029            [build],
1030            oe.spdx30.RelationshipType.hasInput,
1031            oe.spdx30.LifecycleScopeType.build,
1032            sorted(list(build_deps)),
1033        )
1034
1035
1036def create_rootfs_spdx(d):
1037    deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
1038    deploydir = Path(d.getVar("SPDXROOTFSDEPLOY"))
1039    root_packages_file = Path(d.getVar("SPDX_ROOTFS_PACKAGES"))
1040    image_basename = d.getVar("IMAGE_BASENAME")
1041    image_rootfs = d.getVar("IMAGE_ROOTFS")
1042    machine = d.getVar("MACHINE")
1043
1044    with root_packages_file.open("r") as f:
1045        packages = json.load(f)
1046
1047    objset = oe.sbom30.ObjectSet.new_objset(
1048        d, "%s-%s-rootfs" % (image_basename, machine)
1049    )
1050
1051    rootfs = objset.add_root(
1052        oe.spdx30.software_Package(
1053            _id=objset.new_spdxid("rootfs", image_basename),
1054            creationInfo=objset.doc.creationInfo,
1055            name=image_basename,
1056            software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.archive,
1057        )
1058    )
1059    set_timestamp_now(d, rootfs, "builtTime")
1060
1061    rootfs_build = objset.add_root(objset.new_task_build("rootfs", "rootfs"))
1062    set_timestamp_now(d, rootfs_build, "build_buildEndTime")
1063
1064    objset.new_scoped_relationship(
1065        [rootfs_build],
1066        oe.spdx30.RelationshipType.hasOutput,
1067        oe.spdx30.LifecycleScopeType.build,
1068        [rootfs],
1069    )
1070
1071    files_by_hash = {}
1072    collect_build_package_inputs(d, objset, rootfs_build, packages, files_by_hash)
1073
1074    files = set()
1075    for dirpath, dirnames, filenames in os.walk(image_rootfs, onerror=walk_error):
1076        dirnames.sort()
1077        filenames.sort()
1078        for fn in filenames:
1079            fpath = Path(dirpath) / fn
1080            if fpath.is_symlink() or not fpath.is_file():
1081                continue
1082
1083            relpath = str(fpath.relative_to(image_rootfs))
1084            h = bb.utils.sha256_file(fpath)
1085
1086            found = False
1087            if h in files_by_hash:
1088                for f in files_by_hash[h]:
1089                    if isinstance(f, oe.spdx30.software_File) and f.name == relpath:
1090                        files.add(oe.sbom30.get_element_link_id(f))
1091                        found = True
1092                        break
1093
1094            if not found:
1095                files.add(
1096                    objset.new_file(
1097                        objset.new_spdxid("rootfs-file", relpath),
1098                        relpath,
1099                        fpath,
1100                    )
1101                )
1102
1103    if files:
1104        objset.new_relationship(
1105            [rootfs],
1106            oe.spdx30.RelationshipType.contains,
1107            sorted(list(files)),
1108        )
1109
1110    oe.sbom30.write_recipe_jsonld_doc(d, objset, "rootfs", deploydir)
1111
1112
1113def create_image_spdx(d):
1114    import oe.sbom30
1115
1116    image_deploy_dir = Path(d.getVar("IMGDEPLOYDIR"))
1117    manifest_path = Path(d.getVar("IMAGE_OUTPUT_MANIFEST"))
1118    spdx_work_dir = Path(d.getVar("SPDXIMAGEWORK"))
1119
1120    image_basename = d.getVar("IMAGE_BASENAME")
1121    machine = d.getVar("MACHINE")
1122
1123    objset = oe.sbom30.ObjectSet.new_objset(
1124        d, "%s-%s-image" % (image_basename, machine)
1125    )
1126
1127    with manifest_path.open("r") as f:
1128        manifest = json.load(f)
1129
1130    builds = []
1131    for task in manifest:
1132        imagetype = task["imagetype"]
1133        taskname = task["taskname"]
1134
1135        image_build = objset.add_root(
1136            objset.new_task_build(taskname, "image/%s" % imagetype)
1137        )
1138        set_timestamp_now(d, image_build, "build_buildEndTime")
1139        builds.append(image_build)
1140
1141        artifacts = []
1142
1143        for image in task["images"]:
1144            image_filename = image["filename"]
1145            image_path = image_deploy_dir / image_filename
1146            if os.path.isdir(image_path):
1147                a = add_package_files(
1148                        d,
1149                        objset,
1150                        image_path,
1151                        lambda file_counter: objset.new_spdxid(
1152                            "imagefile", str(file_counter)
1153                        ),
1154                        lambda filepath: [],
1155                        license_data=None,
1156                        ignore_dirs=[],
1157                        ignore_top_level_dirs=[],
1158                        archive=None,
1159                )
1160                artifacts.extend(a)
1161            else:
1162                a = objset.add_root(
1163                    oe.spdx30.software_File(
1164                        _id=objset.new_spdxid("image", image_filename),
1165                        creationInfo=objset.doc.creationInfo,
1166                        name=image_filename,
1167                        verifiedUsing=[
1168                            oe.spdx30.Hash(
1169                                algorithm=oe.spdx30.HashAlgorithm.sha256,
1170                                hashValue=bb.utils.sha256_file(image_path),
1171                            )
1172                        ],
1173                    )
1174                )
1175
1176                artifacts.append(a)
1177
1178            for a in artifacts:
1179                set_purposes(
1180                    d, a, "SPDX_IMAGE_PURPOSE:%s" % imagetype, "SPDX_IMAGE_PURPOSE"
1181                )
1182
1183                set_timestamp_now(d, a, "builtTime")
1184
1185
1186        if artifacts:
1187            objset.new_scoped_relationship(
1188                [image_build],
1189                oe.spdx30.RelationshipType.hasOutput,
1190                oe.spdx30.LifecycleScopeType.build,
1191                artifacts,
1192            )
1193
1194    if builds:
1195        rootfs_image, _ = oe.sbom30.find_root_obj_in_jsonld(
1196            d,
1197            "rootfs",
1198            "%s-%s-rootfs" % (image_basename, machine),
1199            oe.spdx30.software_Package,
1200            # TODO: Should use a purpose to filter here?
1201        )
1202        objset.new_scoped_relationship(
1203            builds,
1204            oe.spdx30.RelationshipType.hasInput,
1205            oe.spdx30.LifecycleScopeType.build,
1206            [oe.sbom30.get_element_link_id(rootfs_image)],
1207        )
1208
1209    objset.add_aliases()
1210    objset.link()
1211    oe.sbom30.write_recipe_jsonld_doc(d, objset, "image", spdx_work_dir)
1212
1213
1214def create_image_sbom_spdx(d):
1215    import oe.sbom30
1216
1217    image_name = d.getVar("IMAGE_NAME")
1218    image_basename = d.getVar("IMAGE_BASENAME")
1219    image_link_name = d.getVar("IMAGE_LINK_NAME")
1220    imgdeploydir = Path(d.getVar("SPDXIMAGEDEPLOYDIR"))
1221    machine = d.getVar("MACHINE")
1222
1223    spdx_path = imgdeploydir / (image_name + ".spdx.json")
1224
1225    root_elements = []
1226
1227    # TODO: Do we need to add the rootfs or are the image files sufficient?
1228    rootfs_image, _ = oe.sbom30.find_root_obj_in_jsonld(
1229        d,
1230        "rootfs",
1231        "%s-%s-rootfs" % (image_basename, machine),
1232        oe.spdx30.software_Package,
1233        # TODO: Should use a purpose here?
1234    )
1235    root_elements.append(oe.sbom30.get_element_link_id(rootfs_image))
1236
1237    image_objset, _ = oe.sbom30.find_jsonld(
1238        d, "image", "%s-%s-image" % (image_basename, machine), required=True
1239    )
1240    for o in image_objset.foreach_root(oe.spdx30.software_File):
1241        root_elements.append(oe.sbom30.get_element_link_id(o))
1242
1243    objset, sbom = oe.sbom30.create_sbom(d, image_name, root_elements)
1244
1245    oe.sbom30.write_jsonld_doc(d, objset, spdx_path)
1246
1247    def make_image_link(target_path, suffix):
1248        if image_link_name:
1249            link = imgdeploydir / (image_link_name + suffix)
1250            if link != target_path:
1251                link.symlink_to(os.path.relpath(target_path, link.parent))
1252
1253    make_image_link(spdx_path, ".spdx.json")
1254
1255
1256def sdk_create_spdx(d, sdk_type, spdx_work_dir, toolchain_outputname):
1257    sdk_name = toolchain_outputname + "-" + sdk_type
1258    sdk_packages = oe.sdk.sdk_list_installed_packages(d, sdk_type == "target")
1259
1260    objset = oe.sbom30.ObjectSet.new_objset(d, sdk_name)
1261
1262    sdk_rootfs = objset.add_root(
1263        oe.spdx30.software_Package(
1264            _id=objset.new_spdxid("sdk-rootfs", sdk_name),
1265            creationInfo=objset.doc.creationInfo,
1266            name=sdk_name,
1267            software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.archive,
1268        )
1269    )
1270    set_timestamp_now(d, sdk_rootfs, "builtTime")
1271
1272    sdk_build = objset.add_root(objset.new_task_build("sdk-rootfs", "sdk-rootfs"))
1273    set_timestamp_now(d, sdk_build, "build_buildEndTime")
1274
1275    objset.new_scoped_relationship(
1276        [sdk_build],
1277        oe.spdx30.RelationshipType.hasOutput,
1278        oe.spdx30.LifecycleScopeType.build,
1279        [sdk_rootfs],
1280    )
1281
1282    collect_build_package_inputs(d, objset, sdk_build, sdk_packages)
1283
1284    objset.add_aliases()
1285    oe.sbom30.write_jsonld_doc(d, objset, spdx_work_dir / "sdk-rootfs.spdx.json")
1286
1287
1288def create_sdk_sbom(d, sdk_deploydir, spdx_work_dir, toolchain_outputname):
1289    # Load the document written earlier
1290    rootfs_objset = oe.sbom30.load_jsonld(
1291        d, spdx_work_dir / "sdk-rootfs.spdx.json", required=True
1292    )
1293
1294    # Create a new build for the SDK installer
1295    sdk_build = rootfs_objset.new_task_build("sdk-populate", "sdk-populate")
1296    set_timestamp_now(d, sdk_build, "build_buildEndTime")
1297
1298    rootfs = rootfs_objset.find_root(oe.spdx30.software_Package)
1299    if rootfs is None:
1300        bb.fatal("Unable to find rootfs artifact")
1301
1302    rootfs_objset.new_scoped_relationship(
1303        [sdk_build],
1304        oe.spdx30.RelationshipType.hasInput,
1305        oe.spdx30.LifecycleScopeType.build,
1306        [rootfs],
1307    )
1308
1309    files = set()
1310    root_files = []
1311
1312    # NOTE: os.walk() doesn't return symlinks
1313    for dirpath, dirnames, filenames in os.walk(sdk_deploydir, onerror=walk_error):
1314        dirnames.sort()
1315        filenames.sort()
1316        for fn in filenames:
1317            fpath = Path(dirpath) / fn
1318            if not fpath.is_file() or fpath.is_symlink():
1319                continue
1320
1321            relpath = str(fpath.relative_to(sdk_deploydir))
1322
1323            f = rootfs_objset.new_file(
1324                rootfs_objset.new_spdxid("sdk-installer", relpath),
1325                relpath,
1326                fpath,
1327            )
1328            set_timestamp_now(d, f, "builtTime")
1329
1330            if fn.endswith(".manifest"):
1331                f.software_primaryPurpose = oe.spdx30.software_SoftwarePurpose.manifest
1332            elif fn.endswith(".testdata.json"):
1333                f.software_primaryPurpose = (
1334                    oe.spdx30.software_SoftwarePurpose.configuration
1335                )
1336            else:
1337                set_purposes(d, f, "SPDX_SDK_PURPOSE")
1338                root_files.append(f)
1339
1340            files.add(f)
1341
1342    if files:
1343        rootfs_objset.new_scoped_relationship(
1344            [sdk_build],
1345            oe.spdx30.RelationshipType.hasOutput,
1346            oe.spdx30.LifecycleScopeType.build,
1347            files,
1348        )
1349    else:
1350        bb.warn(f"No SDK output files found in {sdk_deploydir}")
1351
1352    objset, sbom = oe.sbom30.create_sbom(
1353        d, toolchain_outputname, sorted(list(files)), [rootfs_objset]
1354    )
1355
1356    oe.sbom30.write_jsonld_doc(
1357        d, objset, sdk_deploydir / (toolchain_outputname + ".spdx.json")
1358    )
1359