1# 2# Copyright OpenEmbedded Contributors 3# 4# SPDX-License-Identifier: GPL-2.0-only 5# 6 7import json 8import oe.cve_check 9import oe.packagedata 10import oe.patch 11import oe.sbom30 12import oe.spdx30 13import oe.spdx_common 14import oe.sdk 15import os 16 17from contextlib import contextmanager 18from datetime import datetime, timezone 19from pathlib import Path 20 21 22def set_timestamp_now(d, o, prop): 23 if d.getVar("SPDX_INCLUDE_TIMESTAMPS") == "1": 24 setattr(o, prop, datetime.now(timezone.utc)) 25 else: 26 # Doing this helps to validated that the property actually exists, and 27 # also that it is not mandatory 28 delattr(o, prop) 29 30 31def add_license_expression(d, objset, license_expression, license_data): 32 simple_license_text = {} 33 license_text_map = {} 34 license_ref_idx = 0 35 36 def add_license_text(name): 37 nonlocal objset 38 nonlocal simple_license_text 39 40 if name in simple_license_text: 41 return simple_license_text[name] 42 43 lic = objset.find_filter( 44 oe.spdx30.simplelicensing_SimpleLicensingText, 45 name=name, 46 ) 47 48 if lic is not None: 49 simple_license_text[name] = lic 50 return lic 51 52 lic = objset.add( 53 oe.spdx30.simplelicensing_SimpleLicensingText( 54 _id=objset.new_spdxid("license-text", name), 55 creationInfo=objset.doc.creationInfo, 56 name=name, 57 ) 58 ) 59 objset.set_element_alias(lic) 60 simple_license_text[name] = lic 61 62 if name == "PD": 63 lic.simplelicensing_licenseText = "Software released to the public domain" 64 return lic 65 66 # Seach for the license in COMMON_LICENSE_DIR and LICENSE_PATH 67 for directory in [d.getVar("COMMON_LICENSE_DIR")] + ( 68 d.getVar("LICENSE_PATH") or "" 69 ).split(): 70 try: 71 with (Path(directory) / name).open(errors="replace") as f: 72 lic.simplelicensing_licenseText = f.read() 73 return lic 74 75 except FileNotFoundError: 76 pass 77 78 # If it's not SPDX or PD, then NO_GENERIC_LICENSE must be set 79 filename = d.getVarFlag("NO_GENERIC_LICENSE", name) 80 if filename: 81 filename = d.expand("${S}/" + filename) 82 with open(filename, errors="replace") as f: 83 lic.simplelicensing_licenseText = f.read() 84 return lic 85 else: 86 bb.fatal("Cannot find any text for license %s" % name) 87 88 def convert(l): 89 nonlocal license_text_map 90 nonlocal license_ref_idx 91 92 if l == "(" or l == ")": 93 return l 94 95 if l == "&": 96 return "AND" 97 98 if l == "|": 99 return "OR" 100 101 if l == "CLOSED": 102 return "NONE" 103 104 spdx_license = d.getVarFlag("SPDXLICENSEMAP", l) or l 105 if spdx_license in license_data["licenses"]: 106 return spdx_license 107 108 spdx_license = "LicenseRef-" + l 109 if spdx_license not in license_text_map: 110 license_text_map[spdx_license] = oe.sbom30.get_element_link_id( 111 add_license_text(l) 112 ) 113 114 return spdx_license 115 116 lic_split = ( 117 license_expression.replace("(", " ( ") 118 .replace(")", " ) ") 119 .replace("|", " | ") 120 .replace("&", " & ") 121 .split() 122 ) 123 spdx_license_expression = " ".join(convert(l) for l in lic_split) 124 125 o = objset.new_license_expression( 126 spdx_license_expression, license_data, license_text_map 127 ) 128 objset.set_element_alias(o) 129 return o 130 131 132def add_package_files( 133 d, 134 objset, 135 topdir, 136 get_spdxid, 137 get_purposes, 138 license_data=None, 139 *, 140 archive=None, 141 ignore_dirs=[], 142 ignore_top_level_dirs=[], 143): 144 source_date_epoch = d.getVar("SOURCE_DATE_EPOCH") 145 if source_date_epoch: 146 source_date_epoch = int(source_date_epoch) 147 148 spdx_files = set() 149 150 file_counter = 1 151 for subdir, dirs, files in os.walk(topdir): 152 dirs[:] = [d for d in dirs if d not in ignore_dirs] 153 if subdir == str(topdir): 154 dirs[:] = [d for d in dirs if d not in ignore_top_level_dirs] 155 156 for file in files: 157 filepath = Path(subdir) / file 158 if filepath.is_symlink() or not filepath.is_file(): 159 continue 160 161 filename = str(filepath.relative_to(topdir)) 162 file_purposes = get_purposes(filepath) 163 164 spdx_file = objset.new_file( 165 get_spdxid(file_counter), 166 filename, 167 filepath, 168 purposes=file_purposes, 169 ) 170 spdx_files.add(spdx_file) 171 172 if ( 173 oe.spdx30.software_SoftwarePurpose.source in file_purposes 174 and license_data is not None 175 ): 176 objset.scan_declared_licenses(spdx_file, filepath, license_data) 177 178 if archive is not None: 179 with filepath.open("rb") as f: 180 info = archive.gettarinfo(fileobj=f) 181 info.name = filename 182 info.uid = 0 183 info.gid = 0 184 info.uname = "root" 185 info.gname = "root" 186 187 if source_date_epoch is not None and info.mtime > source_date_epoch: 188 info.mtime = source_date_epoch 189 190 archive.addfile(info, f) 191 192 file_counter += 1 193 194 bb.debug(1, "Added %d files to %s" % (len(spdx_files), objset.doc._id)) 195 196 return spdx_files 197 198 199def get_package_sources_from_debug( 200 d, package, package_files, sources, source_hash_cache 201): 202 def file_path_match(file_path, pkg_file): 203 if file_path.lstrip("/") == pkg_file.name.lstrip("/"): 204 return True 205 206 for e in pkg_file.extension: 207 if isinstance(e, oe.sbom30.OEFileNameAliasExtension): 208 for a in e.aliases: 209 if file_path.lstrip("/") == a.lstrip("/"): 210 return True 211 212 return False 213 214 debug_search_paths = [ 215 Path(d.getVar("SPDXWORK")), 216 Path(d.getVar("PKGD")), 217 Path(d.getVar("STAGING_DIR_TARGET")), 218 Path(d.getVar("STAGING_DIR_NATIVE")), 219 Path(d.getVar("STAGING_KERNEL_DIR")), 220 ] 221 222 pkg_data = oe.packagedata.read_subpkgdata_extended(package, d) 223 224 if pkg_data is None: 225 return 226 227 dep_source_files = set() 228 229 for file_path, file_data in pkg_data["files_info"].items(): 230 if not "debugsrc" in file_data: 231 continue 232 233 if not any(file_path_match(file_path, pkg_file) for pkg_file in package_files): 234 bb.fatal( 235 "No package file found for %s in %s; SPDX found: %s" 236 % (str(file_path), package, " ".join(p.name for p in package_files)) 237 ) 238 continue 239 240 for debugsrc in file_data["debugsrc"]: 241 for search in debug_search_paths: 242 if debugsrc.startswith("/usr/src/kernel"): 243 debugsrc_path = search / debugsrc.replace("/usr/src/kernel/", "") 244 else: 245 debugsrc_path = search / debugsrc.lstrip("/") 246 247 if debugsrc_path in source_hash_cache: 248 file_sha256 = source_hash_cache[debugsrc_path] 249 if file_sha256 is None: 250 continue 251 else: 252 # We can only hash files below, skip directories, links, etc. 253 if not debugsrc_path.is_file(): 254 source_hash_cache[debugsrc_path] = None 255 continue 256 257 file_sha256 = bb.utils.sha256_file(debugsrc_path) 258 source_hash_cache[debugsrc_path] = file_sha256 259 260 if file_sha256 in sources: 261 source_file = sources[file_sha256] 262 dep_source_files.add(source_file) 263 else: 264 bb.debug( 265 1, 266 "Debug source %s with SHA256 %s not found in any dependency" 267 % (str(debugsrc_path), file_sha256), 268 ) 269 break 270 else: 271 bb.debug(1, "Debug source %s not found" % debugsrc) 272 273 return dep_source_files 274 275 276def collect_dep_objsets(d, build): 277 deps = oe.spdx_common.get_spdx_deps(d) 278 279 dep_objsets = [] 280 dep_builds = set() 281 282 dep_build_spdxids = set() 283 for dep in deps: 284 bb.debug(1, "Fetching SPDX for dependency %s" % (dep.pn)) 285 dep_build, dep_objset = oe.sbom30.find_root_obj_in_jsonld( 286 d, "recipes", "recipe-" + dep.pn, oe.spdx30.build_Build 287 ) 288 # If the dependency is part of the taskhash, return it to be linked 289 # against. Otherwise, it cannot be linked against because this recipe 290 # will not rebuilt if dependency changes 291 if dep.in_taskhash: 292 dep_objsets.append(dep_objset) 293 294 # The build _can_ be linked against (by alias) 295 dep_builds.add(dep_build) 296 297 return dep_objsets, dep_builds 298 299 300def index_sources_by_hash(sources, dest): 301 for s in sources: 302 if not isinstance(s, oe.spdx30.software_File): 303 continue 304 305 if s.software_primaryPurpose != oe.spdx30.software_SoftwarePurpose.source: 306 continue 307 308 for v in s.verifiedUsing: 309 if v.algorithm == oe.spdx30.HashAlgorithm.sha256: 310 if not v.hashValue in dest: 311 dest[v.hashValue] = s 312 break 313 else: 314 bb.fatal(f"No SHA256 found for {s.name}") 315 316 317def collect_dep_sources(dep_objsets, dest): 318 for objset in dep_objsets: 319 # Don't collect sources from native recipes as they 320 # match non-native sources also. 321 if objset.is_native(): 322 continue 323 324 bb.debug(1, "Fetching Sources for dependency %s" % (objset.doc.name)) 325 326 dep_build = objset.find_root(oe.spdx30.build_Build) 327 if not dep_build: 328 bb.fatal("Unable to find a build") 329 330 for e in objset.foreach_type(oe.spdx30.Relationship): 331 if dep_build is not e.from_: 332 continue 333 334 if e.relationshipType != oe.spdx30.RelationshipType.hasInput: 335 continue 336 337 index_sources_by_hash(e.to, dest) 338 339 340def add_download_files(d, objset): 341 inputs = set() 342 343 urls = d.getVar("SRC_URI").split() 344 fetch = bb.fetch2.Fetch(urls, d) 345 346 for download_idx, src_uri in enumerate(urls): 347 fd = fetch.ud[src_uri] 348 349 for name in fd.names: 350 file_name = os.path.basename(fetch.localpath(src_uri)) 351 if oe.patch.patch_path(src_uri, fetch, "", expand=False): 352 primary_purpose = oe.spdx30.software_SoftwarePurpose.patch 353 else: 354 primary_purpose = oe.spdx30.software_SoftwarePurpose.source 355 356 if fd.type == "file": 357 if os.path.isdir(fd.localpath): 358 walk_idx = 1 359 for root, dirs, files in os.walk(fd.localpath): 360 for f in files: 361 f_path = os.path.join(root, f) 362 if os.path.islink(f_path): 363 # TODO: SPDX doesn't support symlinks yet 364 continue 365 366 file = objset.new_file( 367 objset.new_spdxid( 368 "source", str(download_idx + 1), str(walk_idx) 369 ), 370 os.path.join( 371 file_name, os.path.relpath(f_path, fd.localpath) 372 ), 373 f_path, 374 purposes=[primary_purpose], 375 ) 376 377 inputs.add(file) 378 walk_idx += 1 379 380 else: 381 file = objset.new_file( 382 objset.new_spdxid("source", str(download_idx + 1)), 383 file_name, 384 fd.localpath, 385 purposes=[primary_purpose], 386 ) 387 inputs.add(file) 388 389 else: 390 dl = objset.add( 391 oe.spdx30.software_Package( 392 _id=objset.new_spdxid("source", str(download_idx + 1)), 393 creationInfo=objset.doc.creationInfo, 394 name=file_name, 395 software_primaryPurpose=primary_purpose, 396 software_downloadLocation=oe.spdx_common.fetch_data_to_uri( 397 fd, name 398 ), 399 ) 400 ) 401 402 if fd.method.supports_checksum(fd): 403 # TODO Need something better than hard coding this 404 for checksum_id in ["sha256", "sha1"]: 405 expected_checksum = getattr( 406 fd, "%s_expected" % checksum_id, None 407 ) 408 if expected_checksum is None: 409 continue 410 411 dl.verifiedUsing.append( 412 oe.spdx30.Hash( 413 algorithm=getattr(oe.spdx30.HashAlgorithm, checksum_id), 414 hashValue=expected_checksum, 415 ) 416 ) 417 418 inputs.add(dl) 419 420 return inputs 421 422 423def set_purposes(d, element, *var_names, force_purposes=[]): 424 purposes = force_purposes[:] 425 426 for var_name in var_names: 427 val = d.getVar(var_name) 428 if val: 429 purposes.extend(val.split()) 430 break 431 432 if not purposes: 433 bb.warn("No SPDX purposes found in %s" % " ".join(var_names)) 434 return 435 436 element.software_primaryPurpose = getattr( 437 oe.spdx30.software_SoftwarePurpose, purposes[0] 438 ) 439 element.software_additionalPurpose = [ 440 getattr(oe.spdx30.software_SoftwarePurpose, p) for p in purposes[1:] 441 ] 442 443 444def create_spdx(d): 445 def set_var_field(var, obj, name, package=None): 446 val = None 447 if package: 448 val = d.getVar("%s:%s" % (var, package)) 449 450 if not val: 451 val = d.getVar(var) 452 453 if val: 454 setattr(obj, name, val) 455 456 license_data = oe.spdx_common.load_spdx_license_data(d) 457 458 deploydir = Path(d.getVar("SPDXDEPLOY")) 459 deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX")) 460 spdx_workdir = Path(d.getVar("SPDXWORK")) 461 include_sources = d.getVar("SPDX_INCLUDE_SOURCES") == "1" 462 pkg_arch = d.getVar("SSTATE_PKGARCH") 463 is_native = bb.data.inherits_class("native", d) or bb.data.inherits_class( 464 "cross", d 465 ) 466 include_vex = d.getVar("SPDX_INCLUDE_VEX") 467 if not include_vex in ("none", "current", "all"): 468 bb.fatal("SPDX_INCLUDE_VEX must be one of 'none', 'current', 'all'") 469 470 build_objset = oe.sbom30.ObjectSet.new_objset(d, "recipe-" + d.getVar("PN")) 471 472 build = build_objset.new_task_build("recipe", "recipe") 473 build_objset.set_element_alias(build) 474 475 build_objset.doc.rootElement.append(build) 476 477 build_objset.set_is_native(is_native) 478 479 for var in (d.getVar("SPDX_CUSTOM_ANNOTATION_VARS") or "").split(): 480 new_annotation( 481 d, 482 build_objset, 483 build, 484 "%s=%s" % (var, d.getVar(var)), 485 oe.spdx30.AnnotationType.other, 486 ) 487 488 build_inputs = set() 489 490 # Add CVEs 491 cve_by_status = {} 492 if include_vex != "none": 493 for cve in d.getVarFlags("CVE_STATUS") or {}: 494 decoded_status = oe.cve_check.decode_cve_status(d, cve) 495 496 # If this CVE is fixed upstream, skip it unless all CVEs are 497 # specified. 498 if ( 499 include_vex != "all" 500 and "detail" in decoded_status 501 and decoded_status["detail"] 502 in ( 503 "fixed-version", 504 "cpe-stable-backport", 505 ) 506 ): 507 bb.debug(1, "Skipping %s since it is already fixed upstream" % cve) 508 continue 509 510 spdx_cve = build_objset.new_cve_vuln(cve) 511 build_objset.set_element_alias(spdx_cve) 512 513 cve_by_status.setdefault(decoded_status["mapping"], {})[cve] = ( 514 spdx_cve, 515 decoded_status["detail"], 516 decoded_status["description"], 517 ) 518 519 cpe_ids = oe.cve_check.get_cpe_ids(d.getVar("CVE_PRODUCT"), d.getVar("CVE_VERSION")) 520 521 source_files = add_download_files(d, build_objset) 522 build_inputs |= source_files 523 524 recipe_spdx_license = add_license_expression( 525 d, build_objset, d.getVar("LICENSE"), license_data 526 ) 527 build_objset.new_relationship( 528 source_files, 529 oe.spdx30.RelationshipType.hasConcludedLicense, 530 [oe.sbom30.get_element_link_id(recipe_spdx_license)], 531 ) 532 533 dep_sources = {} 534 if oe.spdx_common.process_sources(d) and include_sources: 535 bb.debug(1, "Adding source files to SPDX") 536 oe.spdx_common.get_patched_src(d) 537 538 files = add_package_files( 539 d, 540 build_objset, 541 spdx_workdir, 542 lambda file_counter: build_objset.new_spdxid( 543 "sourcefile", str(file_counter) 544 ), 545 lambda filepath: [oe.spdx30.software_SoftwarePurpose.source], 546 license_data, 547 ignore_dirs=[".git"], 548 ignore_top_level_dirs=["temp"], 549 archive=None, 550 ) 551 build_inputs |= files 552 index_sources_by_hash(files, dep_sources) 553 554 dep_objsets, dep_builds = collect_dep_objsets(d, build) 555 if dep_builds: 556 build_objset.new_scoped_relationship( 557 [build], 558 oe.spdx30.RelationshipType.dependsOn, 559 oe.spdx30.LifecycleScopeType.build, 560 sorted(oe.sbom30.get_element_link_id(b) for b in dep_builds), 561 ) 562 563 debug_source_ids = set() 564 source_hash_cache = {} 565 566 # Write out the package SPDX data now. It is not complete as we cannot 567 # write the runtime data, so write it to a staging area and a later task 568 # will write out the final collection 569 570 # TODO: Handle native recipe output 571 if not is_native: 572 bb.debug(1, "Collecting Dependency sources files") 573 collect_dep_sources(dep_objsets, dep_sources) 574 575 bb.build.exec_func("read_subpackage_metadata", d) 576 577 pkgdest = Path(d.getVar("PKGDEST")) 578 for package in d.getVar("PACKAGES").split(): 579 if not oe.packagedata.packaged(package, d): 580 continue 581 582 pkg_name = d.getVar("PKG:%s" % package) or package 583 584 bb.debug(1, "Creating SPDX for package %s" % pkg_name) 585 586 pkg_objset = oe.sbom30.ObjectSet.new_objset(d, "package-" + pkg_name) 587 588 spdx_package = pkg_objset.add_root( 589 oe.spdx30.software_Package( 590 _id=pkg_objset.new_spdxid("package", pkg_name), 591 creationInfo=pkg_objset.doc.creationInfo, 592 name=pkg_name, 593 software_packageVersion=d.getVar("PV"), 594 ) 595 ) 596 set_timestamp_now(d, spdx_package, "builtTime") 597 598 set_purposes( 599 d, 600 spdx_package, 601 "SPDX_PACKAGE_ADDITIONAL_PURPOSE:%s" % package, 602 "SPDX_PACKAGE_ADDITIONAL_PURPOSE", 603 force_purposes=["install"], 604 ) 605 606 supplier = build_objset.new_agent("SPDX_PACKAGE_SUPPLIER") 607 if supplier is not None: 608 spdx_package.suppliedBy = ( 609 supplier if isinstance(supplier, str) else supplier._id 610 ) 611 612 set_var_field( 613 "HOMEPAGE", spdx_package, "software_homePage", package=package 614 ) 615 set_var_field("SUMMARY", spdx_package, "summary", package=package) 616 set_var_field("DESCRIPTION", spdx_package, "description", package=package) 617 618 pkg_objset.new_scoped_relationship( 619 [oe.sbom30.get_element_link_id(build)], 620 oe.spdx30.RelationshipType.hasOutput, 621 oe.spdx30.LifecycleScopeType.build, 622 [spdx_package], 623 ) 624 625 for cpe_id in cpe_ids: 626 spdx_package.externalIdentifier.append( 627 oe.spdx30.ExternalIdentifier( 628 externalIdentifierType=oe.spdx30.ExternalIdentifierType.cpe23, 629 identifier=cpe_id, 630 ) 631 ) 632 633 # TODO: Generate a file for each actual IPK/DEB/RPM/TGZ file 634 # generated and link it to the package 635 # spdx_package_file = pkg_objset.add(oe.spdx30.software_File( 636 # _id=pkg_objset.new_spdxid("distribution", pkg_name), 637 # creationInfo=pkg_objset.doc.creationInfo, 638 # name=pkg_name, 639 # software_primaryPurpose=spdx_package.software_primaryPurpose, 640 # software_additionalPurpose=spdx_package.software_additionalPurpose, 641 # )) 642 # set_timestamp_now(d, spdx_package_file, "builtTime") 643 644 ## TODO add hashes 645 # pkg_objset.new_relationship( 646 # [spdx_package], 647 # oe.spdx30.RelationshipType.hasDistributionArtifact, 648 # [spdx_package_file], 649 # ) 650 651 # NOTE: licenses live in the recipe collection and are referenced 652 # by ID in the package collection(s). This helps reduce duplication 653 # (since a lot of packages will have the same license), and also 654 # prevents duplicate license SPDX IDs in the packages 655 package_license = d.getVar("LICENSE:%s" % package) 656 if package_license and package_license != d.getVar("LICENSE"): 657 package_spdx_license = add_license_expression( 658 d, build_objset, package_license, license_data 659 ) 660 else: 661 package_spdx_license = recipe_spdx_license 662 663 pkg_objset.new_relationship( 664 [spdx_package], 665 oe.spdx30.RelationshipType.hasConcludedLicense, 666 [oe.sbom30.get_element_link_id(package_spdx_license)], 667 ) 668 669 # NOTE: CVE Elements live in the recipe collection 670 all_cves = set() 671 for status, cves in cve_by_status.items(): 672 for cve, items in cves.items(): 673 spdx_cve, detail, description = items 674 spdx_cve_id = oe.sbom30.get_element_link_id(spdx_cve) 675 676 all_cves.add(spdx_cve_id) 677 678 if status == "Patched": 679 pkg_objset.new_vex_patched_relationship( 680 [spdx_cve_id], [spdx_package] 681 ) 682 elif status == "Unpatched": 683 pkg_objset.new_vex_unpatched_relationship( 684 [spdx_cve_id], [spdx_package] 685 ) 686 elif status == "Ignored": 687 spdx_vex = pkg_objset.new_vex_ignored_relationship( 688 [spdx_cve_id], 689 [spdx_package], 690 impact_statement=description, 691 ) 692 693 if detail in ( 694 "ignored", 695 "cpe-incorrect", 696 "disputed", 697 "upstream-wontfix", 698 ): 699 # VEX doesn't have justifications for this 700 pass 701 elif detail in ( 702 "not-applicable-config", 703 "not-applicable-platform", 704 ): 705 for v in spdx_vex: 706 v.security_justificationType = ( 707 oe.spdx30.security_VexJustificationType.vulnerableCodeNotPresent 708 ) 709 else: 710 bb.fatal(f"Unknown detail '{detail}' for ignored {cve}") 711 else: 712 bb.fatal(f"Unknown {cve} status '{status}'") 713 714 if all_cves: 715 pkg_objset.new_relationship( 716 [spdx_package], 717 oe.spdx30.RelationshipType.hasAssociatedVulnerability, 718 sorted(list(all_cves)), 719 ) 720 721 bb.debug(1, "Adding package files to SPDX for package %s" % pkg_name) 722 package_files = add_package_files( 723 d, 724 pkg_objset, 725 pkgdest / package, 726 lambda file_counter: pkg_objset.new_spdxid( 727 "package", pkg_name, "file", str(file_counter) 728 ), 729 # TODO: Can we know the purpose here? 730 lambda filepath: [], 731 license_data, 732 ignore_top_level_dirs=["CONTROL", "DEBIAN"], 733 archive=None, 734 ) 735 736 if package_files: 737 pkg_objset.new_relationship( 738 [spdx_package], 739 oe.spdx30.RelationshipType.contains, 740 sorted(list(package_files)), 741 ) 742 743 if include_sources: 744 debug_sources = get_package_sources_from_debug( 745 d, package, package_files, dep_sources, source_hash_cache 746 ) 747 debug_source_ids |= set( 748 oe.sbom30.get_element_link_id(d) for d in debug_sources 749 ) 750 751 oe.sbom30.write_recipe_jsonld_doc( 752 d, pkg_objset, "packages-staging", deploydir, create_spdx_id_links=False 753 ) 754 755 if include_sources: 756 bb.debug(1, "Adding sysroot files to SPDX") 757 sysroot_files = add_package_files( 758 d, 759 build_objset, 760 d.expand("${COMPONENTS_DIR}/${PACKAGE_ARCH}/${PN}"), 761 lambda file_counter: build_objset.new_spdxid("sysroot", str(file_counter)), 762 lambda filepath: [], 763 license_data, 764 archive=None, 765 ) 766 767 if sysroot_files: 768 build_objset.new_scoped_relationship( 769 [build], 770 oe.spdx30.RelationshipType.hasOutput, 771 oe.spdx30.LifecycleScopeType.build, 772 sorted(list(sysroot_files)), 773 ) 774 775 if build_inputs or debug_source_ids: 776 build_objset.new_scoped_relationship( 777 [build], 778 oe.spdx30.RelationshipType.hasInput, 779 oe.spdx30.LifecycleScopeType.build, 780 sorted(list(build_inputs)) + sorted(list(debug_source_ids)), 781 ) 782 783 oe.sbom30.write_recipe_jsonld_doc(d, build_objset, "recipes", deploydir) 784 785 786def create_package_spdx(d): 787 deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX")) 788 deploydir = Path(d.getVar("SPDXRUNTIMEDEPLOY")) 789 is_native = bb.data.inherits_class("native", d) or bb.data.inherits_class( 790 "cross", d 791 ) 792 793 providers = oe.spdx_common.collect_package_providers(d) 794 pkg_arch = d.getVar("SSTATE_PKGARCH") 795 796 if is_native: 797 return 798 799 bb.build.exec_func("read_subpackage_metadata", d) 800 801 dep_package_cache = {} 802 803 # Any element common to all packages that need to be referenced by ID 804 # should be written into this objset set 805 common_objset = oe.sbom30.ObjectSet.new_objset( 806 d, "%s-package-common" % d.getVar("PN") 807 ) 808 809 pkgdest = Path(d.getVar("PKGDEST")) 810 for package in d.getVar("PACKAGES").split(): 811 localdata = bb.data.createCopy(d) 812 pkg_name = d.getVar("PKG:%s" % package) or package 813 localdata.setVar("PKG", pkg_name) 814 localdata.setVar("OVERRIDES", d.getVar("OVERRIDES", False) + ":" + package) 815 816 if not oe.packagedata.packaged(package, localdata): 817 continue 818 819 spdx_package, pkg_objset = oe.sbom30.load_obj_in_jsonld( 820 d, 821 pkg_arch, 822 "packages-staging", 823 "package-" + pkg_name, 824 oe.spdx30.software_Package, 825 software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.install, 826 ) 827 828 # We will write out a new collection, so link it to the new 829 # creation info in the common package data. The old creation info 830 # should still exist and be referenced by all the existing elements 831 # in the package 832 pkg_objset.creationInfo = pkg_objset.copy_creation_info( 833 common_objset.doc.creationInfo 834 ) 835 836 runtime_spdx_deps = set() 837 838 deps = bb.utils.explode_dep_versions2(localdata.getVar("RDEPENDS") or "") 839 seen_deps = set() 840 for dep, _ in deps.items(): 841 if dep in seen_deps: 842 continue 843 844 if dep not in providers: 845 continue 846 847 (dep, _) = providers[dep] 848 849 if not oe.packagedata.packaged(dep, localdata): 850 continue 851 852 dep_pkg_data = oe.packagedata.read_subpkgdata_dict(dep, d) 853 dep_pkg = dep_pkg_data["PKG"] 854 855 if dep in dep_package_cache: 856 dep_spdx_package = dep_package_cache[dep] 857 else: 858 bb.debug(1, "Searching for %s" % dep_pkg) 859 dep_spdx_package, _ = oe.sbom30.find_root_obj_in_jsonld( 860 d, 861 "packages-staging", 862 "package-" + dep_pkg, 863 oe.spdx30.software_Package, 864 software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.install, 865 ) 866 dep_package_cache[dep] = dep_spdx_package 867 868 runtime_spdx_deps.add(dep_spdx_package) 869 seen_deps.add(dep) 870 871 if runtime_spdx_deps: 872 pkg_objset.new_scoped_relationship( 873 [spdx_package], 874 oe.spdx30.RelationshipType.dependsOn, 875 oe.spdx30.LifecycleScopeType.runtime, 876 [oe.sbom30.get_element_link_id(dep) for dep in runtime_spdx_deps], 877 ) 878 879 oe.sbom30.write_recipe_jsonld_doc(d, pkg_objset, "packages", deploydir) 880 881 oe.sbom30.write_recipe_jsonld_doc(d, common_objset, "common-package", deploydir) 882 883 884def write_bitbake_spdx(d): 885 # Set PN to "bitbake" so that SPDX IDs can be generated 886 d.setVar("PN", "bitbake") 887 d.setVar("BB_TASKHASH", "bitbake") 888 oe.spdx_common.load_spdx_license_data(d) 889 890 deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX")) 891 892 objset = oe.sbom30.ObjectSet.new_objset(d, "bitbake", False) 893 894 host_import_key = d.getVar("SPDX_BUILD_HOST") 895 invoked_by = objset.new_agent("SPDX_INVOKED_BY", add=False) 896 on_behalf_of = objset.new_agent("SPDX_ON_BEHALF_OF", add=False) 897 898 if d.getVar("SPDX_INCLUDE_BITBAKE_PARENT_BUILD") == "1": 899 # Since the Build objects are unique, we may as well set the creation 900 # time to the current time instead of the fallback SDE 901 objset.doc.creationInfo.created = datetime.now(timezone.utc) 902 903 # Each invocation of bitbake should have a unique ID since it is a 904 # unique build 905 nonce = os.urandom(16).hex() 906 907 build = objset.add_root( 908 oe.spdx30.build_Build( 909 _id=objset.new_spdxid(nonce, include_unihash=False), 910 creationInfo=objset.doc.creationInfo, 911 build_buildType=oe.sbom30.SPDX_BUILD_TYPE, 912 ) 913 ) 914 set_timestamp_now(d, build, "build_buildStartTime") 915 916 if host_import_key: 917 objset.new_scoped_relationship( 918 [build], 919 oe.spdx30.RelationshipType.hasHost, 920 oe.spdx30.LifecycleScopeType.build, 921 [objset.new_import(host_import_key)], 922 ) 923 924 if invoked_by: 925 objset.add(invoked_by) 926 invoked_by_spdx = objset.new_scoped_relationship( 927 [build], 928 oe.spdx30.RelationshipType.invokedBy, 929 oe.spdx30.LifecycleScopeType.build, 930 [invoked_by], 931 ) 932 933 if on_behalf_of: 934 objset.add(on_behalf_of) 935 objset.new_scoped_relationship( 936 [on_behalf_of], 937 oe.spdx30.RelationshipType.delegatedTo, 938 oe.spdx30.LifecycleScopeType.build, 939 invoked_by_spdx, 940 ) 941 942 elif on_behalf_of: 943 bb.warn("SPDX_ON_BEHALF_OF has no effect if SPDX_INVOKED_BY is not set") 944 945 else: 946 if host_import_key: 947 bb.warn( 948 "SPDX_BUILD_HOST has no effect if SPDX_INCLUDE_BITBAKE_PARENT_BUILD is not set" 949 ) 950 951 if invoked_by: 952 bb.warn( 953 "SPDX_INVOKED_BY has no effect if SPDX_INCLUDE_BITBAKE_PARENT_BUILD is not set" 954 ) 955 956 if on_behalf_of: 957 bb.warn( 958 "SPDX_ON_BEHALF_OF has no effect if SPDX_INCLUDE_BITBAKE_PARENT_BUILD is not set" 959 ) 960 961 for obj in objset.foreach_type(oe.spdx30.Element): 962 obj.extension.append(oe.sbom30.OEIdAliasExtension()) 963 964 oe.sbom30.write_jsonld_doc(d, objset, deploy_dir_spdx / "bitbake.spdx.json") 965 966 967def collect_build_package_inputs(d, objset, build, packages): 968 import oe.sbom30 969 970 providers = oe.spdx_common.collect_package_providers(d) 971 972 build_deps = set() 973 missing_providers = set() 974 975 for name in sorted(packages.keys()): 976 if name not in providers: 977 missing_providers.add(name) 978 continue 979 980 pkg_name, pkg_hashfn = providers[name] 981 982 # Copy all of the package SPDX files into the Sbom elements 983 pkg_spdx, _ = oe.sbom30.find_root_obj_in_jsonld( 984 d, 985 "packages", 986 "package-" + pkg_name, 987 oe.spdx30.software_Package, 988 software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.install, 989 ) 990 build_deps.add(oe.sbom30.get_element_link_id(pkg_spdx)) 991 992 if missing_providers: 993 bb.fatal( 994 f"Unable to find SPDX provider(s) for: {', '.join(sorted(missing_providers))}" 995 ) 996 997 if build_deps: 998 objset.new_scoped_relationship( 999 [build], 1000 oe.spdx30.RelationshipType.hasInput, 1001 oe.spdx30.LifecycleScopeType.build, 1002 sorted(list(build_deps)), 1003 ) 1004 1005 1006def create_rootfs_spdx(d): 1007 deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX")) 1008 deploydir = Path(d.getVar("SPDXROOTFSDEPLOY")) 1009 root_packages_file = Path(d.getVar("SPDX_ROOTFS_PACKAGES")) 1010 image_basename = d.getVar("IMAGE_BASENAME") 1011 machine = d.getVar("MACHINE") 1012 1013 with root_packages_file.open("r") as f: 1014 packages = json.load(f) 1015 1016 objset = oe.sbom30.ObjectSet.new_objset( 1017 d, "%s-%s-rootfs" % (image_basename, machine) 1018 ) 1019 1020 rootfs = objset.add_root( 1021 oe.spdx30.software_Package( 1022 _id=objset.new_spdxid("rootfs", image_basename), 1023 creationInfo=objset.doc.creationInfo, 1024 name=image_basename, 1025 software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.archive, 1026 ) 1027 ) 1028 set_timestamp_now(d, rootfs, "builtTime") 1029 1030 rootfs_build = objset.add_root(objset.new_task_build("rootfs", "rootfs")) 1031 set_timestamp_now(d, rootfs_build, "build_buildEndTime") 1032 1033 objset.new_scoped_relationship( 1034 [rootfs_build], 1035 oe.spdx30.RelationshipType.hasOutput, 1036 oe.spdx30.LifecycleScopeType.build, 1037 [rootfs], 1038 ) 1039 1040 collect_build_package_inputs(d, objset, rootfs_build, packages) 1041 1042 oe.sbom30.write_recipe_jsonld_doc(d, objset, "rootfs", deploydir) 1043 1044 1045def create_image_spdx(d): 1046 import oe.sbom30 1047 1048 image_deploy_dir = Path(d.getVar("IMGDEPLOYDIR")) 1049 manifest_path = Path(d.getVar("IMAGE_OUTPUT_MANIFEST")) 1050 spdx_work_dir = Path(d.getVar("SPDXIMAGEWORK")) 1051 1052 image_basename = d.getVar("IMAGE_BASENAME") 1053 machine = d.getVar("MACHINE") 1054 1055 objset = oe.sbom30.ObjectSet.new_objset( 1056 d, "%s-%s-image" % (image_basename, machine) 1057 ) 1058 1059 with manifest_path.open("r") as f: 1060 manifest = json.load(f) 1061 1062 builds = [] 1063 for task in manifest: 1064 imagetype = task["imagetype"] 1065 taskname = task["taskname"] 1066 1067 image_build = objset.add_root( 1068 objset.new_task_build(taskname, "image/%s" % imagetype) 1069 ) 1070 set_timestamp_now(d, image_build, "build_buildEndTime") 1071 builds.append(image_build) 1072 1073 artifacts = [] 1074 1075 for image in task["images"]: 1076 image_filename = image["filename"] 1077 image_path = image_deploy_dir / image_filename 1078 if os.path.isdir(image_path): 1079 a = add_package_files( 1080 d, 1081 objset, 1082 image_path, 1083 lambda file_counter: objset.new_spdxid( 1084 "imagefile", str(file_counter) 1085 ), 1086 lambda filepath: [], 1087 license_data=None, 1088 ignore_dirs=[], 1089 ignore_top_level_dirs=[], 1090 archive=None, 1091 ) 1092 artifacts.extend(a) 1093 else: 1094 a = objset.add_root( 1095 oe.spdx30.software_File( 1096 _id=objset.new_spdxid("image", image_filename), 1097 creationInfo=objset.doc.creationInfo, 1098 name=image_filename, 1099 verifiedUsing=[ 1100 oe.spdx30.Hash( 1101 algorithm=oe.spdx30.HashAlgorithm.sha256, 1102 hashValue=bb.utils.sha256_file(image_path), 1103 ) 1104 ], 1105 ) 1106 ) 1107 1108 artifacts.append(a) 1109 1110 for a in artifacts: 1111 set_purposes( 1112 d, a, "SPDX_IMAGE_PURPOSE:%s" % imagetype, "SPDX_IMAGE_PURPOSE" 1113 ) 1114 1115 set_timestamp_now(d, a, "builtTime") 1116 1117 1118 if artifacts: 1119 objset.new_scoped_relationship( 1120 [image_build], 1121 oe.spdx30.RelationshipType.hasOutput, 1122 oe.spdx30.LifecycleScopeType.build, 1123 artifacts, 1124 ) 1125 1126 if builds: 1127 rootfs_image, _ = oe.sbom30.find_root_obj_in_jsonld( 1128 d, 1129 "rootfs", 1130 "%s-%s-rootfs" % (image_basename, machine), 1131 oe.spdx30.software_Package, 1132 # TODO: Should use a purpose to filter here? 1133 ) 1134 objset.new_scoped_relationship( 1135 builds, 1136 oe.spdx30.RelationshipType.hasInput, 1137 oe.spdx30.LifecycleScopeType.build, 1138 [oe.sbom30.get_element_link_id(rootfs_image)], 1139 ) 1140 1141 objset.add_aliases() 1142 objset.link() 1143 oe.sbom30.write_recipe_jsonld_doc(d, objset, "image", spdx_work_dir) 1144 1145 1146def create_image_sbom_spdx(d): 1147 import oe.sbom30 1148 1149 image_name = d.getVar("IMAGE_NAME") 1150 image_basename = d.getVar("IMAGE_BASENAME") 1151 image_link_name = d.getVar("IMAGE_LINK_NAME") 1152 imgdeploydir = Path(d.getVar("SPDXIMAGEDEPLOYDIR")) 1153 machine = d.getVar("MACHINE") 1154 1155 spdx_path = imgdeploydir / (image_name + ".spdx.json") 1156 1157 root_elements = [] 1158 1159 # TODO: Do we need to add the rootfs or are the image files sufficient? 1160 rootfs_image, _ = oe.sbom30.find_root_obj_in_jsonld( 1161 d, 1162 "rootfs", 1163 "%s-%s-rootfs" % (image_basename, machine), 1164 oe.spdx30.software_Package, 1165 # TODO: Should use a purpose here? 1166 ) 1167 root_elements.append(oe.sbom30.get_element_link_id(rootfs_image)) 1168 1169 image_objset, _ = oe.sbom30.find_jsonld( 1170 d, "image", "%s-%s-image" % (image_basename, machine), required=True 1171 ) 1172 for o in image_objset.foreach_root(oe.spdx30.software_File): 1173 root_elements.append(oe.sbom30.get_element_link_id(o)) 1174 1175 objset, sbom = oe.sbom30.create_sbom(d, image_name, root_elements) 1176 1177 oe.sbom30.write_jsonld_doc(d, objset, spdx_path) 1178 1179 def make_image_link(target_path, suffix): 1180 if image_link_name: 1181 link = imgdeploydir / (image_link_name + suffix) 1182 if link != target_path: 1183 link.symlink_to(os.path.relpath(target_path, link.parent)) 1184 1185 make_image_link(spdx_path, ".spdx.json") 1186 1187 1188def sdk_create_spdx(d, sdk_type, spdx_work_dir, toolchain_outputname): 1189 sdk_name = toolchain_outputname + "-" + sdk_type 1190 sdk_packages = oe.sdk.sdk_list_installed_packages(d, sdk_type == "target") 1191 1192 objset = oe.sbom30.ObjectSet.new_objset(d, sdk_name) 1193 1194 sdk_rootfs = objset.add_root( 1195 oe.spdx30.software_Package( 1196 _id=objset.new_spdxid("sdk-rootfs", sdk_name), 1197 creationInfo=objset.doc.creationInfo, 1198 name=sdk_name, 1199 software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.archive, 1200 ) 1201 ) 1202 set_timestamp_now(d, sdk_rootfs, "builtTime") 1203 1204 sdk_build = objset.add_root(objset.new_task_build("sdk-rootfs", "sdk-rootfs")) 1205 set_timestamp_now(d, sdk_build, "build_buildEndTime") 1206 1207 objset.new_scoped_relationship( 1208 [sdk_build], 1209 oe.spdx30.RelationshipType.hasOutput, 1210 oe.spdx30.LifecycleScopeType.build, 1211 [sdk_rootfs], 1212 ) 1213 1214 collect_build_package_inputs(d, objset, sdk_build, sdk_packages) 1215 1216 objset.add_aliases() 1217 oe.sbom30.write_jsonld_doc(d, objset, spdx_work_dir / "sdk-rootfs.spdx.json") 1218 1219 1220def create_sdk_sbom(d, sdk_deploydir, spdx_work_dir, toolchain_outputname): 1221 # Load the document written earlier 1222 rootfs_objset = oe.sbom30.load_jsonld( 1223 d, spdx_work_dir / "sdk-rootfs.spdx.json", required=True 1224 ) 1225 1226 # Create a new build for the SDK installer 1227 sdk_build = rootfs_objset.new_task_build("sdk-populate", "sdk-populate") 1228 set_timestamp_now(d, sdk_build, "build_buildEndTime") 1229 1230 rootfs = rootfs_objset.find_root(oe.spdx30.software_Package) 1231 if rootfs is None: 1232 bb.fatal("Unable to find rootfs artifact") 1233 1234 rootfs_objset.new_scoped_relationship( 1235 [sdk_build], 1236 oe.spdx30.RelationshipType.hasInput, 1237 oe.spdx30.LifecycleScopeType.build, 1238 [rootfs], 1239 ) 1240 1241 files = set() 1242 root_files = [] 1243 1244 # NOTE: os.walk() doesn't return symlinks 1245 for dirpath, dirnames, filenames in os.walk(sdk_deploydir): 1246 for fn in filenames: 1247 fpath = Path(dirpath) / fn 1248 if not fpath.is_file() or fpath.is_symlink(): 1249 continue 1250 1251 relpath = str(fpath.relative_to(sdk_deploydir)) 1252 1253 f = rootfs_objset.new_file( 1254 rootfs_objset.new_spdxid("sdk-installer", relpath), 1255 relpath, 1256 fpath, 1257 ) 1258 set_timestamp_now(d, f, "builtTime") 1259 1260 if fn.endswith(".manifest"): 1261 f.software_primaryPurpose = oe.spdx30.software_SoftwarePurpose.manifest 1262 elif fn.endswith(".testdata.json"): 1263 f.software_primaryPurpose = ( 1264 oe.spdx30.software_SoftwarePurpose.configuration 1265 ) 1266 else: 1267 set_purposes(d, f, "SPDX_SDK_PURPOSE") 1268 root_files.append(f) 1269 1270 files.add(f) 1271 1272 if files: 1273 rootfs_objset.new_scoped_relationship( 1274 [sdk_build], 1275 oe.spdx30.RelationshipType.hasOutput, 1276 oe.spdx30.LifecycleScopeType.build, 1277 files, 1278 ) 1279 else: 1280 bb.warn(f"No SDK output files found in {sdk_deploydir}") 1281 1282 objset, sbom = oe.sbom30.create_sbom( 1283 d, toolchain_outputname, sorted(list(files)), [rootfs_objset] 1284 ) 1285 1286 oe.sbom30.write_jsonld_doc( 1287 d, objset, sdk_deploydir / (toolchain_outputname + ".spdx.json") 1288 ) 1289