1# 2# Copyright OpenEmbedded Contributors 3# 4# SPDX-License-Identifier: GPL-2.0-only 5# 6 7import json 8import oe.cve_check 9import oe.packagedata 10import oe.patch 11import oe.sbom30 12import oe.spdx30 13import oe.spdx_common 14import oe.sdk 15import os 16 17from contextlib import contextmanager 18from datetime import datetime, timezone 19from pathlib import Path 20 21 22def set_timestamp_now(d, o, prop): 23 if d.getVar("SPDX_INCLUDE_TIMESTAMPS") == "1": 24 setattr(o, prop, datetime.now(timezone.utc)) 25 else: 26 # Doing this helps to validated that the property actually exists, and 27 # also that it is not mandatory 28 delattr(o, prop) 29 30 31def add_license_expression(d, objset, license_expression, license_data): 32 simple_license_text = {} 33 license_text_map = {} 34 license_ref_idx = 0 35 36 def add_license_text(name): 37 nonlocal objset 38 nonlocal simple_license_text 39 40 if name in simple_license_text: 41 return simple_license_text[name] 42 43 lic = objset.find_filter( 44 oe.spdx30.simplelicensing_SimpleLicensingText, 45 name=name, 46 ) 47 48 if lic is not None: 49 simple_license_text[name] = lic 50 return lic 51 52 lic = objset.add( 53 oe.spdx30.simplelicensing_SimpleLicensingText( 54 _id=objset.new_spdxid("license-text", name), 55 creationInfo=objset.doc.creationInfo, 56 name=name, 57 ) 58 ) 59 objset.set_element_alias(lic) 60 simple_license_text[name] = lic 61 62 if name == "PD": 63 lic.simplelicensing_licenseText = "Software released to the public domain" 64 return lic 65 66 # Seach for the license in COMMON_LICENSE_DIR and LICENSE_PATH 67 for directory in [d.getVar("COMMON_LICENSE_DIR")] + ( 68 d.getVar("LICENSE_PATH") or "" 69 ).split(): 70 try: 71 with (Path(directory) / name).open(errors="replace") as f: 72 lic.simplelicensing_licenseText = f.read() 73 return lic 74 75 except FileNotFoundError: 76 pass 77 78 # If it's not SPDX or PD, then NO_GENERIC_LICENSE must be set 79 filename = d.getVarFlag("NO_GENERIC_LICENSE", name) 80 if filename: 81 filename = d.expand("${S}/" + filename) 82 with open(filename, errors="replace") as f: 83 lic.simplelicensing_licenseText = f.read() 84 return lic 85 else: 86 bb.fatal("Cannot find any text for license %s" % name) 87 88 def convert(l): 89 nonlocal license_text_map 90 nonlocal license_ref_idx 91 92 if l == "(" or l == ")": 93 return l 94 95 if l == "&": 96 return "AND" 97 98 if l == "|": 99 return "OR" 100 101 if l == "CLOSED": 102 return "NONE" 103 104 spdx_license = d.getVarFlag("SPDXLICENSEMAP", l) or l 105 if spdx_license in license_data["licenses"]: 106 return spdx_license 107 108 spdx_license = "LicenseRef-" + l 109 if spdx_license not in license_text_map: 110 license_text_map[spdx_license] = oe.sbom30.get_element_link_id( 111 add_license_text(l) 112 ) 113 114 return spdx_license 115 116 lic_split = ( 117 license_expression.replace("(", " ( ") 118 .replace(")", " ) ") 119 .replace("|", " | ") 120 .replace("&", " & ") 121 .split() 122 ) 123 spdx_license_expression = " ".join(convert(l) for l in lic_split) 124 125 o = objset.new_license_expression( 126 spdx_license_expression, license_data, license_text_map 127 ) 128 objset.set_element_alias(o) 129 return o 130 131 132def add_package_files( 133 d, 134 objset, 135 topdir, 136 get_spdxid, 137 get_purposes, 138 license_data, 139 *, 140 archive=None, 141 ignore_dirs=[], 142 ignore_top_level_dirs=[], 143): 144 source_date_epoch = d.getVar("SOURCE_DATE_EPOCH") 145 if source_date_epoch: 146 source_date_epoch = int(source_date_epoch) 147 148 spdx_files = set() 149 150 file_counter = 1 151 for subdir, dirs, files in os.walk(topdir): 152 dirs[:] = [d for d in dirs if d not in ignore_dirs] 153 if subdir == str(topdir): 154 dirs[:] = [d for d in dirs if d not in ignore_top_level_dirs] 155 156 for file in files: 157 filepath = Path(subdir) / file 158 if filepath.is_symlink() or not filepath.is_file(): 159 continue 160 161 filename = str(filepath.relative_to(topdir)) 162 file_purposes = get_purposes(filepath) 163 164 spdx_file = objset.new_file( 165 get_spdxid(file_counter), 166 filename, 167 filepath, 168 purposes=file_purposes, 169 ) 170 spdx_files.add(spdx_file) 171 172 if oe.spdx30.software_SoftwarePurpose.source in file_purposes: 173 objset.scan_declared_licenses(spdx_file, filepath, license_data) 174 175 if archive is not None: 176 with filepath.open("rb") as f: 177 info = archive.gettarinfo(fileobj=f) 178 info.name = filename 179 info.uid = 0 180 info.gid = 0 181 info.uname = "root" 182 info.gname = "root" 183 184 if source_date_epoch is not None and info.mtime > source_date_epoch: 185 info.mtime = source_date_epoch 186 187 archive.addfile(info, f) 188 189 file_counter += 1 190 191 bb.debug(1, "Added %d files to %s" % (len(spdx_files), objset.doc._id)) 192 193 return spdx_files 194 195 196def get_package_sources_from_debug( 197 d, package, package_files, sources, source_hash_cache 198): 199 def file_path_match(file_path, pkg_file): 200 if file_path.lstrip("/") == pkg_file.name.lstrip("/"): 201 return True 202 203 for e in pkg_file.extension: 204 if isinstance(e, oe.sbom30.OEFileNameAliasExtension): 205 for a in e.aliases: 206 if file_path.lstrip("/") == a.lstrip("/"): 207 return True 208 209 return False 210 211 debug_search_paths = [ 212 Path(d.getVar("SPDXWORK")), 213 Path(d.getVar("PKGD")), 214 Path(d.getVar("STAGING_DIR_TARGET")), 215 Path(d.getVar("STAGING_DIR_NATIVE")), 216 Path(d.getVar("STAGING_KERNEL_DIR")), 217 ] 218 219 pkg_data = oe.packagedata.read_subpkgdata_extended(package, d) 220 221 if pkg_data is None: 222 return 223 224 dep_source_files = set() 225 226 for file_path, file_data in pkg_data["files_info"].items(): 227 if not "debugsrc" in file_data: 228 continue 229 230 if not any(file_path_match(file_path, pkg_file) for pkg_file in package_files): 231 bb.fatal( 232 "No package file found for %s in %s; SPDX found: %s" 233 % (str(file_path), package, " ".join(p.name for p in package_files)) 234 ) 235 continue 236 237 for debugsrc in file_data["debugsrc"]: 238 for search in debug_search_paths: 239 if debugsrc.startswith("/usr/src/kernel"): 240 debugsrc_path = search / debugsrc.replace("/usr/src/kernel/", "") 241 else: 242 debugsrc_path = search / debugsrc.lstrip("/") 243 244 if debugsrc_path in source_hash_cache: 245 file_sha256 = source_hash_cache[debugsrc_path] 246 if file_sha256 is None: 247 continue 248 else: 249 # We can only hash files below, skip directories, links, etc. 250 if not debugsrc_path.is_file(): 251 source_hash_cache[debugsrc_path] = None 252 continue 253 254 file_sha256 = bb.utils.sha256_file(debugsrc_path) 255 source_hash_cache[debugsrc_path] = file_sha256 256 257 if file_sha256 in sources: 258 source_file = sources[file_sha256] 259 dep_source_files.add(source_file) 260 else: 261 bb.debug( 262 1, 263 "Debug source %s with SHA256 %s not found in any dependency" 264 % (str(debugsrc_path), file_sha256), 265 ) 266 break 267 else: 268 bb.debug(1, "Debug source %s not found" % debugsrc) 269 270 return dep_source_files 271 272 273def collect_dep_objsets(d, build): 274 deps = oe.spdx_common.get_spdx_deps(d) 275 276 dep_objsets = [] 277 dep_builds = set() 278 279 dep_build_spdxids = set() 280 for dep in deps: 281 bb.debug(1, "Fetching SPDX for dependency %s" % (dep.pn)) 282 dep_build, dep_objset = oe.sbom30.find_root_obj_in_jsonld( 283 d, "recipes", "recipe-" + dep.pn, oe.spdx30.build_Build 284 ) 285 # If the dependency is part of the taskhash, return it to be linked 286 # against. Otherwise, it cannot be linked against because this recipe 287 # will not rebuilt if dependency changes 288 if dep.in_taskhash: 289 dep_objsets.append(dep_objset) 290 291 # The build _can_ be linked against (by alias) 292 dep_builds.add(dep_build) 293 294 return dep_objsets, dep_builds 295 296 297def index_sources_by_hash(sources, dest): 298 for s in sources: 299 if not isinstance(s, oe.spdx30.software_File): 300 continue 301 302 if s.software_primaryPurpose != oe.spdx30.software_SoftwarePurpose.source: 303 continue 304 305 for v in s.verifiedUsing: 306 if v.algorithm == oe.spdx30.HashAlgorithm.sha256: 307 if not v.hashValue in dest: 308 dest[v.hashValue] = s 309 break 310 else: 311 bb.fatal(f"No SHA256 found for {s.name}") 312 313 314def collect_dep_sources(dep_objsets, dest): 315 for objset in dep_objsets: 316 # Don't collect sources from native recipes as they 317 # match non-native sources also. 318 if objset.is_native(): 319 continue 320 321 bb.debug(1, "Fetching Sources for dependency %s" % (objset.doc.name)) 322 323 dep_build = objset.find_root(oe.spdx30.build_Build) 324 if not dep_build: 325 bb.fatal("Unable to find a build") 326 327 for e in objset.foreach_type(oe.spdx30.Relationship): 328 if dep_build is not e.from_: 329 continue 330 331 if e.relationshipType != oe.spdx30.RelationshipType.hasInput: 332 continue 333 334 index_sources_by_hash(e.to, dest) 335 336 337def add_download_files(d, objset): 338 inputs = set() 339 340 urls = d.getVar("SRC_URI").split() 341 fetch = bb.fetch2.Fetch(urls, d) 342 343 for download_idx, src_uri in enumerate(urls): 344 fd = fetch.ud[src_uri] 345 346 for name in fd.names: 347 file_name = os.path.basename(fetch.localpath(src_uri)) 348 if oe.patch.patch_path(src_uri, fetch, "", expand=False): 349 primary_purpose = oe.spdx30.software_SoftwarePurpose.patch 350 else: 351 primary_purpose = oe.spdx30.software_SoftwarePurpose.source 352 353 if fd.type == "file": 354 if os.path.isdir(fd.localpath): 355 walk_idx = 1 356 for root, dirs, files in os.walk(fd.localpath): 357 for f in files: 358 f_path = os.path.join(root, f) 359 if os.path.islink(f_path): 360 # TODO: SPDX doesn't support symlinks yet 361 continue 362 363 file = objset.new_file( 364 objset.new_spdxid( 365 "source", str(download_idx + 1), str(walk_idx) 366 ), 367 os.path.join( 368 file_name, os.path.relpath(f_path, fd.localpath) 369 ), 370 f_path, 371 purposes=[primary_purpose], 372 ) 373 374 inputs.add(file) 375 walk_idx += 1 376 377 else: 378 file = objset.new_file( 379 objset.new_spdxid("source", str(download_idx + 1)), 380 file_name, 381 fd.localpath, 382 purposes=[primary_purpose], 383 ) 384 inputs.add(file) 385 386 else: 387 dl = objset.add( 388 oe.spdx30.software_Package( 389 _id=objset.new_spdxid("source", str(download_idx + 1)), 390 creationInfo=objset.doc.creationInfo, 391 name=file_name, 392 software_primaryPurpose=primary_purpose, 393 software_downloadLocation=oe.spdx_common.fetch_data_to_uri( 394 fd, name 395 ), 396 ) 397 ) 398 399 if fd.method.supports_checksum(fd): 400 # TODO Need something better than hard coding this 401 for checksum_id in ["sha256", "sha1"]: 402 expected_checksum = getattr( 403 fd, "%s_expected" % checksum_id, None 404 ) 405 if expected_checksum is None: 406 continue 407 408 dl.verifiedUsing.append( 409 oe.spdx30.Hash( 410 algorithm=getattr(oe.spdx30.HashAlgorithm, checksum_id), 411 hashValue=expected_checksum, 412 ) 413 ) 414 415 inputs.add(dl) 416 417 return inputs 418 419 420def set_purposes(d, element, *var_names, force_purposes=[]): 421 purposes = force_purposes[:] 422 423 for var_name in var_names: 424 val = d.getVar(var_name) 425 if val: 426 purposes.extend(val.split()) 427 break 428 429 if not purposes: 430 bb.warn("No SPDX purposes found in %s" % " ".join(var_names)) 431 return 432 433 element.software_primaryPurpose = getattr( 434 oe.spdx30.software_SoftwarePurpose, purposes[0] 435 ) 436 element.software_additionalPurpose = [ 437 getattr(oe.spdx30.software_SoftwarePurpose, p) for p in purposes[1:] 438 ] 439 440 441def create_spdx(d): 442 def set_var_field(var, obj, name, package=None): 443 val = None 444 if package: 445 val = d.getVar("%s:%s" % (var, package)) 446 447 if not val: 448 val = d.getVar(var) 449 450 if val: 451 setattr(obj, name, val) 452 453 license_data = oe.spdx_common.load_spdx_license_data(d) 454 455 deploydir = Path(d.getVar("SPDXDEPLOY")) 456 deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX")) 457 spdx_workdir = Path(d.getVar("SPDXWORK")) 458 include_sources = d.getVar("SPDX_INCLUDE_SOURCES") == "1" 459 pkg_arch = d.getVar("SSTATE_PKGARCH") 460 is_native = bb.data.inherits_class("native", d) or bb.data.inherits_class( 461 "cross", d 462 ) 463 include_vex = d.getVar("SPDX_INCLUDE_VEX") 464 if not include_vex in ("none", "current", "all"): 465 bb.fatal("SPDX_INCLUDE_VEX must be one of 'none', 'current', 'all'") 466 467 build_objset = oe.sbom30.ObjectSet.new_objset(d, "recipe-" + d.getVar("PN")) 468 469 build = build_objset.new_task_build("recipe", "recipe") 470 build_objset.set_element_alias(build) 471 472 build_objset.doc.rootElement.append(build) 473 474 build_objset.set_is_native(is_native) 475 476 for var in (d.getVar("SPDX_CUSTOM_ANNOTATION_VARS") or "").split(): 477 new_annotation( 478 d, 479 build_objset, 480 build, 481 "%s=%s" % (var, d.getVar(var)), 482 oe.spdx30.AnnotationType.other, 483 ) 484 485 build_inputs = set() 486 487 # Add CVEs 488 cve_by_status = {} 489 if include_vex != "none": 490 for cve in d.getVarFlags("CVE_STATUS") or {}: 491 decoded_status = oe.cve_check.decode_cve_status(d, cve) 492 493 # If this CVE is fixed upstream, skip it unless all CVEs are 494 # specified. 495 if ( 496 include_vex != "all" 497 and "detail" in decoded_status 498 and decoded_status["detail"] 499 in ( 500 "fixed-version", 501 "cpe-stable-backport", 502 ) 503 ): 504 bb.debug(1, "Skipping %s since it is already fixed upstream" % cve) 505 continue 506 507 spdx_cve = build_objset.new_cve_vuln(cve) 508 build_objset.set_element_alias(spdx_cve) 509 510 cve_by_status.setdefault(decoded_status["mapping"], {})[cve] = ( 511 spdx_cve, 512 decoded_status["detail"], 513 decoded_status["description"], 514 ) 515 516 cpe_ids = oe.cve_check.get_cpe_ids(d.getVar("CVE_PRODUCT"), d.getVar("CVE_VERSION")) 517 518 source_files = add_download_files(d, build_objset) 519 build_inputs |= source_files 520 521 recipe_spdx_license = add_license_expression( 522 d, build_objset, d.getVar("LICENSE"), license_data 523 ) 524 build_objset.new_relationship( 525 source_files, 526 oe.spdx30.RelationshipType.hasConcludedLicense, 527 [recipe_spdx_license], 528 ) 529 530 dep_sources = {} 531 if oe.spdx_common.process_sources(d) and include_sources: 532 bb.debug(1, "Adding source files to SPDX") 533 oe.spdx_common.get_patched_src(d) 534 535 files = add_package_files( 536 d, 537 build_objset, 538 spdx_workdir, 539 lambda file_counter: build_objset.new_spdxid( 540 "sourcefile", str(file_counter) 541 ), 542 lambda filepath: [oe.spdx30.software_SoftwarePurpose.source], 543 license_data, 544 ignore_dirs=[".git"], 545 ignore_top_level_dirs=["temp"], 546 archive=None, 547 ) 548 build_inputs |= files 549 index_sources_by_hash(files, dep_sources) 550 551 dep_objsets, dep_builds = collect_dep_objsets(d, build) 552 if dep_builds: 553 build_objset.new_scoped_relationship( 554 [build], 555 oe.spdx30.RelationshipType.dependsOn, 556 oe.spdx30.LifecycleScopeType.build, 557 sorted(oe.sbom30.get_element_link_id(b) for b in dep_builds), 558 ) 559 560 debug_source_ids = set() 561 source_hash_cache = {} 562 563 # Write out the package SPDX data now. It is not complete as we cannot 564 # write the runtime data, so write it to a staging area and a later task 565 # will write out the final collection 566 567 # TODO: Handle native recipe output 568 if not is_native: 569 bb.debug(1, "Collecting Dependency sources files") 570 collect_dep_sources(dep_objsets, dep_sources) 571 572 bb.build.exec_func("read_subpackage_metadata", d) 573 574 pkgdest = Path(d.getVar("PKGDEST")) 575 for package in d.getVar("PACKAGES").split(): 576 if not oe.packagedata.packaged(package, d): 577 continue 578 579 pkg_name = d.getVar("PKG:%s" % package) or package 580 581 bb.debug(1, "Creating SPDX for package %s" % pkg_name) 582 583 pkg_objset = oe.sbom30.ObjectSet.new_objset(d, "package-" + pkg_name) 584 585 spdx_package = pkg_objset.add_root( 586 oe.spdx30.software_Package( 587 _id=pkg_objset.new_spdxid("package", pkg_name), 588 creationInfo=pkg_objset.doc.creationInfo, 589 name=pkg_name, 590 software_packageVersion=d.getVar("PV"), 591 ) 592 ) 593 set_timestamp_now(d, spdx_package, "builtTime") 594 595 set_purposes( 596 d, 597 spdx_package, 598 "SPDX_PACKAGE_ADDITIONAL_PURPOSE:%s" % package, 599 "SPDX_PACKAGE_ADDITIONAL_PURPOSE", 600 force_purposes=["install"], 601 ) 602 603 supplier = build_objset.new_agent("SPDX_PACKAGE_SUPPLIER") 604 if supplier is not None: 605 spdx_package.suppliedBy = ( 606 supplier if isinstance(supplier, str) else supplier._id 607 ) 608 609 set_var_field( 610 "HOMEPAGE", spdx_package, "software_homePage", package=package 611 ) 612 set_var_field("SUMMARY", spdx_package, "summary", package=package) 613 set_var_field("DESCRIPTION", spdx_package, "description", package=package) 614 615 pkg_objset.new_scoped_relationship( 616 [oe.sbom30.get_element_link_id(build)], 617 oe.spdx30.RelationshipType.hasOutput, 618 oe.spdx30.LifecycleScopeType.build, 619 [spdx_package], 620 ) 621 622 for cpe_id in cpe_ids: 623 spdx_package.externalIdentifier.append( 624 oe.spdx30.ExternalIdentifier( 625 externalIdentifierType=oe.spdx30.ExternalIdentifierType.cpe23, 626 identifier=cpe_id, 627 ) 628 ) 629 630 # TODO: Generate a file for each actual IPK/DEB/RPM/TGZ file 631 # generated and link it to the package 632 # spdx_package_file = pkg_objset.add(oe.spdx30.software_File( 633 # _id=pkg_objset.new_spdxid("distribution", pkg_name), 634 # creationInfo=pkg_objset.doc.creationInfo, 635 # name=pkg_name, 636 # software_primaryPurpose=spdx_package.software_primaryPurpose, 637 # software_additionalPurpose=spdx_package.software_additionalPurpose, 638 # )) 639 # set_timestamp_now(d, spdx_package_file, "builtTime") 640 641 ## TODO add hashes 642 # pkg_objset.new_relationship( 643 # [spdx_package], 644 # oe.spdx30.RelationshipType.hasDistributionArtifact, 645 # [spdx_package_file], 646 # ) 647 648 # NOTE: licenses live in the recipe collection and are referenced 649 # by ID in the package collection(s). This helps reduce duplication 650 # (since a lot of packages will have the same license), and also 651 # prevents duplicate license SPDX IDs in the packages 652 package_license = d.getVar("LICENSE:%s" % package) 653 if package_license and package_license != d.getVar("LICENSE"): 654 package_spdx_license = add_license_expression( 655 d, build_objset, package_license, license_data 656 ) 657 else: 658 package_spdx_license = recipe_spdx_license 659 660 pkg_objset.new_relationship( 661 [spdx_package], 662 oe.spdx30.RelationshipType.hasConcludedLicense, 663 [oe.sbom30.get_element_link_id(package_spdx_license)], 664 ) 665 666 # NOTE: CVE Elements live in the recipe collection 667 all_cves = set() 668 for status, cves in cve_by_status.items(): 669 for cve, items in cves.items(): 670 spdx_cve, detail, description = items 671 spdx_cve_id = oe.sbom30.get_element_link_id(spdx_cve) 672 673 all_cves.add(spdx_cve_id) 674 675 if status == "Patched": 676 pkg_objset.new_vex_patched_relationship( 677 [spdx_cve_id], [spdx_package] 678 ) 679 elif status == "Unpatched": 680 pkg_objset.new_vex_unpatched_relationship( 681 [spdx_cve_id], [spdx_package] 682 ) 683 elif status == "Ignored": 684 spdx_vex = pkg_objset.new_vex_ignored_relationship( 685 [spdx_cve_id], 686 [spdx_package], 687 impact_statement=description, 688 ) 689 690 if detail in ( 691 "ignored", 692 "cpe-incorrect", 693 "disputed", 694 "upstream-wontfix", 695 ): 696 # VEX doesn't have justifications for this 697 pass 698 elif detail in ( 699 "not-applicable-config", 700 "not-applicable-platform", 701 ): 702 for v in spdx_vex: 703 v.security_justificationType = ( 704 oe.spdx30.security_VexJustificationType.vulnerableCodeNotPresent 705 ) 706 else: 707 bb.fatal(f"Unknown detail '{detail}' for ignored {cve}") 708 else: 709 bb.fatal(f"Unknown {cve} status '{status}'") 710 711 if all_cves: 712 pkg_objset.new_relationship( 713 [spdx_package], 714 oe.spdx30.RelationshipType.hasAssociatedVulnerability, 715 sorted(list(all_cves)), 716 ) 717 718 bb.debug(1, "Adding package files to SPDX for package %s" % pkg_name) 719 package_files = add_package_files( 720 d, 721 pkg_objset, 722 pkgdest / package, 723 lambda file_counter: pkg_objset.new_spdxid( 724 "package", pkg_name, "file", str(file_counter) 725 ), 726 # TODO: Can we know the purpose here? 727 lambda filepath: [], 728 license_data, 729 ignore_top_level_dirs=["CONTROL", "DEBIAN"], 730 archive=None, 731 ) 732 733 if package_files: 734 pkg_objset.new_relationship( 735 [spdx_package], 736 oe.spdx30.RelationshipType.contains, 737 sorted(list(package_files)), 738 ) 739 740 if include_sources: 741 debug_sources = get_package_sources_from_debug( 742 d, package, package_files, dep_sources, source_hash_cache 743 ) 744 debug_source_ids |= set( 745 oe.sbom30.get_element_link_id(d) for d in debug_sources 746 ) 747 748 oe.sbom30.write_recipe_jsonld_doc( 749 d, pkg_objset, "packages-staging", deploydir, create_spdx_id_links=False 750 ) 751 752 if include_sources: 753 bb.debug(1, "Adding sysroot files to SPDX") 754 sysroot_files = add_package_files( 755 d, 756 build_objset, 757 d.expand("${COMPONENTS_DIR}/${PACKAGE_ARCH}/${PN}"), 758 lambda file_counter: build_objset.new_spdxid("sysroot", str(file_counter)), 759 lambda filepath: [], 760 license_data, 761 archive=None, 762 ) 763 764 if sysroot_files: 765 build_objset.new_scoped_relationship( 766 [build], 767 oe.spdx30.RelationshipType.hasOutput, 768 oe.spdx30.LifecycleScopeType.build, 769 sorted(list(sysroot_files)), 770 ) 771 772 if build_inputs or debug_source_ids: 773 build_objset.new_scoped_relationship( 774 [build], 775 oe.spdx30.RelationshipType.hasInput, 776 oe.spdx30.LifecycleScopeType.build, 777 sorted(list(build_inputs)) + sorted(list(debug_source_ids)), 778 ) 779 780 oe.sbom30.write_recipe_jsonld_doc(d, build_objset, "recipes", deploydir) 781 782 783def create_package_spdx(d): 784 deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX")) 785 deploydir = Path(d.getVar("SPDXRUNTIMEDEPLOY")) 786 is_native = bb.data.inherits_class("native", d) or bb.data.inherits_class( 787 "cross", d 788 ) 789 790 providers = oe.spdx_common.collect_package_providers(d) 791 pkg_arch = d.getVar("SSTATE_PKGARCH") 792 793 if is_native: 794 return 795 796 bb.build.exec_func("read_subpackage_metadata", d) 797 798 dep_package_cache = {} 799 800 # Any element common to all packages that need to be referenced by ID 801 # should be written into this objset set 802 common_objset = oe.sbom30.ObjectSet.new_objset( 803 d, "%s-package-common" % d.getVar("PN") 804 ) 805 806 pkgdest = Path(d.getVar("PKGDEST")) 807 for package in d.getVar("PACKAGES").split(): 808 localdata = bb.data.createCopy(d) 809 pkg_name = d.getVar("PKG:%s" % package) or package 810 localdata.setVar("PKG", pkg_name) 811 localdata.setVar("OVERRIDES", d.getVar("OVERRIDES", False) + ":" + package) 812 813 if not oe.packagedata.packaged(package, localdata): 814 continue 815 816 spdx_package, pkg_objset = oe.sbom30.load_obj_in_jsonld( 817 d, 818 pkg_arch, 819 "packages-staging", 820 "package-" + pkg_name, 821 oe.spdx30.software_Package, 822 software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.install, 823 ) 824 825 # We will write out a new collection, so link it to the new 826 # creation info in the common package data. The old creation info 827 # should still exist and be referenced by all the existing elements 828 # in the package 829 pkg_objset.creationInfo = pkg_objset.copy_creation_info( 830 common_objset.doc.creationInfo 831 ) 832 833 runtime_spdx_deps = set() 834 835 deps = bb.utils.explode_dep_versions2(localdata.getVar("RDEPENDS") or "") 836 seen_deps = set() 837 for dep, _ in deps.items(): 838 if dep in seen_deps: 839 continue 840 841 if dep not in providers: 842 continue 843 844 (dep, _) = providers[dep] 845 846 if not oe.packagedata.packaged(dep, localdata): 847 continue 848 849 dep_pkg_data = oe.packagedata.read_subpkgdata_dict(dep, d) 850 dep_pkg = dep_pkg_data["PKG"] 851 852 if dep in dep_package_cache: 853 dep_spdx_package = dep_package_cache[dep] 854 else: 855 bb.debug(1, "Searching for %s" % dep_pkg) 856 dep_spdx_package, _ = oe.sbom30.find_root_obj_in_jsonld( 857 d, 858 "packages-staging", 859 "package-" + dep_pkg, 860 oe.spdx30.software_Package, 861 software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.install, 862 ) 863 dep_package_cache[dep] = dep_spdx_package 864 865 runtime_spdx_deps.add(dep_spdx_package) 866 seen_deps.add(dep) 867 868 if runtime_spdx_deps: 869 pkg_objset.new_scoped_relationship( 870 [spdx_package], 871 oe.spdx30.RelationshipType.dependsOn, 872 oe.spdx30.LifecycleScopeType.runtime, 873 [oe.sbom30.get_element_link_id(dep) for dep in runtime_spdx_deps], 874 ) 875 876 oe.sbom30.write_recipe_jsonld_doc(d, pkg_objset, "packages", deploydir) 877 878 oe.sbom30.write_recipe_jsonld_doc(d, common_objset, "common-package", deploydir) 879 880 881def write_bitbake_spdx(d): 882 # Set PN to "bitbake" so that SPDX IDs can be generated 883 d.setVar("PN", "bitbake") 884 d.setVar("BB_TASKHASH", "bitbake") 885 oe.spdx_common.load_spdx_license_data(d) 886 887 deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX")) 888 889 objset = oe.sbom30.ObjectSet.new_objset(d, "bitbake", False) 890 891 host_import_key = d.getVar("SPDX_BUILD_HOST") 892 invoked_by = objset.new_agent("SPDX_INVOKED_BY", add=False) 893 on_behalf_of = objset.new_agent("SPDX_ON_BEHALF_OF", add=False) 894 895 if d.getVar("SPDX_INCLUDE_BITBAKE_PARENT_BUILD") == "1": 896 # Since the Build objects are unique, we may as well set the creation 897 # time to the current time instead of the fallback SDE 898 objset.doc.creationInfo.created = datetime.now(timezone.utc) 899 900 # Each invocation of bitbake should have a unique ID since it is a 901 # unique build 902 nonce = os.urandom(16).hex() 903 904 build = objset.add_root( 905 oe.spdx30.build_Build( 906 _id=objset.new_spdxid(nonce, include_unihash=False), 907 creationInfo=objset.doc.creationInfo, 908 build_buildType=oe.sbom30.SPDX_BUILD_TYPE, 909 ) 910 ) 911 set_timestamp_now(d, build, "build_buildStartTime") 912 913 if host_import_key: 914 objset.new_scoped_relationship( 915 [build], 916 oe.spdx30.RelationshipType.hasHost, 917 oe.spdx30.LifecycleScopeType.build, 918 [objset.new_import(host_import_key)], 919 ) 920 921 if invoked_by: 922 objset.add(invoked_by) 923 invoked_by_spdx = objset.new_scoped_relationship( 924 [build], 925 oe.spdx30.RelationshipType.invokedBy, 926 oe.spdx30.LifecycleScopeType.build, 927 [invoked_by], 928 ) 929 930 if on_behalf_of: 931 objset.add(on_behalf_of) 932 objset.new_scoped_relationship( 933 [on_behalf_of], 934 oe.spdx30.RelationshipType.delegatedTo, 935 oe.spdx30.LifecycleScopeType.build, 936 invoked_by_spdx, 937 ) 938 939 elif on_behalf_of: 940 bb.warn("SPDX_ON_BEHALF_OF has no effect if SPDX_INVOKED_BY is not set") 941 942 else: 943 if host_import_key: 944 bb.warn( 945 "SPDX_BUILD_HOST has no effect if SPDX_INCLUDE_BITBAKE_PARENT_BUILD is not set" 946 ) 947 948 if invoked_by: 949 bb.warn( 950 "SPDX_INVOKED_BY has no effect if SPDX_INCLUDE_BITBAKE_PARENT_BUILD is not set" 951 ) 952 953 if on_behalf_of: 954 bb.warn( 955 "SPDX_ON_BEHALF_OF has no effect if SPDX_INCLUDE_BITBAKE_PARENT_BUILD is not set" 956 ) 957 958 for obj in objset.foreach_type(oe.spdx30.Element): 959 obj.extension.append(oe.sbom30.OEIdAliasExtension()) 960 961 oe.sbom30.write_jsonld_doc(d, objset, deploy_dir_spdx / "bitbake.spdx.json") 962 963 964def collect_build_package_inputs(d, objset, build, packages): 965 import oe.sbom30 966 967 providers = oe.spdx_common.collect_package_providers(d) 968 969 build_deps = set() 970 missing_providers = set() 971 972 for name in sorted(packages.keys()): 973 if name not in providers: 974 missing_providers.add(name) 975 continue 976 977 pkg_name, pkg_hashfn = providers[name] 978 979 # Copy all of the package SPDX files into the Sbom elements 980 pkg_spdx, _ = oe.sbom30.find_root_obj_in_jsonld( 981 d, 982 "packages", 983 "package-" + pkg_name, 984 oe.spdx30.software_Package, 985 software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.install, 986 ) 987 build_deps.add(oe.sbom30.get_element_link_id(pkg_spdx)) 988 989 if missing_providers: 990 bb.fatal( 991 f"Unable to find SPDX provider(s) for: {', '.join(sorted(missing_providers))}" 992 ) 993 994 if build_deps: 995 objset.new_scoped_relationship( 996 [build], 997 oe.spdx30.RelationshipType.hasInput, 998 oe.spdx30.LifecycleScopeType.build, 999 sorted(list(build_deps)), 1000 ) 1001 1002 1003def create_rootfs_spdx(d): 1004 deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX")) 1005 deploydir = Path(d.getVar("SPDXROOTFSDEPLOY")) 1006 root_packages_file = Path(d.getVar("SPDX_ROOTFS_PACKAGES")) 1007 image_basename = d.getVar("IMAGE_BASENAME") 1008 machine = d.getVar("MACHINE") 1009 1010 with root_packages_file.open("r") as f: 1011 packages = json.load(f) 1012 1013 objset = oe.sbom30.ObjectSet.new_objset( 1014 d, "%s-%s-rootfs" % (image_basename, machine) 1015 ) 1016 1017 rootfs = objset.add_root( 1018 oe.spdx30.software_Package( 1019 _id=objset.new_spdxid("rootfs", image_basename), 1020 creationInfo=objset.doc.creationInfo, 1021 name=image_basename, 1022 software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.archive, 1023 ) 1024 ) 1025 set_timestamp_now(d, rootfs, "builtTime") 1026 1027 rootfs_build = objset.add_root(objset.new_task_build("rootfs", "rootfs")) 1028 set_timestamp_now(d, rootfs_build, "build_buildEndTime") 1029 1030 objset.new_scoped_relationship( 1031 [rootfs_build], 1032 oe.spdx30.RelationshipType.hasOutput, 1033 oe.spdx30.LifecycleScopeType.build, 1034 [rootfs], 1035 ) 1036 1037 collect_build_package_inputs(d, objset, rootfs_build, packages) 1038 1039 oe.sbom30.write_recipe_jsonld_doc(d, objset, "rootfs", deploydir) 1040 1041 1042def create_image_spdx(d): 1043 import oe.sbom30 1044 1045 image_deploy_dir = Path(d.getVar("IMGDEPLOYDIR")) 1046 manifest_path = Path(d.getVar("IMAGE_OUTPUT_MANIFEST")) 1047 spdx_work_dir = Path(d.getVar("SPDXIMAGEWORK")) 1048 1049 image_basename = d.getVar("IMAGE_BASENAME") 1050 machine = d.getVar("MACHINE") 1051 1052 objset = oe.sbom30.ObjectSet.new_objset( 1053 d, "%s-%s-image" % (image_basename, machine) 1054 ) 1055 1056 with manifest_path.open("r") as f: 1057 manifest = json.load(f) 1058 1059 builds = [] 1060 for task in manifest: 1061 imagetype = task["imagetype"] 1062 taskname = task["taskname"] 1063 1064 image_build = objset.add_root( 1065 objset.new_task_build(taskname, "image/%s" % imagetype) 1066 ) 1067 set_timestamp_now(d, image_build, "build_buildEndTime") 1068 builds.append(image_build) 1069 1070 artifacts = [] 1071 1072 for image in task["images"]: 1073 image_filename = image["filename"] 1074 image_path = image_deploy_dir / image_filename 1075 a = objset.add_root( 1076 oe.spdx30.software_File( 1077 _id=objset.new_spdxid("image", image_filename), 1078 creationInfo=objset.doc.creationInfo, 1079 name=image_filename, 1080 verifiedUsing=[ 1081 oe.spdx30.Hash( 1082 algorithm=oe.spdx30.HashAlgorithm.sha256, 1083 hashValue=bb.utils.sha256_file(image_path), 1084 ) 1085 ], 1086 ) 1087 ) 1088 set_purposes( 1089 d, a, "SPDX_IMAGE_PURPOSE:%s" % imagetype, "SPDX_IMAGE_PURPOSE" 1090 ) 1091 set_timestamp_now(d, a, "builtTime") 1092 1093 artifacts.append(a) 1094 1095 if artifacts: 1096 objset.new_scoped_relationship( 1097 [image_build], 1098 oe.spdx30.RelationshipType.hasOutput, 1099 oe.spdx30.LifecycleScopeType.build, 1100 artifacts, 1101 ) 1102 1103 if builds: 1104 rootfs_image, _ = oe.sbom30.find_root_obj_in_jsonld( 1105 d, 1106 "rootfs", 1107 "%s-%s-rootfs" % (image_basename, machine), 1108 oe.spdx30.software_Package, 1109 # TODO: Should use a purpose to filter here? 1110 ) 1111 objset.new_scoped_relationship( 1112 builds, 1113 oe.spdx30.RelationshipType.hasInput, 1114 oe.spdx30.LifecycleScopeType.build, 1115 [oe.sbom30.get_element_link_id(rootfs_image)], 1116 ) 1117 1118 objset.add_aliases() 1119 objset.link() 1120 oe.sbom30.write_recipe_jsonld_doc(d, objset, "image", spdx_work_dir) 1121 1122 1123def create_image_sbom_spdx(d): 1124 import oe.sbom30 1125 1126 image_name = d.getVar("IMAGE_NAME") 1127 image_basename = d.getVar("IMAGE_BASENAME") 1128 image_link_name = d.getVar("IMAGE_LINK_NAME") 1129 imgdeploydir = Path(d.getVar("SPDXIMAGEDEPLOYDIR")) 1130 machine = d.getVar("MACHINE") 1131 1132 spdx_path = imgdeploydir / (image_name + ".spdx.json") 1133 1134 root_elements = [] 1135 1136 # TODO: Do we need to add the rootfs or are the image files sufficient? 1137 rootfs_image, _ = oe.sbom30.find_root_obj_in_jsonld( 1138 d, 1139 "rootfs", 1140 "%s-%s-rootfs" % (image_basename, machine), 1141 oe.spdx30.software_Package, 1142 # TODO: Should use a purpose here? 1143 ) 1144 root_elements.append(oe.sbom30.get_element_link_id(rootfs_image)) 1145 1146 image_objset, _ = oe.sbom30.find_jsonld( 1147 d, "image", "%s-%s-image" % (image_basename, machine), required=True 1148 ) 1149 for o in image_objset.foreach_root(oe.spdx30.software_File): 1150 root_elements.append(oe.sbom30.get_element_link_id(o)) 1151 1152 objset, sbom = oe.sbom30.create_sbom(d, image_name, root_elements) 1153 1154 oe.sbom30.write_jsonld_doc(d, objset, spdx_path) 1155 1156 def make_image_link(target_path, suffix): 1157 if image_link_name: 1158 link = imgdeploydir / (image_link_name + suffix) 1159 if link != target_path: 1160 link.symlink_to(os.path.relpath(target_path, link.parent)) 1161 1162 make_image_link(spdx_path, ".spdx.json") 1163 1164 1165def sdk_create_spdx(d, sdk_type, spdx_work_dir, toolchain_outputname): 1166 sdk_name = toolchain_outputname + "-" + sdk_type 1167 sdk_packages = oe.sdk.sdk_list_installed_packages(d, sdk_type == "target") 1168 1169 objset = oe.sbom30.ObjectSet.new_objset(d, sdk_name) 1170 1171 sdk_rootfs = objset.add_root( 1172 oe.spdx30.software_Package( 1173 _id=objset.new_spdxid("sdk-rootfs", sdk_name), 1174 creationInfo=objset.doc.creationInfo, 1175 name=sdk_name, 1176 software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.archive, 1177 ) 1178 ) 1179 set_timestamp_now(d, sdk_rootfs, "builtTime") 1180 1181 sdk_build = objset.add_root(objset.new_task_build("sdk-rootfs", "sdk-rootfs")) 1182 set_timestamp_now(d, sdk_build, "build_buildEndTime") 1183 1184 objset.new_scoped_relationship( 1185 [sdk_build], 1186 oe.spdx30.RelationshipType.hasOutput, 1187 oe.spdx30.LifecycleScopeType.build, 1188 [sdk_rootfs], 1189 ) 1190 1191 collect_build_package_inputs(d, objset, sdk_build, sdk_packages) 1192 1193 objset.add_aliases() 1194 oe.sbom30.write_jsonld_doc(d, objset, spdx_work_dir / "sdk-rootfs.spdx.json") 1195 1196 1197def create_sdk_sbom(d, sdk_deploydir, spdx_work_dir, toolchain_outputname): 1198 # Load the document written earlier 1199 rootfs_objset = oe.sbom30.load_jsonld( 1200 d, spdx_work_dir / "sdk-rootfs.spdx.json", required=True 1201 ) 1202 1203 # Create a new build for the SDK installer 1204 sdk_build = rootfs_objset.new_task_build("sdk-populate", "sdk-populate") 1205 set_timestamp_now(d, sdk_build, "build_buildEndTime") 1206 1207 rootfs = rootfs_objset.find_root(oe.spdx30.software_Package) 1208 if rootfs is None: 1209 bb.fatal("Unable to find rootfs artifact") 1210 1211 rootfs_objset.new_scoped_relationship( 1212 [sdk_build], 1213 oe.spdx30.RelationshipType.hasInput, 1214 oe.spdx30.LifecycleScopeType.build, 1215 [rootfs], 1216 ) 1217 1218 files = set() 1219 root_files = [] 1220 1221 # NOTE: os.walk() doesn't return symlinks 1222 for dirpath, dirnames, filenames in os.walk(sdk_deploydir): 1223 for fn in filenames: 1224 fpath = Path(dirpath) / fn 1225 if not fpath.is_file() or fpath.is_symlink(): 1226 continue 1227 1228 relpath = str(fpath.relative_to(sdk_deploydir)) 1229 1230 f = rootfs_objset.new_file( 1231 rootfs_objset.new_spdxid("sdk-installer", relpath), 1232 relpath, 1233 fpath, 1234 ) 1235 set_timestamp_now(d, f, "builtTime") 1236 1237 if fn.endswith(".manifest"): 1238 f.software_primaryPurpose = oe.spdx30.software_SoftwarePurpose.manifest 1239 elif fn.endswith(".testdata.json"): 1240 f.software_primaryPurpose = ( 1241 oe.spdx30.software_SoftwarePurpose.configuration 1242 ) 1243 else: 1244 set_purposes(d, f, "SPDX_SDK_PURPOSE") 1245 root_files.append(f) 1246 1247 files.add(f) 1248 1249 if files: 1250 rootfs_objset.new_scoped_relationship( 1251 [sdk_build], 1252 oe.spdx30.RelationshipType.hasOutput, 1253 oe.spdx30.LifecycleScopeType.build, 1254 files, 1255 ) 1256 else: 1257 bb.warn(f"No SDK output files found in {sdk_deploydir}") 1258 1259 objset, sbom = oe.sbom30.create_sbom( 1260 d, toolchain_outputname, sorted(list(files)), [rootfs_objset] 1261 ) 1262 1263 oe.sbom30.write_jsonld_doc( 1264 d, objset, sdk_deploydir / (toolchain_outputname + ".spdx.json") 1265 ) 1266