1# 2# Copyright OpenEmbedded Contributors 3# 4# SPDX-License-Identifier: GPL-2.0-only 5# 6 7import json 8import oe.cve_check 9import oe.packagedata 10import oe.patch 11import oe.sbom30 12import oe.spdx30 13import oe.spdx_common 14import oe.sdk 15import os 16 17from contextlib import contextmanager 18from datetime import datetime, timezone 19from pathlib import Path 20 21 22def walk_error(err): 23 bb.error(f"ERROR walking {err.filename}: {err}") 24 25 26def set_timestamp_now(d, o, prop): 27 if d.getVar("SPDX_INCLUDE_TIMESTAMPS") == "1": 28 setattr(o, prop, datetime.now(timezone.utc)) 29 else: 30 # Doing this helps to validated that the property actually exists, and 31 # also that it is not mandatory 32 delattr(o, prop) 33 34 35def add_license_expression(d, objset, license_expression, license_data): 36 simple_license_text = {} 37 license_text_map = {} 38 license_ref_idx = 0 39 40 def add_license_text(name): 41 nonlocal objset 42 nonlocal simple_license_text 43 44 if name in simple_license_text: 45 return simple_license_text[name] 46 47 lic = objset.find_filter( 48 oe.spdx30.simplelicensing_SimpleLicensingText, 49 name=name, 50 ) 51 52 if lic is not None: 53 simple_license_text[name] = lic 54 return lic 55 56 lic = objset.add( 57 oe.spdx30.simplelicensing_SimpleLicensingText( 58 _id=objset.new_spdxid("license-text", name), 59 creationInfo=objset.doc.creationInfo, 60 name=name, 61 ) 62 ) 63 objset.set_element_alias(lic) 64 simple_license_text[name] = lic 65 66 if name == "PD": 67 lic.simplelicensing_licenseText = "Software released to the public domain" 68 return lic 69 70 # Seach for the license in COMMON_LICENSE_DIR and LICENSE_PATH 71 for directory in [d.getVar("COMMON_LICENSE_DIR")] + ( 72 d.getVar("LICENSE_PATH") or "" 73 ).split(): 74 try: 75 with (Path(directory) / name).open(errors="replace") as f: 76 lic.simplelicensing_licenseText = f.read() 77 return lic 78 79 except FileNotFoundError: 80 pass 81 82 # If it's not SPDX or PD, then NO_GENERIC_LICENSE must be set 83 filename = d.getVarFlag("NO_GENERIC_LICENSE", name) 84 if filename: 85 filename = d.expand("${S}/" + filename) 86 with open(filename, errors="replace") as f: 87 lic.simplelicensing_licenseText = f.read() 88 return lic 89 else: 90 bb.fatal("Cannot find any text for license %s" % name) 91 92 def convert(l): 93 nonlocal license_text_map 94 nonlocal license_ref_idx 95 96 if l == "(" or l == ")": 97 return l 98 99 if l == "&": 100 return "AND" 101 102 if l == "|": 103 return "OR" 104 105 if l == "CLOSED": 106 return "NONE" 107 108 spdx_license = d.getVarFlag("SPDXLICENSEMAP", l) or l 109 if spdx_license in license_data["licenses"]: 110 return spdx_license 111 112 spdx_license = "LicenseRef-" + l 113 if spdx_license not in license_text_map: 114 license_text_map[spdx_license] = oe.sbom30.get_element_link_id( 115 add_license_text(l) 116 ) 117 118 return spdx_license 119 120 lic_split = ( 121 license_expression.replace("(", " ( ") 122 .replace(")", " ) ") 123 .replace("|", " | ") 124 .replace("&", " & ") 125 .split() 126 ) 127 spdx_license_expression = " ".join(convert(l) for l in lic_split) 128 129 o = objset.new_license_expression( 130 spdx_license_expression, license_data, license_text_map 131 ) 132 objset.set_element_alias(o) 133 return o 134 135 136def add_package_files( 137 d, 138 objset, 139 topdir, 140 get_spdxid, 141 get_purposes, 142 license_data=None, 143 *, 144 archive=None, 145 ignore_dirs=[], 146 ignore_top_level_dirs=[], 147): 148 source_date_epoch = d.getVar("SOURCE_DATE_EPOCH") 149 if source_date_epoch: 150 source_date_epoch = int(source_date_epoch) 151 152 spdx_files = set() 153 154 file_counter = 1 155 if not os.path.exists(topdir): 156 bb.note(f"Skip {topdir}") 157 return spdx_files 158 159 for subdir, dirs, files in os.walk(topdir, onerror=walk_error): 160 dirs[:] = [d for d in dirs if d not in ignore_dirs] 161 if subdir == str(topdir): 162 dirs[:] = [d for d in dirs if d not in ignore_top_level_dirs] 163 164 dirs.sort() 165 files.sort() 166 for file in files: 167 filepath = Path(subdir) / file 168 if filepath.is_symlink() or not filepath.is_file(): 169 continue 170 171 filename = str(filepath.relative_to(topdir)) 172 file_purposes = get_purposes(filepath) 173 174 spdx_file = objset.new_file( 175 get_spdxid(file_counter), 176 filename, 177 filepath, 178 purposes=file_purposes, 179 ) 180 spdx_files.add(spdx_file) 181 182 if ( 183 oe.spdx30.software_SoftwarePurpose.source in file_purposes 184 and license_data is not None 185 ): 186 objset.scan_declared_licenses(spdx_file, filepath, license_data) 187 188 if archive is not None: 189 with filepath.open("rb") as f: 190 info = archive.gettarinfo(fileobj=f) 191 info.name = filename 192 info.uid = 0 193 info.gid = 0 194 info.uname = "root" 195 info.gname = "root" 196 197 if source_date_epoch is not None and info.mtime > source_date_epoch: 198 info.mtime = source_date_epoch 199 200 archive.addfile(info, f) 201 202 file_counter += 1 203 204 bb.debug(1, "Added %d files to %s" % (len(spdx_files), objset.doc._id)) 205 206 return spdx_files 207 208 209def get_package_sources_from_debug( 210 d, package, package_files, sources, source_hash_cache 211): 212 def file_path_match(file_path, pkg_file): 213 if file_path.lstrip("/") == pkg_file.name.lstrip("/"): 214 return True 215 216 for e in pkg_file.extension: 217 if isinstance(e, oe.sbom30.OEFileNameAliasExtension): 218 for a in e.aliases: 219 if file_path.lstrip("/") == a.lstrip("/"): 220 return True 221 222 return False 223 224 debug_search_paths = [ 225 Path(d.getVar("SPDXWORK")), 226 Path(d.getVar("PKGD")), 227 Path(d.getVar("STAGING_DIR_TARGET")), 228 Path(d.getVar("STAGING_DIR_NATIVE")), 229 Path(d.getVar("STAGING_KERNEL_DIR")), 230 ] 231 232 pkg_data = oe.packagedata.read_subpkgdata_extended(package, d) 233 234 if pkg_data is None: 235 return 236 237 dep_source_files = set() 238 239 for file_path, file_data in pkg_data["files_info"].items(): 240 if not "debugsrc" in file_data: 241 continue 242 243 if not any(file_path_match(file_path, pkg_file) for pkg_file in package_files): 244 bb.fatal( 245 "No package file found for %s in %s; SPDX found: %s" 246 % (str(file_path), package, " ".join(p.name for p in package_files)) 247 ) 248 continue 249 250 for debugsrc in file_data["debugsrc"]: 251 for search in debug_search_paths: 252 if debugsrc.startswith("/usr/src/kernel"): 253 debugsrc_path = search / debugsrc.replace("/usr/src/kernel/", "") 254 else: 255 debugsrc_path = search / debugsrc.lstrip("/") 256 257 if debugsrc_path in source_hash_cache: 258 file_sha256 = source_hash_cache[debugsrc_path] 259 if file_sha256 is None: 260 continue 261 else: 262 # We can only hash files below, skip directories, links, etc. 263 if not debugsrc_path.is_file(): 264 source_hash_cache[debugsrc_path] = None 265 continue 266 267 file_sha256 = bb.utils.sha256_file(debugsrc_path) 268 source_hash_cache[debugsrc_path] = file_sha256 269 270 if file_sha256 in sources: 271 source_file = sources[file_sha256] 272 dep_source_files.add(source_file) 273 else: 274 bb.debug( 275 1, 276 "Debug source %s with SHA256 %s not found in any dependency" 277 % (str(debugsrc_path), file_sha256), 278 ) 279 break 280 else: 281 bb.debug(1, "Debug source %s not found" % debugsrc) 282 283 return dep_source_files 284 285 286def collect_dep_objsets(d, build): 287 deps = oe.spdx_common.get_spdx_deps(d) 288 289 dep_objsets = [] 290 dep_builds = set() 291 292 dep_build_spdxids = set() 293 for dep in deps: 294 bb.debug(1, "Fetching SPDX for dependency %s" % (dep.pn)) 295 dep_build, dep_objset = oe.sbom30.find_root_obj_in_jsonld( 296 d, "recipes", "recipe-" + dep.pn, oe.spdx30.build_Build 297 ) 298 # If the dependency is part of the taskhash, return it to be linked 299 # against. Otherwise, it cannot be linked against because this recipe 300 # will not rebuilt if dependency changes 301 if dep.in_taskhash: 302 dep_objsets.append(dep_objset) 303 304 # The build _can_ be linked against (by alias) 305 dep_builds.add(dep_build) 306 307 return dep_objsets, dep_builds 308 309 310def index_sources_by_hash(sources, dest): 311 for s in sources: 312 if not isinstance(s, oe.spdx30.software_File): 313 continue 314 315 if s.software_primaryPurpose != oe.spdx30.software_SoftwarePurpose.source: 316 continue 317 318 for v in s.verifiedUsing: 319 if v.algorithm == oe.spdx30.HashAlgorithm.sha256: 320 if not v.hashValue in dest: 321 dest[v.hashValue] = s 322 break 323 else: 324 bb.fatal(f"No SHA256 found for {s.name}") 325 326 327def collect_dep_sources(dep_objsets, dest): 328 for objset in dep_objsets: 329 # Don't collect sources from native recipes as they 330 # match non-native sources also. 331 if objset.is_native(): 332 continue 333 334 bb.debug(1, "Fetching Sources for dependency %s" % (objset.doc.name)) 335 336 dep_build = objset.find_root(oe.spdx30.build_Build) 337 if not dep_build: 338 bb.fatal("Unable to find a build") 339 340 for e in objset.foreach_type(oe.spdx30.Relationship): 341 if dep_build is not e.from_: 342 continue 343 344 if e.relationshipType != oe.spdx30.RelationshipType.hasInput: 345 continue 346 347 index_sources_by_hash(e.to, dest) 348 349 350def add_download_files(d, objset): 351 inputs = set() 352 353 urls = d.getVar("SRC_URI").split() 354 fetch = bb.fetch2.Fetch(urls, d) 355 356 for download_idx, src_uri in enumerate(urls): 357 fd = fetch.ud[src_uri] 358 359 file_name = os.path.basename(fetch.localpath(src_uri)) 360 if oe.patch.patch_path(src_uri, fetch, "", expand=False): 361 primary_purpose = oe.spdx30.software_SoftwarePurpose.patch 362 else: 363 primary_purpose = oe.spdx30.software_SoftwarePurpose.source 364 365 if fd.type == "file": 366 if os.path.isdir(fd.localpath): 367 walk_idx = 1 368 for root, dirs, files in os.walk(fd.localpath, onerror=walk_error): 369 dirs.sort() 370 files.sort() 371 for f in files: 372 f_path = os.path.join(root, f) 373 if os.path.islink(f_path): 374 # TODO: SPDX doesn't support symlinks yet 375 continue 376 377 file = objset.new_file( 378 objset.new_spdxid( 379 "source", str(download_idx + 1), str(walk_idx) 380 ), 381 os.path.join( 382 file_name, os.path.relpath(f_path, fd.localpath) 383 ), 384 f_path, 385 purposes=[primary_purpose], 386 ) 387 388 inputs.add(file) 389 walk_idx += 1 390 391 else: 392 file = objset.new_file( 393 objset.new_spdxid("source", str(download_idx + 1)), 394 file_name, 395 fd.localpath, 396 purposes=[primary_purpose], 397 ) 398 inputs.add(file) 399 400 else: 401 dl = objset.add( 402 oe.spdx30.software_Package( 403 _id=objset.new_spdxid("source", str(download_idx + 1)), 404 creationInfo=objset.doc.creationInfo, 405 name=file_name, 406 software_primaryPurpose=primary_purpose, 407 software_downloadLocation=oe.spdx_common.fetch_data_to_uri( 408 fd, fd.name 409 ), 410 ) 411 ) 412 413 if fd.method.supports_checksum(fd): 414 # TODO Need something better than hard coding this 415 for checksum_id in ["sha256", "sha1"]: 416 expected_checksum = getattr( 417 fd, "%s_expected" % checksum_id, None 418 ) 419 if expected_checksum is None: 420 continue 421 422 dl.verifiedUsing.append( 423 oe.spdx30.Hash( 424 algorithm=getattr(oe.spdx30.HashAlgorithm, checksum_id), 425 hashValue=expected_checksum, 426 ) 427 ) 428 429 inputs.add(dl) 430 431 return inputs 432 433 434def set_purposes(d, element, *var_names, force_purposes=[]): 435 purposes = force_purposes[:] 436 437 for var_name in var_names: 438 val = d.getVar(var_name) 439 if val: 440 purposes.extend(val.split()) 441 break 442 443 if not purposes: 444 bb.warn("No SPDX purposes found in %s" % " ".join(var_names)) 445 return 446 447 element.software_primaryPurpose = getattr( 448 oe.spdx30.software_SoftwarePurpose, purposes[0] 449 ) 450 element.software_additionalPurpose = [ 451 getattr(oe.spdx30.software_SoftwarePurpose, p) for p in purposes[1:] 452 ] 453 454 455def create_spdx(d): 456 def set_var_field(var, obj, name, package=None): 457 val = None 458 if package: 459 val = d.getVar("%s:%s" % (var, package)) 460 461 if not val: 462 val = d.getVar(var) 463 464 if val: 465 setattr(obj, name, val) 466 467 license_data = oe.spdx_common.load_spdx_license_data(d) 468 469 deploydir = Path(d.getVar("SPDXDEPLOY")) 470 deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX")) 471 spdx_workdir = Path(d.getVar("SPDXWORK")) 472 include_sources = d.getVar("SPDX_INCLUDE_SOURCES") == "1" 473 pkg_arch = d.getVar("SSTATE_PKGARCH") 474 is_native = bb.data.inherits_class("native", d) or bb.data.inherits_class( 475 "cross", d 476 ) 477 include_vex = d.getVar("SPDX_INCLUDE_VEX") 478 if not include_vex in ("none", "current", "all"): 479 bb.fatal("SPDX_INCLUDE_VEX must be one of 'none', 'current', 'all'") 480 481 build_objset = oe.sbom30.ObjectSet.new_objset(d, "recipe-" + d.getVar("PN")) 482 483 build = build_objset.new_task_build("recipe", "recipe") 484 build_objset.set_element_alias(build) 485 486 build_objset.doc.rootElement.append(build) 487 488 build_objset.set_is_native(is_native) 489 490 for var in (d.getVar("SPDX_CUSTOM_ANNOTATION_VARS") or "").split(): 491 new_annotation( 492 d, 493 build_objset, 494 build, 495 "%s=%s" % (var, d.getVar(var)), 496 oe.spdx30.AnnotationType.other, 497 ) 498 499 build_inputs = set() 500 501 # Add CVEs 502 cve_by_status = {} 503 if include_vex != "none": 504 patched_cves = oe.cve_check.get_patched_cves(d) 505 for cve, patched_cve in patched_cves.items(): 506 decoded_status = { 507 "mapping": patched_cve["abbrev-status"], 508 "detail": patched_cve["status"], 509 "description": patched_cve.get("justification", None) 510 } 511 512 # If this CVE is fixed upstream, skip it unless all CVEs are 513 # specified. 514 if ( 515 include_vex != "all" 516 and "detail" in decoded_status 517 and decoded_status["detail"] 518 in ( 519 "fixed-version", 520 "cpe-stable-backport", 521 ) 522 ): 523 bb.debug(1, "Skipping %s since it is already fixed upstream" % cve) 524 continue 525 526 spdx_cve = build_objset.new_cve_vuln(cve) 527 build_objset.set_element_alias(spdx_cve) 528 529 cve_by_status.setdefault(decoded_status["mapping"], {})[cve] = ( 530 spdx_cve, 531 decoded_status["detail"], 532 decoded_status["description"], 533 ) 534 535 cpe_ids = oe.cve_check.get_cpe_ids(d.getVar("CVE_PRODUCT"), d.getVar("CVE_VERSION")) 536 537 source_files = add_download_files(d, build_objset) 538 build_inputs |= source_files 539 540 recipe_spdx_license = add_license_expression( 541 d, build_objset, d.getVar("LICENSE"), license_data 542 ) 543 build_objset.new_relationship( 544 source_files, 545 oe.spdx30.RelationshipType.hasConcludedLicense, 546 [oe.sbom30.get_element_link_id(recipe_spdx_license)], 547 ) 548 549 dep_sources = {} 550 if oe.spdx_common.process_sources(d) and include_sources: 551 bb.debug(1, "Adding source files to SPDX") 552 oe.spdx_common.get_patched_src(d) 553 554 files = add_package_files( 555 d, 556 build_objset, 557 spdx_workdir, 558 lambda file_counter: build_objset.new_spdxid( 559 "sourcefile", str(file_counter) 560 ), 561 lambda filepath: [oe.spdx30.software_SoftwarePurpose.source], 562 license_data, 563 ignore_dirs=[".git"], 564 ignore_top_level_dirs=["temp"], 565 archive=None, 566 ) 567 build_inputs |= files 568 index_sources_by_hash(files, dep_sources) 569 570 dep_objsets, dep_builds = collect_dep_objsets(d, build) 571 if dep_builds: 572 build_objset.new_scoped_relationship( 573 [build], 574 oe.spdx30.RelationshipType.dependsOn, 575 oe.spdx30.LifecycleScopeType.build, 576 sorted(oe.sbom30.get_element_link_id(b) for b in dep_builds), 577 ) 578 579 debug_source_ids = set() 580 source_hash_cache = {} 581 582 # Write out the package SPDX data now. It is not complete as we cannot 583 # write the runtime data, so write it to a staging area and a later task 584 # will write out the final collection 585 586 # TODO: Handle native recipe output 587 if not is_native: 588 bb.debug(1, "Collecting Dependency sources files") 589 collect_dep_sources(dep_objsets, dep_sources) 590 591 bb.build.exec_func("read_subpackage_metadata", d) 592 593 pkgdest = Path(d.getVar("PKGDEST")) 594 for package in d.getVar("PACKAGES").split(): 595 if not oe.packagedata.packaged(package, d): 596 continue 597 598 pkg_name = d.getVar("PKG:%s" % package) or package 599 600 bb.debug(1, "Creating SPDX for package %s" % pkg_name) 601 602 pkg_objset = oe.sbom30.ObjectSet.new_objset(d, "package-" + pkg_name) 603 604 spdx_package = pkg_objset.add_root( 605 oe.spdx30.software_Package( 606 _id=pkg_objset.new_spdxid("package", pkg_name), 607 creationInfo=pkg_objset.doc.creationInfo, 608 name=pkg_name, 609 software_packageVersion=d.getVar("SPDX_PACKAGE_VERSION"), 610 ) 611 ) 612 set_timestamp_now(d, spdx_package, "builtTime") 613 614 set_purposes( 615 d, 616 spdx_package, 617 "SPDX_PACKAGE_ADDITIONAL_PURPOSE:%s" % package, 618 "SPDX_PACKAGE_ADDITIONAL_PURPOSE", 619 force_purposes=["install"], 620 ) 621 622 supplier = build_objset.new_agent("SPDX_PACKAGE_SUPPLIER") 623 if supplier is not None: 624 spdx_package.suppliedBy = ( 625 supplier if isinstance(supplier, str) else supplier._id 626 ) 627 628 set_var_field( 629 "HOMEPAGE", spdx_package, "software_homePage", package=package 630 ) 631 set_var_field("SUMMARY", spdx_package, "summary", package=package) 632 set_var_field("DESCRIPTION", spdx_package, "description", package=package) 633 634 if d.getVar("SPDX_PACKAGE_URL:%s" % package) or d.getVar("SPDX_PACKAGE_URL"): 635 set_var_field( 636 "SPDX_PACKAGE_URL", 637 spdx_package, 638 "software_packageUrl", 639 package=package 640 ) 641 642 pkg_objset.new_scoped_relationship( 643 [oe.sbom30.get_element_link_id(build)], 644 oe.spdx30.RelationshipType.hasOutput, 645 oe.spdx30.LifecycleScopeType.build, 646 [spdx_package], 647 ) 648 649 for cpe_id in cpe_ids: 650 spdx_package.externalIdentifier.append( 651 oe.spdx30.ExternalIdentifier( 652 externalIdentifierType=oe.spdx30.ExternalIdentifierType.cpe23, 653 identifier=cpe_id, 654 ) 655 ) 656 657 # TODO: Generate a file for each actual IPK/DEB/RPM/TGZ file 658 # generated and link it to the package 659 # spdx_package_file = pkg_objset.add(oe.spdx30.software_File( 660 # _id=pkg_objset.new_spdxid("distribution", pkg_name), 661 # creationInfo=pkg_objset.doc.creationInfo, 662 # name=pkg_name, 663 # software_primaryPurpose=spdx_package.software_primaryPurpose, 664 # software_additionalPurpose=spdx_package.software_additionalPurpose, 665 # )) 666 # set_timestamp_now(d, spdx_package_file, "builtTime") 667 668 ## TODO add hashes 669 # pkg_objset.new_relationship( 670 # [spdx_package], 671 # oe.spdx30.RelationshipType.hasDistributionArtifact, 672 # [spdx_package_file], 673 # ) 674 675 # NOTE: licenses live in the recipe collection and are referenced 676 # by ID in the package collection(s). This helps reduce duplication 677 # (since a lot of packages will have the same license), and also 678 # prevents duplicate license SPDX IDs in the packages 679 package_license = d.getVar("LICENSE:%s" % package) 680 if package_license and package_license != d.getVar("LICENSE"): 681 package_spdx_license = add_license_expression( 682 d, build_objset, package_license, license_data 683 ) 684 else: 685 package_spdx_license = recipe_spdx_license 686 687 pkg_objset.new_relationship( 688 [spdx_package], 689 oe.spdx30.RelationshipType.hasConcludedLicense, 690 [oe.sbom30.get_element_link_id(package_spdx_license)], 691 ) 692 693 # NOTE: CVE Elements live in the recipe collection 694 all_cves = set() 695 for status, cves in cve_by_status.items(): 696 for cve, items in cves.items(): 697 spdx_cve, detail, description = items 698 spdx_cve_id = oe.sbom30.get_element_link_id(spdx_cve) 699 700 all_cves.add(spdx_cve_id) 701 702 if status == "Patched": 703 pkg_objset.new_vex_patched_relationship( 704 [spdx_cve_id], [spdx_package] 705 ) 706 elif status == "Unpatched": 707 pkg_objset.new_vex_unpatched_relationship( 708 [spdx_cve_id], [spdx_package] 709 ) 710 elif status == "Ignored": 711 spdx_vex = pkg_objset.new_vex_ignored_relationship( 712 [spdx_cve_id], 713 [spdx_package], 714 impact_statement=description, 715 ) 716 717 if detail in ( 718 "ignored", 719 "cpe-incorrect", 720 "disputed", 721 "upstream-wontfix", 722 ): 723 # VEX doesn't have justifications for this 724 pass 725 elif detail in ( 726 "not-applicable-config", 727 "not-applicable-platform", 728 ): 729 for v in spdx_vex: 730 v.security_justificationType = ( 731 oe.spdx30.security_VexJustificationType.vulnerableCodeNotPresent 732 ) 733 else: 734 bb.fatal(f"Unknown detail '{detail}' for ignored {cve}") 735 elif status == "Unknown": 736 bb.note(f"Skipping {cve} with status 'Unknown'") 737 else: 738 bb.fatal(f"Unknown {cve} status '{status}'") 739 740 if all_cves: 741 pkg_objset.new_relationship( 742 [spdx_package], 743 oe.spdx30.RelationshipType.hasAssociatedVulnerability, 744 sorted(list(all_cves)), 745 ) 746 747 bb.debug(1, "Adding package files to SPDX for package %s" % pkg_name) 748 package_files = add_package_files( 749 d, 750 pkg_objset, 751 pkgdest / package, 752 lambda file_counter: pkg_objset.new_spdxid( 753 "package", pkg_name, "file", str(file_counter) 754 ), 755 # TODO: Can we know the purpose here? 756 lambda filepath: [], 757 license_data, 758 ignore_top_level_dirs=["CONTROL", "DEBIAN"], 759 archive=None, 760 ) 761 762 if package_files: 763 pkg_objset.new_relationship( 764 [spdx_package], 765 oe.spdx30.RelationshipType.contains, 766 sorted(list(package_files)), 767 ) 768 769 if include_sources: 770 debug_sources = get_package_sources_from_debug( 771 d, package, package_files, dep_sources, source_hash_cache 772 ) 773 debug_source_ids |= set( 774 oe.sbom30.get_element_link_id(d) for d in debug_sources 775 ) 776 777 oe.sbom30.write_recipe_jsonld_doc( 778 d, pkg_objset, "packages-staging", deploydir, create_spdx_id_links=False 779 ) 780 781 if include_sources: 782 bb.debug(1, "Adding sysroot files to SPDX") 783 sysroot_files = add_package_files( 784 d, 785 build_objset, 786 d.expand("${COMPONENTS_DIR}/${PACKAGE_ARCH}/${PN}"), 787 lambda file_counter: build_objset.new_spdxid("sysroot", str(file_counter)), 788 lambda filepath: [], 789 license_data, 790 archive=None, 791 ) 792 793 if sysroot_files: 794 build_objset.new_scoped_relationship( 795 [build], 796 oe.spdx30.RelationshipType.hasOutput, 797 oe.spdx30.LifecycleScopeType.build, 798 sorted(list(sysroot_files)), 799 ) 800 801 if build_inputs or debug_source_ids: 802 build_objset.new_scoped_relationship( 803 [build], 804 oe.spdx30.RelationshipType.hasInput, 805 oe.spdx30.LifecycleScopeType.build, 806 sorted(list(build_inputs)) + sorted(list(debug_source_ids)), 807 ) 808 809 oe.sbom30.write_recipe_jsonld_doc(d, build_objset, "recipes", deploydir) 810 811 812def create_package_spdx(d): 813 deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX")) 814 deploydir = Path(d.getVar("SPDXRUNTIMEDEPLOY")) 815 is_native = bb.data.inherits_class("native", d) or bb.data.inherits_class( 816 "cross", d 817 ) 818 819 providers = oe.spdx_common.collect_package_providers(d) 820 pkg_arch = d.getVar("SSTATE_PKGARCH") 821 822 if is_native: 823 return 824 825 bb.build.exec_func("read_subpackage_metadata", d) 826 827 dep_package_cache = {} 828 829 # Any element common to all packages that need to be referenced by ID 830 # should be written into this objset set 831 common_objset = oe.sbom30.ObjectSet.new_objset( 832 d, "%s-package-common" % d.getVar("PN") 833 ) 834 835 pkgdest = Path(d.getVar("PKGDEST")) 836 for package in d.getVar("PACKAGES").split(): 837 localdata = bb.data.createCopy(d) 838 pkg_name = d.getVar("PKG:%s" % package) or package 839 localdata.setVar("PKG", pkg_name) 840 localdata.setVar("OVERRIDES", d.getVar("OVERRIDES", False) + ":" + package) 841 842 if not oe.packagedata.packaged(package, localdata): 843 continue 844 845 spdx_package, pkg_objset = oe.sbom30.load_obj_in_jsonld( 846 d, 847 pkg_arch, 848 "packages-staging", 849 "package-" + pkg_name, 850 oe.spdx30.software_Package, 851 software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.install, 852 ) 853 854 # We will write out a new collection, so link it to the new 855 # creation info in the common package data. The old creation info 856 # should still exist and be referenced by all the existing elements 857 # in the package 858 pkg_objset.creationInfo = pkg_objset.copy_creation_info( 859 common_objset.doc.creationInfo 860 ) 861 862 runtime_spdx_deps = set() 863 864 deps = bb.utils.explode_dep_versions2(localdata.getVar("RDEPENDS") or "") 865 seen_deps = set() 866 for dep, _ in deps.items(): 867 if dep in seen_deps: 868 continue 869 870 if dep not in providers: 871 continue 872 873 (dep, _) = providers[dep] 874 875 if not oe.packagedata.packaged(dep, localdata): 876 continue 877 878 dep_pkg_data = oe.packagedata.read_subpkgdata_dict(dep, d) 879 dep_pkg = dep_pkg_data["PKG"] 880 881 if dep in dep_package_cache: 882 dep_spdx_package = dep_package_cache[dep] 883 else: 884 bb.debug(1, "Searching for %s" % dep_pkg) 885 dep_spdx_package, _ = oe.sbom30.find_root_obj_in_jsonld( 886 d, 887 "packages-staging", 888 "package-" + dep_pkg, 889 oe.spdx30.software_Package, 890 software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.install, 891 ) 892 dep_package_cache[dep] = dep_spdx_package 893 894 runtime_spdx_deps.add(dep_spdx_package) 895 seen_deps.add(dep) 896 897 if runtime_spdx_deps: 898 pkg_objset.new_scoped_relationship( 899 [spdx_package], 900 oe.spdx30.RelationshipType.dependsOn, 901 oe.spdx30.LifecycleScopeType.runtime, 902 [oe.sbom30.get_element_link_id(dep) for dep in runtime_spdx_deps], 903 ) 904 905 oe.sbom30.write_recipe_jsonld_doc(d, pkg_objset, "packages", deploydir) 906 907 oe.sbom30.write_recipe_jsonld_doc(d, common_objset, "common-package", deploydir) 908 909 910def write_bitbake_spdx(d): 911 # Set PN to "bitbake" so that SPDX IDs can be generated 912 d.setVar("PN", "bitbake") 913 d.setVar("BB_TASKHASH", "bitbake") 914 oe.spdx_common.load_spdx_license_data(d) 915 916 deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX")) 917 918 objset = oe.sbom30.ObjectSet.new_objset(d, "bitbake", False) 919 920 host_import_key = d.getVar("SPDX_BUILD_HOST") 921 invoked_by = objset.new_agent("SPDX_INVOKED_BY", add=False) 922 on_behalf_of = objset.new_agent("SPDX_ON_BEHALF_OF", add=False) 923 924 if d.getVar("SPDX_INCLUDE_BITBAKE_PARENT_BUILD") == "1": 925 # Since the Build objects are unique, we may as well set the creation 926 # time to the current time instead of the fallback SDE 927 objset.doc.creationInfo.created = datetime.now(timezone.utc) 928 929 # Each invocation of bitbake should have a unique ID since it is a 930 # unique build 931 nonce = os.urandom(16).hex() 932 933 build = objset.add_root( 934 oe.spdx30.build_Build( 935 _id=objset.new_spdxid(nonce, include_unihash=False), 936 creationInfo=objset.doc.creationInfo, 937 build_buildType=oe.sbom30.SPDX_BUILD_TYPE, 938 ) 939 ) 940 set_timestamp_now(d, build, "build_buildStartTime") 941 942 if host_import_key: 943 objset.new_scoped_relationship( 944 [build], 945 oe.spdx30.RelationshipType.hasHost, 946 oe.spdx30.LifecycleScopeType.build, 947 [objset.new_import(host_import_key)], 948 ) 949 950 if invoked_by: 951 objset.add(invoked_by) 952 invoked_by_spdx = objset.new_scoped_relationship( 953 [build], 954 oe.spdx30.RelationshipType.invokedBy, 955 oe.spdx30.LifecycleScopeType.build, 956 [invoked_by], 957 ) 958 959 if on_behalf_of: 960 objset.add(on_behalf_of) 961 objset.new_scoped_relationship( 962 [on_behalf_of], 963 oe.spdx30.RelationshipType.delegatedTo, 964 oe.spdx30.LifecycleScopeType.build, 965 invoked_by_spdx, 966 ) 967 968 elif on_behalf_of: 969 bb.warn("SPDX_ON_BEHALF_OF has no effect if SPDX_INVOKED_BY is not set") 970 971 else: 972 if host_import_key: 973 bb.warn( 974 "SPDX_BUILD_HOST has no effect if SPDX_INCLUDE_BITBAKE_PARENT_BUILD is not set" 975 ) 976 977 if invoked_by: 978 bb.warn( 979 "SPDX_INVOKED_BY has no effect if SPDX_INCLUDE_BITBAKE_PARENT_BUILD is not set" 980 ) 981 982 if on_behalf_of: 983 bb.warn( 984 "SPDX_ON_BEHALF_OF has no effect if SPDX_INCLUDE_BITBAKE_PARENT_BUILD is not set" 985 ) 986 987 for obj in objset.foreach_type(oe.spdx30.Element): 988 obj.extension.append(oe.sbom30.OEIdAliasExtension()) 989 990 oe.sbom30.write_jsonld_doc(d, objset, deploy_dir_spdx / "bitbake.spdx.json") 991 992 993def collect_build_package_inputs(d, objset, build, packages, files_by_hash=None): 994 import oe.sbom30 995 996 providers = oe.spdx_common.collect_package_providers(d) 997 998 build_deps = set() 999 missing_providers = set() 1000 1001 for name in sorted(packages.keys()): 1002 if name not in providers: 1003 missing_providers.add(name) 1004 continue 1005 1006 pkg_name, pkg_hashfn = providers[name] 1007 1008 # Copy all of the package SPDX files into the Sbom elements 1009 pkg_spdx, pkg_objset = oe.sbom30.find_root_obj_in_jsonld( 1010 d, 1011 "packages", 1012 "package-" + pkg_name, 1013 oe.spdx30.software_Package, 1014 software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.install, 1015 ) 1016 build_deps.add(oe.sbom30.get_element_link_id(pkg_spdx)) 1017 1018 if files_by_hash is not None: 1019 for h, f in pkg_objset.by_sha256_hash.items(): 1020 files_by_hash.setdefault(h, set()).update(f) 1021 1022 if missing_providers: 1023 bb.fatal( 1024 f"Unable to find SPDX provider(s) for: {', '.join(sorted(missing_providers))}" 1025 ) 1026 1027 if build_deps: 1028 objset.new_scoped_relationship( 1029 [build], 1030 oe.spdx30.RelationshipType.hasInput, 1031 oe.spdx30.LifecycleScopeType.build, 1032 sorted(list(build_deps)), 1033 ) 1034 1035 1036def create_rootfs_spdx(d): 1037 deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX")) 1038 deploydir = Path(d.getVar("SPDXROOTFSDEPLOY")) 1039 root_packages_file = Path(d.getVar("SPDX_ROOTFS_PACKAGES")) 1040 image_basename = d.getVar("IMAGE_BASENAME") 1041 image_rootfs = d.getVar("IMAGE_ROOTFS") 1042 machine = d.getVar("MACHINE") 1043 1044 with root_packages_file.open("r") as f: 1045 packages = json.load(f) 1046 1047 objset = oe.sbom30.ObjectSet.new_objset( 1048 d, "%s-%s-rootfs" % (image_basename, machine) 1049 ) 1050 1051 rootfs = objset.add_root( 1052 oe.spdx30.software_Package( 1053 _id=objset.new_spdxid("rootfs", image_basename), 1054 creationInfo=objset.doc.creationInfo, 1055 name=image_basename, 1056 software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.archive, 1057 ) 1058 ) 1059 set_timestamp_now(d, rootfs, "builtTime") 1060 1061 rootfs_build = objset.add_root(objset.new_task_build("rootfs", "rootfs")) 1062 set_timestamp_now(d, rootfs_build, "build_buildEndTime") 1063 1064 objset.new_scoped_relationship( 1065 [rootfs_build], 1066 oe.spdx30.RelationshipType.hasOutput, 1067 oe.spdx30.LifecycleScopeType.build, 1068 [rootfs], 1069 ) 1070 1071 files_by_hash = {} 1072 collect_build_package_inputs(d, objset, rootfs_build, packages, files_by_hash) 1073 1074 files = set() 1075 for dirpath, dirnames, filenames in os.walk(image_rootfs, onerror=walk_error): 1076 dirnames.sort() 1077 filenames.sort() 1078 for fn in filenames: 1079 fpath = Path(dirpath) / fn 1080 if fpath.is_symlink() or not fpath.is_file(): 1081 continue 1082 1083 relpath = str(fpath.relative_to(image_rootfs)) 1084 h = bb.utils.sha256_file(fpath) 1085 1086 found = False 1087 if h in files_by_hash: 1088 for f in files_by_hash[h]: 1089 if isinstance(f, oe.spdx30.software_File) and f.name == relpath: 1090 files.add(oe.sbom30.get_element_link_id(f)) 1091 found = True 1092 break 1093 1094 if not found: 1095 files.add( 1096 objset.new_file( 1097 objset.new_spdxid("rootfs-file", relpath), 1098 relpath, 1099 fpath, 1100 ) 1101 ) 1102 1103 if files: 1104 objset.new_relationship( 1105 [rootfs], 1106 oe.spdx30.RelationshipType.contains, 1107 sorted(list(files)), 1108 ) 1109 1110 oe.sbom30.write_recipe_jsonld_doc(d, objset, "rootfs", deploydir) 1111 1112 1113def create_image_spdx(d): 1114 import oe.sbom30 1115 1116 image_deploy_dir = Path(d.getVar("IMGDEPLOYDIR")) 1117 manifest_path = Path(d.getVar("IMAGE_OUTPUT_MANIFEST")) 1118 spdx_work_dir = Path(d.getVar("SPDXIMAGEWORK")) 1119 1120 image_basename = d.getVar("IMAGE_BASENAME") 1121 machine = d.getVar("MACHINE") 1122 1123 objset = oe.sbom30.ObjectSet.new_objset( 1124 d, "%s-%s-image" % (image_basename, machine) 1125 ) 1126 1127 with manifest_path.open("r") as f: 1128 manifest = json.load(f) 1129 1130 builds = [] 1131 for task in manifest: 1132 imagetype = task["imagetype"] 1133 taskname = task["taskname"] 1134 1135 image_build = objset.add_root( 1136 objset.new_task_build(taskname, "image/%s" % imagetype) 1137 ) 1138 set_timestamp_now(d, image_build, "build_buildEndTime") 1139 builds.append(image_build) 1140 1141 artifacts = [] 1142 1143 for image in task["images"]: 1144 image_filename = image["filename"] 1145 image_path = image_deploy_dir / image_filename 1146 if os.path.isdir(image_path): 1147 a = add_package_files( 1148 d, 1149 objset, 1150 image_path, 1151 lambda file_counter: objset.new_spdxid( 1152 "imagefile", str(file_counter) 1153 ), 1154 lambda filepath: [], 1155 license_data=None, 1156 ignore_dirs=[], 1157 ignore_top_level_dirs=[], 1158 archive=None, 1159 ) 1160 artifacts.extend(a) 1161 else: 1162 a = objset.add_root( 1163 oe.spdx30.software_File( 1164 _id=objset.new_spdxid("image", image_filename), 1165 creationInfo=objset.doc.creationInfo, 1166 name=image_filename, 1167 verifiedUsing=[ 1168 oe.spdx30.Hash( 1169 algorithm=oe.spdx30.HashAlgorithm.sha256, 1170 hashValue=bb.utils.sha256_file(image_path), 1171 ) 1172 ], 1173 ) 1174 ) 1175 1176 artifacts.append(a) 1177 1178 for a in artifacts: 1179 set_purposes( 1180 d, a, "SPDX_IMAGE_PURPOSE:%s" % imagetype, "SPDX_IMAGE_PURPOSE" 1181 ) 1182 1183 set_timestamp_now(d, a, "builtTime") 1184 1185 1186 if artifacts: 1187 objset.new_scoped_relationship( 1188 [image_build], 1189 oe.spdx30.RelationshipType.hasOutput, 1190 oe.spdx30.LifecycleScopeType.build, 1191 artifacts, 1192 ) 1193 1194 if builds: 1195 rootfs_image, _ = oe.sbom30.find_root_obj_in_jsonld( 1196 d, 1197 "rootfs", 1198 "%s-%s-rootfs" % (image_basename, machine), 1199 oe.spdx30.software_Package, 1200 # TODO: Should use a purpose to filter here? 1201 ) 1202 objset.new_scoped_relationship( 1203 builds, 1204 oe.spdx30.RelationshipType.hasInput, 1205 oe.spdx30.LifecycleScopeType.build, 1206 [oe.sbom30.get_element_link_id(rootfs_image)], 1207 ) 1208 1209 objset.add_aliases() 1210 objset.link() 1211 oe.sbom30.write_recipe_jsonld_doc(d, objset, "image", spdx_work_dir) 1212 1213 1214def create_image_sbom_spdx(d): 1215 import oe.sbom30 1216 1217 image_name = d.getVar("IMAGE_NAME") 1218 image_basename = d.getVar("IMAGE_BASENAME") 1219 image_link_name = d.getVar("IMAGE_LINK_NAME") 1220 imgdeploydir = Path(d.getVar("SPDXIMAGEDEPLOYDIR")) 1221 machine = d.getVar("MACHINE") 1222 1223 spdx_path = imgdeploydir / (image_name + ".spdx.json") 1224 1225 root_elements = [] 1226 1227 # TODO: Do we need to add the rootfs or are the image files sufficient? 1228 rootfs_image, _ = oe.sbom30.find_root_obj_in_jsonld( 1229 d, 1230 "rootfs", 1231 "%s-%s-rootfs" % (image_basename, machine), 1232 oe.spdx30.software_Package, 1233 # TODO: Should use a purpose here? 1234 ) 1235 root_elements.append(oe.sbom30.get_element_link_id(rootfs_image)) 1236 1237 image_objset, _ = oe.sbom30.find_jsonld( 1238 d, "image", "%s-%s-image" % (image_basename, machine), required=True 1239 ) 1240 for o in image_objset.foreach_root(oe.spdx30.software_File): 1241 root_elements.append(oe.sbom30.get_element_link_id(o)) 1242 1243 objset, sbom = oe.sbom30.create_sbom(d, image_name, root_elements) 1244 1245 oe.sbom30.write_jsonld_doc(d, objset, spdx_path) 1246 1247 def make_image_link(target_path, suffix): 1248 if image_link_name: 1249 link = imgdeploydir / (image_link_name + suffix) 1250 if link != target_path: 1251 link.symlink_to(os.path.relpath(target_path, link.parent)) 1252 1253 make_image_link(spdx_path, ".spdx.json") 1254 1255 1256def sdk_create_spdx(d, sdk_type, spdx_work_dir, toolchain_outputname): 1257 sdk_name = toolchain_outputname + "-" + sdk_type 1258 sdk_packages = oe.sdk.sdk_list_installed_packages(d, sdk_type == "target") 1259 1260 objset = oe.sbom30.ObjectSet.new_objset(d, sdk_name) 1261 1262 sdk_rootfs = objset.add_root( 1263 oe.spdx30.software_Package( 1264 _id=objset.new_spdxid("sdk-rootfs", sdk_name), 1265 creationInfo=objset.doc.creationInfo, 1266 name=sdk_name, 1267 software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.archive, 1268 ) 1269 ) 1270 set_timestamp_now(d, sdk_rootfs, "builtTime") 1271 1272 sdk_build = objset.add_root(objset.new_task_build("sdk-rootfs", "sdk-rootfs")) 1273 set_timestamp_now(d, sdk_build, "build_buildEndTime") 1274 1275 objset.new_scoped_relationship( 1276 [sdk_build], 1277 oe.spdx30.RelationshipType.hasOutput, 1278 oe.spdx30.LifecycleScopeType.build, 1279 [sdk_rootfs], 1280 ) 1281 1282 collect_build_package_inputs(d, objset, sdk_build, sdk_packages) 1283 1284 objset.add_aliases() 1285 oe.sbom30.write_jsonld_doc(d, objset, spdx_work_dir / "sdk-rootfs.spdx.json") 1286 1287 1288def create_sdk_sbom(d, sdk_deploydir, spdx_work_dir, toolchain_outputname): 1289 # Load the document written earlier 1290 rootfs_objset = oe.sbom30.load_jsonld( 1291 d, spdx_work_dir / "sdk-rootfs.spdx.json", required=True 1292 ) 1293 1294 # Create a new build for the SDK installer 1295 sdk_build = rootfs_objset.new_task_build("sdk-populate", "sdk-populate") 1296 set_timestamp_now(d, sdk_build, "build_buildEndTime") 1297 1298 rootfs = rootfs_objset.find_root(oe.spdx30.software_Package) 1299 if rootfs is None: 1300 bb.fatal("Unable to find rootfs artifact") 1301 1302 rootfs_objset.new_scoped_relationship( 1303 [sdk_build], 1304 oe.spdx30.RelationshipType.hasInput, 1305 oe.spdx30.LifecycleScopeType.build, 1306 [rootfs], 1307 ) 1308 1309 files = set() 1310 root_files = [] 1311 1312 # NOTE: os.walk() doesn't return symlinks 1313 for dirpath, dirnames, filenames in os.walk(sdk_deploydir, onerror=walk_error): 1314 dirnames.sort() 1315 filenames.sort() 1316 for fn in filenames: 1317 fpath = Path(dirpath) / fn 1318 if not fpath.is_file() or fpath.is_symlink(): 1319 continue 1320 1321 relpath = str(fpath.relative_to(sdk_deploydir)) 1322 1323 f = rootfs_objset.new_file( 1324 rootfs_objset.new_spdxid("sdk-installer", relpath), 1325 relpath, 1326 fpath, 1327 ) 1328 set_timestamp_now(d, f, "builtTime") 1329 1330 if fn.endswith(".manifest"): 1331 f.software_primaryPurpose = oe.spdx30.software_SoftwarePurpose.manifest 1332 elif fn.endswith(".testdata.json"): 1333 f.software_primaryPurpose = ( 1334 oe.spdx30.software_SoftwarePurpose.configuration 1335 ) 1336 else: 1337 set_purposes(d, f, "SPDX_SDK_PURPOSE") 1338 root_files.append(f) 1339 1340 files.add(f) 1341 1342 if files: 1343 rootfs_objset.new_scoped_relationship( 1344 [sdk_build], 1345 oe.spdx30.RelationshipType.hasOutput, 1346 oe.spdx30.LifecycleScopeType.build, 1347 files, 1348 ) 1349 else: 1350 bb.warn(f"No SDK output files found in {sdk_deploydir}") 1351 1352 objset, sbom = oe.sbom30.create_sbom( 1353 d, toolchain_outputname, sorted(list(files)), [rootfs_objset] 1354 ) 1355 1356 oe.sbom30.write_jsonld_doc( 1357 d, objset, sdk_deploydir / (toolchain_outputname + ".spdx.json") 1358 ) 1359