1#
2# Copyright OpenEmbedded Contributors
3#
4# SPDX-License-Identifier: GPL-2.0-only
5#
6
7DEPLOY_DIR_SPDX ??= "${DEPLOY_DIR}/spdx"
8
9# The product name that the CVE database uses.  Defaults to BPN, but may need to
10# be overriden per recipe (for example tiff.bb sets CVE_PRODUCT=libtiff).
11CVE_PRODUCT ??= "${BPN}"
12CVE_VERSION ??= "${PV}"
13
14SPDXDIR ??= "${WORKDIR}/spdx"
15SPDXDEPLOY = "${SPDXDIR}/deploy"
16SPDXWORK = "${SPDXDIR}/work"
17SPDXIMAGEWORK = "${SPDXDIR}/image-work"
18SPDXSDKWORK = "${SPDXDIR}/sdk-work"
19SPDXDEPS = "${SPDXDIR}/deps.json"
20
21SPDX_TOOL_NAME ??= "oe-spdx-creator"
22SPDX_TOOL_VERSION ??= "1.0"
23
24SPDXRUNTIMEDEPLOY = "${SPDXDIR}/runtime-deploy"
25
26SPDX_INCLUDE_SOURCES ??= "0"
27SPDX_ARCHIVE_SOURCES ??= "0"
28SPDX_ARCHIVE_PACKAGED ??= "0"
29
30SPDX_UUID_NAMESPACE ??= "sbom.openembedded.org"
31SPDX_NAMESPACE_PREFIX ??= "http://spdx.org/spdxdoc"
32SPDX_PRETTY ??= "0"
33
34SPDX_LICENSES ??= "${COREBASE}/meta/files/spdx-licenses.json"
35
36SPDX_CUSTOM_ANNOTATION_VARS ??= ""
37
38SPDX_ORG ??= "OpenEmbedded ()"
39SPDX_SUPPLIER ??= "Organization: ${SPDX_ORG}"
40SPDX_SUPPLIER[doc] = "The SPDX PackageSupplier field for SPDX packages created from \
41    this recipe. For SPDX documents create using this class during the build, this \
42    is the contact information for the person or organization who is doing the \
43    build."
44
45def extract_licenses(filename):
46    import re
47
48    lic_regex = re.compile(rb'^\W*SPDX-License-Identifier:\s*([ \w\d.()+-]+?)(?:\s+\W*)?$', re.MULTILINE)
49
50    try:
51        with open(filename, 'rb') as f:
52            size = min(15000, os.stat(filename).st_size)
53            txt = f.read(size)
54            licenses = re.findall(lic_regex, txt)
55            if licenses:
56                ascii_licenses = [lic.decode('ascii') for lic in licenses]
57                return ascii_licenses
58    except Exception as e:
59        bb.warn(f"Exception reading {filename}: {e}")
60    return None
61
62def get_doc_namespace(d, doc):
63    import uuid
64    namespace_uuid = uuid.uuid5(uuid.NAMESPACE_DNS, d.getVar("SPDX_UUID_NAMESPACE"))
65    return "%s/%s-%s" % (d.getVar("SPDX_NAMESPACE_PREFIX"), doc.name, str(uuid.uuid5(namespace_uuid, doc.name)))
66
67def create_annotation(d, comment):
68    from datetime import datetime, timezone
69
70    creation_time = datetime.now(tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
71    annotation = oe.spdx.SPDXAnnotation()
72    annotation.annotationDate = creation_time
73    annotation.annotationType = "OTHER"
74    annotation.annotator = "Tool: %s - %s" % (d.getVar("SPDX_TOOL_NAME"), d.getVar("SPDX_TOOL_VERSION"))
75    annotation.comment = comment
76    return annotation
77
78def recipe_spdx_is_native(d, recipe):
79    return any(a.annotationType == "OTHER" and
80      a.annotator == "Tool: %s - %s" % (d.getVar("SPDX_TOOL_NAME"), d.getVar("SPDX_TOOL_VERSION")) and
81      a.comment == "isNative" for a in recipe.annotations)
82
83def is_work_shared_spdx(d):
84    return bb.data.inherits_class('kernel', d) or ('work-shared' in d.getVar('WORKDIR'))
85
86def get_json_indent(d):
87    if d.getVar("SPDX_PRETTY") == "1":
88        return 2
89    return None
90
91python() {
92    import json
93    if d.getVar("SPDX_LICENSE_DATA"):
94        return
95
96    with open(d.getVar("SPDX_LICENSES"), "r") as f:
97        data = json.load(f)
98        # Transform the license array to a dictionary
99        data["licenses"] = {l["licenseId"]: l for l in data["licenses"]}
100        d.setVar("SPDX_LICENSE_DATA", data)
101}
102
103def convert_license_to_spdx(lic, document, d, existing={}):
104    from pathlib import Path
105    import oe.spdx
106
107    license_data = d.getVar("SPDX_LICENSE_DATA")
108    extracted = {}
109
110    def add_extracted_license(ident, name):
111        nonlocal document
112
113        if name in extracted:
114            return
115
116        extracted_info = oe.spdx.SPDXExtractedLicensingInfo()
117        extracted_info.name = name
118        extracted_info.licenseId = ident
119        extracted_info.extractedText = None
120
121        if name == "PD":
122            # Special-case this.
123            extracted_info.extractedText = "Software released to the public domain"
124        else:
125            # Seach for the license in COMMON_LICENSE_DIR and LICENSE_PATH
126            for directory in [d.getVar('COMMON_LICENSE_DIR')] + (d.getVar('LICENSE_PATH') or '').split():
127                try:
128                    with (Path(directory) / name).open(errors="replace") as f:
129                        extracted_info.extractedText = f.read()
130                        break
131                except FileNotFoundError:
132                    pass
133            if extracted_info.extractedText is None:
134                # If it's not SPDX or PD, then NO_GENERIC_LICENSE must be set
135                filename = d.getVarFlag('NO_GENERIC_LICENSE', name)
136                if filename:
137                    filename = d.expand("${S}/" + filename)
138                    with open(filename, errors="replace") as f:
139                        extracted_info.extractedText = f.read()
140                else:
141                    bb.fatal("Cannot find any text for license %s" % name)
142
143        extracted[name] = extracted_info
144        document.hasExtractedLicensingInfos.append(extracted_info)
145
146    def convert(l):
147        if l == "(" or l == ")":
148            return l
149
150        if l == "&":
151            return "AND"
152
153        if l == "|":
154            return "OR"
155
156        if l == "CLOSED":
157            return "NONE"
158
159        spdx_license = d.getVarFlag("SPDXLICENSEMAP", l) or l
160        if spdx_license in license_data["licenses"]:
161            return spdx_license
162
163        try:
164            spdx_license = existing[l]
165        except KeyError:
166            spdx_license = "LicenseRef-" + l
167            add_extracted_license(spdx_license, l)
168
169        return spdx_license
170
171    lic_split = lic.replace("(", " ( ").replace(")", " ) ").replace("|", " | ").replace("&", " & ").split()
172
173    return ' '.join(convert(l) for l in lic_split)
174
175def process_sources(d):
176    pn = d.getVar('PN')
177    assume_provided = (d.getVar("ASSUME_PROVIDED") or "").split()
178    if pn in assume_provided:
179        for p in d.getVar("PROVIDES").split():
180            if p != pn:
181                pn = p
182                break
183
184    # glibc-locale: do_fetch, do_unpack and do_patch tasks have been deleted,
185    # so avoid archiving source here.
186    if pn.startswith('glibc-locale'):
187        return False
188    if d.getVar('PN') == "libtool-cross":
189        return False
190    if d.getVar('PN') == "libgcc-initial":
191        return False
192    if d.getVar('PN') == "shadow-sysroot":
193        return False
194
195    # We just archive gcc-source for all the gcc related recipes
196    if d.getVar('BPN') in ['gcc', 'libgcc']:
197        bb.debug(1, 'spdx: There is bug in scan of %s is, do nothing' % pn)
198        return False
199
200    return True
201
202
203def add_package_files(d, doc, spdx_pkg, topdir, get_spdxid, get_types, *, archive=None, ignore_dirs=[], ignore_top_level_dirs=[]):
204    from pathlib import Path
205    import oe.spdx
206    import hashlib
207
208    source_date_epoch = d.getVar("SOURCE_DATE_EPOCH")
209    if source_date_epoch:
210        source_date_epoch = int(source_date_epoch)
211
212    sha1s = []
213    spdx_files = []
214
215    file_counter = 1
216    for subdir, dirs, files in os.walk(topdir):
217        dirs[:] = [d for d in dirs if d not in ignore_dirs]
218        if subdir == str(topdir):
219            dirs[:] = [d for d in dirs if d not in ignore_top_level_dirs]
220
221        for file in files:
222            filepath = Path(subdir) / file
223            filename = str(filepath.relative_to(topdir))
224
225            if not filepath.is_symlink() and filepath.is_file():
226                spdx_file = oe.spdx.SPDXFile()
227                spdx_file.SPDXID = get_spdxid(file_counter)
228                for t in get_types(filepath):
229                    spdx_file.fileTypes.append(t)
230                spdx_file.fileName = filename
231
232                if archive is not None:
233                    with filepath.open("rb") as f:
234                        info = archive.gettarinfo(fileobj=f)
235                        info.name = filename
236                        info.uid = 0
237                        info.gid = 0
238                        info.uname = "root"
239                        info.gname = "root"
240
241                        if source_date_epoch is not None and info.mtime > source_date_epoch:
242                            info.mtime = source_date_epoch
243
244                        archive.addfile(info, f)
245
246                sha1 = bb.utils.sha1_file(filepath)
247                sha1s.append(sha1)
248                spdx_file.checksums.append(oe.spdx.SPDXChecksum(
249                        algorithm="SHA1",
250                        checksumValue=sha1,
251                    ))
252                spdx_file.checksums.append(oe.spdx.SPDXChecksum(
253                        algorithm="SHA256",
254                        checksumValue=bb.utils.sha256_file(filepath),
255                    ))
256
257                if "SOURCE" in spdx_file.fileTypes:
258                    extracted_lics = extract_licenses(filepath)
259                    if extracted_lics:
260                        spdx_file.licenseInfoInFiles = extracted_lics
261
262                doc.files.append(spdx_file)
263                doc.add_relationship(spdx_pkg, "CONTAINS", spdx_file)
264                spdx_pkg.hasFiles.append(spdx_file.SPDXID)
265
266                spdx_files.append(spdx_file)
267
268                file_counter += 1
269
270    sha1s.sort()
271    verifier = hashlib.sha1()
272    for v in sha1s:
273        verifier.update(v.encode("utf-8"))
274    spdx_pkg.packageVerificationCode.packageVerificationCodeValue = verifier.hexdigest()
275
276    return spdx_files
277
278
279def add_package_sources_from_debug(d, package_doc, spdx_package, package, package_files, sources):
280    from pathlib import Path
281    import hashlib
282    import oe.packagedata
283    import oe.spdx
284
285    debug_search_paths = [
286        Path(d.getVar('PKGD')),
287        Path(d.getVar('STAGING_DIR_TARGET')),
288        Path(d.getVar('STAGING_DIR_NATIVE')),
289        Path(d.getVar('STAGING_KERNEL_DIR')),
290    ]
291
292    pkg_data = oe.packagedata.read_subpkgdata_extended(package, d)
293
294    if pkg_data is None:
295        return
296
297    for file_path, file_data in pkg_data["files_info"].items():
298        if not "debugsrc" in file_data:
299            continue
300
301        for pkg_file in package_files:
302            if file_path.lstrip("/") == pkg_file.fileName.lstrip("/"):
303                break
304        else:
305            bb.fatal("No package file found for %s" % str(file_path))
306            continue
307
308        for debugsrc in file_data["debugsrc"]:
309            ref_id = "NOASSERTION"
310            for search in debug_search_paths:
311                if debugsrc.startswith("/usr/src/kernel"):
312                    debugsrc_path = search / debugsrc.replace('/usr/src/kernel/', '')
313                else:
314                    debugsrc_path = search / debugsrc.lstrip("/")
315                if not debugsrc_path.exists():
316                    continue
317
318                file_sha256 = bb.utils.sha256_file(debugsrc_path)
319
320                if file_sha256 in sources:
321                    source_file = sources[file_sha256]
322
323                    doc_ref = package_doc.find_external_document_ref(source_file.doc.documentNamespace)
324                    if doc_ref is None:
325                        doc_ref = oe.spdx.SPDXExternalDocumentRef()
326                        doc_ref.externalDocumentId = "DocumentRef-dependency-" + source_file.doc.name
327                        doc_ref.spdxDocument = source_file.doc.documentNamespace
328                        doc_ref.checksum.algorithm = "SHA1"
329                        doc_ref.checksum.checksumValue = source_file.doc_sha1
330                        package_doc.externalDocumentRefs.append(doc_ref)
331
332                    ref_id = "%s:%s" % (doc_ref.externalDocumentId, source_file.file.SPDXID)
333                else:
334                    bb.debug(1, "Debug source %s with SHA256 %s not found in any dependency" % (str(debugsrc_path), file_sha256))
335                break
336            else:
337                bb.debug(1, "Debug source %s not found" % debugsrc)
338
339            package_doc.add_relationship(pkg_file, "GENERATED_FROM", ref_id, comment=debugsrc)
340
341add_package_sources_from_debug[vardepsexclude] += "STAGING_KERNEL_DIR"
342
343def collect_dep_recipes(d, doc, spdx_recipe):
344    import json
345    from pathlib import Path
346    import oe.sbom
347    import oe.spdx
348
349    deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
350    spdx_deps_file = Path(d.getVar("SPDXDEPS"))
351
352    dep_recipes = []
353
354    with spdx_deps_file.open("r") as f:
355        deps = json.load(f)
356
357    for dep_pn, dep_hashfn in deps:
358        dep_recipe_path = oe.sbom.doc_path_by_hashfn(deploy_dir_spdx, "recipe-" + dep_pn, dep_hashfn)
359
360        spdx_dep_doc, spdx_dep_sha1 = oe.sbom.read_doc(dep_recipe_path)
361
362        for pkg in spdx_dep_doc.packages:
363            if pkg.name == dep_pn:
364                spdx_dep_recipe = pkg
365                break
366        else:
367            continue
368
369        dep_recipes.append(oe.sbom.DepRecipe(spdx_dep_doc, spdx_dep_sha1, spdx_dep_recipe))
370
371        dep_recipe_ref = oe.spdx.SPDXExternalDocumentRef()
372        dep_recipe_ref.externalDocumentId = "DocumentRef-dependency-" + spdx_dep_doc.name
373        dep_recipe_ref.spdxDocument = spdx_dep_doc.documentNamespace
374        dep_recipe_ref.checksum.algorithm = "SHA1"
375        dep_recipe_ref.checksum.checksumValue = spdx_dep_sha1
376
377        doc.externalDocumentRefs.append(dep_recipe_ref)
378
379        doc.add_relationship(
380            "%s:%s" % (dep_recipe_ref.externalDocumentId, spdx_dep_recipe.SPDXID),
381            "BUILD_DEPENDENCY_OF",
382            spdx_recipe
383        )
384
385    return dep_recipes
386
387
388def collect_dep_sources(d, dep_recipes):
389    import oe.sbom
390
391    sources = {}
392    for dep in dep_recipes:
393        # Don't collect sources from native recipes as they
394        # match non-native sources also.
395        if recipe_spdx_is_native(d, dep.recipe):
396            continue
397        recipe_files = set(dep.recipe.hasFiles)
398
399        for spdx_file in dep.doc.files:
400            if spdx_file.SPDXID not in recipe_files:
401                continue
402
403            if "SOURCE" in spdx_file.fileTypes:
404                for checksum in spdx_file.checksums:
405                    if checksum.algorithm == "SHA256":
406                        sources[checksum.checksumValue] = oe.sbom.DepSource(dep.doc, dep.doc_sha1, dep.recipe, spdx_file)
407                        break
408
409    return sources
410
411def add_download_packages(d, doc, recipe):
412    import os.path
413    from bb.fetch2 import decodeurl, CHECKSUM_LIST
414    import bb.process
415    import oe.spdx
416    import oe.sbom
417
418    for download_idx, src_uri in enumerate(d.getVar('SRC_URI').split()):
419        f = bb.fetch2.FetchData(src_uri, d)
420
421        for name in f.names:
422            package = oe.spdx.SPDXPackage()
423            package.name = "%s-source-%d" % (d.getVar("PN"), download_idx + 1)
424            package.SPDXID = oe.sbom.get_download_spdxid(d, download_idx + 1)
425
426            if f.type == "file":
427                continue
428
429            uri = f.type
430            proto = getattr(f, "proto", None)
431            if proto is not None:
432                uri = uri + "+" + proto
433            uri = uri + "://" + f.host + f.path
434
435            if f.method.supports_srcrev():
436                uri = uri + "@" + f.revisions[name]
437
438            if f.method.supports_checksum(f):
439                for checksum_id in CHECKSUM_LIST:
440                    if checksum_id.upper() not in oe.spdx.SPDXPackage.ALLOWED_CHECKSUMS:
441                        continue
442
443                    expected_checksum = getattr(f, "%s_expected" % checksum_id)
444                    if expected_checksum is None:
445                        continue
446
447                    c = oe.spdx.SPDXChecksum()
448                    c.algorithm = checksum_id.upper()
449                    c.checksumValue = expected_checksum
450                    package.checksums.append(c)
451
452            package.downloadLocation = uri
453            doc.packages.append(package)
454            doc.add_relationship(doc, "DESCRIBES", package)
455            # In the future, we might be able to do more fancy dependencies,
456            # but this should be sufficient for now
457            doc.add_relationship(package, "BUILD_DEPENDENCY_OF", recipe)
458
459def collect_direct_deps(d, dep_task):
460    current_task = "do_" + d.getVar("BB_CURRENTTASK")
461    pn = d.getVar("PN")
462
463    taskdepdata = d.getVar("BB_TASKDEPDATA", False)
464
465    for this_dep in taskdepdata.values():
466        if this_dep[0] == pn and this_dep[1] == current_task:
467            break
468    else:
469        bb.fatal(f"Unable to find this {pn}:{current_task} in taskdepdata")
470
471    deps = set()
472    for dep_name in this_dep[3]:
473        dep_data = taskdepdata[dep_name]
474        if dep_data[1] == dep_task and dep_data[0] != pn:
475            deps.add((dep_data[0], dep_data[7]))
476
477    return sorted(deps)
478
479collect_direct_deps[vardepsexclude] += "BB_TASKDEPDATA"
480collect_direct_deps[vardeps] += "DEPENDS"
481
482python do_collect_spdx_deps() {
483    # This task calculates the build time dependencies of the recipe, and is
484    # required because while a task can deptask on itself, those dependencies
485    # do not show up in BB_TASKDEPDATA. To work around that, this task does the
486    # deptask on do_create_spdx and writes out the dependencies it finds, then
487    # do_create_spdx reads in the found dependencies when writing the actual
488    # SPDX document
489    import json
490    from pathlib import Path
491
492    spdx_deps_file = Path(d.getVar("SPDXDEPS"))
493
494    deps = collect_direct_deps(d, "do_create_spdx")
495
496    with spdx_deps_file.open("w") as f:
497        json.dump(deps, f)
498}
499# NOTE: depending on do_unpack is a hack that is necessary to get it's dependencies for archive the source
500addtask do_collect_spdx_deps after do_unpack
501do_collect_spdx_deps[depends] += "${PATCHDEPENDENCY}"
502do_collect_spdx_deps[deptask] = "do_create_spdx"
503do_collect_spdx_deps[dirs] = "${SPDXDIR}"
504
505python do_create_spdx() {
506    from datetime import datetime, timezone
507    import oe.sbom
508    import oe.spdx
509    import uuid
510    from pathlib import Path
511    from contextlib import contextmanager
512    import oe.cve_check
513
514    @contextmanager
515    def optional_tarfile(name, guard, mode="w"):
516        import tarfile
517        import bb.compress.zstd
518
519        num_threads = int(d.getVar("BB_NUMBER_THREADS"))
520
521        if guard:
522            name.parent.mkdir(parents=True, exist_ok=True)
523            with bb.compress.zstd.open(name, mode=mode + "b", num_threads=num_threads) as f:
524                with tarfile.open(fileobj=f, mode=mode + "|") as tf:
525                    yield tf
526        else:
527            yield None
528
529
530    deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
531    spdx_workdir = Path(d.getVar("SPDXWORK"))
532    include_sources = d.getVar("SPDX_INCLUDE_SOURCES") == "1"
533    archive_sources = d.getVar("SPDX_ARCHIVE_SOURCES") == "1"
534    archive_packaged = d.getVar("SPDX_ARCHIVE_PACKAGED") == "1"
535
536    creation_time = datetime.now(tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
537
538    doc = oe.spdx.SPDXDocument()
539
540    doc.name = "recipe-" + d.getVar("PN")
541    doc.documentNamespace = get_doc_namespace(d, doc)
542    doc.creationInfo.created = creation_time
543    doc.creationInfo.comment = "This document was created by analyzing recipe files during the build."
544    doc.creationInfo.licenseListVersion = d.getVar("SPDX_LICENSE_DATA")["licenseListVersion"]
545    doc.creationInfo.creators.append("Tool: OpenEmbedded Core create-spdx.bbclass")
546    doc.creationInfo.creators.append("Organization: %s" % d.getVar("SPDX_ORG"))
547    doc.creationInfo.creators.append("Person: N/A ()")
548
549    recipe = oe.spdx.SPDXPackage()
550    recipe.name = d.getVar("PN")
551    recipe.versionInfo = d.getVar("PV")
552    recipe.SPDXID = oe.sbom.get_recipe_spdxid(d)
553    recipe.supplier = d.getVar("SPDX_SUPPLIER")
554    if bb.data.inherits_class("native", d) or bb.data.inherits_class("cross", d):
555        recipe.annotations.append(create_annotation(d, "isNative"))
556
557    homepage = d.getVar("HOMEPAGE")
558    if homepage:
559        recipe.homepage = homepage
560
561    license = d.getVar("LICENSE")
562    if license:
563        recipe.licenseDeclared = convert_license_to_spdx(license, doc, d)
564
565    summary = d.getVar("SUMMARY")
566    if summary:
567        recipe.summary = summary
568
569    description = d.getVar("DESCRIPTION")
570    if description:
571        recipe.description = description
572
573    if d.getVar("SPDX_CUSTOM_ANNOTATION_VARS"):
574        for var in d.getVar('SPDX_CUSTOM_ANNOTATION_VARS').split():
575            recipe.annotations.append(create_annotation(d, var + "=" + d.getVar(var)))
576
577    # Some CVEs may be patched during the build process without incrementing the version number,
578    # so querying for CVEs based on the CPE id can lead to false positives. To account for this,
579    # save the CVEs fixed by patches to source information field in the SPDX.
580    patched_cves = oe.cve_check.get_patched_cves(d)
581    patched_cves = list(patched_cves)
582    patched_cves = ' '.join(patched_cves)
583    if patched_cves:
584        recipe.sourceInfo = "CVEs fixed: " + patched_cves
585
586    cpe_ids = oe.cve_check.get_cpe_ids(d.getVar("CVE_PRODUCT"), d.getVar("CVE_VERSION"))
587    if cpe_ids:
588        for cpe_id in cpe_ids:
589            cpe = oe.spdx.SPDXExternalReference()
590            cpe.referenceCategory = "SECURITY"
591            cpe.referenceType = "http://spdx.org/rdf/references/cpe23Type"
592            cpe.referenceLocator = cpe_id
593            recipe.externalRefs.append(cpe)
594
595    doc.packages.append(recipe)
596    doc.add_relationship(doc, "DESCRIBES", recipe)
597
598    add_download_packages(d, doc, recipe)
599
600    if process_sources(d) and include_sources:
601        recipe_archive = deploy_dir_spdx / "recipes" / (doc.name + ".tar.zst")
602        with optional_tarfile(recipe_archive, archive_sources) as archive:
603            spdx_get_src(d)
604
605            add_package_files(
606                d,
607                doc,
608                recipe,
609                spdx_workdir,
610                lambda file_counter: "SPDXRef-SourceFile-%s-%d" % (d.getVar("PN"), file_counter),
611                lambda filepath: ["SOURCE"],
612                ignore_dirs=[".git"],
613                ignore_top_level_dirs=["temp"],
614                archive=archive,
615            )
616
617            if archive is not None:
618                recipe.packageFileName = str(recipe_archive.name)
619
620    dep_recipes = collect_dep_recipes(d, doc, recipe)
621
622    doc_sha1 = oe.sbom.write_doc(d, doc, d.getVar("SSTATE_PKGARCH"), "recipes", indent=get_json_indent(d))
623    dep_recipes.append(oe.sbom.DepRecipe(doc, doc_sha1, recipe))
624
625    recipe_ref = oe.spdx.SPDXExternalDocumentRef()
626    recipe_ref.externalDocumentId = "DocumentRef-recipe-" + recipe.name
627    recipe_ref.spdxDocument = doc.documentNamespace
628    recipe_ref.checksum.algorithm = "SHA1"
629    recipe_ref.checksum.checksumValue = doc_sha1
630
631    sources = collect_dep_sources(d, dep_recipes)
632    found_licenses = {license.name:recipe_ref.externalDocumentId + ":" + license.licenseId for license in doc.hasExtractedLicensingInfos}
633
634    if not recipe_spdx_is_native(d, recipe):
635        bb.build.exec_func("read_subpackage_metadata", d)
636
637        pkgdest = Path(d.getVar("PKGDEST"))
638        for package in d.getVar("PACKAGES").split():
639            if not oe.packagedata.packaged(package, d):
640                continue
641
642            package_doc = oe.spdx.SPDXDocument()
643            pkg_name = d.getVar("PKG:%s" % package) or package
644            package_doc.name = pkg_name
645            package_doc.documentNamespace = get_doc_namespace(d, package_doc)
646            package_doc.creationInfo.created = creation_time
647            package_doc.creationInfo.comment = "This document was created by analyzing packages created during the build."
648            package_doc.creationInfo.licenseListVersion = d.getVar("SPDX_LICENSE_DATA")["licenseListVersion"]
649            package_doc.creationInfo.creators.append("Tool: OpenEmbedded Core create-spdx.bbclass")
650            package_doc.creationInfo.creators.append("Organization: %s" % d.getVar("SPDX_ORG"))
651            package_doc.creationInfo.creators.append("Person: N/A ()")
652            package_doc.externalDocumentRefs.append(recipe_ref)
653
654            package_license = d.getVar("LICENSE:%s" % package) or d.getVar("LICENSE")
655
656            spdx_package = oe.spdx.SPDXPackage()
657
658            spdx_package.SPDXID = oe.sbom.get_package_spdxid(pkg_name)
659            spdx_package.name = pkg_name
660            spdx_package.versionInfo = d.getVar("PV")
661            spdx_package.licenseDeclared = convert_license_to_spdx(package_license, package_doc, d, found_licenses)
662            spdx_package.supplier = d.getVar("SPDX_SUPPLIER")
663
664            package_doc.packages.append(spdx_package)
665
666            package_doc.add_relationship(spdx_package, "GENERATED_FROM", "%s:%s" % (recipe_ref.externalDocumentId, recipe.SPDXID))
667            package_doc.add_relationship(package_doc, "DESCRIBES", spdx_package)
668
669            package_archive = deploy_dir_spdx / "packages" / (package_doc.name + ".tar.zst")
670            with optional_tarfile(package_archive, archive_packaged) as archive:
671                package_files = add_package_files(
672                    d,
673                    package_doc,
674                    spdx_package,
675                    pkgdest / package,
676                    lambda file_counter: oe.sbom.get_packaged_file_spdxid(pkg_name, file_counter),
677                    lambda filepath: ["BINARY"],
678                    ignore_top_level_dirs=['CONTROL', 'DEBIAN'],
679                    archive=archive,
680                )
681
682                if archive is not None:
683                    spdx_package.packageFileName = str(package_archive.name)
684
685            add_package_sources_from_debug(d, package_doc, spdx_package, package, package_files, sources)
686
687            oe.sbom.write_doc(d, package_doc, d.getVar("SSTATE_PKGARCH"), "packages", indent=get_json_indent(d))
688}
689do_create_spdx[vardepsexclude] += "BB_NUMBER_THREADS"
690# NOTE: depending on do_unpack is a hack that is necessary to get it's dependencies for archive the source
691addtask do_create_spdx after do_package do_packagedata do_unpack do_collect_spdx_deps before do_populate_sdk do_build do_rm_work
692
693SSTATETASKS += "do_create_spdx"
694do_create_spdx[sstate-inputdirs] = "${SPDXDEPLOY}"
695do_create_spdx[sstate-outputdirs] = "${DEPLOY_DIR_SPDX}"
696
697python do_create_spdx_setscene () {
698    sstate_setscene(d)
699}
700addtask do_create_spdx_setscene
701
702do_create_spdx[dirs] = "${SPDXWORK}"
703do_create_spdx[cleandirs] = "${SPDXDEPLOY} ${SPDXWORK}"
704do_create_spdx[depends] += "${PATCHDEPENDENCY}"
705
706def collect_package_providers(d):
707    from pathlib import Path
708    import oe.sbom
709    import oe.spdx
710    import json
711
712    deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
713
714    providers = {}
715
716    deps = collect_direct_deps(d, "do_create_spdx")
717    deps.append((d.getVar("PN"), d.getVar("BB_HASHFILENAME")))
718
719    for dep_pn, dep_hashfn in deps:
720        localdata = d
721        recipe_data = oe.packagedata.read_pkgdata(dep_pn, localdata)
722        if not recipe_data:
723            localdata = bb.data.createCopy(d)
724            localdata.setVar("PKGDATA_DIR", "${PKGDATA_DIR_SDK}")
725            recipe_data = oe.packagedata.read_pkgdata(dep_pn, localdata)
726
727        for pkg in recipe_data.get("PACKAGES", "").split():
728
729            pkg_data = oe.packagedata.read_subpkgdata_dict(pkg, localdata)
730            rprovides = set(n for n, _ in bb.utils.explode_dep_versions2(pkg_data.get("RPROVIDES", "")).items())
731            rprovides.add(pkg)
732
733            if "PKG" in pkg_data:
734                pkg = pkg_data["PKG"]
735                rprovides.add(pkg)
736
737            for r in rprovides:
738                providers[r] = (pkg, dep_hashfn)
739
740    return providers
741
742collect_package_providers[vardepsexclude] += "BB_TASKDEPDATA"
743
744python do_create_runtime_spdx() {
745    from datetime import datetime, timezone
746    import oe.sbom
747    import oe.spdx
748    import oe.packagedata
749    from pathlib import Path
750
751    deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
752    spdx_deploy = Path(d.getVar("SPDXRUNTIMEDEPLOY"))
753    is_native = bb.data.inherits_class("native", d) or bb.data.inherits_class("cross", d)
754
755    creation_time = datetime.now(tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
756
757    providers = collect_package_providers(d)
758
759    if not is_native:
760        bb.build.exec_func("read_subpackage_metadata", d)
761
762        dep_package_cache = {}
763
764        pkgdest = Path(d.getVar("PKGDEST"))
765        for package in d.getVar("PACKAGES").split():
766            localdata = bb.data.createCopy(d)
767            pkg_name = d.getVar("PKG:%s" % package) or package
768            localdata.setVar("PKG", pkg_name)
769            localdata.setVar('OVERRIDES', d.getVar("OVERRIDES", False) + ":" + package)
770
771            if not oe.packagedata.packaged(package, localdata):
772                continue
773
774            pkg_spdx_path = oe.sbom.doc_path(deploy_dir_spdx, pkg_name, d.getVar("SSTATE_PKGARCH"), "packages")
775
776            package_doc, package_doc_sha1 = oe.sbom.read_doc(pkg_spdx_path)
777
778            for p in package_doc.packages:
779                if p.name == pkg_name:
780                    spdx_package = p
781                    break
782            else:
783                bb.fatal("Package '%s' not found in %s" % (pkg_name, pkg_spdx_path))
784
785            runtime_doc = oe.spdx.SPDXDocument()
786            runtime_doc.name = "runtime-" + pkg_name
787            runtime_doc.documentNamespace = get_doc_namespace(localdata, runtime_doc)
788            runtime_doc.creationInfo.created = creation_time
789            runtime_doc.creationInfo.comment = "This document was created by analyzing package runtime dependencies."
790            runtime_doc.creationInfo.licenseListVersion = d.getVar("SPDX_LICENSE_DATA")["licenseListVersion"]
791            runtime_doc.creationInfo.creators.append("Tool: OpenEmbedded Core create-spdx.bbclass")
792            runtime_doc.creationInfo.creators.append("Organization: %s" % d.getVar("SPDX_ORG"))
793            runtime_doc.creationInfo.creators.append("Person: N/A ()")
794
795            package_ref = oe.spdx.SPDXExternalDocumentRef()
796            package_ref.externalDocumentId = "DocumentRef-package-" + package
797            package_ref.spdxDocument = package_doc.documentNamespace
798            package_ref.checksum.algorithm = "SHA1"
799            package_ref.checksum.checksumValue = package_doc_sha1
800
801            runtime_doc.externalDocumentRefs.append(package_ref)
802
803            runtime_doc.add_relationship(
804                runtime_doc.SPDXID,
805                "AMENDS",
806                "%s:%s" % (package_ref.externalDocumentId, package_doc.SPDXID)
807            )
808
809            deps = bb.utils.explode_dep_versions2(localdata.getVar("RDEPENDS") or "")
810            seen_deps = set()
811            for dep, _ in deps.items():
812                if dep in seen_deps:
813                    continue
814
815                if dep not in providers:
816                    continue
817
818                (dep, dep_hashfn) = providers[dep]
819
820                if not oe.packagedata.packaged(dep, localdata):
821                    continue
822
823                dep_pkg_data = oe.packagedata.read_subpkgdata_dict(dep, d)
824                dep_pkg = dep_pkg_data["PKG"]
825
826                if dep in dep_package_cache:
827                    (dep_spdx_package, dep_package_ref) = dep_package_cache[dep]
828                else:
829                    dep_path = oe.sbom.doc_path_by_hashfn(deploy_dir_spdx, dep_pkg, dep_hashfn)
830
831                    spdx_dep_doc, spdx_dep_sha1 = oe.sbom.read_doc(dep_path)
832
833                    for pkg in spdx_dep_doc.packages:
834                        if pkg.name == dep_pkg:
835                            dep_spdx_package = pkg
836                            break
837                    else:
838                        bb.fatal("Package '%s' not found in %s" % (dep_pkg, dep_path))
839
840                    dep_package_ref = oe.spdx.SPDXExternalDocumentRef()
841                    dep_package_ref.externalDocumentId = "DocumentRef-runtime-dependency-" + spdx_dep_doc.name
842                    dep_package_ref.spdxDocument = spdx_dep_doc.documentNamespace
843                    dep_package_ref.checksum.algorithm = "SHA1"
844                    dep_package_ref.checksum.checksumValue = spdx_dep_sha1
845
846                    dep_package_cache[dep] = (dep_spdx_package, dep_package_ref)
847
848                runtime_doc.externalDocumentRefs.append(dep_package_ref)
849
850                runtime_doc.add_relationship(
851                    "%s:%s" % (dep_package_ref.externalDocumentId, dep_spdx_package.SPDXID),
852                    "RUNTIME_DEPENDENCY_OF",
853                    "%s:%s" % (package_ref.externalDocumentId, spdx_package.SPDXID)
854                )
855                seen_deps.add(dep)
856
857            oe.sbom.write_doc(d, runtime_doc, d.getVar("SSTATE_PKGARCH"), "runtime", spdx_deploy, indent=get_json_indent(d))
858}
859
860do_create_runtime_spdx[vardepsexclude] += "OVERRIDES"
861
862addtask do_create_runtime_spdx after do_create_spdx before do_build do_rm_work
863SSTATETASKS += "do_create_runtime_spdx"
864do_create_runtime_spdx[sstate-inputdirs] = "${SPDXRUNTIMEDEPLOY}"
865do_create_runtime_spdx[sstate-outputdirs] = "${DEPLOY_DIR_SPDX}"
866
867python do_create_runtime_spdx_setscene () {
868    sstate_setscene(d)
869}
870addtask do_create_runtime_spdx_setscene
871
872do_create_runtime_spdx[dirs] = "${SPDXRUNTIMEDEPLOY}"
873do_create_runtime_spdx[cleandirs] = "${SPDXRUNTIMEDEPLOY}"
874do_create_runtime_spdx[rdeptask] = "do_create_spdx"
875
876def spdx_get_src(d):
877    """
878    save patched source of the recipe in SPDX_WORKDIR.
879    """
880    import shutil
881    spdx_workdir = d.getVar('SPDXWORK')
882    spdx_sysroot_native = d.getVar('STAGING_DIR_NATIVE')
883    pn = d.getVar('PN')
884
885    workdir = d.getVar("WORKDIR")
886
887    try:
888        # The kernel class functions require it to be on work-shared, so we dont change WORKDIR
889        if not is_work_shared_spdx(d):
890            # Change the WORKDIR to make do_unpack do_patch run in another dir.
891            d.setVar('WORKDIR', spdx_workdir)
892            # Restore the original path to recipe's native sysroot (it's relative to WORKDIR).
893            d.setVar('STAGING_DIR_NATIVE', spdx_sysroot_native)
894
895            # The changed 'WORKDIR' also caused 'B' changed, create dir 'B' for the
896            # possibly requiring of the following tasks (such as some recipes's
897            # do_patch required 'B' existed).
898            bb.utils.mkdirhier(d.getVar('B'))
899
900            bb.build.exec_func('do_unpack', d)
901        # Copy source of kernel to spdx_workdir
902        if is_work_shared_spdx(d):
903            share_src = d.getVar('WORKDIR')
904            d.setVar('WORKDIR', spdx_workdir)
905            d.setVar('STAGING_DIR_NATIVE', spdx_sysroot_native)
906            src_dir = spdx_workdir + "/" + d.getVar('PN')+ "-" + d.getVar('PV') + "-" + d.getVar('PR')
907            bb.utils.mkdirhier(src_dir)
908            if bb.data.inherits_class('kernel',d):
909                share_src = d.getVar('STAGING_KERNEL_DIR')
910            cmd_copy_share = "cp -rf " + share_src + "/* " + src_dir + "/"
911            cmd_copy_shared_res = os.popen(cmd_copy_share).read()
912            bb.note("cmd_copy_shared_result = " + cmd_copy_shared_res)
913
914            git_path = src_dir + "/.git"
915            if os.path.exists(git_path):
916                shutils.rmtree(git_path)
917
918        # Make sure gcc and kernel sources are patched only once
919        if not (d.getVar('SRC_URI') == "" or is_work_shared_spdx(d)):
920            bb.build.exec_func('do_patch', d)
921
922        # Some userland has no source.
923        if not os.path.exists( spdx_workdir ):
924            bb.utils.mkdirhier(spdx_workdir)
925    finally:
926        d.setVar("WORKDIR", workdir)
927
928spdx_get_src[vardepsexclude] += "STAGING_KERNEL_DIR"
929
930do_rootfs[recrdeptask] += "do_create_spdx do_create_runtime_spdx"
931do_rootfs[cleandirs] += "${SPDXIMAGEWORK}"
932
933ROOTFS_POSTUNINSTALL_COMMAND =+ "image_combine_spdx"
934
935do_populate_sdk[recrdeptask] += "do_create_spdx do_create_runtime_spdx"
936do_populate_sdk[cleandirs] += "${SPDXSDKWORK}"
937POPULATE_SDK_POST_HOST_COMMAND:append:task-populate-sdk = " sdk_host_combine_spdx"
938POPULATE_SDK_POST_TARGET_COMMAND:append:task-populate-sdk = " sdk_target_combine_spdx"
939
940python image_combine_spdx() {
941    import os
942    import oe.sbom
943    from pathlib import Path
944    from oe.rootfs import image_list_installed_packages
945
946    image_name = d.getVar("IMAGE_NAME")
947    image_link_name = d.getVar("IMAGE_LINK_NAME")
948    imgdeploydir = Path(d.getVar("IMGDEPLOYDIR"))
949    img_spdxid = oe.sbom.get_image_spdxid(image_name)
950    packages = image_list_installed_packages(d)
951
952    combine_spdx(d, image_name, imgdeploydir, img_spdxid, packages, Path(d.getVar("SPDXIMAGEWORK")))
953
954    def make_image_link(target_path, suffix):
955        if image_link_name:
956            link = imgdeploydir / (image_link_name + suffix)
957            if link != target_path:
958                link.symlink_to(os.path.relpath(target_path, link.parent))
959
960    spdx_tar_path = imgdeploydir / (image_name + ".spdx.tar.zst")
961    make_image_link(spdx_tar_path, ".spdx.tar.zst")
962}
963
964python sdk_host_combine_spdx() {
965    sdk_combine_spdx(d, "host")
966}
967
968python sdk_target_combine_spdx() {
969    sdk_combine_spdx(d, "target")
970}
971
972def sdk_combine_spdx(d, sdk_type):
973    import oe.sbom
974    from pathlib import Path
975    from oe.sdk import sdk_list_installed_packages
976
977    sdk_name = d.getVar("SDK_NAME") + "-" + sdk_type
978    sdk_deploydir = Path(d.getVar("SDKDEPLOYDIR"))
979    sdk_spdxid = oe.sbom.get_sdk_spdxid(sdk_name)
980    sdk_packages = sdk_list_installed_packages(d, sdk_type == "target")
981    combine_spdx(d, sdk_name, sdk_deploydir, sdk_spdxid, sdk_packages, Path(d.getVar('SPDXSDKWORK')))
982
983def combine_spdx(d, rootfs_name, rootfs_deploydir, rootfs_spdxid, packages, spdx_workdir):
984    import os
985    import oe.spdx
986    import oe.sbom
987    import io
988    import json
989    from datetime import timezone, datetime
990    from pathlib import Path
991    import tarfile
992    import bb.compress.zstd
993
994    providers = collect_package_providers(d)
995
996    creation_time = datetime.now(tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
997    deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
998    source_date_epoch = d.getVar("SOURCE_DATE_EPOCH")
999
1000    doc = oe.spdx.SPDXDocument()
1001    doc.name = rootfs_name
1002    doc.documentNamespace = get_doc_namespace(d, doc)
1003    doc.creationInfo.created = creation_time
1004    doc.creationInfo.comment = "This document was created by analyzing the source of the Yocto recipe during the build."
1005    doc.creationInfo.licenseListVersion = d.getVar("SPDX_LICENSE_DATA")["licenseListVersion"]
1006    doc.creationInfo.creators.append("Tool: OpenEmbedded Core create-spdx.bbclass")
1007    doc.creationInfo.creators.append("Organization: %s" % d.getVar("SPDX_ORG"))
1008    doc.creationInfo.creators.append("Person: N/A ()")
1009
1010    image = oe.spdx.SPDXPackage()
1011    image.name = d.getVar("PN")
1012    image.versionInfo = d.getVar("PV")
1013    image.SPDXID = rootfs_spdxid
1014    image.supplier = d.getVar("SPDX_SUPPLIER")
1015
1016    doc.packages.append(image)
1017
1018    for name in sorted(packages.keys()):
1019        if name not in providers:
1020            bb.fatal("Unable to find provider for '%s'" % name)
1021
1022        pkg_name, pkg_hashfn = providers[name]
1023
1024        pkg_spdx_path = oe.sbom.doc_path_by_hashfn(deploy_dir_spdx, pkg_name, pkg_hashfn)
1025        pkg_doc, pkg_doc_sha1 = oe.sbom.read_doc(pkg_spdx_path)
1026
1027        for p in pkg_doc.packages:
1028            if p.name == name:
1029                pkg_ref = oe.spdx.SPDXExternalDocumentRef()
1030                pkg_ref.externalDocumentId = "DocumentRef-%s" % pkg_doc.name
1031                pkg_ref.spdxDocument = pkg_doc.documentNamespace
1032                pkg_ref.checksum.algorithm = "SHA1"
1033                pkg_ref.checksum.checksumValue = pkg_doc_sha1
1034
1035                doc.externalDocumentRefs.append(pkg_ref)
1036                doc.add_relationship(image, "CONTAINS", "%s:%s" % (pkg_ref.externalDocumentId, p.SPDXID))
1037                break
1038        else:
1039            bb.fatal("Unable to find package with name '%s' in SPDX file %s" % (name, pkg_spdx_path))
1040
1041        runtime_spdx_path = oe.sbom.doc_path_by_hashfn(deploy_dir_spdx, "runtime-" + name, pkg_hashfn)
1042        runtime_doc, runtime_doc_sha1 = oe.sbom.read_doc(runtime_spdx_path)
1043
1044        runtime_ref = oe.spdx.SPDXExternalDocumentRef()
1045        runtime_ref.externalDocumentId = "DocumentRef-%s" % runtime_doc.name
1046        runtime_ref.spdxDocument = runtime_doc.documentNamespace
1047        runtime_ref.checksum.algorithm = "SHA1"
1048        runtime_ref.checksum.checksumValue = runtime_doc_sha1
1049
1050        # "OTHER" isn't ideal here, but I can't find a relationship that makes sense
1051        doc.externalDocumentRefs.append(runtime_ref)
1052        doc.add_relationship(
1053            image,
1054            "OTHER",
1055            "%s:%s" % (runtime_ref.externalDocumentId, runtime_doc.SPDXID),
1056            comment="Runtime dependencies for %s" % name
1057        )
1058
1059    image_spdx_path = spdx_workdir / (rootfs_name + ".spdx.json")
1060
1061    with image_spdx_path.open("wb") as f:
1062        doc.to_json(f, sort_keys=True, indent=get_json_indent(d))
1063
1064    num_threads = int(d.getVar("BB_NUMBER_THREADS"))
1065
1066    visited_docs = set()
1067
1068    index = {"documents": []}
1069
1070    spdx_tar_path = rootfs_deploydir / (rootfs_name + ".spdx.tar.zst")
1071    with bb.compress.zstd.open(spdx_tar_path, "w", num_threads=num_threads) as f:
1072        with tarfile.open(fileobj=f, mode="w|") as tar:
1073            def collect_spdx_document(path):
1074                nonlocal tar
1075                nonlocal deploy_dir_spdx
1076                nonlocal source_date_epoch
1077                nonlocal index
1078
1079                if path in visited_docs:
1080                    return
1081
1082                visited_docs.add(path)
1083
1084                with path.open("rb") as f:
1085                    doc, sha1 = oe.sbom.read_doc(f)
1086                    f.seek(0)
1087
1088                    if doc.documentNamespace in visited_docs:
1089                        return
1090
1091                    bb.note("Adding SPDX document %s" % path)
1092                    visited_docs.add(doc.documentNamespace)
1093                    info = tar.gettarinfo(fileobj=f)
1094
1095                    info.name = doc.name + ".spdx.json"
1096                    info.uid = 0
1097                    info.gid = 0
1098                    info.uname = "root"
1099                    info.gname = "root"
1100
1101                    if source_date_epoch is not None and info.mtime > int(source_date_epoch):
1102                        info.mtime = int(source_date_epoch)
1103
1104                    tar.addfile(info, f)
1105
1106                    index["documents"].append({
1107                        "filename": info.name,
1108                        "documentNamespace": doc.documentNamespace,
1109                        "sha1": sha1,
1110                    })
1111
1112                for ref in doc.externalDocumentRefs:
1113                    ref_path = oe.sbom.doc_path_by_namespace(deploy_dir_spdx, ref.spdxDocument)
1114                    collect_spdx_document(ref_path)
1115
1116            collect_spdx_document(image_spdx_path)
1117
1118            index["documents"].sort(key=lambda x: x["filename"])
1119
1120            index_str = io.BytesIO(json.dumps(
1121                index,
1122                sort_keys=True,
1123                indent=get_json_indent(d),
1124            ).encode("utf-8"))
1125
1126            info = tarfile.TarInfo()
1127            info.name = "index.json"
1128            info.size = len(index_str.getvalue())
1129            info.uid = 0
1130            info.gid = 0
1131            info.uname = "root"
1132            info.gname = "root"
1133
1134            tar.addfile(info, fileobj=index_str)
1135
1136combine_spdx[vardepsexclude] += "BB_NUMBER_THREADS"
1137