1#
2# Copyright OpenEmbedded Contributors
3#
4# SPDX-License-Identifier: GPL-2.0-only
5#
6
7DEPLOY_DIR_SPDX ??= "${DEPLOY_DIR}/spdx/${MACHINE}"
8
9# The product name that the CVE database uses.  Defaults to BPN, but may need to
10# be overriden per recipe (for example tiff.bb sets CVE_PRODUCT=libtiff).
11CVE_PRODUCT ??= "${BPN}"
12CVE_VERSION ??= "${PV}"
13
14SPDXDIR ??= "${WORKDIR}/spdx"
15SPDXDEPLOY = "${SPDXDIR}/deploy"
16SPDXWORK = "${SPDXDIR}/work"
17
18SPDX_TOOL_NAME ??= "oe-spdx-creator"
19SPDX_TOOL_VERSION ??= "1.0"
20
21SPDXRUNTIMEDEPLOY = "${SPDXDIR}/runtime-deploy"
22
23SPDX_INCLUDE_SOURCES ??= "0"
24SPDX_INCLUDE_PACKAGED ??= "0"
25SPDX_ARCHIVE_SOURCES ??= "0"
26SPDX_ARCHIVE_PACKAGED ??= "0"
27
28SPDX_UUID_NAMESPACE ??= "sbom.openembedded.org"
29SPDX_NAMESPACE_PREFIX ??= "http://spdx.org/spdxdoc"
30SPDX_PRETTY ??= "0"
31
32SPDX_LICENSES ??= "${COREBASE}/meta/files/spdx-licenses.json"
33
34SPDX_ORG ??= "OpenEmbedded ()"
35SPDX_SUPPLIER ??= "Organization: ${SPDX_ORG}"
36SPDX_SUPPLIER[doc] = "The SPDX PackageSupplier field for SPDX packages created from \
37    this recipe. For SPDX documents create using this class during the build, this \
38    is the contact information for the person or organization who is doing the \
39    build."
40
41def extract_licenses(filename):
42    import re
43
44    lic_regex = re.compile(rb'^\W*SPDX-License-Identifier:\s*([ \w\d.()+-]+?)(?:\s+\W*)?$', re.MULTILINE)
45
46    try:
47        with open(filename, 'rb') as f:
48            size = min(15000, os.stat(filename).st_size)
49            txt = f.read(size)
50            licenses = re.findall(lic_regex, txt)
51            if licenses:
52                ascii_licenses = [lic.decode('ascii') for lic in licenses]
53                return ascii_licenses
54    except Exception as e:
55        bb.warn(f"Exception reading {filename}: {e}")
56    return None
57
58def get_doc_namespace(d, doc):
59    import uuid
60    namespace_uuid = uuid.uuid5(uuid.NAMESPACE_DNS, d.getVar("SPDX_UUID_NAMESPACE"))
61    return "%s/%s-%s" % (d.getVar("SPDX_NAMESPACE_PREFIX"), doc.name, str(uuid.uuid5(namespace_uuid, doc.name)))
62
63def create_annotation(d, comment):
64    from datetime import datetime, timezone
65
66    creation_time = datetime.now(tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
67    annotation = oe.spdx.SPDXAnnotation()
68    annotation.annotationDate = creation_time
69    annotation.annotationType = "OTHER"
70    annotation.annotator = "Tool: %s - %s" % (d.getVar("SPDX_TOOL_NAME"), d.getVar("SPDX_TOOL_VERSION"))
71    annotation.comment = comment
72    return annotation
73
74def recipe_spdx_is_native(d, recipe):
75    return any(a.annotationType == "OTHER" and
76      a.annotator == "Tool: %s - %s" % (d.getVar("SPDX_TOOL_NAME"), d.getVar("SPDX_TOOL_VERSION")) and
77      a.comment == "isNative" for a in recipe.annotations)
78
79def is_work_shared_spdx(d):
80    return bb.data.inherits_class('kernel', d) or ('work-shared' in d.getVar('WORKDIR'))
81
82def get_json_indent(d):
83    if d.getVar("SPDX_PRETTY") == "1":
84        return 2
85    return None
86
87python() {
88    import json
89    if d.getVar("SPDX_LICENSE_DATA"):
90        return
91
92    with open(d.getVar("SPDX_LICENSES"), "r") as f:
93        data = json.load(f)
94        # Transform the license array to a dictionary
95        data["licenses"] = {l["licenseId"]: l for l in data["licenses"]}
96        d.setVar("SPDX_LICENSE_DATA", data)
97}
98
99def convert_license_to_spdx(lic, document, d, existing={}):
100    from pathlib import Path
101    import oe.spdx
102
103    license_data = d.getVar("SPDX_LICENSE_DATA")
104    extracted = {}
105
106    def add_extracted_license(ident, name):
107        nonlocal document
108
109        if name in extracted:
110            return
111
112        extracted_info = oe.spdx.SPDXExtractedLicensingInfo()
113        extracted_info.name = name
114        extracted_info.licenseId = ident
115        extracted_info.extractedText = None
116
117        if name == "PD":
118            # Special-case this.
119            extracted_info.extractedText = "Software released to the public domain"
120        else:
121            # Seach for the license in COMMON_LICENSE_DIR and LICENSE_PATH
122            for directory in [d.getVar('COMMON_LICENSE_DIR')] + (d.getVar('LICENSE_PATH') or '').split():
123                try:
124                    with (Path(directory) / name).open(errors="replace") as f:
125                        extracted_info.extractedText = f.read()
126                        break
127                except FileNotFoundError:
128                    pass
129            if extracted_info.extractedText is None:
130                # If it's not SPDX or PD, then NO_GENERIC_LICENSE must be set
131                filename = d.getVarFlag('NO_GENERIC_LICENSE', name)
132                if filename:
133                    filename = d.expand("${S}/" + filename)
134                    with open(filename, errors="replace") as f:
135                        extracted_info.extractedText = f.read()
136                else:
137                    bb.error("Cannot find any text for license %s" % name)
138
139        extracted[name] = extracted_info
140        document.hasExtractedLicensingInfos.append(extracted_info)
141
142    def convert(l):
143        if l == "(" or l == ")":
144            return l
145
146        if l == "&":
147            return "AND"
148
149        if l == "|":
150            return "OR"
151
152        if l == "CLOSED":
153            return "NONE"
154
155        spdx_license = d.getVarFlag("SPDXLICENSEMAP", l) or l
156        if spdx_license in license_data["licenses"]:
157            return spdx_license
158
159        try:
160            spdx_license = existing[l]
161        except KeyError:
162            spdx_license = "LicenseRef-" + l
163            add_extracted_license(spdx_license, l)
164
165        return spdx_license
166
167    lic_split = lic.replace("(", " ( ").replace(")", " ) ").split()
168
169    return ' '.join(convert(l) for l in lic_split)
170
171def process_sources(d):
172    pn = d.getVar('PN')
173    assume_provided = (d.getVar("ASSUME_PROVIDED") or "").split()
174    if pn in assume_provided:
175        for p in d.getVar("PROVIDES").split():
176            if p != pn:
177                pn = p
178                break
179
180    # glibc-locale: do_fetch, do_unpack and do_patch tasks have been deleted,
181    # so avoid archiving source here.
182    if pn.startswith('glibc-locale'):
183        return False
184    if d.getVar('PN') == "libtool-cross":
185        return False
186    if d.getVar('PN') == "libgcc-initial":
187        return False
188    if d.getVar('PN') == "shadow-sysroot":
189        return False
190
191    # We just archive gcc-source for all the gcc related recipes
192    if d.getVar('BPN') in ['gcc', 'libgcc']:
193        bb.debug(1, 'spdx: There is bug in scan of %s is, do nothing' % pn)
194        return False
195
196    return True
197
198
199def add_package_files(d, doc, spdx_pkg, topdir, get_spdxid, get_types, *, archive=None, ignore_dirs=[], ignore_top_level_dirs=[]):
200    from pathlib import Path
201    import oe.spdx
202    import hashlib
203
204    source_date_epoch = d.getVar("SOURCE_DATE_EPOCH")
205    if source_date_epoch:
206        source_date_epoch = int(source_date_epoch)
207
208    sha1s = []
209    spdx_files = []
210
211    file_counter = 1
212    for subdir, dirs, files in os.walk(topdir):
213        dirs[:] = [d for d in dirs if d not in ignore_dirs]
214        if subdir == str(topdir):
215            dirs[:] = [d for d in dirs if d not in ignore_top_level_dirs]
216
217        for file in files:
218            filepath = Path(subdir) / file
219            filename = str(filepath.relative_to(topdir))
220
221            if not filepath.is_symlink() and filepath.is_file():
222                spdx_file = oe.spdx.SPDXFile()
223                spdx_file.SPDXID = get_spdxid(file_counter)
224                for t in get_types(filepath):
225                    spdx_file.fileTypes.append(t)
226                spdx_file.fileName = filename
227
228                if archive is not None:
229                    with filepath.open("rb") as f:
230                        info = archive.gettarinfo(fileobj=f)
231                        info.name = filename
232                        info.uid = 0
233                        info.gid = 0
234                        info.uname = "root"
235                        info.gname = "root"
236
237                        if source_date_epoch is not None and info.mtime > source_date_epoch:
238                            info.mtime = source_date_epoch
239
240                        archive.addfile(info, f)
241
242                sha1 = bb.utils.sha1_file(filepath)
243                sha1s.append(sha1)
244                spdx_file.checksums.append(oe.spdx.SPDXChecksum(
245                        algorithm="SHA1",
246                        checksumValue=sha1,
247                    ))
248                spdx_file.checksums.append(oe.spdx.SPDXChecksum(
249                        algorithm="SHA256",
250                        checksumValue=bb.utils.sha256_file(filepath),
251                    ))
252
253                if "SOURCE" in spdx_file.fileTypes:
254                    extracted_lics = extract_licenses(filepath)
255                    if extracted_lics:
256                        spdx_file.licenseInfoInFiles = extracted_lics
257
258                doc.files.append(spdx_file)
259                doc.add_relationship(spdx_pkg, "CONTAINS", spdx_file)
260                spdx_pkg.hasFiles.append(spdx_file.SPDXID)
261
262                spdx_files.append(spdx_file)
263
264                file_counter += 1
265
266    sha1s.sort()
267    verifier = hashlib.sha1()
268    for v in sha1s:
269        verifier.update(v.encode("utf-8"))
270    spdx_pkg.packageVerificationCode.packageVerificationCodeValue = verifier.hexdigest()
271
272    return spdx_files
273
274
275def add_package_sources_from_debug(d, package_doc, spdx_package, package, package_files, sources):
276    from pathlib import Path
277    import hashlib
278    import oe.packagedata
279    import oe.spdx
280
281    debug_search_paths = [
282        Path(d.getVar('PKGD')),
283        Path(d.getVar('STAGING_DIR_TARGET')),
284        Path(d.getVar('STAGING_DIR_NATIVE')),
285        Path(d.getVar('STAGING_KERNEL_DIR')),
286    ]
287
288    pkg_data = oe.packagedata.read_subpkgdata_extended(package, d)
289
290    if pkg_data is None:
291        return
292
293    for file_path, file_data in pkg_data["files_info"].items():
294        if not "debugsrc" in file_data:
295            continue
296
297        for pkg_file in package_files:
298            if file_path.lstrip("/") == pkg_file.fileName.lstrip("/"):
299                break
300        else:
301            bb.fatal("No package file found for %s" % str(file_path))
302            continue
303
304        for debugsrc in file_data["debugsrc"]:
305            ref_id = "NOASSERTION"
306            for search in debug_search_paths:
307                if debugsrc.startswith("/usr/src/kernel"):
308                    debugsrc_path = search / debugsrc.replace('/usr/src/kernel/', '')
309                else:
310                    debugsrc_path = search / debugsrc.lstrip("/")
311                if not debugsrc_path.exists():
312                    continue
313
314                file_sha256 = bb.utils.sha256_file(debugsrc_path)
315
316                if file_sha256 in sources:
317                    source_file = sources[file_sha256]
318
319                    doc_ref = package_doc.find_external_document_ref(source_file.doc.documentNamespace)
320                    if doc_ref is None:
321                        doc_ref = oe.spdx.SPDXExternalDocumentRef()
322                        doc_ref.externalDocumentId = "DocumentRef-dependency-" + source_file.doc.name
323                        doc_ref.spdxDocument = source_file.doc.documentNamespace
324                        doc_ref.checksum.algorithm = "SHA1"
325                        doc_ref.checksum.checksumValue = source_file.doc_sha1
326                        package_doc.externalDocumentRefs.append(doc_ref)
327
328                    ref_id = "%s:%s" % (doc_ref.externalDocumentId, source_file.file.SPDXID)
329                else:
330                    bb.debug(1, "Debug source %s with SHA256 %s not found in any dependency" % (str(debugsrc_path), file_sha256))
331                break
332            else:
333                bb.debug(1, "Debug source %s not found" % debugsrc)
334
335            package_doc.add_relationship(pkg_file, "GENERATED_FROM", ref_id, comment=debugsrc)
336
337def collect_dep_recipes(d, doc, spdx_recipe):
338    from pathlib import Path
339    import oe.sbom
340    import oe.spdx
341
342    deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
343
344    dep_recipes = []
345    taskdepdata = d.getVar("BB_TASKDEPDATA", False)
346    deps = sorted(set(
347        dep[0] for dep in taskdepdata.values() if
348            dep[1] == "do_create_spdx" and dep[0] != d.getVar("PN")
349    ))
350    for dep_pn in deps:
351        dep_recipe_path = deploy_dir_spdx / "recipes" / ("recipe-%s.spdx.json" % dep_pn)
352
353        spdx_dep_doc, spdx_dep_sha1 = oe.sbom.read_doc(dep_recipe_path)
354
355        for pkg in spdx_dep_doc.packages:
356            if pkg.name == dep_pn:
357                spdx_dep_recipe = pkg
358                break
359        else:
360            continue
361
362        dep_recipes.append(oe.sbom.DepRecipe(spdx_dep_doc, spdx_dep_sha1, spdx_dep_recipe))
363
364        dep_recipe_ref = oe.spdx.SPDXExternalDocumentRef()
365        dep_recipe_ref.externalDocumentId = "DocumentRef-dependency-" + spdx_dep_doc.name
366        dep_recipe_ref.spdxDocument = spdx_dep_doc.documentNamespace
367        dep_recipe_ref.checksum.algorithm = "SHA1"
368        dep_recipe_ref.checksum.checksumValue = spdx_dep_sha1
369
370        doc.externalDocumentRefs.append(dep_recipe_ref)
371
372        doc.add_relationship(
373            "%s:%s" % (dep_recipe_ref.externalDocumentId, spdx_dep_recipe.SPDXID),
374            "BUILD_DEPENDENCY_OF",
375            spdx_recipe
376        )
377
378    return dep_recipes
379
380collect_dep_recipes[vardepsexclude] += "BB_TASKDEPDATA"
381
382
383def collect_dep_sources(d, dep_recipes):
384    import oe.sbom
385
386    sources = {}
387    for dep in dep_recipes:
388        # Don't collect sources from native recipes as they
389        # match non-native sources also.
390        if recipe_spdx_is_native(d, dep.recipe):
391            continue
392        recipe_files = set(dep.recipe.hasFiles)
393
394        for spdx_file in dep.doc.files:
395            if spdx_file.SPDXID not in recipe_files:
396                continue
397
398            if "SOURCE" in spdx_file.fileTypes:
399                for checksum in spdx_file.checksums:
400                    if checksum.algorithm == "SHA256":
401                        sources[checksum.checksumValue] = oe.sbom.DepSource(dep.doc, dep.doc_sha1, dep.recipe, spdx_file)
402                        break
403
404    return sources
405
406
407python do_create_spdx() {
408    from datetime import datetime, timezone
409    import oe.sbom
410    import oe.spdx
411    import uuid
412    from pathlib import Path
413    from contextlib import contextmanager
414    import oe.cve_check
415
416    @contextmanager
417    def optional_tarfile(name, guard, mode="w"):
418        import tarfile
419        import bb.compress.zstd
420
421        num_threads = int(d.getVar("BB_NUMBER_THREADS"))
422
423        if guard:
424            name.parent.mkdir(parents=True, exist_ok=True)
425            with bb.compress.zstd.open(name, mode=mode + "b", num_threads=num_threads) as f:
426                with tarfile.open(fileobj=f, mode=mode + "|") as tf:
427                    yield tf
428        else:
429            yield None
430
431
432    deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
433    spdx_workdir = Path(d.getVar("SPDXWORK"))
434    include_packaged = d.getVar("SPDX_INCLUDE_PACKAGED") == "1"
435    include_sources = d.getVar("SPDX_INCLUDE_SOURCES") == "1"
436    archive_sources = d.getVar("SPDX_ARCHIVE_SOURCES") == "1"
437    archive_packaged = d.getVar("SPDX_ARCHIVE_PACKAGED") == "1"
438
439    creation_time = datetime.now(tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
440
441    doc = oe.spdx.SPDXDocument()
442
443    doc.name = "recipe-" + d.getVar("PN")
444    doc.documentNamespace = get_doc_namespace(d, doc)
445    doc.creationInfo.created = creation_time
446    doc.creationInfo.comment = "This document was created by analyzing recipe files during the build."
447    doc.creationInfo.licenseListVersion = d.getVar("SPDX_LICENSE_DATA")["licenseListVersion"]
448    doc.creationInfo.creators.append("Tool: OpenEmbedded Core create-spdx.bbclass")
449    doc.creationInfo.creators.append("Organization: %s" % d.getVar("SPDX_ORG"))
450    doc.creationInfo.creators.append("Person: N/A ()")
451
452    recipe = oe.spdx.SPDXPackage()
453    recipe.name = d.getVar("PN")
454    recipe.versionInfo = d.getVar("PV")
455    recipe.SPDXID = oe.sbom.get_recipe_spdxid(d)
456    recipe.supplier = d.getVar("SPDX_SUPPLIER")
457    if bb.data.inherits_class("native", d) or bb.data.inherits_class("cross", d):
458        recipe.annotations.append(create_annotation(d, "isNative"))
459
460    for s in d.getVar('SRC_URI').split():
461        if not s.startswith("file://"):
462            recipe.downloadLocation = s
463            break
464    else:
465        recipe.downloadLocation = "NOASSERTION"
466
467    homepage = d.getVar("HOMEPAGE")
468    if homepage:
469        recipe.homepage = homepage
470
471    license = d.getVar("LICENSE")
472    if license:
473        recipe.licenseDeclared = convert_license_to_spdx(license, doc, d)
474
475    summary = d.getVar("SUMMARY")
476    if summary:
477        recipe.summary = summary
478
479    description = d.getVar("DESCRIPTION")
480    if description:
481        recipe.description = description
482
483    # Some CVEs may be patched during the build process without incrementing the version number,
484    # so querying for CVEs based on the CPE id can lead to false positives. To account for this,
485    # save the CVEs fixed by patches to source information field in the SPDX.
486    patched_cves = oe.cve_check.get_patched_cves(d)
487    patched_cves = list(patched_cves)
488    patched_cves = ' '.join(patched_cves)
489    if patched_cves:
490        recipe.sourceInfo = "CVEs fixed: " + patched_cves
491
492    cpe_ids = oe.cve_check.get_cpe_ids(d.getVar("CVE_PRODUCT"), d.getVar("CVE_VERSION"))
493    if cpe_ids:
494        for cpe_id in cpe_ids:
495            cpe = oe.spdx.SPDXExternalReference()
496            cpe.referenceCategory = "SECURITY"
497            cpe.referenceType = "http://spdx.org/rdf/references/cpe23Type"
498            cpe.referenceLocator = cpe_id
499            recipe.externalRefs.append(cpe)
500
501    doc.packages.append(recipe)
502    doc.add_relationship(doc, "DESCRIBES", recipe)
503
504    if process_sources(d) and include_sources:
505        recipe_archive = deploy_dir_spdx / "recipes" / (doc.name + ".tar.zst")
506        with optional_tarfile(recipe_archive, archive_sources) as archive:
507            spdx_get_src(d)
508
509            add_package_files(
510                d,
511                doc,
512                recipe,
513                spdx_workdir,
514                lambda file_counter: "SPDXRef-SourceFile-%s-%d" % (d.getVar("PN"), file_counter),
515                lambda filepath: ["SOURCE"],
516                ignore_dirs=[".git"],
517                ignore_top_level_dirs=["temp"],
518                archive=archive,
519            )
520
521            if archive is not None:
522                recipe.packageFileName = str(recipe_archive.name)
523
524    dep_recipes = collect_dep_recipes(d, doc, recipe)
525
526    doc_sha1 = oe.sbom.write_doc(d, doc, "recipes", indent=get_json_indent(d))
527    dep_recipes.append(oe.sbom.DepRecipe(doc, doc_sha1, recipe))
528
529    recipe_ref = oe.spdx.SPDXExternalDocumentRef()
530    recipe_ref.externalDocumentId = "DocumentRef-recipe-" + recipe.name
531    recipe_ref.spdxDocument = doc.documentNamespace
532    recipe_ref.checksum.algorithm = "SHA1"
533    recipe_ref.checksum.checksumValue = doc_sha1
534
535    sources = collect_dep_sources(d, dep_recipes)
536    found_licenses = {license.name:recipe_ref.externalDocumentId + ":" + license.licenseId for license in doc.hasExtractedLicensingInfos}
537
538    if not recipe_spdx_is_native(d, recipe):
539        bb.build.exec_func("read_subpackage_metadata", d)
540
541        pkgdest = Path(d.getVar("PKGDEST"))
542        for package in d.getVar("PACKAGES").split():
543            if not oe.packagedata.packaged(package, d):
544                continue
545
546            package_doc = oe.spdx.SPDXDocument()
547            pkg_name = d.getVar("PKG:%s" % package) or package
548            package_doc.name = pkg_name
549            package_doc.documentNamespace = get_doc_namespace(d, package_doc)
550            package_doc.creationInfo.created = creation_time
551            package_doc.creationInfo.comment = "This document was created by analyzing packages created during the build."
552            package_doc.creationInfo.licenseListVersion = d.getVar("SPDX_LICENSE_DATA")["licenseListVersion"]
553            package_doc.creationInfo.creators.append("Tool: OpenEmbedded Core create-spdx.bbclass")
554            package_doc.creationInfo.creators.append("Organization: %s" % d.getVar("SPDX_ORG"))
555            package_doc.creationInfo.creators.append("Person: N/A ()")
556            package_doc.externalDocumentRefs.append(recipe_ref)
557
558            package_license = d.getVar("LICENSE:%s" % package) or d.getVar("LICENSE")
559
560            spdx_package = oe.spdx.SPDXPackage()
561
562            spdx_package.SPDXID = oe.sbom.get_package_spdxid(pkg_name)
563            spdx_package.name = pkg_name
564            spdx_package.versionInfo = d.getVar("PV")
565            spdx_package.licenseDeclared = convert_license_to_spdx(package_license, package_doc, d, found_licenses)
566            spdx_package.supplier = d.getVar("SPDX_SUPPLIER")
567
568            package_doc.packages.append(spdx_package)
569
570            package_doc.add_relationship(spdx_package, "GENERATED_FROM", "%s:%s" % (recipe_ref.externalDocumentId, recipe.SPDXID))
571            package_doc.add_relationship(package_doc, "DESCRIBES", spdx_package)
572
573            package_archive = deploy_dir_spdx / "packages" / (package_doc.name + ".tar.zst")
574            with optional_tarfile(package_archive, archive_packaged) as archive:
575                package_files = add_package_files(
576                    d,
577                    package_doc,
578                    spdx_package,
579                    pkgdest / package,
580                    lambda file_counter: oe.sbom.get_packaged_file_spdxid(pkg_name, file_counter),
581                    lambda filepath: ["BINARY"],
582                    ignore_top_level_dirs=['CONTROL', 'DEBIAN'],
583                    archive=archive,
584                )
585
586                if archive is not None:
587                    spdx_package.packageFileName = str(package_archive.name)
588
589            add_package_sources_from_debug(d, package_doc, spdx_package, package, package_files, sources)
590
591            oe.sbom.write_doc(d, package_doc, "packages", indent=get_json_indent(d))
592}
593# NOTE: depending on do_unpack is a hack that is necessary to get it's dependencies for archive the source
594addtask do_create_spdx after do_package do_packagedata do_unpack before do_populate_sdk do_build do_rm_work
595
596SSTATETASKS += "do_create_spdx"
597do_create_spdx[sstate-inputdirs] = "${SPDXDEPLOY}"
598do_create_spdx[sstate-outputdirs] = "${DEPLOY_DIR_SPDX}"
599
600python do_create_spdx_setscene () {
601    sstate_setscene(d)
602}
603addtask do_create_spdx_setscene
604
605do_create_spdx[dirs] = "${SPDXWORK}"
606do_create_spdx[cleandirs] = "${SPDXDEPLOY} ${SPDXWORK}"
607do_create_spdx[depends] += "${PATCHDEPENDENCY}"
608do_create_spdx[deptask] = "do_create_spdx"
609
610def collect_package_providers(d):
611    from pathlib import Path
612    import oe.sbom
613    import oe.spdx
614    import json
615
616    deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
617
618    providers = {}
619
620    taskdepdata = d.getVar("BB_TASKDEPDATA", False)
621    deps = sorted(set(
622        dep[0] for dep in taskdepdata.values() if dep[0] != d.getVar("PN")
623    ))
624    deps.append(d.getVar("PN"))
625
626    for dep_pn in deps:
627        recipe_data = oe.packagedata.read_pkgdata(dep_pn, d)
628
629        for pkg in recipe_data.get("PACKAGES", "").split():
630
631            pkg_data = oe.packagedata.read_subpkgdata_dict(pkg, d)
632            rprovides = set(n for n, _ in bb.utils.explode_dep_versions2(pkg_data.get("RPROVIDES", "")).items())
633            rprovides.add(pkg)
634
635            for r in rprovides:
636                providers[r] = pkg
637
638    return providers
639
640collect_package_providers[vardepsexclude] += "BB_TASKDEPDATA"
641
642python do_create_runtime_spdx() {
643    from datetime import datetime, timezone
644    import oe.sbom
645    import oe.spdx
646    import oe.packagedata
647    from pathlib import Path
648
649    deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
650    spdx_deploy = Path(d.getVar("SPDXRUNTIMEDEPLOY"))
651    is_native = bb.data.inherits_class("native", d) or bb.data.inherits_class("cross", d)
652
653    creation_time = datetime.now(tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
654
655    providers = collect_package_providers(d)
656
657    if not is_native:
658        bb.build.exec_func("read_subpackage_metadata", d)
659
660        dep_package_cache = {}
661
662        pkgdest = Path(d.getVar("PKGDEST"))
663        for package in d.getVar("PACKAGES").split():
664            localdata = bb.data.createCopy(d)
665            pkg_name = d.getVar("PKG:%s" % package) or package
666            localdata.setVar("PKG", pkg_name)
667            localdata.setVar('OVERRIDES', d.getVar("OVERRIDES", False) + ":" + package)
668
669            if not oe.packagedata.packaged(package, localdata):
670                continue
671
672            pkg_spdx_path = deploy_dir_spdx / "packages" / (pkg_name + ".spdx.json")
673
674            package_doc, package_doc_sha1 = oe.sbom.read_doc(pkg_spdx_path)
675
676            for p in package_doc.packages:
677                if p.name == pkg_name:
678                    spdx_package = p
679                    break
680            else:
681                bb.fatal("Package '%s' not found in %s" % (pkg_name, pkg_spdx_path))
682
683            runtime_doc = oe.spdx.SPDXDocument()
684            runtime_doc.name = "runtime-" + pkg_name
685            runtime_doc.documentNamespace = get_doc_namespace(localdata, runtime_doc)
686            runtime_doc.creationInfo.created = creation_time
687            runtime_doc.creationInfo.comment = "This document was created by analyzing package runtime dependencies."
688            runtime_doc.creationInfo.licenseListVersion = d.getVar("SPDX_LICENSE_DATA")["licenseListVersion"]
689            runtime_doc.creationInfo.creators.append("Tool: OpenEmbedded Core create-spdx.bbclass")
690            runtime_doc.creationInfo.creators.append("Organization: %s" % d.getVar("SPDX_ORG"))
691            runtime_doc.creationInfo.creators.append("Person: N/A ()")
692
693            package_ref = oe.spdx.SPDXExternalDocumentRef()
694            package_ref.externalDocumentId = "DocumentRef-package-" + package
695            package_ref.spdxDocument = package_doc.documentNamespace
696            package_ref.checksum.algorithm = "SHA1"
697            package_ref.checksum.checksumValue = package_doc_sha1
698
699            runtime_doc.externalDocumentRefs.append(package_ref)
700
701            runtime_doc.add_relationship(
702                runtime_doc.SPDXID,
703                "AMENDS",
704                "%s:%s" % (package_ref.externalDocumentId, package_doc.SPDXID)
705            )
706
707            deps = bb.utils.explode_dep_versions2(localdata.getVar("RDEPENDS") or "")
708            seen_deps = set()
709            for dep, _ in deps.items():
710                if dep in seen_deps:
711                    continue
712
713                if dep not in providers:
714                    continue
715
716                dep = providers[dep]
717
718                if not oe.packagedata.packaged(dep, localdata):
719                    continue
720
721                dep_pkg_data = oe.packagedata.read_subpkgdata_dict(dep, d)
722                dep_pkg = dep_pkg_data["PKG"]
723
724                if dep in dep_package_cache:
725                    (dep_spdx_package, dep_package_ref) = dep_package_cache[dep]
726                else:
727                    dep_path = deploy_dir_spdx / "packages" / ("%s.spdx.json" % dep_pkg)
728
729                    spdx_dep_doc, spdx_dep_sha1 = oe.sbom.read_doc(dep_path)
730
731                    for pkg in spdx_dep_doc.packages:
732                        if pkg.name == dep_pkg:
733                            dep_spdx_package = pkg
734                            break
735                    else:
736                        bb.fatal("Package '%s' not found in %s" % (dep_pkg, dep_path))
737
738                    dep_package_ref = oe.spdx.SPDXExternalDocumentRef()
739                    dep_package_ref.externalDocumentId = "DocumentRef-runtime-dependency-" + spdx_dep_doc.name
740                    dep_package_ref.spdxDocument = spdx_dep_doc.documentNamespace
741                    dep_package_ref.checksum.algorithm = "SHA1"
742                    dep_package_ref.checksum.checksumValue = spdx_dep_sha1
743
744                    dep_package_cache[dep] = (dep_spdx_package, dep_package_ref)
745
746                runtime_doc.externalDocumentRefs.append(dep_package_ref)
747
748                runtime_doc.add_relationship(
749                    "%s:%s" % (dep_package_ref.externalDocumentId, dep_spdx_package.SPDXID),
750                    "RUNTIME_DEPENDENCY_OF",
751                    "%s:%s" % (package_ref.externalDocumentId, spdx_package.SPDXID)
752                )
753                seen_deps.add(dep)
754
755            oe.sbom.write_doc(d, runtime_doc, "runtime", spdx_deploy, indent=get_json_indent(d))
756}
757
758addtask do_create_runtime_spdx after do_create_spdx before do_build do_rm_work
759SSTATETASKS += "do_create_runtime_spdx"
760do_create_runtime_spdx[sstate-inputdirs] = "${SPDXRUNTIMEDEPLOY}"
761do_create_runtime_spdx[sstate-outputdirs] = "${DEPLOY_DIR_SPDX}"
762
763python do_create_runtime_spdx_setscene () {
764    sstate_setscene(d)
765}
766addtask do_create_runtime_spdx_setscene
767
768do_create_runtime_spdx[dirs] = "${SPDXRUNTIMEDEPLOY}"
769do_create_runtime_spdx[cleandirs] = "${SPDXRUNTIMEDEPLOY}"
770do_create_runtime_spdx[rdeptask] = "do_create_spdx"
771
772def spdx_get_src(d):
773    """
774    save patched source of the recipe in SPDX_WORKDIR.
775    """
776    import shutil
777    spdx_workdir = d.getVar('SPDXWORK')
778    spdx_sysroot_native = d.getVar('STAGING_DIR_NATIVE')
779    pn = d.getVar('PN')
780
781    workdir = d.getVar("WORKDIR")
782
783    try:
784        # The kernel class functions require it to be on work-shared, so we dont change WORKDIR
785        if not is_work_shared_spdx(d):
786            # Change the WORKDIR to make do_unpack do_patch run in another dir.
787            d.setVar('WORKDIR', spdx_workdir)
788            # Restore the original path to recipe's native sysroot (it's relative to WORKDIR).
789            d.setVar('STAGING_DIR_NATIVE', spdx_sysroot_native)
790
791            # The changed 'WORKDIR' also caused 'B' changed, create dir 'B' for the
792            # possibly requiring of the following tasks (such as some recipes's
793            # do_patch required 'B' existed).
794            bb.utils.mkdirhier(d.getVar('B'))
795
796            bb.build.exec_func('do_unpack', d)
797        # Copy source of kernel to spdx_workdir
798        if is_work_shared_spdx(d):
799            d.setVar('WORKDIR', spdx_workdir)
800            d.setVar('STAGING_DIR_NATIVE', spdx_sysroot_native)
801            src_dir = spdx_workdir + "/" + d.getVar('PN')+ "-" + d.getVar('PV') + "-" + d.getVar('PR')
802            bb.utils.mkdirhier(src_dir)
803            if bb.data.inherits_class('kernel',d):
804                share_src = d.getVar('STAGING_KERNEL_DIR')
805            cmd_copy_share = "cp -rf " + share_src + "/* " + src_dir + "/"
806            cmd_copy_kernel_result = os.popen(cmd_copy_share).read()
807            bb.note("cmd_copy_kernel_result = " + cmd_copy_kernel_result)
808
809            git_path = src_dir + "/.git"
810            if os.path.exists(git_path):
811                shutils.rmtree(git_path)
812
813        # Make sure gcc and kernel sources are patched only once
814        if not (d.getVar('SRC_URI') == "" or is_work_shared_spdx(d)):
815            bb.build.exec_func('do_patch', d)
816
817        # Some userland has no source.
818        if not os.path.exists( spdx_workdir ):
819            bb.utils.mkdirhier(spdx_workdir)
820    finally:
821        d.setVar("WORKDIR", workdir)
822
823do_rootfs[recrdeptask] += "do_create_spdx do_create_runtime_spdx"
824
825ROOTFS_POSTUNINSTALL_COMMAND =+ "image_combine_spdx ; "
826
827do_populate_sdk[recrdeptask] += "do_create_spdx do_create_runtime_spdx"
828POPULATE_SDK_POST_HOST_COMMAND:append:task-populate-sdk = " sdk_host_combine_spdx; "
829POPULATE_SDK_POST_TARGET_COMMAND:append:task-populate-sdk = " sdk_target_combine_spdx; "
830
831python image_combine_spdx() {
832    import os
833    import oe.sbom
834    from pathlib import Path
835    from oe.rootfs import image_list_installed_packages
836
837    image_name = d.getVar("IMAGE_NAME")
838    image_link_name = d.getVar("IMAGE_LINK_NAME")
839    imgdeploydir = Path(d.getVar("IMGDEPLOYDIR"))
840    img_spdxid = oe.sbom.get_image_spdxid(image_name)
841    packages = image_list_installed_packages(d)
842
843    combine_spdx(d, image_name, imgdeploydir, img_spdxid, packages)
844
845    def make_image_link(target_path, suffix):
846        if image_link_name:
847            link = imgdeploydir / (image_link_name + suffix)
848            if link != target_path:
849                link.symlink_to(os.path.relpath(target_path, link.parent))
850
851    image_spdx_path = imgdeploydir / (image_name + ".spdx.json")
852    make_image_link(image_spdx_path, ".spdx.json")
853    spdx_tar_path = imgdeploydir / (image_name + ".spdx.tar.zst")
854    make_image_link(spdx_tar_path, ".spdx.tar.zst")
855    spdx_index_path = imgdeploydir / (image_name + ".spdx.index.json")
856    make_image_link(spdx_index_path, ".spdx.index.json")
857}
858
859python sdk_host_combine_spdx() {
860    sdk_combine_spdx(d, "host")
861}
862
863python sdk_target_combine_spdx() {
864    sdk_combine_spdx(d, "target")
865}
866
867def sdk_combine_spdx(d, sdk_type):
868    import oe.sbom
869    from pathlib import Path
870    from oe.sdk import sdk_list_installed_packages
871
872    sdk_name = d.getVar("SDK_NAME") + "-" + sdk_type
873    sdk_deploydir = Path(d.getVar("SDKDEPLOYDIR"))
874    sdk_spdxid = oe.sbom.get_sdk_spdxid(sdk_name)
875    sdk_packages = sdk_list_installed_packages(d, sdk_type == "target")
876    combine_spdx(d, sdk_name, sdk_deploydir, sdk_spdxid, sdk_packages)
877
878def combine_spdx(d, rootfs_name, rootfs_deploydir, rootfs_spdxid, packages):
879    import os
880    import oe.spdx
881    import oe.sbom
882    import io
883    import json
884    from datetime import timezone, datetime
885    from pathlib import Path
886    import tarfile
887    import bb.compress.zstd
888
889    creation_time = datetime.now(tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
890    deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
891    source_date_epoch = d.getVar("SOURCE_DATE_EPOCH")
892
893    doc = oe.spdx.SPDXDocument()
894    doc.name = rootfs_name
895    doc.documentNamespace = get_doc_namespace(d, doc)
896    doc.creationInfo.created = creation_time
897    doc.creationInfo.comment = "This document was created by analyzing the source of the Yocto recipe during the build."
898    doc.creationInfo.licenseListVersion = d.getVar("SPDX_LICENSE_DATA")["licenseListVersion"]
899    doc.creationInfo.creators.append("Tool: OpenEmbedded Core create-spdx.bbclass")
900    doc.creationInfo.creators.append("Organization: %s" % d.getVar("SPDX_ORG"))
901    doc.creationInfo.creators.append("Person: N/A ()")
902
903    image = oe.spdx.SPDXPackage()
904    image.name = d.getVar("PN")
905    image.versionInfo = d.getVar("PV")
906    image.SPDXID = rootfs_spdxid
907    image.supplier = d.getVar("SPDX_SUPPLIER")
908
909    doc.packages.append(image)
910
911    for name in sorted(packages.keys()):
912        pkg_spdx_path = deploy_dir_spdx / "packages" / (name + ".spdx.json")
913        pkg_doc, pkg_doc_sha1 = oe.sbom.read_doc(pkg_spdx_path)
914
915        for p in pkg_doc.packages:
916            if p.name == name:
917                pkg_ref = oe.spdx.SPDXExternalDocumentRef()
918                pkg_ref.externalDocumentId = "DocumentRef-%s" % pkg_doc.name
919                pkg_ref.spdxDocument = pkg_doc.documentNamespace
920                pkg_ref.checksum.algorithm = "SHA1"
921                pkg_ref.checksum.checksumValue = pkg_doc_sha1
922
923                doc.externalDocumentRefs.append(pkg_ref)
924                doc.add_relationship(image, "CONTAINS", "%s:%s" % (pkg_ref.externalDocumentId, p.SPDXID))
925                break
926        else:
927            bb.fatal("Unable to find package with name '%s' in SPDX file %s" % (name, pkg_spdx_path))
928
929        runtime_spdx_path = deploy_dir_spdx / "runtime" / ("runtime-" + name + ".spdx.json")
930        runtime_doc, runtime_doc_sha1 = oe.sbom.read_doc(runtime_spdx_path)
931
932        runtime_ref = oe.spdx.SPDXExternalDocumentRef()
933        runtime_ref.externalDocumentId = "DocumentRef-%s" % runtime_doc.name
934        runtime_ref.spdxDocument = runtime_doc.documentNamespace
935        runtime_ref.checksum.algorithm = "SHA1"
936        runtime_ref.checksum.checksumValue = runtime_doc_sha1
937
938        # "OTHER" isn't ideal here, but I can't find a relationship that makes sense
939        doc.externalDocumentRefs.append(runtime_ref)
940        doc.add_relationship(
941            image,
942            "OTHER",
943            "%s:%s" % (runtime_ref.externalDocumentId, runtime_doc.SPDXID),
944            comment="Runtime dependencies for %s" % name
945        )
946
947    image_spdx_path = rootfs_deploydir / (rootfs_name + ".spdx.json")
948
949    with image_spdx_path.open("wb") as f:
950        doc.to_json(f, sort_keys=True, indent=get_json_indent(d))
951
952    num_threads = int(d.getVar("BB_NUMBER_THREADS"))
953
954    visited_docs = set()
955
956    index = {"documents": []}
957
958    spdx_tar_path = rootfs_deploydir / (rootfs_name + ".spdx.tar.zst")
959    with bb.compress.zstd.open(spdx_tar_path, "w", num_threads=num_threads) as f:
960        with tarfile.open(fileobj=f, mode="w|") as tar:
961            def collect_spdx_document(path):
962                nonlocal tar
963                nonlocal deploy_dir_spdx
964                nonlocal source_date_epoch
965                nonlocal index
966
967                if path in visited_docs:
968                    return
969
970                visited_docs.add(path)
971
972                with path.open("rb") as f:
973                    doc, sha1 = oe.sbom.read_doc(f)
974                    f.seek(0)
975
976                    if doc.documentNamespace in visited_docs:
977                        return
978
979                    bb.note("Adding SPDX document %s" % path)
980                    visited_docs.add(doc.documentNamespace)
981                    info = tar.gettarinfo(fileobj=f)
982
983                    info.name = doc.name + ".spdx.json"
984                    info.uid = 0
985                    info.gid = 0
986                    info.uname = "root"
987                    info.gname = "root"
988
989                    if source_date_epoch is not None and info.mtime > int(source_date_epoch):
990                        info.mtime = int(source_date_epoch)
991
992                    tar.addfile(info, f)
993
994                    index["documents"].append({
995                        "filename": info.name,
996                        "documentNamespace": doc.documentNamespace,
997                        "sha1": sha1,
998                    })
999
1000                for ref in doc.externalDocumentRefs:
1001                    ref_path = deploy_dir_spdx / "by-namespace" / ref.spdxDocument.replace("/", "_")
1002                    collect_spdx_document(ref_path)
1003
1004            collect_spdx_document(image_spdx_path)
1005
1006            index["documents"].sort(key=lambda x: x["filename"])
1007
1008            index_str = io.BytesIO(json.dumps(
1009                index,
1010                sort_keys=True,
1011                indent=get_json_indent(d),
1012            ).encode("utf-8"))
1013
1014            info = tarfile.TarInfo()
1015            info.name = "index.json"
1016            info.size = len(index_str.getvalue())
1017            info.uid = 0
1018            info.gid = 0
1019            info.uname = "root"
1020            info.gname = "root"
1021
1022            tar.addfile(info, fileobj=index_str)
1023
1024    spdx_index_path = rootfs_deploydir / (rootfs_name + ".spdx.index.json")
1025    with spdx_index_path.open("w") as f:
1026        json.dump(index, f, sort_keys=True, indent=get_json_indent(d))
1027