1#
2# SPDX-License-Identifier: MIT
3#
4# Copyright 2019-2020 by Garmin Ltd. or its subsidiaries
5
6from oeqa.selftest.case import OESelftestTestCase
7from oeqa.utils.commands import runCmd, bitbake, get_bb_var, get_bb_vars
8import bb.utils
9import functools
10import multiprocessing
11import textwrap
12import json
13import unittest
14import tempfile
15import shutil
16import stat
17import os
18import datetime
19
20# For sample packages, see:
21# https://autobuilder.yocto.io/pub/repro-fail/oe-reproducible-20201127-0t7wr_oo/
22# https://autobuilder.yocto.io/pub/repro-fail/oe-reproducible-20201127-4s9ejwyp/
23# https://autobuilder.yocto.io/pub/repro-fail/oe-reproducible-20201127-haiwdlbr/
24# https://autobuilder.yocto.io/pub/repro-fail/oe-reproducible-20201127-hwds3mcl/
25# https://autobuilder.yocto.io/pub/repro-fail/oe-reproducible-20201203-sua0pzvc/
26# (both packages/ and packages-excluded/)
27
28# ruby-ri-docs, meson:
29#https://autobuilder.yocto.io/pub/repro-fail/oe-reproducible-20210215-0_td9la2/packages/diff-html/
30exclude_packages = [
31	'glide',
32	'go-helloworld',
33	'go-runtime',
34	'go_',
35	'go-',
36	'ruby-ri-docs'
37	]
38
39def is_excluded(package):
40    package_name = os.path.basename(package)
41    for i in exclude_packages:
42        if package_name.startswith(i):
43            return i
44    return None
45
46MISSING = 'MISSING'
47DIFFERENT = 'DIFFERENT'
48SAME = 'SAME'
49
50@functools.total_ordering
51class CompareResult(object):
52    def __init__(self):
53        self.reference = None
54        self.test = None
55        self.status = 'UNKNOWN'
56
57    def __eq__(self, other):
58        return (self.status, self.test) == (other.status, other.test)
59
60    def __lt__(self, other):
61        return (self.status, self.test) < (other.status, other.test)
62
63class PackageCompareResults(object):
64    def __init__(self):
65        self.total = []
66        self.missing = []
67        self.different = []
68        self.different_excluded = []
69        self.same = []
70        self.active_exclusions = set()
71
72    def add_result(self, r):
73        self.total.append(r)
74        if r.status == MISSING:
75            self.missing.append(r)
76        elif r.status == DIFFERENT:
77            exclusion = is_excluded(r.reference)
78            if exclusion:
79                self.different_excluded.append(r)
80                self.active_exclusions.add(exclusion)
81            else:
82                self.different.append(r)
83        else:
84            self.same.append(r)
85
86    def sort(self):
87        self.total.sort()
88        self.missing.sort()
89        self.different.sort()
90        self.different_excluded.sort()
91        self.same.sort()
92
93    def __str__(self):
94        return 'same=%i different=%i different_excluded=%i missing=%i total=%i\nunused_exclusions=%s' % (len(self.same), len(self.different), len(self.different_excluded), len(self.missing), len(self.total), self.unused_exclusions())
95
96    def unused_exclusions(self):
97        return sorted(set(exclude_packages) - self.active_exclusions)
98
99def compare_file(reference, test, diffutils_sysroot):
100    result = CompareResult()
101    result.reference = reference
102    result.test = test
103
104    if not os.path.exists(reference):
105        result.status = MISSING
106        return result
107
108    r = runCmd(['cmp', '--quiet', reference, test], native_sysroot=diffutils_sysroot, ignore_status=True, sync=False)
109
110    if r.status:
111        result.status = DIFFERENT
112        return result
113
114    result.status = SAME
115    return result
116
117def run_diffoscope(a_dir, b_dir, html_dir, **kwargs):
118    return runCmd(['diffoscope', '--no-default-limits', '--exclude-directory-metadata', 'yes', '--html-dir', html_dir, a_dir, b_dir],
119                **kwargs)
120
121class DiffoscopeTests(OESelftestTestCase):
122    diffoscope_test_files = os.path.join(os.path.dirname(os.path.abspath(__file__)), "diffoscope")
123
124    def test_diffoscope(self):
125        bitbake("diffoscope-native -c addto_recipe_sysroot")
126        diffoscope_sysroot = get_bb_var("RECIPE_SYSROOT_NATIVE", "diffoscope-native")
127
128        # Check that diffoscope doesn't return an error when the files compare
129        # the same (a general check that diffoscope is working)
130        with tempfile.TemporaryDirectory() as tmpdir:
131            run_diffoscope('A', 'A', tmpdir,
132                native_sysroot=diffoscope_sysroot, cwd=self.diffoscope_test_files)
133
134        # Check that diffoscope generates an index.html file when the files are
135        # different
136        with tempfile.TemporaryDirectory() as tmpdir:
137            r = run_diffoscope('A', 'B', tmpdir,
138                native_sysroot=diffoscope_sysroot, ignore_status=True, cwd=self.diffoscope_test_files)
139
140            self.assertNotEqual(r.status, 0, msg="diffoscope was successful when an error was expected")
141            self.assertTrue(os.path.exists(os.path.join(tmpdir, 'index.html')), "HTML index not found!")
142
143class ReproducibleTests(OESelftestTestCase):
144    # Test the reproducibility of whatever is built between sstate_targets and targets
145
146    package_classes = ['deb', 'ipk', 'rpm']
147
148    # targets are the things we want to test the reproducibility of
149    targets = ['core-image-minimal', 'core-image-sato', 'core-image-full-cmdline', 'core-image-weston', 'world']
150    # sstate targets are things to pull from sstate to potentially cut build/debugging time
151    sstate_targets = []
152    save_results = False
153    if 'OEQA_DEBUGGING_SAVED_OUTPUT' in os.environ:
154        save_results = os.environ['OEQA_DEBUGGING_SAVED_OUTPUT']
155
156    # This variable controls if one of the test builds is allowed to pull from
157    # an sstate cache/mirror. The other build is always done clean as a point of
158    # comparison.
159    # If you know that your sstate archives are reproducible, enabling this
160    # will test that and also make the test run faster. If your sstate is not
161    # reproducible, disable this in your derived test class
162    build_from_sstate = True
163
164    def setUpLocal(self):
165        super().setUpLocal()
166        needed_vars = ['TOPDIR', 'TARGET_PREFIX', 'BB_NUMBER_THREADS']
167        bb_vars = get_bb_vars(needed_vars)
168        for v in needed_vars:
169            setattr(self, v.lower(), bb_vars[v])
170
171        self.extraresults = {}
172        self.extraresults.setdefault('reproducible.rawlogs', {})['log'] = ''
173        self.extraresults.setdefault('reproducible', {}).setdefault('files', {})
174
175    def append_to_log(self, msg):
176        self.extraresults['reproducible.rawlogs']['log'] += msg
177
178    def compare_packages(self, reference_dir, test_dir, diffutils_sysroot):
179        result = PackageCompareResults()
180
181        old_cwd = os.getcwd()
182        try:
183            file_result = {}
184            os.chdir(test_dir)
185            with multiprocessing.Pool(processes=int(self.bb_number_threads or 0)) as p:
186                for root, dirs, files in os.walk('.'):
187                    async_result = []
188                    for f in files:
189                        reference_path = os.path.join(reference_dir, root, f)
190                        test_path = os.path.join(test_dir, root, f)
191                        async_result.append(p.apply_async(compare_file, (reference_path, test_path, diffutils_sysroot)))
192
193                    for a in async_result:
194                        result.add_result(a.get())
195
196        finally:
197            os.chdir(old_cwd)
198
199        result.sort()
200        return result
201
202    def write_package_list(self, package_class, name, packages):
203        self.extraresults['reproducible']['files'].setdefault(package_class, {})[name] = [
204                {'reference': p.reference, 'test': p.test} for p in packages]
205
206    def copy_file(self, source, dest):
207        bb.utils.mkdirhier(os.path.dirname(dest))
208        shutil.copyfile(source, dest)
209
210    def do_test_build(self, name, use_sstate):
211        capture_vars = ['DEPLOY_DIR_' + c.upper() for c in self.package_classes]
212
213        tmpdir = os.path.join(self.topdir, name, 'tmp')
214        if os.path.exists(tmpdir):
215            bb.utils.remove(tmpdir, recurse=True)
216
217        config = textwrap.dedent('''\
218            INHERIT += "reproducible_build"
219            PACKAGE_CLASSES = "{package_classes}"
220            INHIBIT_PACKAGE_STRIP = "1"
221            TMPDIR = "{tmpdir}"
222            LICENSE_FLAGS_WHITELIST = "commercial"
223            DISTRO_FEATURES:append = ' systemd pam'
224            USERADDEXTENSION = "useradd-staticids"
225            USERADD_ERROR_DYNAMIC = "skip"
226            USERADD_UID_TABLES += "files/static-passwd"
227            USERADD_GID_TABLES += "files/static-group"
228            ''').format(package_classes=' '.join('package_%s' % c for c in self.package_classes),
229                        tmpdir=tmpdir)
230
231        if not use_sstate:
232            if self.sstate_targets:
233               self.logger.info("Building prebuild for %s (sstate allowed)..." % (name))
234               self.write_config(config)
235               bitbake(' '.join(self.sstate_targets))
236
237            # This config fragment will disable using shared and the sstate
238            # mirror, forcing a complete build from scratch
239            config += textwrap.dedent('''\
240                SSTATE_DIR = "${TMPDIR}/sstate"
241                SSTATE_MIRRORS = ""
242                ''')
243
244        self.logger.info("Building %s (sstate%s allowed)..." % (name, '' if use_sstate else ' NOT'))
245        self.write_config(config)
246        d = get_bb_vars(capture_vars)
247        # targets used to be called images
248        bitbake(' '.join(getattr(self, 'images', self.targets)))
249        return d
250
251    def test_reproducible_builds(self):
252        def strip_topdir(s):
253            if s.startswith(self.topdir):
254                return s[len(self.topdir):]
255            return s
256
257        # Build native utilities
258        self.write_config('')
259        bitbake("diffoscope-native diffutils-native jquery-native -c addto_recipe_sysroot")
260        diffutils_sysroot = get_bb_var("RECIPE_SYSROOT_NATIVE", "diffutils-native")
261        diffoscope_sysroot = get_bb_var("RECIPE_SYSROOT_NATIVE", "diffoscope-native")
262        jquery_sysroot = get_bb_var("RECIPE_SYSROOT_NATIVE", "jquery-native")
263
264        if self.save_results:
265            os.makedirs(self.save_results, exist_ok=True)
266            datestr = datetime.datetime.now().strftime('%Y%m%d')
267            save_dir = tempfile.mkdtemp(prefix='oe-reproducible-%s-' % datestr, dir=self.save_results)
268            os.chmod(save_dir, stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH)
269            self.logger.info('Non-reproducible packages will be copied to %s', save_dir)
270
271        vars_A = self.do_test_build('reproducibleA', self.build_from_sstate)
272
273        vars_B = self.do_test_build('reproducibleB', False)
274
275        # NOTE: The temp directories from the reproducible build are purposely
276        # kept after the build so it can be diffed for debugging.
277
278        fails = []
279
280        for c in self.package_classes:
281            with self.subTest(package_class=c):
282                package_class = 'package_' + c
283
284                deploy_A = vars_A['DEPLOY_DIR_' + c.upper()]
285                deploy_B = vars_B['DEPLOY_DIR_' + c.upper()]
286
287                self.logger.info('Checking %s packages for differences...' % c)
288                result = self.compare_packages(deploy_A, deploy_B, diffutils_sysroot)
289
290                self.logger.info('Reproducibility summary for %s: %s' % (c, result))
291
292                self.append_to_log('\n'.join("%s: %s" % (r.status, r.test) for r in result.total))
293
294                self.write_package_list(package_class, 'missing', result.missing)
295                self.write_package_list(package_class, 'different', result.different)
296                self.write_package_list(package_class, 'different_excluded', result.different_excluded)
297                self.write_package_list(package_class, 'same', result.same)
298
299                if self.save_results:
300                    for d in result.different:
301                        self.copy_file(d.reference, '/'.join([save_dir, 'packages', strip_topdir(d.reference)]))
302                        self.copy_file(d.test, '/'.join([save_dir, 'packages', strip_topdir(d.test)]))
303
304                    for d in result.different_excluded:
305                        self.copy_file(d.reference, '/'.join([save_dir, 'packages-excluded', strip_topdir(d.reference)]))
306                        self.copy_file(d.test, '/'.join([save_dir, 'packages-excluded', strip_topdir(d.test)]))
307
308                if result.missing or result.different:
309                    fails.append("The following %s packages are missing or different and not in exclusion list: %s" %
310                            (c, '\n'.join(r.test for r in (result.missing + result.different))))
311
312        # Clean up empty directories
313        if self.save_results:
314            if not os.listdir(save_dir):
315                os.rmdir(save_dir)
316            else:
317                self.logger.info('Running diffoscope')
318                package_dir = os.path.join(save_dir, 'packages')
319                package_html_dir = os.path.join(package_dir, 'diff-html')
320
321                # Copy jquery to improve the diffoscope output usability
322                self.copy_file(os.path.join(jquery_sysroot, 'usr/share/javascript/jquery/jquery.min.js'), os.path.join(package_html_dir, 'jquery.js'))
323
324                run_diffoscope('reproducibleA', 'reproducibleB', package_html_dir,
325                        native_sysroot=diffoscope_sysroot, ignore_status=True, cwd=package_dir)
326
327        if fails:
328            self.fail('\n'.join(fails))
329
330