xref: /openbmc/openbmc/poky/bitbake/lib/bb/siggen.py (revision eff27476)
1#
2# SPDX-License-Identifier: GPL-2.0-only
3#
4
5import hashlib
6import logging
7import os
8import re
9import tempfile
10import pickle
11import bb.data
12import difflib
13import simplediff
14import json
15import bb.compress.zstd
16from bb.checksum import FileChecksumCache
17from bb import runqueue
18import hashserv
19import hashserv.client
20
21logger = logging.getLogger('BitBake.SigGen')
22hashequiv_logger = logging.getLogger('BitBake.SigGen.HashEquiv')
23
24class SetEncoder(json.JSONEncoder):
25    def default(self, obj):
26        if isinstance(obj, set):
27            return dict(_set_object=list(sorted(obj)))
28        return json.JSONEncoder.default(self, obj)
29
30def SetDecoder(dct):
31    if '_set_object' in dct:
32        return set(dct['_set_object'])
33    return dct
34
35def init(d):
36    siggens = [obj for obj in globals().values()
37                      if type(obj) is type and issubclass(obj, SignatureGenerator)]
38
39    desired = d.getVar("BB_SIGNATURE_HANDLER") or "noop"
40    for sg in siggens:
41        if desired == sg.name:
42            return sg(d)
43            break
44    else:
45        logger.error("Invalid signature generator '%s', using default 'noop'\n"
46                     "Available generators: %s", desired,
47                     ', '.join(obj.name for obj in siggens))
48        return SignatureGenerator(d)
49
50class SignatureGenerator(object):
51    """
52    """
53    name = "noop"
54
55    # If the derived class supports multiconfig datacaches, set this to True
56    # The default is False for backward compatibility with derived signature
57    # generators that do not understand multiconfig caches
58    supports_multiconfig_datacaches = False
59
60    def __init__(self, data):
61        self.basehash = {}
62        self.taskhash = {}
63        self.unihash = {}
64        self.runtaskdeps = {}
65        self.file_checksum_values = {}
66        self.taints = {}
67        self.unitaskhashes = {}
68        self.tidtopn = {}
69        self.setscenetasks = set()
70
71    def finalise(self, fn, d, varient):
72        return
73
74    def postparsing_clean_cache(self):
75        return
76
77    def get_unihash(self, tid):
78        return self.taskhash[tid]
79
80    def prep_taskhash(self, tid, deps, dataCaches):
81        return
82
83    def get_taskhash(self, tid, deps, dataCaches):
84        self.taskhash[tid] = hashlib.sha256(tid.encode("utf-8")).hexdigest()
85        return self.taskhash[tid]
86
87    def writeout_file_checksum_cache(self):
88        """Write/update the file checksum cache onto disk"""
89        return
90
91    def stampfile(self, stampbase, file_name, taskname, extrainfo):
92        return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
93
94    def stampcleanmask(self, stampbase, file_name, taskname, extrainfo):
95        return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
96
97    def dump_sigtask(self, fn, task, stampbase, runtime):
98        return
99
100    def invalidate_task(self, task, d, fn):
101        bb.build.del_stamp(task, d, fn)
102
103    def dump_sigs(self, dataCache, options):
104        return
105
106    def get_taskdata(self):
107        return (self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks)
108
109    def set_taskdata(self, data):
110        self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks = data
111
112    def reset(self, data):
113        self.__init__(data)
114
115    def get_taskhashes(self):
116        return self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn
117
118    def set_taskhashes(self, hashes):
119        self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn = hashes
120
121    def save_unitaskhashes(self):
122        return
123
124    def set_setscene_tasks(self, setscene_tasks):
125        return
126
127    @classmethod
128    def get_data_caches(cls, dataCaches, mc):
129        """
130        This function returns the datacaches that should be passed to signature
131        generator functions. If the signature generator supports multiconfig
132        caches, the entire dictionary of data caches is sent, otherwise a
133        special proxy is sent that support both index access to all
134        multiconfigs, and also direct access for the default multiconfig.
135
136        The proxy class allows code in this class itself to always use
137        multiconfig aware code (to ease maintenance), but derived classes that
138        are unaware of multiconfig data caches can still access the default
139        multiconfig as expected.
140
141        Do not override this function in derived classes; it will be removed in
142        the future when support for multiconfig data caches is mandatory
143        """
144        class DataCacheProxy(object):
145            def __init__(self):
146                pass
147
148            def __getitem__(self, key):
149                return dataCaches[key]
150
151            def __getattr__(self, name):
152                return getattr(dataCaches[mc], name)
153
154        if cls.supports_multiconfig_datacaches:
155            return dataCaches
156
157        return DataCacheProxy()
158
159class SignatureGeneratorBasic(SignatureGenerator):
160    """
161    """
162    name = "basic"
163
164    def __init__(self, data):
165        self.basehash = {}
166        self.taskhash = {}
167        self.unihash = {}
168        self.taskdeps = {}
169        self.runtaskdeps = {}
170        self.file_checksum_values = {}
171        self.taints = {}
172        self.gendeps = {}
173        self.lookupcache = {}
174        self.setscenetasks = set()
175        self.basewhitelist = set((data.getVar("BB_HASHBASE_WHITELIST") or "").split())
176        self.taskwhitelist = None
177        self.init_rundepcheck(data)
178        checksum_cache_file = data.getVar("BB_HASH_CHECKSUM_CACHE_FILE")
179        if checksum_cache_file:
180            self.checksum_cache = FileChecksumCache()
181            self.checksum_cache.init_cache(data, checksum_cache_file)
182        else:
183            self.checksum_cache = None
184
185        self.unihash_cache = bb.cache.SimpleCache("3")
186        self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {})
187        self.localdirsexclude = (data.getVar("BB_SIGNATURE_LOCAL_DIRS_EXCLUDE") or "CVS .bzr .git .hg .osc .p4 .repo .svn").split()
188        self.tidtopn = {}
189
190    def init_rundepcheck(self, data):
191        self.taskwhitelist = data.getVar("BB_HASHTASK_WHITELIST") or None
192        if self.taskwhitelist:
193            self.twl = re.compile(self.taskwhitelist)
194        else:
195            self.twl = None
196
197    def _build_data(self, fn, d):
198
199        ignore_mismatch = ((d.getVar("BB_HASH_IGNORE_MISMATCH") or '') == '1')
200        tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d, self.basewhitelist)
201
202        taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, self.basewhitelist, fn)
203
204        for task in tasklist:
205            tid = fn + ":" + task
206            if not ignore_mismatch and tid in self.basehash and self.basehash[tid] != basehash[tid]:
207                bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (tid, self.basehash[tid], basehash[tid]))
208                bb.error("The following commands may help:")
209                cmd = "$ bitbake %s -c%s" % (d.getVar('PN'), task)
210                # Make sure sigdata is dumped before run printdiff
211                bb.error("%s -Snone" % cmd)
212                bb.error("Then:")
213                bb.error("%s -Sprintdiff\n" % cmd)
214            self.basehash[tid] = basehash[tid]
215
216        self.taskdeps[fn] = taskdeps
217        self.gendeps[fn] = gendeps
218        self.lookupcache[fn] = lookupcache
219
220        return taskdeps
221
222    def set_setscene_tasks(self, setscene_tasks):
223        self.setscenetasks = set(setscene_tasks)
224
225    def finalise(self, fn, d, variant):
226
227        mc = d.getVar("__BBMULTICONFIG", False) or ""
228        if variant or mc:
229            fn = bb.cache.realfn2virtual(fn, variant, mc)
230
231        try:
232            taskdeps = self._build_data(fn, d)
233        except bb.parse.SkipRecipe:
234            raise
235        except:
236            bb.warn("Error during finalise of %s" % fn)
237            raise
238
239        #Slow but can be useful for debugging mismatched basehashes
240        #for task in self.taskdeps[fn]:
241        #    self.dump_sigtask(fn, task, d.getVar("STAMP"), False)
242
243        for task in taskdeps:
244            d.setVar("BB_BASEHASH:task-%s" % task, self.basehash[fn + ":" + task])
245
246    def postparsing_clean_cache(self):
247        #
248        # After parsing we can remove some things from memory to reduce our memory footprint
249        #
250        self.gendeps = {}
251        self.lookupcache = {}
252        self.taskdeps = {}
253
254    def rundep_check(self, fn, recipename, task, dep, depname, dataCaches):
255        # Return True if we should keep the dependency, False to drop it
256        # We only manipulate the dependencies for packages not in the whitelist
257        if self.twl and not self.twl.search(recipename):
258            # then process the actual dependencies
259            if self.twl.search(depname):
260                return False
261        return True
262
263    def read_taint(self, fn, task, stampbase):
264        taint = None
265        try:
266            with open(stampbase + '.' + task + '.taint', 'r') as taintf:
267                taint = taintf.read()
268        except IOError:
269            pass
270        return taint
271
272    def prep_taskhash(self, tid, deps, dataCaches):
273
274        (mc, _, task, fn) = bb.runqueue.split_tid_mcfn(tid)
275
276        self.basehash[tid] = dataCaches[mc].basetaskhash[tid]
277        self.runtaskdeps[tid] = []
278        self.file_checksum_values[tid] = []
279        recipename = dataCaches[mc].pkg_fn[fn]
280
281        self.tidtopn[tid] = recipename
282
283        for dep in sorted(deps, key=clean_basepath):
284            (depmc, _, _, depmcfn) = bb.runqueue.split_tid_mcfn(dep)
285            depname = dataCaches[depmc].pkg_fn[depmcfn]
286            if not self.supports_multiconfig_datacaches and mc != depmc:
287                # If the signature generator doesn't understand multiconfig
288                # data caches, any dependency not in the same multiconfig must
289                # be skipped for backward compatibility
290                continue
291            if not self.rundep_check(fn, recipename, task, dep, depname, dataCaches):
292                continue
293            if dep not in self.taskhash:
294                bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep)
295            self.runtaskdeps[tid].append(dep)
296
297        if task in dataCaches[mc].file_checksums[fn]:
298            if self.checksum_cache:
299                checksums = self.checksum_cache.get_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude)
300            else:
301                checksums = bb.fetch2.get_file_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude)
302            for (f,cs) in checksums:
303                self.file_checksum_values[tid].append((f,cs))
304
305        taskdep = dataCaches[mc].task_deps[fn]
306        if 'nostamp' in taskdep and task in taskdep['nostamp']:
307            # Nostamp tasks need an implicit taint so that they force any dependent tasks to run
308            if tid in self.taints and self.taints[tid].startswith("nostamp:"):
309                # Don't reset taint value upon every call
310                pass
311            else:
312                import uuid
313                taint = str(uuid.uuid4())
314                self.taints[tid] = "nostamp:" + taint
315
316        taint = self.read_taint(fn, task, dataCaches[mc].stamp[fn])
317        if taint:
318            self.taints[tid] = taint
319            logger.warning("%s is tainted from a forced run" % tid)
320
321        return
322
323    def get_taskhash(self, tid, deps, dataCaches):
324
325        data = self.basehash[tid]
326        for dep in self.runtaskdeps[tid]:
327            data = data + self.get_unihash(dep)
328
329        for (f, cs) in self.file_checksum_values[tid]:
330            if cs:
331                data = data + cs
332
333        if tid in self.taints:
334            if self.taints[tid].startswith("nostamp:"):
335                data = data + self.taints[tid][8:]
336            else:
337                data = data + self.taints[tid]
338
339        h = hashlib.sha256(data.encode("utf-8")).hexdigest()
340        self.taskhash[tid] = h
341        #d.setVar("BB_TASKHASH:task-%s" % task, taskhash[task])
342        return h
343
344    def writeout_file_checksum_cache(self):
345        """Write/update the file checksum cache onto disk"""
346        if self.checksum_cache:
347            self.checksum_cache.save_extras()
348            self.checksum_cache.save_merge()
349        else:
350            bb.fetch2.fetcher_parse_save()
351            bb.fetch2.fetcher_parse_done()
352
353    def save_unitaskhashes(self):
354        self.unihash_cache.save(self.unitaskhashes)
355
356    def dump_sigtask(self, fn, task, stampbase, runtime):
357
358        tid = fn + ":" + task
359        referencestamp = stampbase
360        if isinstance(runtime, str) and runtime.startswith("customfile"):
361            sigfile = stampbase
362            referencestamp = runtime[11:]
363        elif runtime and tid in self.taskhash:
364            sigfile = stampbase + "." + task + ".sigdata" + "." + self.get_unihash(tid)
365        else:
366            sigfile = stampbase + "." + task + ".sigbasedata" + "." + self.basehash[tid]
367
368        with bb.utils.umask(0o002):
369            bb.utils.mkdirhier(os.path.dirname(sigfile))
370
371        data = {}
372        data['task'] = task
373        data['basewhitelist'] = self.basewhitelist
374        data['taskwhitelist'] = self.taskwhitelist
375        data['taskdeps'] = self.taskdeps[fn][task]
376        data['basehash'] = self.basehash[tid]
377        data['gendeps'] = {}
378        data['varvals'] = {}
379        data['varvals'][task] = self.lookupcache[fn][task]
380        for dep in self.taskdeps[fn][task]:
381            if dep in self.basewhitelist:
382                continue
383            data['gendeps'][dep] = self.gendeps[fn][dep]
384            data['varvals'][dep] = self.lookupcache[fn][dep]
385
386        if runtime and tid in self.taskhash:
387            data['runtaskdeps'] = self.runtaskdeps[tid]
388            data['file_checksum_values'] = [(os.path.basename(f), cs) for f,cs in self.file_checksum_values[tid]]
389            data['runtaskhashes'] = {}
390            for dep in data['runtaskdeps']:
391                data['runtaskhashes'][dep] = self.get_unihash(dep)
392            data['taskhash'] = self.taskhash[tid]
393            data['unihash'] = self.get_unihash(tid)
394
395        taint = self.read_taint(fn, task, referencestamp)
396        if taint:
397            data['taint'] = taint
398
399        if runtime and tid in self.taints:
400            if 'nostamp:' in self.taints[tid]:
401                data['taint'] = self.taints[tid]
402
403        computed_basehash = calc_basehash(data)
404        if computed_basehash != self.basehash[tid]:
405            bb.error("Basehash mismatch %s versus %s for %s" % (computed_basehash, self.basehash[tid], tid))
406        if runtime and tid in self.taskhash:
407            computed_taskhash = calc_taskhash(data)
408            if computed_taskhash != self.taskhash[tid]:
409                bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid))
410                sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash)
411
412        fd, tmpfile = tempfile.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.")
413        try:
414            with bb.compress.zstd.open(fd, "wt", encoding="utf-8", num_threads=1) as f:
415                json.dump(data, f, sort_keys=True, separators=(",", ":"), cls=SetEncoder)
416                f.flush()
417            os.chmod(tmpfile, 0o664)
418            bb.utils.rename(tmpfile, sigfile)
419        except (OSError, IOError) as err:
420            try:
421                os.unlink(tmpfile)
422            except OSError:
423                pass
424            raise err
425
426    def dump_sigfn(self, fn, dataCaches, options):
427        if fn in self.taskdeps:
428            for task in self.taskdeps[fn]:
429                tid = fn + ":" + task
430                mc = bb.runqueue.mc_from_tid(tid)
431                if tid not in self.taskhash:
432                    continue
433                if dataCaches[mc].basetaskhash[tid] != self.basehash[tid]:
434                    bb.error("Bitbake's cached basehash does not match the one we just generated (%s)!" % tid)
435                    bb.error("The mismatched hashes were %s and %s" % (dataCaches[mc].basetaskhash[tid], self.basehash[tid]))
436                self.dump_sigtask(fn, task, dataCaches[mc].stamp[fn], True)
437
438class SignatureGeneratorBasicHash(SignatureGeneratorBasic):
439    name = "basichash"
440
441    def get_stampfile_hash(self, tid):
442        if tid in self.taskhash:
443            return self.taskhash[tid]
444
445        # If task is not in basehash, then error
446        return self.basehash[tid]
447
448    def stampfile(self, stampbase, fn, taskname, extrainfo, clean=False):
449        if taskname != "do_setscene" and taskname.endswith("_setscene"):
450            tid = fn + ":" + taskname[:-9]
451        else:
452            tid = fn + ":" + taskname
453        if clean:
454            h = "*"
455        else:
456            h = self.get_stampfile_hash(tid)
457
458        return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.')
459
460    def stampcleanmask(self, stampbase, fn, taskname, extrainfo):
461        return self.stampfile(stampbase, fn, taskname, extrainfo, clean=True)
462
463    def invalidate_task(self, task, d, fn):
464        bb.note("Tainting hash to force rebuild of task %s, %s" % (fn, task))
465        bb.build.write_taint(task, d, fn)
466
467class SignatureGeneratorUniHashMixIn(object):
468    def __init__(self, data):
469        self.extramethod = {}
470        super().__init__(data)
471
472    def get_taskdata(self):
473        return (self.server, self.method, self.extramethod) + super().get_taskdata()
474
475    def set_taskdata(self, data):
476        self.server, self.method, self.extramethod = data[:3]
477        super().set_taskdata(data[3:])
478
479    def client(self):
480        if getattr(self, '_client', None) is None:
481            self._client = hashserv.create_client(self.server)
482        return self._client
483
484    def get_stampfile_hash(self, tid):
485        if tid in self.taskhash:
486            # If a unique hash is reported, use it as the stampfile hash. This
487            # ensures that if a task won't be re-run if the taskhash changes,
488            # but it would result in the same output hash
489            unihash = self._get_unihash(tid)
490            if unihash is not None:
491                return unihash
492
493        return super().get_stampfile_hash(tid)
494
495    def set_unihash(self, tid, unihash):
496        (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
497        key = mc + ":" + self.tidtopn[tid] + ":" + taskname
498        self.unitaskhashes[key] = (self.taskhash[tid], unihash)
499        self.unihash[tid] = unihash
500
501    def _get_unihash(self, tid, checkkey=None):
502        if tid not in self.tidtopn:
503            return None
504        (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
505        key = mc + ":" + self.tidtopn[tid] + ":" + taskname
506        if key not in self.unitaskhashes:
507            return None
508        if not checkkey:
509            checkkey = self.taskhash[tid]
510        (key, unihash) = self.unitaskhashes[key]
511        if key != checkkey:
512            return None
513        return unihash
514
515    def get_unihash(self, tid):
516        taskhash = self.taskhash[tid]
517
518        # If its not a setscene task we can return
519        if self.setscenetasks and tid not in self.setscenetasks:
520            self.unihash[tid] = None
521            return taskhash
522
523        # TODO: This cache can grow unbounded. It probably only needs to keep
524        # for each task
525        unihash =  self._get_unihash(tid)
526        if unihash is not None:
527            self.unihash[tid] = unihash
528            return unihash
529
530        # In the absence of being able to discover a unique hash from the
531        # server, make it be equivalent to the taskhash. The unique "hash" only
532        # really needs to be a unique string (not even necessarily a hash), but
533        # making it match the taskhash has a few advantages:
534        #
535        # 1) All of the sstate code that assumes hashes can be the same
536        # 2) It provides maximal compatibility with builders that don't use
537        #    an equivalency server
538        # 3) The value is easy for multiple independent builders to derive the
539        #    same unique hash from the same input. This means that if the
540        #    independent builders find the same taskhash, but it isn't reported
541        #    to the server, there is a better chance that they will agree on
542        #    the unique hash.
543        unihash = taskhash
544
545        try:
546            method = self.method
547            if tid in self.extramethod:
548                method = method + self.extramethod[tid]
549            data = self.client().get_unihash(method, self.taskhash[tid])
550            if data:
551                unihash = data
552                # A unique hash equal to the taskhash is not very interesting,
553                # so it is reported it at debug level 2. If they differ, that
554                # is much more interesting, so it is reported at debug level 1
555                hashequiv_logger.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server))
556            else:
557                hashequiv_logger.debug2('No reported unihash for %s:%s from %s' % (tid, taskhash, self.server))
558        except ConnectionError as e:
559            bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
560
561        self.set_unihash(tid, unihash)
562        self.unihash[tid] = unihash
563        return unihash
564
565    def report_unihash(self, path, task, d):
566        import importlib
567
568        taskhash = d.getVar('BB_TASKHASH')
569        unihash = d.getVar('BB_UNIHASH')
570        report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1'
571        tempdir = d.getVar('T')
572        fn = d.getVar('BB_FILENAME')
573        tid = fn + ':do_' + task
574        key = tid + ':' + taskhash
575
576        if self.setscenetasks and tid not in self.setscenetasks:
577            return
578
579        # This can happen if locked sigs are in action. Detect and just abort
580        if taskhash != self.taskhash[tid]:
581            return
582
583        # Sanity checks
584        cache_unihash = self._get_unihash(tid, checkkey=taskhash)
585        if cache_unihash is None:
586            bb.fatal('%s not in unihash cache. Please report this error' % key)
587
588        if cache_unihash != unihash:
589            bb.fatal("Cache unihash %s doesn't match BB_UNIHASH %s" % (cache_unihash, unihash))
590
591        sigfile = None
592        sigfile_name = "depsig.do_%s.%d" % (task, os.getpid())
593        sigfile_link = "depsig.do_%s" % task
594
595        try:
596            sigfile = open(os.path.join(tempdir, sigfile_name), 'w+b')
597
598            locs = {'path': path, 'sigfile': sigfile, 'task': task, 'd': d}
599
600            if "." in self.method:
601                (module, method) = self.method.rsplit('.', 1)
602                locs['method'] = getattr(importlib.import_module(module), method)
603                outhash = bb.utils.better_eval('method(path, sigfile, task, d)', locs)
604            else:
605                outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs)
606
607            try:
608                extra_data = {}
609
610                owner = d.getVar('SSTATE_HASHEQUIV_OWNER')
611                if owner:
612                    extra_data['owner'] = owner
613
614                if report_taskdata:
615                    sigfile.seek(0)
616
617                    extra_data['PN'] = d.getVar('PN')
618                    extra_data['PV'] = d.getVar('PV')
619                    extra_data['PR'] = d.getVar('PR')
620                    extra_data['task'] = task
621                    extra_data['outhash_siginfo'] = sigfile.read().decode('utf-8')
622
623                method = self.method
624                if tid in self.extramethod:
625                    method = method + self.extramethod[tid]
626
627                data = self.client().report_unihash(taskhash, method, outhash, unihash, extra_data)
628                new_unihash = data['unihash']
629
630                if new_unihash != unihash:
631                    hashequiv_logger.debug('Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server))
632                    bb.event.fire(bb.runqueue.taskUniHashUpdate(fn + ':do_' + task, new_unihash), d)
633                    self.set_unihash(tid, new_unihash)
634                    d.setVar('BB_UNIHASH', new_unihash)
635                else:
636                    hashequiv_logger.debug('Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server))
637            except ConnectionError as e:
638                bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
639        finally:
640            if sigfile:
641                sigfile.close()
642
643                sigfile_link_path = os.path.join(tempdir, sigfile_link)
644                bb.utils.remove(sigfile_link_path)
645
646                try:
647                    os.symlink(sigfile_name, sigfile_link_path)
648                except OSError:
649                    pass
650
651    def report_unihash_equiv(self, tid, taskhash, wanted_unihash, current_unihash, datacaches):
652        try:
653            extra_data = {}
654            method = self.method
655            if tid in self.extramethod:
656                method = method + self.extramethod[tid]
657
658            data = self.client().report_unihash_equiv(taskhash, method, wanted_unihash, extra_data)
659            hashequiv_logger.verbose('Reported task %s as unihash %s to %s (%s)' % (tid, wanted_unihash, self.server, str(data)))
660
661            if data is None:
662                bb.warn("Server unable to handle unihash report")
663                return False
664
665            finalunihash = data['unihash']
666
667            if finalunihash == current_unihash:
668                hashequiv_logger.verbose('Task %s unihash %s unchanged by server' % (tid, finalunihash))
669            elif finalunihash == wanted_unihash:
670                hashequiv_logger.verbose('Task %s unihash changed %s -> %s as wanted' % (tid, current_unihash, finalunihash))
671                self.set_unihash(tid, finalunihash)
672                return True
673            else:
674                # TODO: What to do here?
675                hashequiv_logger.verbose('Task %s unihash reported as unwanted hash %s' % (tid, finalunihash))
676
677        except ConnectionError as e:
678            bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
679
680        return False
681
682#
683# Dummy class used for bitbake-selftest
684#
685class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureGeneratorBasicHash):
686    name = "TestEquivHash"
687    def init_rundepcheck(self, data):
688        super().init_rundepcheck(data)
689        self.server = data.getVar('BB_HASHSERVE')
690        self.method = "sstate_output_hash"
691
692#
693# Dummy class used for bitbake-selftest
694#
695class SignatureGeneratorTestMulticonfigDepends(SignatureGeneratorBasicHash):
696    name = "TestMulticonfigDepends"
697    supports_multiconfig_datacaches = True
698
699def dump_this_task(outfile, d):
700    import bb.parse
701    fn = d.getVar("BB_FILENAME")
702    task = "do_" + d.getVar("BB_CURRENTTASK")
703    referencestamp = bb.build.stamp_internal(task, d, None, True)
704    bb.parse.siggen.dump_sigtask(fn, task, outfile, "customfile:" + referencestamp)
705
706def init_colors(enable_color):
707    """Initialise colour dict for passing to compare_sigfiles()"""
708    # First set up the colours
709    colors = {'color_title':   '\033[1m',
710              'color_default': '\033[0m',
711              'color_add':     '\033[0;32m',
712              'color_remove':  '\033[0;31m',
713             }
714    # Leave all keys present but clear the values
715    if not enable_color:
716        for k in colors.keys():
717            colors[k] = ''
718    return colors
719
720def worddiff_str(oldstr, newstr, colors=None):
721    if not colors:
722        colors = init_colors(False)
723    diff = simplediff.diff(oldstr.split(' '), newstr.split(' '))
724    ret = []
725    for change, value in diff:
726        value = ' '.join(value)
727        if change == '=':
728            ret.append(value)
729        elif change == '+':
730            item = '{color_add}{{+{value}+}}{color_default}'.format(value=value, **colors)
731            ret.append(item)
732        elif change == '-':
733            item = '{color_remove}[-{value}-]{color_default}'.format(value=value, **colors)
734            ret.append(item)
735    whitespace_note = ''
736    if oldstr != newstr and ' '.join(oldstr.split()) == ' '.join(newstr.split()):
737        whitespace_note = ' (whitespace changed)'
738    return '"%s"%s' % (' '.join(ret), whitespace_note)
739
740def list_inline_diff(oldlist, newlist, colors=None):
741    if not colors:
742        colors = init_colors(False)
743    diff = simplediff.diff(oldlist, newlist)
744    ret = []
745    for change, value in diff:
746        value = ' '.join(value)
747        if change == '=':
748            ret.append("'%s'" % value)
749        elif change == '+':
750            item = '{color_add}+{value}{color_default}'.format(value=value, **colors)
751            ret.append(item)
752        elif change == '-':
753            item = '{color_remove}-{value}{color_default}'.format(value=value, **colors)
754            ret.append(item)
755    return '[%s]' % (', '.join(ret))
756
757def clean_basepath(basepath):
758    basepath, dir, recipe_task = basepath.rsplit("/", 2)
759    cleaned = dir + '/' + recipe_task
760
761    if basepath[0] == '/':
762        return cleaned
763
764    if basepath.startswith("mc:") and basepath.count(':') >= 2:
765        mc, mc_name, basepath = basepath.split(":", 2)
766        mc_suffix = ':mc:' + mc_name
767    else:
768        mc_suffix = ''
769
770    # mc stuff now removed from basepath. Whatever was next, if present will be the first
771    # suffix. ':/', recipe path start, marks the end of this. Something like
772    # 'virtual:a[:b[:c]]:/path...' (b and c being optional)
773    if basepath[0] != '/':
774        cleaned += ':' + basepath.split(':/', 1)[0]
775
776    return cleaned + mc_suffix
777
778def clean_basepaths(a):
779    b = {}
780    for x in a:
781        b[clean_basepath(x)] = a[x]
782    return b
783
784def clean_basepaths_list(a):
785    b = []
786    for x in a:
787        b.append(clean_basepath(x))
788    return b
789
790def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
791    output = []
792
793    colors = init_colors(color)
794    def color_format(formatstr, **values):
795        """
796        Return colour formatted string.
797        NOTE: call with the format string, not an already formatted string
798        containing values (otherwise you could have trouble with { and }
799        characters)
800        """
801        if not formatstr.endswith('{color_default}'):
802            formatstr += '{color_default}'
803        # In newer python 3 versions you can pass both of these directly,
804        # but we only require 3.4 at the moment
805        formatparams = {}
806        formatparams.update(colors)
807        formatparams.update(values)
808        return formatstr.format(**formatparams)
809
810    with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f:
811        a_data = json.load(f, object_hook=SetDecoder)
812    with bb.compress.zstd.open(b, "rt", encoding="utf-8", num_threads=1) as f:
813        b_data = json.load(f, object_hook=SetDecoder)
814
815    def dict_diff(a, b, whitelist=set()):
816        sa = set(a.keys())
817        sb = set(b.keys())
818        common = sa & sb
819        changed = set()
820        for i in common:
821            if a[i] != b[i] and i not in whitelist:
822                changed.add(i)
823        added = sb - sa
824        removed = sa - sb
825        return changed, added, removed
826
827    def file_checksums_diff(a, b):
828        from collections import Counter
829
830        # Convert lists back to tuples
831        a = [(f[0], f[1]) for f in a]
832        b = [(f[0], f[1]) for f in b]
833
834        # Compare lists, ensuring we can handle duplicate filenames if they exist
835        removedcount = Counter(a)
836        removedcount.subtract(b)
837        addedcount = Counter(b)
838        addedcount.subtract(a)
839        added = []
840        for x in b:
841            if addedcount[x] > 0:
842                addedcount[x] -= 1
843                added.append(x)
844        removed = []
845        changed = []
846        for x in a:
847            if removedcount[x] > 0:
848                removedcount[x] -= 1
849                for y in added:
850                    if y[0] == x[0]:
851                        changed.append((x[0], x[1], y[1]))
852                        added.remove(y)
853                        break
854                else:
855                    removed.append(x)
856        added = [x[0] for x in added]
857        removed = [x[0] for x in removed]
858        return changed, added, removed
859
860    if 'basewhitelist' in a_data and a_data['basewhitelist'] != b_data['basewhitelist']:
861        output.append(color_format("{color_title}basewhitelist changed{color_default} from '%s' to '%s'") % (a_data['basewhitelist'], b_data['basewhitelist']))
862        if a_data['basewhitelist'] and b_data['basewhitelist']:
863            output.append("changed items: %s" % a_data['basewhitelist'].symmetric_difference(b_data['basewhitelist']))
864
865    if 'taskwhitelist' in a_data and a_data['taskwhitelist'] != b_data['taskwhitelist']:
866        output.append(color_format("{color_title}taskwhitelist changed{color_default} from '%s' to '%s'") % (a_data['taskwhitelist'], b_data['taskwhitelist']))
867        if a_data['taskwhitelist'] and b_data['taskwhitelist']:
868            output.append("changed items: %s" % a_data['taskwhitelist'].symmetric_difference(b_data['taskwhitelist']))
869
870    if a_data['taskdeps'] != b_data['taskdeps']:
871        output.append(color_format("{color_title}Task dependencies changed{color_default} from:\n%s\nto:\n%s") % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps'])))
872
873    if a_data['basehash'] != b_data['basehash'] and not collapsed:
874        output.append(color_format("{color_title}basehash changed{color_default} from %s to %s") % (a_data['basehash'], b_data['basehash']))
875
876    changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basewhitelist'] & b_data['basewhitelist'])
877    if changed:
878        for dep in sorted(changed):
879            output.append(color_format("{color_title}List of dependencies for variable %s changed from '{color_default}%s{color_title}' to '{color_default}%s{color_title}'") % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep]))
880            if a_data['gendeps'][dep] and b_data['gendeps'][dep]:
881                output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep]))
882    if added:
883        for dep in sorted(added):
884            output.append(color_format("{color_title}Dependency on variable %s was added") % (dep))
885    if removed:
886        for dep in sorted(removed):
887            output.append(color_format("{color_title}Dependency on Variable %s was removed") % (dep))
888
889
890    changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals'])
891    if changed:
892        for dep in sorted(changed):
893            oldval = a_data['varvals'][dep]
894            newval = b_data['varvals'][dep]
895            if newval and oldval and ('\n' in oldval or '\n' in newval):
896                diff = difflib.unified_diff(oldval.splitlines(), newval.splitlines(), lineterm='')
897                # Cut off the first two lines, since we aren't interested in
898                # the old/new filename (they are blank anyway in this case)
899                difflines = list(diff)[2:]
900                if color:
901                    # Add colour to diff output
902                    for i, line in enumerate(difflines):
903                        if line.startswith('+'):
904                            line = color_format('{color_add}{line}', line=line)
905                            difflines[i] = line
906                        elif line.startswith('-'):
907                            line = color_format('{color_remove}{line}', line=line)
908                            difflines[i] = line
909                output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff='\n'.join(difflines)))
910            elif newval and oldval and (' ' in oldval or ' ' in newval):
911                output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff=worddiff_str(oldval, newval, colors)))
912            else:
913                output.append(color_format("{color_title}Variable {var} value changed from '{color_default}{oldval}{color_title}' to '{color_default}{newval}{color_title}'{color_default}", var=dep, oldval=oldval, newval=newval))
914
915    if not 'file_checksum_values' in a_data:
916         a_data['file_checksum_values'] = []
917    if not 'file_checksum_values' in b_data:
918         b_data['file_checksum_values'] = []
919
920    changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values'])
921    if changed:
922        for f, old, new in changed:
923            output.append(color_format("{color_title}Checksum for file %s changed{color_default} from %s to %s") % (f, old, new))
924    if added:
925        for f in added:
926            output.append(color_format("{color_title}Dependency on checksum of file %s was added") % (f))
927    if removed:
928        for f in removed:
929            output.append(color_format("{color_title}Dependency on checksum of file %s was removed") % (f))
930
931    if not 'runtaskdeps' in a_data:
932         a_data['runtaskdeps'] = {}
933    if not 'runtaskdeps' in b_data:
934         b_data['runtaskdeps'] = {}
935
936    if not collapsed:
937        if len(a_data['runtaskdeps']) != len(b_data['runtaskdeps']):
938            changed = ["Number of task dependencies changed"]
939        else:
940            changed = []
941            for idx, task in enumerate(a_data['runtaskdeps']):
942                a = a_data['runtaskdeps'][idx]
943                b = b_data['runtaskdeps'][idx]
944                if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b] and not collapsed:
945                    changed.append("%s with hash %s\n changed to\n%s with hash %s" % (clean_basepath(a), a_data['runtaskhashes'][a], clean_basepath(b), b_data['runtaskhashes'][b]))
946
947        if changed:
948            clean_a = clean_basepaths_list(a_data['runtaskdeps'])
949            clean_b = clean_basepaths_list(b_data['runtaskdeps'])
950            if clean_a != clean_b:
951                output.append(color_format("{color_title}runtaskdeps changed:{color_default}\n%s") % list_inline_diff(clean_a, clean_b, colors))
952            else:
953                output.append(color_format("{color_title}runtaskdeps changed:"))
954            output.append("\n".join(changed))
955
956
957    if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data:
958        a = a_data['runtaskhashes']
959        b = b_data['runtaskhashes']
960        changed, added, removed = dict_diff(a, b)
961        if added:
962            for dep in sorted(added):
963                bdep_found = False
964                if removed:
965                    for bdep in removed:
966                        if b[dep] == a[bdep]:
967                            #output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep))
968                            bdep_found = True
969                if not bdep_found:
970                    output.append(color_format("{color_title}Dependency on task %s was added{color_default} with hash %s") % (clean_basepath(dep), b[dep]))
971        if removed:
972            for dep in sorted(removed):
973                adep_found = False
974                if added:
975                    for adep in added:
976                        if b[adep] == a[dep]:
977                            #output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep))
978                            adep_found = True
979                if not adep_found:
980                    output.append(color_format("{color_title}Dependency on task %s was removed{color_default} with hash %s") % (clean_basepath(dep), a[dep]))
981        if changed:
982            for dep in sorted(changed):
983                if not collapsed:
984                    output.append(color_format("{color_title}Hash for dependent task %s changed{color_default} from %s to %s") % (clean_basepath(dep), a[dep], b[dep]))
985                if callable(recursecb):
986                    recout = recursecb(dep, a[dep], b[dep])
987                    if recout:
988                        if collapsed:
989                            output.extend(recout)
990                        else:
991                            # If a dependent hash changed, might as well print the line above and then defer to the changes in
992                            # that hash since in all likelyhood, they're the same changes this task also saw.
993                            output = [output[-1]] + recout
994
995    a_taint = a_data.get('taint', None)
996    b_taint = b_data.get('taint', None)
997    if a_taint != b_taint:
998        if a_taint and a_taint.startswith('nostamp:'):
999            a_taint = a_taint.replace('nostamp:', 'nostamp(uuid4):')
1000        if b_taint and b_taint.startswith('nostamp:'):
1001            b_taint = b_taint.replace('nostamp:', 'nostamp(uuid4):')
1002        output.append(color_format("{color_title}Taint (by forced/invalidated task) changed{color_default} from %s to %s") % (a_taint, b_taint))
1003
1004    return output
1005
1006
1007def calc_basehash(sigdata):
1008    task = sigdata['task']
1009    basedata = sigdata['varvals'][task]
1010
1011    if basedata is None:
1012        basedata = ''
1013
1014    alldeps = sigdata['taskdeps']
1015    for dep in alldeps:
1016        basedata = basedata + dep
1017        val = sigdata['varvals'][dep]
1018        if val is not None:
1019            basedata = basedata + str(val)
1020
1021    return hashlib.sha256(basedata.encode("utf-8")).hexdigest()
1022
1023def calc_taskhash(sigdata):
1024    data = sigdata['basehash']
1025
1026    for dep in sigdata['runtaskdeps']:
1027        data = data + sigdata['runtaskhashes'][dep]
1028
1029    for c in sigdata['file_checksum_values']:
1030        if c[1]:
1031            data = data + c[1]
1032
1033    if 'taint' in sigdata:
1034        if 'nostamp:' in sigdata['taint']:
1035            data = data + sigdata['taint'][8:]
1036        else:
1037            data = data + sigdata['taint']
1038
1039    return hashlib.sha256(data.encode("utf-8")).hexdigest()
1040
1041
1042def dump_sigfile(a):
1043    output = []
1044
1045    with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f:
1046        a_data = json.load(f, object_hook=SetDecoder)
1047
1048    output.append("basewhitelist: %s" % (a_data['basewhitelist']))
1049
1050    output.append("taskwhitelist: %s" % (a_data['taskwhitelist']))
1051
1052    output.append("Task dependencies: %s" % (sorted(a_data['taskdeps'])))
1053
1054    output.append("basehash: %s" % (a_data['basehash']))
1055
1056    for dep in a_data['gendeps']:
1057        output.append("List of dependencies for variable %s is %s" % (dep, a_data['gendeps'][dep]))
1058
1059    for dep in a_data['varvals']:
1060        output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep]))
1061
1062    if 'runtaskdeps' in a_data:
1063        output.append("Tasks this task depends on: %s" % (a_data['runtaskdeps']))
1064
1065    if 'file_checksum_values' in a_data:
1066        output.append("This task depends on the checksums of files: %s" % (a_data['file_checksum_values']))
1067
1068    if 'runtaskhashes' in a_data:
1069        for dep in a_data['runtaskhashes']:
1070            output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep]))
1071
1072    if 'taint' in a_data:
1073        if a_data['taint'].startswith('nostamp:'):
1074            msg = a_data['taint'].replace('nostamp:', 'nostamp(uuid4):')
1075        else:
1076            msg = a_data['taint']
1077        output.append("Tainted (by forced/invalidated task): %s" % msg)
1078
1079    if 'task' in a_data:
1080        computed_basehash = calc_basehash(a_data)
1081        output.append("Computed base hash is %s and from file %s" % (computed_basehash, a_data['basehash']))
1082    else:
1083        output.append("Unable to compute base hash")
1084
1085    computed_taskhash = calc_taskhash(a_data)
1086    output.append("Computed task hash is %s" % computed_taskhash)
1087
1088    return output
1089