xref: /openbmc/openbmc/poky/bitbake/lib/bb/siggen.py (revision 595f6308)
1#
2# SPDX-License-Identifier: GPL-2.0-only
3#
4
5import hashlib
6import logging
7import os
8import re
9import tempfile
10import pickle
11import bb.data
12import difflib
13import simplediff
14import json
15import bb.compress.zstd
16from bb.checksum import FileChecksumCache
17from bb import runqueue
18import hashserv
19import hashserv.client
20
21logger = logging.getLogger('BitBake.SigGen')
22hashequiv_logger = logging.getLogger('BitBake.SigGen.HashEquiv')
23
24class SetEncoder(json.JSONEncoder):
25    def default(self, obj):
26        if isinstance(obj, set):
27            return dict(_set_object=list(sorted(obj)))
28        return json.JSONEncoder.default(self, obj)
29
30def SetDecoder(dct):
31    if '_set_object' in dct:
32        return set(dct['_set_object'])
33    return dct
34
35def init(d):
36    siggens = [obj for obj in globals().values()
37                      if type(obj) is type and issubclass(obj, SignatureGenerator)]
38
39    desired = d.getVar("BB_SIGNATURE_HANDLER") or "noop"
40    for sg in siggens:
41        if desired == sg.name:
42            return sg(d)
43            break
44    else:
45        logger.error("Invalid signature generator '%s', using default 'noop'\n"
46                     "Available generators: %s", desired,
47                     ', '.join(obj.name for obj in siggens))
48        return SignatureGenerator(d)
49
50class SignatureGenerator(object):
51    """
52    """
53    name = "noop"
54
55    # If the derived class supports multiconfig datacaches, set this to True
56    # The default is False for backward compatibility with derived signature
57    # generators that do not understand multiconfig caches
58    supports_multiconfig_datacaches = False
59
60    def __init__(self, data):
61        self.basehash = {}
62        self.taskhash = {}
63        self.unihash = {}
64        self.runtaskdeps = {}
65        self.file_checksum_values = {}
66        self.taints = {}
67        self.unitaskhashes = {}
68        self.tidtopn = {}
69        self.setscenetasks = set()
70
71    def finalise(self, fn, d, varient):
72        return
73
74    def postparsing_clean_cache(self):
75        return
76
77    def get_unihash(self, tid):
78        return self.taskhash[tid]
79
80    def prep_taskhash(self, tid, deps, dataCaches):
81        return
82
83    def get_taskhash(self, tid, deps, dataCaches):
84        self.taskhash[tid] = hashlib.sha256(tid.encode("utf-8")).hexdigest()
85        return self.taskhash[tid]
86
87    def writeout_file_checksum_cache(self):
88        """Write/update the file checksum cache onto disk"""
89        return
90
91    def stampfile(self, stampbase, file_name, taskname, extrainfo):
92        return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
93
94    def stampcleanmask(self, stampbase, file_name, taskname, extrainfo):
95        return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
96
97    def dump_sigtask(self, fn, task, stampbase, runtime):
98        return
99
100    def invalidate_task(self, task, d, fn):
101        bb.build.del_stamp(task, d, fn)
102
103    def dump_sigs(self, dataCache, options):
104        return
105
106    def get_taskdata(self):
107        return (self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks)
108
109    def set_taskdata(self, data):
110        self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks = data
111
112    def reset(self, data):
113        self.__init__(data)
114
115    def get_taskhashes(self):
116        return self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn
117
118    def set_taskhashes(self, hashes):
119        self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn = hashes
120
121    def save_unitaskhashes(self):
122        return
123
124    def set_setscene_tasks(self, setscene_tasks):
125        return
126
127    @classmethod
128    def get_data_caches(cls, dataCaches, mc):
129        """
130        This function returns the datacaches that should be passed to signature
131        generator functions. If the signature generator supports multiconfig
132        caches, the entire dictionary of data caches is sent, otherwise a
133        special proxy is sent that support both index access to all
134        multiconfigs, and also direct access for the default multiconfig.
135
136        The proxy class allows code in this class itself to always use
137        multiconfig aware code (to ease maintenance), but derived classes that
138        are unaware of multiconfig data caches can still access the default
139        multiconfig as expected.
140
141        Do not override this function in derived classes; it will be removed in
142        the future when support for multiconfig data caches is mandatory
143        """
144        class DataCacheProxy(object):
145            def __init__(self):
146                pass
147
148            def __getitem__(self, key):
149                return dataCaches[key]
150
151            def __getattr__(self, name):
152                return getattr(dataCaches[mc], name)
153
154        if cls.supports_multiconfig_datacaches:
155            return dataCaches
156
157        return DataCacheProxy()
158
159class SignatureGeneratorBasic(SignatureGenerator):
160    """
161    """
162    name = "basic"
163
164    def __init__(self, data):
165        self.basehash = {}
166        self.taskhash = {}
167        self.unihash = {}
168        self.taskdeps = {}
169        self.runtaskdeps = {}
170        self.file_checksum_values = {}
171        self.taints = {}
172        self.gendeps = {}
173        self.lookupcache = {}
174        self.setscenetasks = set()
175        self.basewhitelist = set((data.getVar("BB_HASHBASE_WHITELIST") or "").split())
176        self.taskwhitelist = None
177        self.init_rundepcheck(data)
178        checksum_cache_file = data.getVar("BB_HASH_CHECKSUM_CACHE_FILE")
179        if checksum_cache_file:
180            self.checksum_cache = FileChecksumCache()
181            self.checksum_cache.init_cache(data, checksum_cache_file)
182        else:
183            self.checksum_cache = None
184
185        self.unihash_cache = bb.cache.SimpleCache("3")
186        self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {})
187        self.localdirsexclude = (data.getVar("BB_SIGNATURE_LOCAL_DIRS_EXCLUDE") or "CVS .bzr .git .hg .osc .p4 .repo .svn").split()
188        self.tidtopn = {}
189
190    def init_rundepcheck(self, data):
191        self.taskwhitelist = data.getVar("BB_HASHTASK_WHITELIST") or None
192        if self.taskwhitelist:
193            self.twl = re.compile(self.taskwhitelist)
194        else:
195            self.twl = None
196
197    def _build_data(self, fn, d):
198
199        ignore_mismatch = ((d.getVar("BB_HASH_IGNORE_MISMATCH") or '') == '1')
200        tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d, self.basewhitelist)
201
202        taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, self.basewhitelist, fn)
203
204        for task in tasklist:
205            tid = fn + ":" + task
206            if not ignore_mismatch and tid in self.basehash and self.basehash[tid] != basehash[tid]:
207                bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (tid, self.basehash[tid], basehash[tid]))
208                bb.error("The following commands may help:")
209                cmd = "$ bitbake %s -c%s" % (d.getVar('PN'), task)
210                # Make sure sigdata is dumped before run printdiff
211                bb.error("%s -Snone" % cmd)
212                bb.error("Then:")
213                bb.error("%s -Sprintdiff\n" % cmd)
214            self.basehash[tid] = basehash[tid]
215
216        self.taskdeps[fn] = taskdeps
217        self.gendeps[fn] = gendeps
218        self.lookupcache[fn] = lookupcache
219
220        return taskdeps
221
222    def set_setscene_tasks(self, setscene_tasks):
223        self.setscenetasks = set(setscene_tasks)
224
225    def finalise(self, fn, d, variant):
226
227        mc = d.getVar("__BBMULTICONFIG", False) or ""
228        if variant or mc:
229            fn = bb.cache.realfn2virtual(fn, variant, mc)
230
231        try:
232            taskdeps = self._build_data(fn, d)
233        except bb.parse.SkipRecipe:
234            raise
235        except:
236            bb.warn("Error during finalise of %s" % fn)
237            raise
238
239        #Slow but can be useful for debugging mismatched basehashes
240        #for task in self.taskdeps[fn]:
241        #    self.dump_sigtask(fn, task, d.getVar("STAMP"), False)
242
243        for task in taskdeps:
244            d.setVar("BB_BASEHASH:task-%s" % task, self.basehash[fn + ":" + task])
245
246    def postparsing_clean_cache(self):
247        #
248        # After parsing we can remove some things from memory to reduce our memory footprint
249        #
250        self.gendeps = {}
251        self.lookupcache = {}
252        self.taskdeps = {}
253
254    def rundep_check(self, fn, recipename, task, dep, depname, dataCaches):
255        # Return True if we should keep the dependency, False to drop it
256        # We only manipulate the dependencies for packages not in the whitelist
257        if self.twl and not self.twl.search(recipename):
258            # then process the actual dependencies
259            if self.twl.search(depname):
260                return False
261        return True
262
263    def read_taint(self, fn, task, stampbase):
264        taint = None
265        try:
266            with open(stampbase + '.' + task + '.taint', 'r') as taintf:
267                taint = taintf.read()
268        except IOError:
269            pass
270        return taint
271
272    def prep_taskhash(self, tid, deps, dataCaches):
273
274        (mc, _, task, fn) = bb.runqueue.split_tid_mcfn(tid)
275
276        self.basehash[tid] = dataCaches[mc].basetaskhash[tid]
277        self.runtaskdeps[tid] = []
278        self.file_checksum_values[tid] = []
279        recipename = dataCaches[mc].pkg_fn[fn]
280
281        self.tidtopn[tid] = recipename
282
283        for dep in sorted(deps, key=clean_basepath):
284            (depmc, _, _, depmcfn) = bb.runqueue.split_tid_mcfn(dep)
285            depname = dataCaches[depmc].pkg_fn[depmcfn]
286            if not self.supports_multiconfig_datacaches and mc != depmc:
287                # If the signature generator doesn't understand multiconfig
288                # data caches, any dependency not in the same multiconfig must
289                # be skipped for backward compatibility
290                continue
291            if not self.rundep_check(fn, recipename, task, dep, depname, dataCaches):
292                continue
293            if dep not in self.taskhash:
294                bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep)
295            self.runtaskdeps[tid].append(dep)
296
297        if task in dataCaches[mc].file_checksums[fn]:
298            if self.checksum_cache:
299                checksums = self.checksum_cache.get_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude)
300            else:
301                checksums = bb.fetch2.get_file_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude)
302            for (f,cs) in checksums:
303                self.file_checksum_values[tid].append((f,cs))
304
305        taskdep = dataCaches[mc].task_deps[fn]
306        if 'nostamp' in taskdep and task in taskdep['nostamp']:
307            # Nostamp tasks need an implicit taint so that they force any dependent tasks to run
308            if tid in self.taints and self.taints[tid].startswith("nostamp:"):
309                # Don't reset taint value upon every call
310                pass
311            else:
312                import uuid
313                taint = str(uuid.uuid4())
314                self.taints[tid] = "nostamp:" + taint
315
316        taint = self.read_taint(fn, task, dataCaches[mc].stamp[fn])
317        if taint:
318            self.taints[tid] = taint
319            logger.warning("%s is tainted from a forced run" % tid)
320
321        return
322
323    def get_taskhash(self, tid, deps, dataCaches):
324
325        data = self.basehash[tid]
326        for dep in self.runtaskdeps[tid]:
327            data = data + self.get_unihash(dep)
328
329        for (f, cs) in self.file_checksum_values[tid]:
330            if cs:
331                if "/./" in f:
332                    data = data + "./" + f.split("/./")[1]
333                data = data + cs
334
335        if tid in self.taints:
336            if self.taints[tid].startswith("nostamp:"):
337                data = data + self.taints[tid][8:]
338            else:
339                data = data + self.taints[tid]
340
341        h = hashlib.sha256(data.encode("utf-8")).hexdigest()
342        self.taskhash[tid] = h
343        #d.setVar("BB_TASKHASH:task-%s" % task, taskhash[task])
344        return h
345
346    def writeout_file_checksum_cache(self):
347        """Write/update the file checksum cache onto disk"""
348        if self.checksum_cache:
349            self.checksum_cache.save_extras()
350            self.checksum_cache.save_merge()
351        else:
352            bb.fetch2.fetcher_parse_save()
353            bb.fetch2.fetcher_parse_done()
354
355    def save_unitaskhashes(self):
356        self.unihash_cache.save(self.unitaskhashes)
357
358    def dump_sigtask(self, fn, task, stampbase, runtime):
359
360        tid = fn + ":" + task
361        referencestamp = stampbase
362        if isinstance(runtime, str) and runtime.startswith("customfile"):
363            sigfile = stampbase
364            referencestamp = runtime[11:]
365        elif runtime and tid in self.taskhash:
366            sigfile = stampbase + "." + task + ".sigdata" + "." + self.get_unihash(tid)
367        else:
368            sigfile = stampbase + "." + task + ".sigbasedata" + "." + self.basehash[tid]
369
370        with bb.utils.umask(0o002):
371            bb.utils.mkdirhier(os.path.dirname(sigfile))
372
373        data = {}
374        data['task'] = task
375        data['basewhitelist'] = self.basewhitelist
376        data['taskwhitelist'] = self.taskwhitelist
377        data['taskdeps'] = self.taskdeps[fn][task]
378        data['basehash'] = self.basehash[tid]
379        data['gendeps'] = {}
380        data['varvals'] = {}
381        data['varvals'][task] = self.lookupcache[fn][task]
382        for dep in self.taskdeps[fn][task]:
383            if dep in self.basewhitelist:
384                continue
385            data['gendeps'][dep] = self.gendeps[fn][dep]
386            data['varvals'][dep] = self.lookupcache[fn][dep]
387
388        if runtime and tid in self.taskhash:
389            data['runtaskdeps'] = self.runtaskdeps[tid]
390            data['file_checksum_values'] = []
391            for f,cs in self.file_checksum_values[tid]:
392                if "/./" in f:
393                    data['file_checksum_values'].append(("./" + f.split("/./")[1], cs))
394                else:
395                    data['file_checksum_values'].append((os.path.basename(f), cs))
396            data['runtaskhashes'] = {}
397            for dep in data['runtaskdeps']:
398                data['runtaskhashes'][dep] = self.get_unihash(dep)
399            data['taskhash'] = self.taskhash[tid]
400            data['unihash'] = self.get_unihash(tid)
401
402        taint = self.read_taint(fn, task, referencestamp)
403        if taint:
404            data['taint'] = taint
405
406        if runtime and tid in self.taints:
407            if 'nostamp:' in self.taints[tid]:
408                data['taint'] = self.taints[tid]
409
410        computed_basehash = calc_basehash(data)
411        if computed_basehash != self.basehash[tid]:
412            bb.error("Basehash mismatch %s versus %s for %s" % (computed_basehash, self.basehash[tid], tid))
413        if runtime and tid in self.taskhash:
414            computed_taskhash = calc_taskhash(data)
415            if computed_taskhash != self.taskhash[tid]:
416                bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid))
417                sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash)
418
419        fd, tmpfile = tempfile.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.")
420        try:
421            with bb.compress.zstd.open(fd, "wt", encoding="utf-8", num_threads=1) as f:
422                json.dump(data, f, sort_keys=True, separators=(",", ":"), cls=SetEncoder)
423                f.flush()
424            os.chmod(tmpfile, 0o664)
425            bb.utils.rename(tmpfile, sigfile)
426        except (OSError, IOError) as err:
427            try:
428                os.unlink(tmpfile)
429            except OSError:
430                pass
431            raise err
432
433    def dump_sigfn(self, fn, dataCaches, options):
434        if fn in self.taskdeps:
435            for task in self.taskdeps[fn]:
436                tid = fn + ":" + task
437                mc = bb.runqueue.mc_from_tid(tid)
438                if tid not in self.taskhash:
439                    continue
440                if dataCaches[mc].basetaskhash[tid] != self.basehash[tid]:
441                    bb.error("Bitbake's cached basehash does not match the one we just generated (%s)!" % tid)
442                    bb.error("The mismatched hashes were %s and %s" % (dataCaches[mc].basetaskhash[tid], self.basehash[tid]))
443                self.dump_sigtask(fn, task, dataCaches[mc].stamp[fn], True)
444
445class SignatureGeneratorBasicHash(SignatureGeneratorBasic):
446    name = "basichash"
447
448    def get_stampfile_hash(self, tid):
449        if tid in self.taskhash:
450            return self.taskhash[tid]
451
452        # If task is not in basehash, then error
453        return self.basehash[tid]
454
455    def stampfile(self, stampbase, fn, taskname, extrainfo, clean=False):
456        if taskname != "do_setscene" and taskname.endswith("_setscene"):
457            tid = fn + ":" + taskname[:-9]
458        else:
459            tid = fn + ":" + taskname
460        if clean:
461            h = "*"
462        else:
463            h = self.get_stampfile_hash(tid)
464
465        return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.')
466
467    def stampcleanmask(self, stampbase, fn, taskname, extrainfo):
468        return self.stampfile(stampbase, fn, taskname, extrainfo, clean=True)
469
470    def invalidate_task(self, task, d, fn):
471        bb.note("Tainting hash to force rebuild of task %s, %s" % (fn, task))
472        bb.build.write_taint(task, d, fn)
473
474class SignatureGeneratorUniHashMixIn(object):
475    def __init__(self, data):
476        self.extramethod = {}
477        super().__init__(data)
478
479    def get_taskdata(self):
480        return (self.server, self.method, self.extramethod) + super().get_taskdata()
481
482    def set_taskdata(self, data):
483        self.server, self.method, self.extramethod = data[:3]
484        super().set_taskdata(data[3:])
485
486    def client(self):
487        if getattr(self, '_client', None) is None:
488            self._client = hashserv.create_client(self.server)
489        return self._client
490
491    def get_stampfile_hash(self, tid):
492        if tid in self.taskhash:
493            # If a unique hash is reported, use it as the stampfile hash. This
494            # ensures that if a task won't be re-run if the taskhash changes,
495            # but it would result in the same output hash
496            unihash = self._get_unihash(tid)
497            if unihash is not None:
498                return unihash
499
500        return super().get_stampfile_hash(tid)
501
502    def set_unihash(self, tid, unihash):
503        (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
504        key = mc + ":" + self.tidtopn[tid] + ":" + taskname
505        self.unitaskhashes[key] = (self.taskhash[tid], unihash)
506        self.unihash[tid] = unihash
507
508    def _get_unihash(self, tid, checkkey=None):
509        if tid not in self.tidtopn:
510            return None
511        (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
512        key = mc + ":" + self.tidtopn[tid] + ":" + taskname
513        if key not in self.unitaskhashes:
514            return None
515        if not checkkey:
516            checkkey = self.taskhash[tid]
517        (key, unihash) = self.unitaskhashes[key]
518        if key != checkkey:
519            return None
520        return unihash
521
522    def get_unihash(self, tid):
523        taskhash = self.taskhash[tid]
524
525        # If its not a setscene task we can return
526        if self.setscenetasks and tid not in self.setscenetasks:
527            self.unihash[tid] = None
528            return taskhash
529
530        # TODO: This cache can grow unbounded. It probably only needs to keep
531        # for each task
532        unihash =  self._get_unihash(tid)
533        if unihash is not None:
534            self.unihash[tid] = unihash
535            return unihash
536
537        # In the absence of being able to discover a unique hash from the
538        # server, make it be equivalent to the taskhash. The unique "hash" only
539        # really needs to be a unique string (not even necessarily a hash), but
540        # making it match the taskhash has a few advantages:
541        #
542        # 1) All of the sstate code that assumes hashes can be the same
543        # 2) It provides maximal compatibility with builders that don't use
544        #    an equivalency server
545        # 3) The value is easy for multiple independent builders to derive the
546        #    same unique hash from the same input. This means that if the
547        #    independent builders find the same taskhash, but it isn't reported
548        #    to the server, there is a better chance that they will agree on
549        #    the unique hash.
550        unihash = taskhash
551
552        try:
553            method = self.method
554            if tid in self.extramethod:
555                method = method + self.extramethod[tid]
556            data = self.client().get_unihash(method, self.taskhash[tid])
557            if data:
558                unihash = data
559                # A unique hash equal to the taskhash is not very interesting,
560                # so it is reported it at debug level 2. If they differ, that
561                # is much more interesting, so it is reported at debug level 1
562                hashequiv_logger.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server))
563            else:
564                hashequiv_logger.debug2('No reported unihash for %s:%s from %s' % (tid, taskhash, self.server))
565        except ConnectionError as e:
566            bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
567
568        self.set_unihash(tid, unihash)
569        self.unihash[tid] = unihash
570        return unihash
571
572    def report_unihash(self, path, task, d):
573        import importlib
574
575        taskhash = d.getVar('BB_TASKHASH')
576        unihash = d.getVar('BB_UNIHASH')
577        report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1'
578        tempdir = d.getVar('T')
579        fn = d.getVar('BB_FILENAME')
580        tid = fn + ':do_' + task
581        key = tid + ':' + taskhash
582
583        if self.setscenetasks and tid not in self.setscenetasks:
584            return
585
586        # This can happen if locked sigs are in action. Detect and just abort
587        if taskhash != self.taskhash[tid]:
588            return
589
590        # Sanity checks
591        cache_unihash = self._get_unihash(tid, checkkey=taskhash)
592        if cache_unihash is None:
593            bb.fatal('%s not in unihash cache. Please report this error' % key)
594
595        if cache_unihash != unihash:
596            bb.fatal("Cache unihash %s doesn't match BB_UNIHASH %s" % (cache_unihash, unihash))
597
598        sigfile = None
599        sigfile_name = "depsig.do_%s.%d" % (task, os.getpid())
600        sigfile_link = "depsig.do_%s" % task
601
602        try:
603            sigfile = open(os.path.join(tempdir, sigfile_name), 'w+b')
604
605            locs = {'path': path, 'sigfile': sigfile, 'task': task, 'd': d}
606
607            if "." in self.method:
608                (module, method) = self.method.rsplit('.', 1)
609                locs['method'] = getattr(importlib.import_module(module), method)
610                outhash = bb.utils.better_eval('method(path, sigfile, task, d)', locs)
611            else:
612                outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs)
613
614            try:
615                extra_data = {}
616
617                owner = d.getVar('SSTATE_HASHEQUIV_OWNER')
618                if owner:
619                    extra_data['owner'] = owner
620
621                if report_taskdata:
622                    sigfile.seek(0)
623
624                    extra_data['PN'] = d.getVar('PN')
625                    extra_data['PV'] = d.getVar('PV')
626                    extra_data['PR'] = d.getVar('PR')
627                    extra_data['task'] = task
628                    extra_data['outhash_siginfo'] = sigfile.read().decode('utf-8')
629
630                method = self.method
631                if tid in self.extramethod:
632                    method = method + self.extramethod[tid]
633
634                data = self.client().report_unihash(taskhash, method, outhash, unihash, extra_data)
635                new_unihash = data['unihash']
636
637                if new_unihash != unihash:
638                    hashequiv_logger.debug('Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server))
639                    bb.event.fire(bb.runqueue.taskUniHashUpdate(fn + ':do_' + task, new_unihash), d)
640                    self.set_unihash(tid, new_unihash)
641                    d.setVar('BB_UNIHASH', new_unihash)
642                else:
643                    hashequiv_logger.debug('Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server))
644            except ConnectionError as e:
645                bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
646        finally:
647            if sigfile:
648                sigfile.close()
649
650                sigfile_link_path = os.path.join(tempdir, sigfile_link)
651                bb.utils.remove(sigfile_link_path)
652
653                try:
654                    os.symlink(sigfile_name, sigfile_link_path)
655                except OSError:
656                    pass
657
658    def report_unihash_equiv(self, tid, taskhash, wanted_unihash, current_unihash, datacaches):
659        try:
660            extra_data = {}
661            method = self.method
662            if tid in self.extramethod:
663                method = method + self.extramethod[tid]
664
665            data = self.client().report_unihash_equiv(taskhash, method, wanted_unihash, extra_data)
666            hashequiv_logger.verbose('Reported task %s as unihash %s to %s (%s)' % (tid, wanted_unihash, self.server, str(data)))
667
668            if data is None:
669                bb.warn("Server unable to handle unihash report")
670                return False
671
672            finalunihash = data['unihash']
673
674            if finalunihash == current_unihash:
675                hashequiv_logger.verbose('Task %s unihash %s unchanged by server' % (tid, finalunihash))
676            elif finalunihash == wanted_unihash:
677                hashequiv_logger.verbose('Task %s unihash changed %s -> %s as wanted' % (tid, current_unihash, finalunihash))
678                self.set_unihash(tid, finalunihash)
679                return True
680            else:
681                # TODO: What to do here?
682                hashequiv_logger.verbose('Task %s unihash reported as unwanted hash %s' % (tid, finalunihash))
683
684        except ConnectionError as e:
685            bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
686
687        return False
688
689#
690# Dummy class used for bitbake-selftest
691#
692class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureGeneratorBasicHash):
693    name = "TestEquivHash"
694    def init_rundepcheck(self, data):
695        super().init_rundepcheck(data)
696        self.server = data.getVar('BB_HASHSERVE')
697        self.method = "sstate_output_hash"
698
699#
700# Dummy class used for bitbake-selftest
701#
702class SignatureGeneratorTestMulticonfigDepends(SignatureGeneratorBasicHash):
703    name = "TestMulticonfigDepends"
704    supports_multiconfig_datacaches = True
705
706def dump_this_task(outfile, d):
707    import bb.parse
708    fn = d.getVar("BB_FILENAME")
709    task = "do_" + d.getVar("BB_CURRENTTASK")
710    referencestamp = bb.build.stamp_internal(task, d, None, True)
711    bb.parse.siggen.dump_sigtask(fn, task, outfile, "customfile:" + referencestamp)
712
713def init_colors(enable_color):
714    """Initialise colour dict for passing to compare_sigfiles()"""
715    # First set up the colours
716    colors = {'color_title':   '\033[1m',
717              'color_default': '\033[0m',
718              'color_add':     '\033[0;32m',
719              'color_remove':  '\033[0;31m',
720             }
721    # Leave all keys present but clear the values
722    if not enable_color:
723        for k in colors.keys():
724            colors[k] = ''
725    return colors
726
727def worddiff_str(oldstr, newstr, colors=None):
728    if not colors:
729        colors = init_colors(False)
730    diff = simplediff.diff(oldstr.split(' '), newstr.split(' '))
731    ret = []
732    for change, value in diff:
733        value = ' '.join(value)
734        if change == '=':
735            ret.append(value)
736        elif change == '+':
737            item = '{color_add}{{+{value}+}}{color_default}'.format(value=value, **colors)
738            ret.append(item)
739        elif change == '-':
740            item = '{color_remove}[-{value}-]{color_default}'.format(value=value, **colors)
741            ret.append(item)
742    whitespace_note = ''
743    if oldstr != newstr and ' '.join(oldstr.split()) == ' '.join(newstr.split()):
744        whitespace_note = ' (whitespace changed)'
745    return '"%s"%s' % (' '.join(ret), whitespace_note)
746
747def list_inline_diff(oldlist, newlist, colors=None):
748    if not colors:
749        colors = init_colors(False)
750    diff = simplediff.diff(oldlist, newlist)
751    ret = []
752    for change, value in diff:
753        value = ' '.join(value)
754        if change == '=':
755            ret.append("'%s'" % value)
756        elif change == '+':
757            item = '{color_add}+{value}{color_default}'.format(value=value, **colors)
758            ret.append(item)
759        elif change == '-':
760            item = '{color_remove}-{value}{color_default}'.format(value=value, **colors)
761            ret.append(item)
762    return '[%s]' % (', '.join(ret))
763
764def clean_basepath(basepath):
765    basepath, dir, recipe_task = basepath.rsplit("/", 2)
766    cleaned = dir + '/' + recipe_task
767
768    if basepath[0] == '/':
769        return cleaned
770
771    if basepath.startswith("mc:") and basepath.count(':') >= 2:
772        mc, mc_name, basepath = basepath.split(":", 2)
773        mc_suffix = ':mc:' + mc_name
774    else:
775        mc_suffix = ''
776
777    # mc stuff now removed from basepath. Whatever was next, if present will be the first
778    # suffix. ':/', recipe path start, marks the end of this. Something like
779    # 'virtual:a[:b[:c]]:/path...' (b and c being optional)
780    if basepath[0] != '/':
781        cleaned += ':' + basepath.split(':/', 1)[0]
782
783    return cleaned + mc_suffix
784
785def clean_basepaths(a):
786    b = {}
787    for x in a:
788        b[clean_basepath(x)] = a[x]
789    return b
790
791def clean_basepaths_list(a):
792    b = []
793    for x in a:
794        b.append(clean_basepath(x))
795    return b
796
797def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
798    output = []
799
800    colors = init_colors(color)
801    def color_format(formatstr, **values):
802        """
803        Return colour formatted string.
804        NOTE: call with the format string, not an already formatted string
805        containing values (otherwise you could have trouble with { and }
806        characters)
807        """
808        if not formatstr.endswith('{color_default}'):
809            formatstr += '{color_default}'
810        # In newer python 3 versions you can pass both of these directly,
811        # but we only require 3.4 at the moment
812        formatparams = {}
813        formatparams.update(colors)
814        formatparams.update(values)
815        return formatstr.format(**formatparams)
816
817    with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f:
818        a_data = json.load(f, object_hook=SetDecoder)
819    with bb.compress.zstd.open(b, "rt", encoding="utf-8", num_threads=1) as f:
820        b_data = json.load(f, object_hook=SetDecoder)
821
822    def dict_diff(a, b, whitelist=set()):
823        sa = set(a.keys())
824        sb = set(b.keys())
825        common = sa & sb
826        changed = set()
827        for i in common:
828            if a[i] != b[i] and i not in whitelist:
829                changed.add(i)
830        added = sb - sa
831        removed = sa - sb
832        return changed, added, removed
833
834    def file_checksums_diff(a, b):
835        from collections import Counter
836
837        # Convert lists back to tuples
838        a = [(f[0], f[1]) for f in a]
839        b = [(f[0], f[1]) for f in b]
840
841        # Compare lists, ensuring we can handle duplicate filenames if they exist
842        removedcount = Counter(a)
843        removedcount.subtract(b)
844        addedcount = Counter(b)
845        addedcount.subtract(a)
846        added = []
847        for x in b:
848            if addedcount[x] > 0:
849                addedcount[x] -= 1
850                added.append(x)
851        removed = []
852        changed = []
853        for x in a:
854            if removedcount[x] > 0:
855                removedcount[x] -= 1
856                for y in added:
857                    if y[0] == x[0]:
858                        changed.append((x[0], x[1], y[1]))
859                        added.remove(y)
860                        break
861                else:
862                    removed.append(x)
863        added = [x[0] for x in added]
864        removed = [x[0] for x in removed]
865        return changed, added, removed
866
867    if 'basewhitelist' in a_data and a_data['basewhitelist'] != b_data['basewhitelist']:
868        output.append(color_format("{color_title}basewhitelist changed{color_default} from '%s' to '%s'") % (a_data['basewhitelist'], b_data['basewhitelist']))
869        if a_data['basewhitelist'] and b_data['basewhitelist']:
870            output.append("changed items: %s" % a_data['basewhitelist'].symmetric_difference(b_data['basewhitelist']))
871
872    if 'taskwhitelist' in a_data and a_data['taskwhitelist'] != b_data['taskwhitelist']:
873        output.append(color_format("{color_title}taskwhitelist changed{color_default} from '%s' to '%s'") % (a_data['taskwhitelist'], b_data['taskwhitelist']))
874        if a_data['taskwhitelist'] and b_data['taskwhitelist']:
875            output.append("changed items: %s" % a_data['taskwhitelist'].symmetric_difference(b_data['taskwhitelist']))
876
877    if a_data['taskdeps'] != b_data['taskdeps']:
878        output.append(color_format("{color_title}Task dependencies changed{color_default} from:\n%s\nto:\n%s") % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps'])))
879
880    if a_data['basehash'] != b_data['basehash'] and not collapsed:
881        output.append(color_format("{color_title}basehash changed{color_default} from %s to %s") % (a_data['basehash'], b_data['basehash']))
882
883    changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basewhitelist'] & b_data['basewhitelist'])
884    if changed:
885        for dep in sorted(changed):
886            output.append(color_format("{color_title}List of dependencies for variable %s changed from '{color_default}%s{color_title}' to '{color_default}%s{color_title}'") % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep]))
887            if a_data['gendeps'][dep] and b_data['gendeps'][dep]:
888                output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep]))
889    if added:
890        for dep in sorted(added):
891            output.append(color_format("{color_title}Dependency on variable %s was added") % (dep))
892    if removed:
893        for dep in sorted(removed):
894            output.append(color_format("{color_title}Dependency on Variable %s was removed") % (dep))
895
896
897    changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals'])
898    if changed:
899        for dep in sorted(changed):
900            oldval = a_data['varvals'][dep]
901            newval = b_data['varvals'][dep]
902            if newval and oldval and ('\n' in oldval or '\n' in newval):
903                diff = difflib.unified_diff(oldval.splitlines(), newval.splitlines(), lineterm='')
904                # Cut off the first two lines, since we aren't interested in
905                # the old/new filename (they are blank anyway in this case)
906                difflines = list(diff)[2:]
907                if color:
908                    # Add colour to diff output
909                    for i, line in enumerate(difflines):
910                        if line.startswith('+'):
911                            line = color_format('{color_add}{line}', line=line)
912                            difflines[i] = line
913                        elif line.startswith('-'):
914                            line = color_format('{color_remove}{line}', line=line)
915                            difflines[i] = line
916                output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff='\n'.join(difflines)))
917            elif newval and oldval and (' ' in oldval or ' ' in newval):
918                output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff=worddiff_str(oldval, newval, colors)))
919            else:
920                output.append(color_format("{color_title}Variable {var} value changed from '{color_default}{oldval}{color_title}' to '{color_default}{newval}{color_title}'{color_default}", var=dep, oldval=oldval, newval=newval))
921
922    if not 'file_checksum_values' in a_data:
923         a_data['file_checksum_values'] = []
924    if not 'file_checksum_values' in b_data:
925         b_data['file_checksum_values'] = []
926
927    changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values'])
928    if changed:
929        for f, old, new in changed:
930            output.append(color_format("{color_title}Checksum for file %s changed{color_default} from %s to %s") % (f, old, new))
931    if added:
932        for f in added:
933            output.append(color_format("{color_title}Dependency on checksum of file %s was added") % (f))
934    if removed:
935        for f in removed:
936            output.append(color_format("{color_title}Dependency on checksum of file %s was removed") % (f))
937
938    if not 'runtaskdeps' in a_data:
939         a_data['runtaskdeps'] = {}
940    if not 'runtaskdeps' in b_data:
941         b_data['runtaskdeps'] = {}
942
943    if not collapsed:
944        if len(a_data['runtaskdeps']) != len(b_data['runtaskdeps']):
945            changed = ["Number of task dependencies changed"]
946        else:
947            changed = []
948            for idx, task in enumerate(a_data['runtaskdeps']):
949                a = a_data['runtaskdeps'][idx]
950                b = b_data['runtaskdeps'][idx]
951                if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b] and not collapsed:
952                    changed.append("%s with hash %s\n changed to\n%s with hash %s" % (clean_basepath(a), a_data['runtaskhashes'][a], clean_basepath(b), b_data['runtaskhashes'][b]))
953
954        if changed:
955            clean_a = clean_basepaths_list(a_data['runtaskdeps'])
956            clean_b = clean_basepaths_list(b_data['runtaskdeps'])
957            if clean_a != clean_b:
958                output.append(color_format("{color_title}runtaskdeps changed:{color_default}\n%s") % list_inline_diff(clean_a, clean_b, colors))
959            else:
960                output.append(color_format("{color_title}runtaskdeps changed:"))
961            output.append("\n".join(changed))
962
963
964    if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data:
965        a = a_data['runtaskhashes']
966        b = b_data['runtaskhashes']
967        changed, added, removed = dict_diff(a, b)
968        if added:
969            for dep in sorted(added):
970                bdep_found = False
971                if removed:
972                    for bdep in removed:
973                        if b[dep] == a[bdep]:
974                            #output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep))
975                            bdep_found = True
976                if not bdep_found:
977                    output.append(color_format("{color_title}Dependency on task %s was added{color_default} with hash %s") % (clean_basepath(dep), b[dep]))
978        if removed:
979            for dep in sorted(removed):
980                adep_found = False
981                if added:
982                    for adep in added:
983                        if b[adep] == a[dep]:
984                            #output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep))
985                            adep_found = True
986                if not adep_found:
987                    output.append(color_format("{color_title}Dependency on task %s was removed{color_default} with hash %s") % (clean_basepath(dep), a[dep]))
988        if changed:
989            for dep in sorted(changed):
990                if not collapsed:
991                    output.append(color_format("{color_title}Hash for dependent task %s changed{color_default} from %s to %s") % (clean_basepath(dep), a[dep], b[dep]))
992                if callable(recursecb):
993                    recout = recursecb(dep, a[dep], b[dep])
994                    if recout:
995                        if collapsed:
996                            output.extend(recout)
997                        else:
998                            # If a dependent hash changed, might as well print the line above and then defer to the changes in
999                            # that hash since in all likelyhood, they're the same changes this task also saw.
1000                            output = [output[-1]] + recout
1001
1002    a_taint = a_data.get('taint', None)
1003    b_taint = b_data.get('taint', None)
1004    if a_taint != b_taint:
1005        if a_taint and a_taint.startswith('nostamp:'):
1006            a_taint = a_taint.replace('nostamp:', 'nostamp(uuid4):')
1007        if b_taint and b_taint.startswith('nostamp:'):
1008            b_taint = b_taint.replace('nostamp:', 'nostamp(uuid4):')
1009        output.append(color_format("{color_title}Taint (by forced/invalidated task) changed{color_default} from %s to %s") % (a_taint, b_taint))
1010
1011    return output
1012
1013
1014def calc_basehash(sigdata):
1015    task = sigdata['task']
1016    basedata = sigdata['varvals'][task]
1017
1018    if basedata is None:
1019        basedata = ''
1020
1021    alldeps = sigdata['taskdeps']
1022    for dep in alldeps:
1023        basedata = basedata + dep
1024        val = sigdata['varvals'][dep]
1025        if val is not None:
1026            basedata = basedata + str(val)
1027
1028    return hashlib.sha256(basedata.encode("utf-8")).hexdigest()
1029
1030def calc_taskhash(sigdata):
1031    data = sigdata['basehash']
1032
1033    for dep in sigdata['runtaskdeps']:
1034        data = data + sigdata['runtaskhashes'][dep]
1035
1036    for c in sigdata['file_checksum_values']:
1037        if c[1]:
1038            if "./" in c[0]:
1039                data = data + c[0]
1040            data = data + c[1]
1041
1042    if 'taint' in sigdata:
1043        if 'nostamp:' in sigdata['taint']:
1044            data = data + sigdata['taint'][8:]
1045        else:
1046            data = data + sigdata['taint']
1047
1048    return hashlib.sha256(data.encode("utf-8")).hexdigest()
1049
1050
1051def dump_sigfile(a):
1052    output = []
1053
1054    with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f:
1055        a_data = json.load(f, object_hook=SetDecoder)
1056
1057    output.append("basewhitelist: %s" % (sorted(a_data['basewhitelist'])))
1058
1059    output.append("taskwhitelist: %s" % (sorted(a_data['taskwhitelist'] or [])))
1060
1061    output.append("Task dependencies: %s" % (sorted(a_data['taskdeps'])))
1062
1063    output.append("basehash: %s" % (a_data['basehash']))
1064
1065    for dep in sorted(a_data['gendeps']):
1066        output.append("List of dependencies for variable %s is %s" % (dep, sorted(a_data['gendeps'][dep])))
1067
1068    for dep in sorted(a_data['varvals']):
1069        output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep]))
1070
1071    if 'runtaskdeps' in a_data:
1072        output.append("Tasks this task depends on: %s" % (sorted(a_data['runtaskdeps'])))
1073
1074    if 'file_checksum_values' in a_data:
1075        output.append("This task depends on the checksums of files: %s" % (sorted(a_data['file_checksum_values'])))
1076
1077    if 'runtaskhashes' in a_data:
1078        for dep in sorted(a_data['runtaskhashes']):
1079            output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep]))
1080
1081    if 'taint' in a_data:
1082        if a_data['taint'].startswith('nostamp:'):
1083            msg = a_data['taint'].replace('nostamp:', 'nostamp(uuid4):')
1084        else:
1085            msg = a_data['taint']
1086        output.append("Tainted (by forced/invalidated task): %s" % msg)
1087
1088    if 'task' in a_data:
1089        computed_basehash = calc_basehash(a_data)
1090        output.append("Computed base hash is %s and from file %s" % (computed_basehash, a_data['basehash']))
1091    else:
1092        output.append("Unable to compute base hash")
1093
1094    computed_taskhash = calc_taskhash(a_data)
1095    output.append("Computed task hash is %s" % computed_taskhash)
1096
1097    return output
1098