xref: /openbmc/openbmc/poky/bitbake/lib/bb/siggen.py (revision 9936f86d)
1#
2# Copyright BitBake Contributors
3#
4# SPDX-License-Identifier: GPL-2.0-only
5#
6
7import hashlib
8import logging
9import os
10import re
11import tempfile
12import pickle
13import bb.data
14import difflib
15import simplediff
16import json
17import bb.compress.zstd
18from bb.checksum import FileChecksumCache
19from bb import runqueue
20import hashserv
21import hashserv.client
22
23logger = logging.getLogger('BitBake.SigGen')
24hashequiv_logger = logging.getLogger('BitBake.SigGen.HashEquiv')
25
26class SetEncoder(json.JSONEncoder):
27    def default(self, obj):
28        if isinstance(obj, set):
29            return dict(_set_object=list(sorted(obj)))
30        return json.JSONEncoder.default(self, obj)
31
32def SetDecoder(dct):
33    if '_set_object' in dct:
34        return set(dct['_set_object'])
35    return dct
36
37def init(d):
38    siggens = [obj for obj in globals().values()
39                      if type(obj) is type and issubclass(obj, SignatureGenerator)]
40
41    desired = d.getVar("BB_SIGNATURE_HANDLER") or "noop"
42    for sg in siggens:
43        if desired == sg.name:
44            return sg(d)
45    else:
46        logger.error("Invalid signature generator '%s', using default 'noop'\n"
47                     "Available generators: %s", desired,
48                     ', '.join(obj.name for obj in siggens))
49        return SignatureGenerator(d)
50
51class SignatureGenerator(object):
52    """
53    """
54    name = "noop"
55
56    # If the derived class supports multiconfig datacaches, set this to True
57    # The default is False for backward compatibility with derived signature
58    # generators that do not understand multiconfig caches
59    supports_multiconfig_datacaches = False
60
61    def __init__(self, data):
62        self.basehash = {}
63        self.taskhash = {}
64        self.unihash = {}
65        self.runtaskdeps = {}
66        self.file_checksum_values = {}
67        self.taints = {}
68        self.unitaskhashes = {}
69        self.tidtopn = {}
70        self.setscenetasks = set()
71
72    def finalise(self, fn, d, varient):
73        return
74
75    def postparsing_clean_cache(self):
76        return
77
78    def get_unihash(self, tid):
79        return self.taskhash[tid]
80
81    def prep_taskhash(self, tid, deps, dataCaches):
82        return
83
84    def get_taskhash(self, tid, deps, dataCaches):
85        self.taskhash[tid] = hashlib.sha256(tid.encode("utf-8")).hexdigest()
86        return self.taskhash[tid]
87
88    def writeout_file_checksum_cache(self):
89        """Write/update the file checksum cache onto disk"""
90        return
91
92    def stampfile(self, stampbase, file_name, taskname, extrainfo):
93        return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
94
95    def stampcleanmask(self, stampbase, file_name, taskname, extrainfo):
96        return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
97
98    def dump_sigtask(self, fn, task, stampbase, runtime):
99        return
100
101    def invalidate_task(self, task, d, fn):
102        bb.build.del_stamp(task, d, fn)
103
104    def dump_sigs(self, dataCache, options):
105        return
106
107    def get_taskdata(self):
108        return (self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks)
109
110    def set_taskdata(self, data):
111        self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks = data
112
113    def reset(self, data):
114        self.__init__(data)
115
116    def get_taskhashes(self):
117        return self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn
118
119    def set_taskhashes(self, hashes):
120        self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn = hashes
121
122    def save_unitaskhashes(self):
123        return
124
125    def copy_unitaskhashes(self, targetdir):
126        return
127
128    def set_setscene_tasks(self, setscene_tasks):
129        return
130
131    @classmethod
132    def get_data_caches(cls, dataCaches, mc):
133        """
134        This function returns the datacaches that should be passed to signature
135        generator functions. If the signature generator supports multiconfig
136        caches, the entire dictionary of data caches is sent, otherwise a
137        special proxy is sent that support both index access to all
138        multiconfigs, and also direct access for the default multiconfig.
139
140        The proxy class allows code in this class itself to always use
141        multiconfig aware code (to ease maintenance), but derived classes that
142        are unaware of multiconfig data caches can still access the default
143        multiconfig as expected.
144
145        Do not override this function in derived classes; it will be removed in
146        the future when support for multiconfig data caches is mandatory
147        """
148        class DataCacheProxy(object):
149            def __init__(self):
150                pass
151
152            def __getitem__(self, key):
153                return dataCaches[key]
154
155            def __getattr__(self, name):
156                return getattr(dataCaches[mc], name)
157
158        if cls.supports_multiconfig_datacaches:
159            return dataCaches
160
161        return DataCacheProxy()
162
163    def exit(self):
164        return
165
166class SignatureGeneratorBasic(SignatureGenerator):
167    """
168    """
169    name = "basic"
170
171    def __init__(self, data):
172        self.basehash = {}
173        self.taskhash = {}
174        self.unihash = {}
175        self.taskdeps = {}
176        self.runtaskdeps = {}
177        self.file_checksum_values = {}
178        self.taints = {}
179        self.gendeps = {}
180        self.lookupcache = {}
181        self.setscenetasks = set()
182        self.basehash_ignore_vars = set((data.getVar("BB_BASEHASH_IGNORE_VARS") or "").split())
183        self.taskhash_ignore_tasks = None
184        self.init_rundepcheck(data)
185        checksum_cache_file = data.getVar("BB_HASH_CHECKSUM_CACHE_FILE")
186        if checksum_cache_file:
187            self.checksum_cache = FileChecksumCache()
188            self.checksum_cache.init_cache(data, checksum_cache_file)
189        else:
190            self.checksum_cache = None
191
192        self.unihash_cache = bb.cache.SimpleCache("3")
193        self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {})
194        self.localdirsexclude = (data.getVar("BB_SIGNATURE_LOCAL_DIRS_EXCLUDE") or "CVS .bzr .git .hg .osc .p4 .repo .svn").split()
195        self.tidtopn = {}
196
197    def init_rundepcheck(self, data):
198        self.taskhash_ignore_tasks = data.getVar("BB_TASKHASH_IGNORE_TASKS") or None
199        if self.taskhash_ignore_tasks:
200            self.twl = re.compile(self.taskhash_ignore_tasks)
201        else:
202            self.twl = None
203
204    def _build_data(self, fn, d):
205
206        ignore_mismatch = ((d.getVar("BB_HASH_IGNORE_MISMATCH") or '') == '1')
207        tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d, self.basehash_ignore_vars)
208
209        taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, self.basehash_ignore_vars, fn)
210
211        for task in tasklist:
212            tid = fn + ":" + task
213            if not ignore_mismatch and tid in self.basehash and self.basehash[tid] != basehash[tid]:
214                bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (tid, self.basehash[tid], basehash[tid]))
215                bb.error("The following commands may help:")
216                cmd = "$ bitbake %s -c%s" % (d.getVar('PN'), task)
217                # Make sure sigdata is dumped before run printdiff
218                bb.error("%s -Snone" % cmd)
219                bb.error("Then:")
220                bb.error("%s -Sprintdiff\n" % cmd)
221            self.basehash[tid] = basehash[tid]
222
223        self.taskdeps[fn] = taskdeps
224        self.gendeps[fn] = gendeps
225        self.lookupcache[fn] = lookupcache
226
227        return taskdeps
228
229    def set_setscene_tasks(self, setscene_tasks):
230        self.setscenetasks = set(setscene_tasks)
231
232    def finalise(self, fn, d, variant):
233
234        mc = d.getVar("__BBMULTICONFIG", False) or ""
235        if variant or mc:
236            fn = bb.cache.realfn2virtual(fn, variant, mc)
237
238        try:
239            taskdeps = self._build_data(fn, d)
240        except bb.parse.SkipRecipe:
241            raise
242        except:
243            bb.warn("Error during finalise of %s" % fn)
244            raise
245
246        #Slow but can be useful for debugging mismatched basehashes
247        #for task in self.taskdeps[fn]:
248        #    self.dump_sigtask(fn, task, d.getVar("STAMP"), False)
249
250        for task in taskdeps:
251            d.setVar("BB_BASEHASH:task-%s" % task, self.basehash[fn + ":" + task])
252
253    def postparsing_clean_cache(self):
254        #
255        # After parsing we can remove some things from memory to reduce our memory footprint
256        #
257        self.gendeps = {}
258        self.lookupcache = {}
259        self.taskdeps = {}
260
261    def rundep_check(self, fn, recipename, task, dep, depname, dataCaches):
262        # Return True if we should keep the dependency, False to drop it
263        # We only manipulate the dependencies for packages not in the ignore
264        # list
265        if self.twl and not self.twl.search(recipename):
266            # then process the actual dependencies
267            if self.twl.search(depname):
268                return False
269        return True
270
271    def read_taint(self, fn, task, stampbase):
272        taint = None
273        try:
274            with open(stampbase + '.' + task + '.taint', 'r') as taintf:
275                taint = taintf.read()
276        except IOError:
277            pass
278        return taint
279
280    def prep_taskhash(self, tid, deps, dataCaches):
281
282        (mc, _, task, fn) = bb.runqueue.split_tid_mcfn(tid)
283
284        self.basehash[tid] = dataCaches[mc].basetaskhash[tid]
285        self.runtaskdeps[tid] = []
286        self.file_checksum_values[tid] = []
287        recipename = dataCaches[mc].pkg_fn[fn]
288
289        self.tidtopn[tid] = recipename
290
291        for dep in sorted(deps, key=clean_basepath):
292            (depmc, _, _, depmcfn) = bb.runqueue.split_tid_mcfn(dep)
293            depname = dataCaches[depmc].pkg_fn[depmcfn]
294            if not self.supports_multiconfig_datacaches and mc != depmc:
295                # If the signature generator doesn't understand multiconfig
296                # data caches, any dependency not in the same multiconfig must
297                # be skipped for backward compatibility
298                continue
299            if not self.rundep_check(fn, recipename, task, dep, depname, dataCaches):
300                continue
301            if dep not in self.taskhash:
302                bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep)
303            self.runtaskdeps[tid].append(dep)
304
305        if task in dataCaches[mc].file_checksums[fn]:
306            if self.checksum_cache:
307                checksums = self.checksum_cache.get_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude)
308            else:
309                checksums = bb.fetch2.get_file_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude)
310            for (f,cs) in checksums:
311                self.file_checksum_values[tid].append((f,cs))
312
313        taskdep = dataCaches[mc].task_deps[fn]
314        if 'nostamp' in taskdep and task in taskdep['nostamp']:
315            # Nostamp tasks need an implicit taint so that they force any dependent tasks to run
316            if tid in self.taints and self.taints[tid].startswith("nostamp:"):
317                # Don't reset taint value upon every call
318                pass
319            else:
320                import uuid
321                taint = str(uuid.uuid4())
322                self.taints[tid] = "nostamp:" + taint
323
324        taint = self.read_taint(fn, task, dataCaches[mc].stamp[fn])
325        if taint:
326            self.taints[tid] = taint
327            logger.warning("%s is tainted from a forced run" % tid)
328
329        return
330
331    def get_taskhash(self, tid, deps, dataCaches):
332
333        data = self.basehash[tid]
334        for dep in self.runtaskdeps[tid]:
335            data = data + self.get_unihash(dep)
336
337        for (f, cs) in self.file_checksum_values[tid]:
338            if cs:
339                if "/./" in f:
340                    data = data + "./" + f.split("/./")[1]
341                data = data + cs
342
343        if tid in self.taints:
344            if self.taints[tid].startswith("nostamp:"):
345                data = data + self.taints[tid][8:]
346            else:
347                data = data + self.taints[tid]
348
349        h = hashlib.sha256(data.encode("utf-8")).hexdigest()
350        self.taskhash[tid] = h
351        #d.setVar("BB_TASKHASH:task-%s" % task, taskhash[task])
352        return h
353
354    def writeout_file_checksum_cache(self):
355        """Write/update the file checksum cache onto disk"""
356        if self.checksum_cache:
357            self.checksum_cache.save_extras()
358            self.checksum_cache.save_merge()
359        else:
360            bb.fetch2.fetcher_parse_save()
361            bb.fetch2.fetcher_parse_done()
362
363    def save_unitaskhashes(self):
364        self.unihash_cache.save(self.unitaskhashes)
365
366    def copy_unitaskhashes(self, targetdir):
367        self.unihash_cache.copyfile(targetdir)
368
369    def dump_sigtask(self, fn, task, stampbase, runtime):
370
371        tid = fn + ":" + task
372        referencestamp = stampbase
373        if isinstance(runtime, str) and runtime.startswith("customfile"):
374            sigfile = stampbase
375            referencestamp = runtime[11:]
376        elif runtime and tid in self.taskhash:
377            sigfile = stampbase + "." + task + ".sigdata" + "." + self.get_unihash(tid)
378        else:
379            sigfile = stampbase + "." + task + ".sigbasedata" + "." + self.basehash[tid]
380
381        with bb.utils.umask(0o002):
382            bb.utils.mkdirhier(os.path.dirname(sigfile))
383
384        data = {}
385        data['task'] = task
386        data['basehash_ignore_vars'] = self.basehash_ignore_vars
387        data['taskhash_ignore_tasks'] = self.taskhash_ignore_tasks
388        data['taskdeps'] = self.taskdeps[fn][task]
389        data['basehash'] = self.basehash[tid]
390        data['gendeps'] = {}
391        data['varvals'] = {}
392        data['varvals'][task] = self.lookupcache[fn][task]
393        for dep in self.taskdeps[fn][task]:
394            if dep in self.basehash_ignore_vars:
395                continue
396            data['gendeps'][dep] = self.gendeps[fn][dep]
397            data['varvals'][dep] = self.lookupcache[fn][dep]
398
399        if runtime and tid in self.taskhash:
400            data['runtaskdeps'] = self.runtaskdeps[tid]
401            data['file_checksum_values'] = []
402            for f,cs in self.file_checksum_values[tid]:
403                if "/./" in f:
404                    data['file_checksum_values'].append(("./" + f.split("/./")[1], cs))
405                else:
406                    data['file_checksum_values'].append((os.path.basename(f), cs))
407            data['runtaskhashes'] = {}
408            for dep in data['runtaskdeps']:
409                data['runtaskhashes'][dep] = self.get_unihash(dep)
410            data['taskhash'] = self.taskhash[tid]
411            data['unihash'] = self.get_unihash(tid)
412
413        taint = self.read_taint(fn, task, referencestamp)
414        if taint:
415            data['taint'] = taint
416
417        if runtime and tid in self.taints:
418            if 'nostamp:' in self.taints[tid]:
419                data['taint'] = self.taints[tid]
420
421        computed_basehash = calc_basehash(data)
422        if computed_basehash != self.basehash[tid]:
423            bb.error("Basehash mismatch %s versus %s for %s" % (computed_basehash, self.basehash[tid], tid))
424        if runtime and tid in self.taskhash:
425            computed_taskhash = calc_taskhash(data)
426            if computed_taskhash != self.taskhash[tid]:
427                bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid))
428                sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash)
429
430        fd, tmpfile = bb.utils.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.")
431        try:
432            with bb.compress.zstd.open(fd, "wt", encoding="utf-8", num_threads=1) as f:
433                json.dump(data, f, sort_keys=True, separators=(",", ":"), cls=SetEncoder)
434                f.flush()
435            os.chmod(tmpfile, 0o664)
436            bb.utils.rename(tmpfile, sigfile)
437        except (OSError, IOError) as err:
438            try:
439                os.unlink(tmpfile)
440            except OSError:
441                pass
442            raise err
443
444    def dump_sigfn(self, fn, dataCaches, options):
445        if fn in self.taskdeps:
446            for task in self.taskdeps[fn]:
447                tid = fn + ":" + task
448                mc = bb.runqueue.mc_from_tid(tid)
449                if tid not in self.taskhash:
450                    continue
451                if dataCaches[mc].basetaskhash[tid] != self.basehash[tid]:
452                    bb.error("Bitbake's cached basehash does not match the one we just generated (%s)!" % tid)
453                    bb.error("The mismatched hashes were %s and %s" % (dataCaches[mc].basetaskhash[tid], self.basehash[tid]))
454                self.dump_sigtask(fn, task, dataCaches[mc].stamp[fn], True)
455
456class SignatureGeneratorBasicHash(SignatureGeneratorBasic):
457    name = "basichash"
458
459    def get_stampfile_hash(self, tid):
460        if tid in self.taskhash:
461            return self.taskhash[tid]
462
463        # If task is not in basehash, then error
464        return self.basehash[tid]
465
466    def stampfile(self, stampbase, fn, taskname, extrainfo, clean=False):
467        if taskname != "do_setscene" and taskname.endswith("_setscene"):
468            tid = fn + ":" + taskname[:-9]
469        else:
470            tid = fn + ":" + taskname
471        if clean:
472            h = "*"
473        else:
474            h = self.get_stampfile_hash(tid)
475
476        return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.')
477
478    def stampcleanmask(self, stampbase, fn, taskname, extrainfo):
479        return self.stampfile(stampbase, fn, taskname, extrainfo, clean=True)
480
481    def invalidate_task(self, task, d, fn):
482        bb.note("Tainting hash to force rebuild of task %s, %s" % (fn, task))
483        bb.build.write_taint(task, d, fn)
484
485class SignatureGeneratorUniHashMixIn(object):
486    def __init__(self, data):
487        self.extramethod = {}
488        super().__init__(data)
489
490    def get_taskdata(self):
491        return (self.server, self.method, self.extramethod) + super().get_taskdata()
492
493    def set_taskdata(self, data):
494        self.server, self.method, self.extramethod = data[:3]
495        super().set_taskdata(data[3:])
496
497    def client(self):
498        if getattr(self, '_client', None) is None:
499            self._client = hashserv.create_client(self.server)
500        return self._client
501
502    def reset(self, data):
503        if getattr(self, '_client', None) is not None:
504            self._client.close()
505            self._client = None
506        return super().reset(data)
507
508    def exit(self):
509        if getattr(self, '_client', None) is not None:
510            self._client.close()
511            self._client = None
512        return super().exit()
513
514    def get_stampfile_hash(self, tid):
515        if tid in self.taskhash:
516            # If a unique hash is reported, use it as the stampfile hash. This
517            # ensures that if a task won't be re-run if the taskhash changes,
518            # but it would result in the same output hash
519            unihash = self._get_unihash(tid)
520            if unihash is not None:
521                return unihash
522
523        return super().get_stampfile_hash(tid)
524
525    def set_unihash(self, tid, unihash):
526        (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
527        key = mc + ":" + self.tidtopn[tid] + ":" + taskname
528        self.unitaskhashes[key] = (self.taskhash[tid], unihash)
529        self.unihash[tid] = unihash
530
531    def _get_unihash(self, tid, checkkey=None):
532        if tid not in self.tidtopn:
533            return None
534        (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
535        key = mc + ":" + self.tidtopn[tid] + ":" + taskname
536        if key not in self.unitaskhashes:
537            return None
538        if not checkkey:
539            checkkey = self.taskhash[tid]
540        (key, unihash) = self.unitaskhashes[key]
541        if key != checkkey:
542            return None
543        return unihash
544
545    def get_unihash(self, tid):
546        taskhash = self.taskhash[tid]
547
548        # If its not a setscene task we can return
549        if self.setscenetasks and tid not in self.setscenetasks:
550            self.unihash[tid] = None
551            return taskhash
552
553        # TODO: This cache can grow unbounded. It probably only needs to keep
554        # for each task
555        unihash =  self._get_unihash(tid)
556        if unihash is not None:
557            self.unihash[tid] = unihash
558            return unihash
559
560        # In the absence of being able to discover a unique hash from the
561        # server, make it be equivalent to the taskhash. The unique "hash" only
562        # really needs to be a unique string (not even necessarily a hash), but
563        # making it match the taskhash has a few advantages:
564        #
565        # 1) All of the sstate code that assumes hashes can be the same
566        # 2) It provides maximal compatibility with builders that don't use
567        #    an equivalency server
568        # 3) The value is easy for multiple independent builders to derive the
569        #    same unique hash from the same input. This means that if the
570        #    independent builders find the same taskhash, but it isn't reported
571        #    to the server, there is a better chance that they will agree on
572        #    the unique hash.
573        unihash = taskhash
574
575        try:
576            method = self.method
577            if tid in self.extramethod:
578                method = method + self.extramethod[tid]
579            data = self.client().get_unihash(method, self.taskhash[tid])
580            if data:
581                unihash = data
582                # A unique hash equal to the taskhash is not very interesting,
583                # so it is reported it at debug level 2. If they differ, that
584                # is much more interesting, so it is reported at debug level 1
585                hashequiv_logger.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server))
586            else:
587                hashequiv_logger.debug2('No reported unihash for %s:%s from %s' % (tid, taskhash, self.server))
588        except ConnectionError as e:
589            bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
590
591        self.set_unihash(tid, unihash)
592        self.unihash[tid] = unihash
593        return unihash
594
595    def report_unihash(self, path, task, d):
596        import importlib
597
598        taskhash = d.getVar('BB_TASKHASH')
599        unihash = d.getVar('BB_UNIHASH')
600        report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1'
601        tempdir = d.getVar('T')
602        fn = d.getVar('BB_FILENAME')
603        tid = fn + ':do_' + task
604        key = tid + ':' + taskhash
605
606        if self.setscenetasks and tid not in self.setscenetasks:
607            return
608
609        # This can happen if locked sigs are in action. Detect and just exit
610        if taskhash != self.taskhash[tid]:
611            return
612
613        # Sanity checks
614        cache_unihash = self._get_unihash(tid, checkkey=taskhash)
615        if cache_unihash is None:
616            bb.fatal('%s not in unihash cache. Please report this error' % key)
617
618        if cache_unihash != unihash:
619            bb.fatal("Cache unihash %s doesn't match BB_UNIHASH %s" % (cache_unihash, unihash))
620
621        sigfile = None
622        sigfile_name = "depsig.do_%s.%d" % (task, os.getpid())
623        sigfile_link = "depsig.do_%s" % task
624
625        try:
626            sigfile = open(os.path.join(tempdir, sigfile_name), 'w+b')
627
628            locs = {'path': path, 'sigfile': sigfile, 'task': task, 'd': d}
629
630            if "." in self.method:
631                (module, method) = self.method.rsplit('.', 1)
632                locs['method'] = getattr(importlib.import_module(module), method)
633                outhash = bb.utils.better_eval('method(path, sigfile, task, d)', locs)
634            else:
635                outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs)
636
637            try:
638                extra_data = {}
639
640                owner = d.getVar('SSTATE_HASHEQUIV_OWNER')
641                if owner:
642                    extra_data['owner'] = owner
643
644                if report_taskdata:
645                    sigfile.seek(0)
646
647                    extra_data['PN'] = d.getVar('PN')
648                    extra_data['PV'] = d.getVar('PV')
649                    extra_data['PR'] = d.getVar('PR')
650                    extra_data['task'] = task
651                    extra_data['outhash_siginfo'] = sigfile.read().decode('utf-8')
652
653                method = self.method
654                if tid in self.extramethod:
655                    method = method + self.extramethod[tid]
656
657                data = self.client().report_unihash(taskhash, method, outhash, unihash, extra_data)
658                new_unihash = data['unihash']
659
660                if new_unihash != unihash:
661                    hashequiv_logger.debug('Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server))
662                    bb.event.fire(bb.runqueue.taskUniHashUpdate(fn + ':do_' + task, new_unihash), d)
663                    self.set_unihash(tid, new_unihash)
664                    d.setVar('BB_UNIHASH', new_unihash)
665                else:
666                    hashequiv_logger.debug('Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server))
667            except ConnectionError as e:
668                bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
669        finally:
670            if sigfile:
671                sigfile.close()
672
673                sigfile_link_path = os.path.join(tempdir, sigfile_link)
674                bb.utils.remove(sigfile_link_path)
675
676                try:
677                    os.symlink(sigfile_name, sigfile_link_path)
678                except OSError:
679                    pass
680
681    def report_unihash_equiv(self, tid, taskhash, wanted_unihash, current_unihash, datacaches):
682        try:
683            extra_data = {}
684            method = self.method
685            if tid in self.extramethod:
686                method = method + self.extramethod[tid]
687
688            data = self.client().report_unihash_equiv(taskhash, method, wanted_unihash, extra_data)
689            hashequiv_logger.verbose('Reported task %s as unihash %s to %s (%s)' % (tid, wanted_unihash, self.server, str(data)))
690
691            if data is None:
692                bb.warn("Server unable to handle unihash report")
693                return False
694
695            finalunihash = data['unihash']
696
697            if finalunihash == current_unihash:
698                hashequiv_logger.verbose('Task %s unihash %s unchanged by server' % (tid, finalunihash))
699            elif finalunihash == wanted_unihash:
700                hashequiv_logger.verbose('Task %s unihash changed %s -> %s as wanted' % (tid, current_unihash, finalunihash))
701                self.set_unihash(tid, finalunihash)
702                return True
703            else:
704                # TODO: What to do here?
705                hashequiv_logger.verbose('Task %s unihash reported as unwanted hash %s' % (tid, finalunihash))
706
707        except ConnectionError as e:
708            bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
709
710        return False
711
712#
713# Dummy class used for bitbake-selftest
714#
715class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureGeneratorBasicHash):
716    name = "TestEquivHash"
717    def init_rundepcheck(self, data):
718        super().init_rundepcheck(data)
719        self.server = data.getVar('BB_HASHSERVE')
720        self.method = "sstate_output_hash"
721
722#
723# Dummy class used for bitbake-selftest
724#
725class SignatureGeneratorTestMulticonfigDepends(SignatureGeneratorBasicHash):
726    name = "TestMulticonfigDepends"
727    supports_multiconfig_datacaches = True
728
729def dump_this_task(outfile, d):
730    import bb.parse
731    fn = d.getVar("BB_FILENAME")
732    task = "do_" + d.getVar("BB_CURRENTTASK")
733    referencestamp = bb.build.stamp_internal(task, d, None, True)
734    bb.parse.siggen.dump_sigtask(fn, task, outfile, "customfile:" + referencestamp)
735
736def init_colors(enable_color):
737    """Initialise colour dict for passing to compare_sigfiles()"""
738    # First set up the colours
739    colors = {'color_title':   '\033[1m',
740              'color_default': '\033[0m',
741              'color_add':     '\033[0;32m',
742              'color_remove':  '\033[0;31m',
743             }
744    # Leave all keys present but clear the values
745    if not enable_color:
746        for k in colors.keys():
747            colors[k] = ''
748    return colors
749
750def worddiff_str(oldstr, newstr, colors=None):
751    if not colors:
752        colors = init_colors(False)
753    diff = simplediff.diff(oldstr.split(' '), newstr.split(' '))
754    ret = []
755    for change, value in diff:
756        value = ' '.join(value)
757        if change == '=':
758            ret.append(value)
759        elif change == '+':
760            item = '{color_add}{{+{value}+}}{color_default}'.format(value=value, **colors)
761            ret.append(item)
762        elif change == '-':
763            item = '{color_remove}[-{value}-]{color_default}'.format(value=value, **colors)
764            ret.append(item)
765    whitespace_note = ''
766    if oldstr != newstr and ' '.join(oldstr.split()) == ' '.join(newstr.split()):
767        whitespace_note = ' (whitespace changed)'
768    return '"%s"%s' % (' '.join(ret), whitespace_note)
769
770def list_inline_diff(oldlist, newlist, colors=None):
771    if not colors:
772        colors = init_colors(False)
773    diff = simplediff.diff(oldlist, newlist)
774    ret = []
775    for change, value in diff:
776        value = ' '.join(value)
777        if change == '=':
778            ret.append("'%s'" % value)
779        elif change == '+':
780            item = '{color_add}+{value}{color_default}'.format(value=value, **colors)
781            ret.append(item)
782        elif change == '-':
783            item = '{color_remove}-{value}{color_default}'.format(value=value, **colors)
784            ret.append(item)
785    return '[%s]' % (', '.join(ret))
786
787def clean_basepath(basepath):
788    basepath, dir, recipe_task = basepath.rsplit("/", 2)
789    cleaned = dir + '/' + recipe_task
790
791    if basepath[0] == '/':
792        return cleaned
793
794    if basepath.startswith("mc:") and basepath.count(':') >= 2:
795        mc, mc_name, basepath = basepath.split(":", 2)
796        mc_suffix = ':mc:' + mc_name
797    else:
798        mc_suffix = ''
799
800    # mc stuff now removed from basepath. Whatever was next, if present will be the first
801    # suffix. ':/', recipe path start, marks the end of this. Something like
802    # 'virtual:a[:b[:c]]:/path...' (b and c being optional)
803    if basepath[0] != '/':
804        cleaned += ':' + basepath.split(':/', 1)[0]
805
806    return cleaned + mc_suffix
807
808def clean_basepaths(a):
809    b = {}
810    for x in a:
811        b[clean_basepath(x)] = a[x]
812    return b
813
814def clean_basepaths_list(a):
815    b = []
816    for x in a:
817        b.append(clean_basepath(x))
818    return b
819
820# Handled renamed fields
821def handle_renames(data):
822    if 'basewhitelist' in data:
823        data['basehash_ignore_vars'] = data['basewhitelist']
824        del data['basewhitelist']
825    if 'taskwhitelist' in data:
826        data['taskhash_ignore_tasks'] = data['taskwhitelist']
827        del data['taskwhitelist']
828
829
830def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
831    output = []
832
833    colors = init_colors(color)
834    def color_format(formatstr, **values):
835        """
836        Return colour formatted string.
837        NOTE: call with the format string, not an already formatted string
838        containing values (otherwise you could have trouble with { and }
839        characters)
840        """
841        if not formatstr.endswith('{color_default}'):
842            formatstr += '{color_default}'
843        # In newer python 3 versions you can pass both of these directly,
844        # but we only require 3.4 at the moment
845        formatparams = {}
846        formatparams.update(colors)
847        formatparams.update(values)
848        return formatstr.format(**formatparams)
849
850    with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f:
851        a_data = json.load(f, object_hook=SetDecoder)
852    with bb.compress.zstd.open(b, "rt", encoding="utf-8", num_threads=1) as f:
853        b_data = json.load(f, object_hook=SetDecoder)
854
855    for data in [a_data, b_data]:
856        handle_renames(data)
857
858    def dict_diff(a, b, ignored_vars=set()):
859        sa = set(a.keys())
860        sb = set(b.keys())
861        common = sa & sb
862        changed = set()
863        for i in common:
864            if a[i] != b[i] and i not in ignored_vars:
865                changed.add(i)
866        added = sb - sa
867        removed = sa - sb
868        return changed, added, removed
869
870    def file_checksums_diff(a, b):
871        from collections import Counter
872
873        # Convert lists back to tuples
874        a = [(f[0], f[1]) for f in a]
875        b = [(f[0], f[1]) for f in b]
876
877        # Compare lists, ensuring we can handle duplicate filenames if they exist
878        removedcount = Counter(a)
879        removedcount.subtract(b)
880        addedcount = Counter(b)
881        addedcount.subtract(a)
882        added = []
883        for x in b:
884            if addedcount[x] > 0:
885                addedcount[x] -= 1
886                added.append(x)
887        removed = []
888        changed = []
889        for x in a:
890            if removedcount[x] > 0:
891                removedcount[x] -= 1
892                for y in added:
893                    if y[0] == x[0]:
894                        changed.append((x[0], x[1], y[1]))
895                        added.remove(y)
896                        break
897                else:
898                    removed.append(x)
899        added = [x[0] for x in added]
900        removed = [x[0] for x in removed]
901        return changed, added, removed
902
903    if 'basehash_ignore_vars' in a_data and a_data['basehash_ignore_vars'] != b_data['basehash_ignore_vars']:
904        output.append(color_format("{color_title}basehash_ignore_vars changed{color_default} from '%s' to '%s'") % (a_data['basehash_ignore_vars'], b_data['basehash_ignore_vars']))
905        if a_data['basehash_ignore_vars'] and b_data['basehash_ignore_vars']:
906            output.append("changed items: %s" % a_data['basehash_ignore_vars'].symmetric_difference(b_data['basehash_ignore_vars']))
907
908    if 'taskhash_ignore_tasks' in a_data and a_data['taskhash_ignore_tasks'] != b_data['taskhash_ignore_tasks']:
909        output.append(color_format("{color_title}taskhash_ignore_tasks changed{color_default} from '%s' to '%s'") % (a_data['taskhash_ignore_tasks'], b_data['taskhash_ignore_tasks']))
910        if a_data['taskhash_ignore_tasks'] and b_data['taskhash_ignore_tasks']:
911            output.append("changed items: %s" % a_data['taskhash_ignore_tasks'].symmetric_difference(b_data['taskhash_ignore_tasks']))
912
913    if a_data['taskdeps'] != b_data['taskdeps']:
914        output.append(color_format("{color_title}Task dependencies changed{color_default} from:\n%s\nto:\n%s") % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps'])))
915
916    if a_data['basehash'] != b_data['basehash'] and not collapsed:
917        output.append(color_format("{color_title}basehash changed{color_default} from %s to %s") % (a_data['basehash'], b_data['basehash']))
918
919    changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basehash_ignore_vars'] & b_data['basehash_ignore_vars'])
920    if changed:
921        for dep in sorted(changed):
922            output.append(color_format("{color_title}List of dependencies for variable %s changed from '{color_default}%s{color_title}' to '{color_default}%s{color_title}'") % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep]))
923            if a_data['gendeps'][dep] and b_data['gendeps'][dep]:
924                output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep]))
925    if added:
926        for dep in sorted(added):
927            output.append(color_format("{color_title}Dependency on variable %s was added") % (dep))
928    if removed:
929        for dep in sorted(removed):
930            output.append(color_format("{color_title}Dependency on Variable %s was removed") % (dep))
931
932
933    changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals'])
934    if changed:
935        for dep in sorted(changed):
936            oldval = a_data['varvals'][dep]
937            newval = b_data['varvals'][dep]
938            if newval and oldval and ('\n' in oldval or '\n' in newval):
939                diff = difflib.unified_diff(oldval.splitlines(), newval.splitlines(), lineterm='')
940                # Cut off the first two lines, since we aren't interested in
941                # the old/new filename (they are blank anyway in this case)
942                difflines = list(diff)[2:]
943                if color:
944                    # Add colour to diff output
945                    for i, line in enumerate(difflines):
946                        if line.startswith('+'):
947                            line = color_format('{color_add}{line}', line=line)
948                            difflines[i] = line
949                        elif line.startswith('-'):
950                            line = color_format('{color_remove}{line}', line=line)
951                            difflines[i] = line
952                output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff='\n'.join(difflines)))
953            elif newval and oldval and (' ' in oldval or ' ' in newval):
954                output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff=worddiff_str(oldval, newval, colors)))
955            else:
956                output.append(color_format("{color_title}Variable {var} value changed from '{color_default}{oldval}{color_title}' to '{color_default}{newval}{color_title}'{color_default}", var=dep, oldval=oldval, newval=newval))
957
958    if not 'file_checksum_values' in a_data:
959         a_data['file_checksum_values'] = []
960    if not 'file_checksum_values' in b_data:
961         b_data['file_checksum_values'] = []
962
963    changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values'])
964    if changed:
965        for f, old, new in changed:
966            output.append(color_format("{color_title}Checksum for file %s changed{color_default} from %s to %s") % (f, old, new))
967    if added:
968        for f in added:
969            output.append(color_format("{color_title}Dependency on checksum of file %s was added") % (f))
970    if removed:
971        for f in removed:
972            output.append(color_format("{color_title}Dependency on checksum of file %s was removed") % (f))
973
974    if not 'runtaskdeps' in a_data:
975         a_data['runtaskdeps'] = {}
976    if not 'runtaskdeps' in b_data:
977         b_data['runtaskdeps'] = {}
978
979    if not collapsed:
980        if len(a_data['runtaskdeps']) != len(b_data['runtaskdeps']):
981            changed = ["Number of task dependencies changed"]
982        else:
983            changed = []
984            for idx, task in enumerate(a_data['runtaskdeps']):
985                a = a_data['runtaskdeps'][idx]
986                b = b_data['runtaskdeps'][idx]
987                if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b] and not collapsed:
988                    changed.append("%s with hash %s\n changed to\n%s with hash %s" % (clean_basepath(a), a_data['runtaskhashes'][a], clean_basepath(b), b_data['runtaskhashes'][b]))
989
990        if changed:
991            clean_a = clean_basepaths_list(a_data['runtaskdeps'])
992            clean_b = clean_basepaths_list(b_data['runtaskdeps'])
993            if clean_a != clean_b:
994                output.append(color_format("{color_title}runtaskdeps changed:{color_default}\n%s") % list_inline_diff(clean_a, clean_b, colors))
995            else:
996                output.append(color_format("{color_title}runtaskdeps changed:"))
997            output.append("\n".join(changed))
998
999
1000    if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data:
1001        a = clean_basepaths(a_data['runtaskhashes'])
1002        b = clean_basepaths(b_data['runtaskhashes'])
1003        changed, added, removed = dict_diff(a, b)
1004        if added:
1005            for dep in sorted(added):
1006                bdep_found = False
1007                if removed:
1008                    for bdep in removed:
1009                        if b[dep] == a[bdep]:
1010                            #output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep))
1011                            bdep_found = True
1012                if not bdep_found:
1013                    output.append(color_format("{color_title}Dependency on task %s was added{color_default} with hash %s") % (dep, b[dep]))
1014        if removed:
1015            for dep in sorted(removed):
1016                adep_found = False
1017                if added:
1018                    for adep in added:
1019                        if b[adep] == a[dep]:
1020                            #output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep))
1021                            adep_found = True
1022                if not adep_found:
1023                    output.append(color_format("{color_title}Dependency on task %s was removed{color_default} with hash %s") % (dep, a[dep]))
1024        if changed:
1025            for dep in sorted(changed):
1026                if not collapsed:
1027                    output.append(color_format("{color_title}Hash for task dependency %s changed{color_default} from %s to %s") % (dep, a[dep], b[dep]))
1028                if callable(recursecb):
1029                    recout = recursecb(dep, a[dep], b[dep])
1030                    if recout:
1031                        if collapsed:
1032                            output.extend(recout)
1033                        else:
1034                            # If a dependent hash changed, might as well print the line above and then defer to the changes in
1035                            # that hash since in all likelyhood, they're the same changes this task also saw.
1036                            output = [output[-1]] + recout
1037                            break
1038
1039    a_taint = a_data.get('taint', None)
1040    b_taint = b_data.get('taint', None)
1041    if a_taint != b_taint:
1042        if a_taint and a_taint.startswith('nostamp:'):
1043            a_taint = a_taint.replace('nostamp:', 'nostamp(uuid4):')
1044        if b_taint and b_taint.startswith('nostamp:'):
1045            b_taint = b_taint.replace('nostamp:', 'nostamp(uuid4):')
1046        output.append(color_format("{color_title}Taint (by forced/invalidated task) changed{color_default} from %s to %s") % (a_taint, b_taint))
1047
1048    return output
1049
1050
1051def calc_basehash(sigdata):
1052    task = sigdata['task']
1053    basedata = sigdata['varvals'][task]
1054
1055    if basedata is None:
1056        basedata = ''
1057
1058    alldeps = sigdata['taskdeps']
1059    for dep in alldeps:
1060        basedata = basedata + dep
1061        val = sigdata['varvals'][dep]
1062        if val is not None:
1063            basedata = basedata + str(val)
1064
1065    return hashlib.sha256(basedata.encode("utf-8")).hexdigest()
1066
1067def calc_taskhash(sigdata):
1068    data = sigdata['basehash']
1069
1070    for dep in sigdata['runtaskdeps']:
1071        data = data + sigdata['runtaskhashes'][dep]
1072
1073    for c in sigdata['file_checksum_values']:
1074        if c[1]:
1075            if "./" in c[0]:
1076                data = data + c[0]
1077            data = data + c[1]
1078
1079    if 'taint' in sigdata:
1080        if 'nostamp:' in sigdata['taint']:
1081            data = data + sigdata['taint'][8:]
1082        else:
1083            data = data + sigdata['taint']
1084
1085    return hashlib.sha256(data.encode("utf-8")).hexdigest()
1086
1087
1088def dump_sigfile(a):
1089    output = []
1090
1091    with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f:
1092        a_data = json.load(f, object_hook=SetDecoder)
1093
1094    handle_renames(a_data)
1095
1096    output.append("basehash_ignore_vars: %s" % (sorted(a_data['basehash_ignore_vars'])))
1097
1098    output.append("taskhash_ignore_tasks: %s" % (sorted(a_data['taskhash_ignore_tasks'] or [])))
1099
1100    output.append("Task dependencies: %s" % (sorted(a_data['taskdeps'])))
1101
1102    output.append("basehash: %s" % (a_data['basehash']))
1103
1104    for dep in sorted(a_data['gendeps']):
1105        output.append("List of dependencies for variable %s is %s" % (dep, sorted(a_data['gendeps'][dep])))
1106
1107    for dep in sorted(a_data['varvals']):
1108        output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep]))
1109
1110    if 'runtaskdeps' in a_data:
1111        output.append("Tasks this task depends on: %s" % (sorted(a_data['runtaskdeps'])))
1112
1113    if 'file_checksum_values' in a_data:
1114        output.append("This task depends on the checksums of files: %s" % (sorted(a_data['file_checksum_values'])))
1115
1116    if 'runtaskhashes' in a_data:
1117        for dep in sorted(a_data['runtaskhashes']):
1118            output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep]))
1119
1120    if 'taint' in a_data:
1121        if a_data['taint'].startswith('nostamp:'):
1122            msg = a_data['taint'].replace('nostamp:', 'nostamp(uuid4):')
1123        else:
1124            msg = a_data['taint']
1125        output.append("Tainted (by forced/invalidated task): %s" % msg)
1126
1127    if 'task' in a_data:
1128        computed_basehash = calc_basehash(a_data)
1129        output.append("Computed base hash is %s and from file %s" % (computed_basehash, a_data['basehash']))
1130    else:
1131        output.append("Unable to compute base hash")
1132
1133    computed_taskhash = calc_taskhash(a_data)
1134    output.append("Computed task hash is %s" % computed_taskhash)
1135
1136    return output
1137