xref: /openbmc/openbmc/poky/bitbake/lib/bb/siggen.py (revision 78b72798)
1#
2# SPDX-License-Identifier: GPL-2.0-only
3#
4
5import hashlib
6import logging
7import os
8import re
9import tempfile
10import pickle
11import bb.data
12import difflib
13import simplediff
14import json
15import bb.compress.zstd
16from bb.checksum import FileChecksumCache
17from bb import runqueue
18import hashserv
19import hashserv.client
20
21logger = logging.getLogger('BitBake.SigGen')
22hashequiv_logger = logging.getLogger('BitBake.SigGen.HashEquiv')
23
24class SetEncoder(json.JSONEncoder):
25    def default(self, obj):
26        if isinstance(obj, set):
27            return dict(_set_object=list(sorted(obj)))
28        return json.JSONEncoder.default(self, obj)
29
30def SetDecoder(dct):
31    if '_set_object' in dct:
32        return set(dct['_set_object'])
33    return dct
34
35def init(d):
36    siggens = [obj for obj in globals().values()
37                      if type(obj) is type and issubclass(obj, SignatureGenerator)]
38
39    desired = d.getVar("BB_SIGNATURE_HANDLER") or "noop"
40    for sg in siggens:
41        if desired == sg.name:
42            return sg(d)
43    else:
44        logger.error("Invalid signature generator '%s', using default 'noop'\n"
45                     "Available generators: %s", desired,
46                     ', '.join(obj.name for obj in siggens))
47        return SignatureGenerator(d)
48
49class SignatureGenerator(object):
50    """
51    """
52    name = "noop"
53
54    # If the derived class supports multiconfig datacaches, set this to True
55    # The default is False for backward compatibility with derived signature
56    # generators that do not understand multiconfig caches
57    supports_multiconfig_datacaches = False
58
59    def __init__(self, data):
60        self.basehash = {}
61        self.taskhash = {}
62        self.unihash = {}
63        self.runtaskdeps = {}
64        self.file_checksum_values = {}
65        self.taints = {}
66        self.unitaskhashes = {}
67        self.tidtopn = {}
68        self.setscenetasks = set()
69
70    def finalise(self, fn, d, varient):
71        return
72
73    def postparsing_clean_cache(self):
74        return
75
76    def get_unihash(self, tid):
77        return self.taskhash[tid]
78
79    def prep_taskhash(self, tid, deps, dataCaches):
80        return
81
82    def get_taskhash(self, tid, deps, dataCaches):
83        self.taskhash[tid] = hashlib.sha256(tid.encode("utf-8")).hexdigest()
84        return self.taskhash[tid]
85
86    def writeout_file_checksum_cache(self):
87        """Write/update the file checksum cache onto disk"""
88        return
89
90    def stampfile(self, stampbase, file_name, taskname, extrainfo):
91        return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
92
93    def stampcleanmask(self, stampbase, file_name, taskname, extrainfo):
94        return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
95
96    def dump_sigtask(self, fn, task, stampbase, runtime):
97        return
98
99    def invalidate_task(self, task, d, fn):
100        bb.build.del_stamp(task, d, fn)
101
102    def dump_sigs(self, dataCache, options):
103        return
104
105    def get_taskdata(self):
106        return (self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks)
107
108    def set_taskdata(self, data):
109        self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks = data
110
111    def reset(self, data):
112        self.__init__(data)
113
114    def get_taskhashes(self):
115        return self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn
116
117    def set_taskhashes(self, hashes):
118        self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn = hashes
119
120    def save_unitaskhashes(self):
121        return
122
123    def copy_unitaskhashes(self, targetdir):
124        return
125
126    def set_setscene_tasks(self, setscene_tasks):
127        return
128
129    @classmethod
130    def get_data_caches(cls, dataCaches, mc):
131        """
132        This function returns the datacaches that should be passed to signature
133        generator functions. If the signature generator supports multiconfig
134        caches, the entire dictionary of data caches is sent, otherwise a
135        special proxy is sent that support both index access to all
136        multiconfigs, and also direct access for the default multiconfig.
137
138        The proxy class allows code in this class itself to always use
139        multiconfig aware code (to ease maintenance), but derived classes that
140        are unaware of multiconfig data caches can still access the default
141        multiconfig as expected.
142
143        Do not override this function in derived classes; it will be removed in
144        the future when support for multiconfig data caches is mandatory
145        """
146        class DataCacheProxy(object):
147            def __init__(self):
148                pass
149
150            def __getitem__(self, key):
151                return dataCaches[key]
152
153            def __getattr__(self, name):
154                return getattr(dataCaches[mc], name)
155
156        if cls.supports_multiconfig_datacaches:
157            return dataCaches
158
159        return DataCacheProxy()
160
161    def exit(self):
162        return
163
164class SignatureGeneratorBasic(SignatureGenerator):
165    """
166    """
167    name = "basic"
168
169    def __init__(self, data):
170        self.basehash = {}
171        self.taskhash = {}
172        self.unihash = {}
173        self.taskdeps = {}
174        self.runtaskdeps = {}
175        self.file_checksum_values = {}
176        self.taints = {}
177        self.gendeps = {}
178        self.lookupcache = {}
179        self.setscenetasks = set()
180        self.basehash_ignore_vars = set((data.getVar("BB_BASEHASH_IGNORE_VARS") or "").split())
181        self.taskhash_ignore_tasks = None
182        self.init_rundepcheck(data)
183        checksum_cache_file = data.getVar("BB_HASH_CHECKSUM_CACHE_FILE")
184        if checksum_cache_file:
185            self.checksum_cache = FileChecksumCache()
186            self.checksum_cache.init_cache(data, checksum_cache_file)
187        else:
188            self.checksum_cache = None
189
190        self.unihash_cache = bb.cache.SimpleCache("3")
191        self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {})
192        self.localdirsexclude = (data.getVar("BB_SIGNATURE_LOCAL_DIRS_EXCLUDE") or "CVS .bzr .git .hg .osc .p4 .repo .svn").split()
193        self.tidtopn = {}
194
195    def init_rundepcheck(self, data):
196        self.taskhash_ignore_tasks = data.getVar("BB_TASKHASH_IGNORE_TASKS") or None
197        if self.taskhash_ignore_tasks:
198            self.twl = re.compile(self.taskhash_ignore_tasks)
199        else:
200            self.twl = None
201
202    def _build_data(self, fn, d):
203
204        ignore_mismatch = ((d.getVar("BB_HASH_IGNORE_MISMATCH") or '') == '1')
205        tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d, self.basehash_ignore_vars)
206
207        taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, self.basehash_ignore_vars, fn)
208
209        for task in tasklist:
210            tid = fn + ":" + task
211            if not ignore_mismatch and tid in self.basehash and self.basehash[tid] != basehash[tid]:
212                bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (tid, self.basehash[tid], basehash[tid]))
213                bb.error("The following commands may help:")
214                cmd = "$ bitbake %s -c%s" % (d.getVar('PN'), task)
215                # Make sure sigdata is dumped before run printdiff
216                bb.error("%s -Snone" % cmd)
217                bb.error("Then:")
218                bb.error("%s -Sprintdiff\n" % cmd)
219            self.basehash[tid] = basehash[tid]
220
221        self.taskdeps[fn] = taskdeps
222        self.gendeps[fn] = gendeps
223        self.lookupcache[fn] = lookupcache
224
225        return taskdeps
226
227    def set_setscene_tasks(self, setscene_tasks):
228        self.setscenetasks = set(setscene_tasks)
229
230    def finalise(self, fn, d, variant):
231
232        mc = d.getVar("__BBMULTICONFIG", False) or ""
233        if variant or mc:
234            fn = bb.cache.realfn2virtual(fn, variant, mc)
235
236        try:
237            taskdeps = self._build_data(fn, d)
238        except bb.parse.SkipRecipe:
239            raise
240        except:
241            bb.warn("Error during finalise of %s" % fn)
242            raise
243
244        #Slow but can be useful for debugging mismatched basehashes
245        #for task in self.taskdeps[fn]:
246        #    self.dump_sigtask(fn, task, d.getVar("STAMP"), False)
247
248        for task in taskdeps:
249            d.setVar("BB_BASEHASH:task-%s" % task, self.basehash[fn + ":" + task])
250
251    def postparsing_clean_cache(self):
252        #
253        # After parsing we can remove some things from memory to reduce our memory footprint
254        #
255        self.gendeps = {}
256        self.lookupcache = {}
257        self.taskdeps = {}
258
259    def rundep_check(self, fn, recipename, task, dep, depname, dataCaches):
260        # Return True if we should keep the dependency, False to drop it
261        # We only manipulate the dependencies for packages not in the ignore
262        # list
263        if self.twl and not self.twl.search(recipename):
264            # then process the actual dependencies
265            if self.twl.search(depname):
266                return False
267        return True
268
269    def read_taint(self, fn, task, stampbase):
270        taint = None
271        try:
272            with open(stampbase + '.' + task + '.taint', 'r') as taintf:
273                taint = taintf.read()
274        except IOError:
275            pass
276        return taint
277
278    def prep_taskhash(self, tid, deps, dataCaches):
279
280        (mc, _, task, fn) = bb.runqueue.split_tid_mcfn(tid)
281
282        self.basehash[tid] = dataCaches[mc].basetaskhash[tid]
283        self.runtaskdeps[tid] = []
284        self.file_checksum_values[tid] = []
285        recipename = dataCaches[mc].pkg_fn[fn]
286
287        self.tidtopn[tid] = recipename
288
289        for dep in sorted(deps, key=clean_basepath):
290            (depmc, _, _, depmcfn) = bb.runqueue.split_tid_mcfn(dep)
291            depname = dataCaches[depmc].pkg_fn[depmcfn]
292            if not self.supports_multiconfig_datacaches and mc != depmc:
293                # If the signature generator doesn't understand multiconfig
294                # data caches, any dependency not in the same multiconfig must
295                # be skipped for backward compatibility
296                continue
297            if not self.rundep_check(fn, recipename, task, dep, depname, dataCaches):
298                continue
299            if dep not in self.taskhash:
300                bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep)
301            self.runtaskdeps[tid].append(dep)
302
303        if task in dataCaches[mc].file_checksums[fn]:
304            if self.checksum_cache:
305                checksums = self.checksum_cache.get_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude)
306            else:
307                checksums = bb.fetch2.get_file_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude)
308            for (f,cs) in checksums:
309                self.file_checksum_values[tid].append((f,cs))
310
311        taskdep = dataCaches[mc].task_deps[fn]
312        if 'nostamp' in taskdep and task in taskdep['nostamp']:
313            # Nostamp tasks need an implicit taint so that they force any dependent tasks to run
314            if tid in self.taints and self.taints[tid].startswith("nostamp:"):
315                # Don't reset taint value upon every call
316                pass
317            else:
318                import uuid
319                taint = str(uuid.uuid4())
320                self.taints[tid] = "nostamp:" + taint
321
322        taint = self.read_taint(fn, task, dataCaches[mc].stamp[fn])
323        if taint:
324            self.taints[tid] = taint
325            logger.warning("%s is tainted from a forced run" % tid)
326
327        return
328
329    def get_taskhash(self, tid, deps, dataCaches):
330
331        data = self.basehash[tid]
332        for dep in self.runtaskdeps[tid]:
333            data = data + self.get_unihash(dep)
334
335        for (f, cs) in self.file_checksum_values[tid]:
336            if cs:
337                if "/./" in f:
338                    data = data + "./" + f.split("/./")[1]
339                data = data + cs
340
341        if tid in self.taints:
342            if self.taints[tid].startswith("nostamp:"):
343                data = data + self.taints[tid][8:]
344            else:
345                data = data + self.taints[tid]
346
347        h = hashlib.sha256(data.encode("utf-8")).hexdigest()
348        self.taskhash[tid] = h
349        #d.setVar("BB_TASKHASH:task-%s" % task, taskhash[task])
350        return h
351
352    def writeout_file_checksum_cache(self):
353        """Write/update the file checksum cache onto disk"""
354        if self.checksum_cache:
355            self.checksum_cache.save_extras()
356            self.checksum_cache.save_merge()
357        else:
358            bb.fetch2.fetcher_parse_save()
359            bb.fetch2.fetcher_parse_done()
360
361    def save_unitaskhashes(self):
362        self.unihash_cache.save(self.unitaskhashes)
363
364    def copy_unitaskhashes(self, targetdir):
365        self.unihash_cache.copyfile(targetdir)
366
367    def dump_sigtask(self, fn, task, stampbase, runtime):
368
369        tid = fn + ":" + task
370        referencestamp = stampbase
371        if isinstance(runtime, str) and runtime.startswith("customfile"):
372            sigfile = stampbase
373            referencestamp = runtime[11:]
374        elif runtime and tid in self.taskhash:
375            sigfile = stampbase + "." + task + ".sigdata" + "." + self.get_unihash(tid)
376        else:
377            sigfile = stampbase + "." + task + ".sigbasedata" + "." + self.basehash[tid]
378
379        with bb.utils.umask(0o002):
380            bb.utils.mkdirhier(os.path.dirname(sigfile))
381
382        data = {}
383        data['task'] = task
384        data['basehash_ignore_vars'] = self.basehash_ignore_vars
385        data['taskhash_ignore_tasks'] = self.taskhash_ignore_tasks
386        data['taskdeps'] = self.taskdeps[fn][task]
387        data['basehash'] = self.basehash[tid]
388        data['gendeps'] = {}
389        data['varvals'] = {}
390        data['varvals'][task] = self.lookupcache[fn][task]
391        for dep in self.taskdeps[fn][task]:
392            if dep in self.basehash_ignore_vars:
393                continue
394            data['gendeps'][dep] = self.gendeps[fn][dep]
395            data['varvals'][dep] = self.lookupcache[fn][dep]
396
397        if runtime and tid in self.taskhash:
398            data['runtaskdeps'] = self.runtaskdeps[tid]
399            data['file_checksum_values'] = []
400            for f,cs in self.file_checksum_values[tid]:
401                if "/./" in f:
402                    data['file_checksum_values'].append(("./" + f.split("/./")[1], cs))
403                else:
404                    data['file_checksum_values'].append((os.path.basename(f), cs))
405            data['runtaskhashes'] = {}
406            for dep in data['runtaskdeps']:
407                data['runtaskhashes'][dep] = self.get_unihash(dep)
408            data['taskhash'] = self.taskhash[tid]
409            data['unihash'] = self.get_unihash(tid)
410
411        taint = self.read_taint(fn, task, referencestamp)
412        if taint:
413            data['taint'] = taint
414
415        if runtime and tid in self.taints:
416            if 'nostamp:' in self.taints[tid]:
417                data['taint'] = self.taints[tid]
418
419        computed_basehash = calc_basehash(data)
420        if computed_basehash != self.basehash[tid]:
421            bb.error("Basehash mismatch %s versus %s for %s" % (computed_basehash, self.basehash[tid], tid))
422        if runtime and tid in self.taskhash:
423            computed_taskhash = calc_taskhash(data)
424            if computed_taskhash != self.taskhash[tid]:
425                bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid))
426                sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash)
427
428        fd, tmpfile = tempfile.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.")
429        try:
430            with bb.compress.zstd.open(fd, "wt", encoding="utf-8", num_threads=1) as f:
431                json.dump(data, f, sort_keys=True, separators=(",", ":"), cls=SetEncoder)
432                f.flush()
433            os.chmod(tmpfile, 0o664)
434            bb.utils.rename(tmpfile, sigfile)
435        except (OSError, IOError) as err:
436            try:
437                os.unlink(tmpfile)
438            except OSError:
439                pass
440            raise err
441
442    def dump_sigfn(self, fn, dataCaches, options):
443        if fn in self.taskdeps:
444            for task in self.taskdeps[fn]:
445                tid = fn + ":" + task
446                mc = bb.runqueue.mc_from_tid(tid)
447                if tid not in self.taskhash:
448                    continue
449                if dataCaches[mc].basetaskhash[tid] != self.basehash[tid]:
450                    bb.error("Bitbake's cached basehash does not match the one we just generated (%s)!" % tid)
451                    bb.error("The mismatched hashes were %s and %s" % (dataCaches[mc].basetaskhash[tid], self.basehash[tid]))
452                self.dump_sigtask(fn, task, dataCaches[mc].stamp[fn], True)
453
454class SignatureGeneratorBasicHash(SignatureGeneratorBasic):
455    name = "basichash"
456
457    def get_stampfile_hash(self, tid):
458        if tid in self.taskhash:
459            return self.taskhash[tid]
460
461        # If task is not in basehash, then error
462        return self.basehash[tid]
463
464    def stampfile(self, stampbase, fn, taskname, extrainfo, clean=False):
465        if taskname != "do_setscene" and taskname.endswith("_setscene"):
466            tid = fn + ":" + taskname[:-9]
467        else:
468            tid = fn + ":" + taskname
469        if clean:
470            h = "*"
471        else:
472            h = self.get_stampfile_hash(tid)
473
474        return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.')
475
476    def stampcleanmask(self, stampbase, fn, taskname, extrainfo):
477        return self.stampfile(stampbase, fn, taskname, extrainfo, clean=True)
478
479    def invalidate_task(self, task, d, fn):
480        bb.note("Tainting hash to force rebuild of task %s, %s" % (fn, task))
481        bb.build.write_taint(task, d, fn)
482
483class SignatureGeneratorUniHashMixIn(object):
484    def __init__(self, data):
485        self.extramethod = {}
486        super().__init__(data)
487
488    def get_taskdata(self):
489        return (self.server, self.method, self.extramethod) + super().get_taskdata()
490
491    def set_taskdata(self, data):
492        self.server, self.method, self.extramethod = data[:3]
493        super().set_taskdata(data[3:])
494
495    def client(self):
496        if getattr(self, '_client', None) is None:
497            self._client = hashserv.create_client(self.server)
498        return self._client
499
500    def reset(self, data):
501        if getattr(self, '_client', None) is not None:
502            self._client.close()
503            self._client = None
504        return super().reset(data)
505
506    def exit(self):
507        if getattr(self, '_client', None) is not None:
508            self._client.close()
509            self._client = None
510        return super().exit()
511
512    def get_stampfile_hash(self, tid):
513        if tid in self.taskhash:
514            # If a unique hash is reported, use it as the stampfile hash. This
515            # ensures that if a task won't be re-run if the taskhash changes,
516            # but it would result in the same output hash
517            unihash = self._get_unihash(tid)
518            if unihash is not None:
519                return unihash
520
521        return super().get_stampfile_hash(tid)
522
523    def set_unihash(self, tid, unihash):
524        (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
525        key = mc + ":" + self.tidtopn[tid] + ":" + taskname
526        self.unitaskhashes[key] = (self.taskhash[tid], unihash)
527        self.unihash[tid] = unihash
528
529    def _get_unihash(self, tid, checkkey=None):
530        if tid not in self.tidtopn:
531            return None
532        (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
533        key = mc + ":" + self.tidtopn[tid] + ":" + taskname
534        if key not in self.unitaskhashes:
535            return None
536        if not checkkey:
537            checkkey = self.taskhash[tid]
538        (key, unihash) = self.unitaskhashes[key]
539        if key != checkkey:
540            return None
541        return unihash
542
543    def get_unihash(self, tid):
544        taskhash = self.taskhash[tid]
545
546        # If its not a setscene task we can return
547        if self.setscenetasks and tid not in self.setscenetasks:
548            self.unihash[tid] = None
549            return taskhash
550
551        # TODO: This cache can grow unbounded. It probably only needs to keep
552        # for each task
553        unihash =  self._get_unihash(tid)
554        if unihash is not None:
555            self.unihash[tid] = unihash
556            return unihash
557
558        # In the absence of being able to discover a unique hash from the
559        # server, make it be equivalent to the taskhash. The unique "hash" only
560        # really needs to be a unique string (not even necessarily a hash), but
561        # making it match the taskhash has a few advantages:
562        #
563        # 1) All of the sstate code that assumes hashes can be the same
564        # 2) It provides maximal compatibility with builders that don't use
565        #    an equivalency server
566        # 3) The value is easy for multiple independent builders to derive the
567        #    same unique hash from the same input. This means that if the
568        #    independent builders find the same taskhash, but it isn't reported
569        #    to the server, there is a better chance that they will agree on
570        #    the unique hash.
571        unihash = taskhash
572
573        try:
574            method = self.method
575            if tid in self.extramethod:
576                method = method + self.extramethod[tid]
577            data = self.client().get_unihash(method, self.taskhash[tid])
578            if data:
579                unihash = data
580                # A unique hash equal to the taskhash is not very interesting,
581                # so it is reported it at debug level 2. If they differ, that
582                # is much more interesting, so it is reported at debug level 1
583                hashequiv_logger.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server))
584            else:
585                hashequiv_logger.debug2('No reported unihash for %s:%s from %s' % (tid, taskhash, self.server))
586        except ConnectionError as e:
587            bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
588
589        self.set_unihash(tid, unihash)
590        self.unihash[tid] = unihash
591        return unihash
592
593    def report_unihash(self, path, task, d):
594        import importlib
595
596        taskhash = d.getVar('BB_TASKHASH')
597        unihash = d.getVar('BB_UNIHASH')
598        report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1'
599        tempdir = d.getVar('T')
600        fn = d.getVar('BB_FILENAME')
601        tid = fn + ':do_' + task
602        key = tid + ':' + taskhash
603
604        if self.setscenetasks and tid not in self.setscenetasks:
605            return
606
607        # This can happen if locked sigs are in action. Detect and just exit
608        if taskhash != self.taskhash[tid]:
609            return
610
611        # Sanity checks
612        cache_unihash = self._get_unihash(tid, checkkey=taskhash)
613        if cache_unihash is None:
614            bb.fatal('%s not in unihash cache. Please report this error' % key)
615
616        if cache_unihash != unihash:
617            bb.fatal("Cache unihash %s doesn't match BB_UNIHASH %s" % (cache_unihash, unihash))
618
619        sigfile = None
620        sigfile_name = "depsig.do_%s.%d" % (task, os.getpid())
621        sigfile_link = "depsig.do_%s" % task
622
623        try:
624            sigfile = open(os.path.join(tempdir, sigfile_name), 'w+b')
625
626            locs = {'path': path, 'sigfile': sigfile, 'task': task, 'd': d}
627
628            if "." in self.method:
629                (module, method) = self.method.rsplit('.', 1)
630                locs['method'] = getattr(importlib.import_module(module), method)
631                outhash = bb.utils.better_eval('method(path, sigfile, task, d)', locs)
632            else:
633                outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs)
634
635            try:
636                extra_data = {}
637
638                owner = d.getVar('SSTATE_HASHEQUIV_OWNER')
639                if owner:
640                    extra_data['owner'] = owner
641
642                if report_taskdata:
643                    sigfile.seek(0)
644
645                    extra_data['PN'] = d.getVar('PN')
646                    extra_data['PV'] = d.getVar('PV')
647                    extra_data['PR'] = d.getVar('PR')
648                    extra_data['task'] = task
649                    extra_data['outhash_siginfo'] = sigfile.read().decode('utf-8')
650
651                method = self.method
652                if tid in self.extramethod:
653                    method = method + self.extramethod[tid]
654
655                data = self.client().report_unihash(taskhash, method, outhash, unihash, extra_data)
656                new_unihash = data['unihash']
657
658                if new_unihash != unihash:
659                    hashequiv_logger.debug('Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server))
660                    bb.event.fire(bb.runqueue.taskUniHashUpdate(fn + ':do_' + task, new_unihash), d)
661                    self.set_unihash(tid, new_unihash)
662                    d.setVar('BB_UNIHASH', new_unihash)
663                else:
664                    hashequiv_logger.debug('Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server))
665            except ConnectionError as e:
666                bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
667        finally:
668            if sigfile:
669                sigfile.close()
670
671                sigfile_link_path = os.path.join(tempdir, sigfile_link)
672                bb.utils.remove(sigfile_link_path)
673
674                try:
675                    os.symlink(sigfile_name, sigfile_link_path)
676                except OSError:
677                    pass
678
679    def report_unihash_equiv(self, tid, taskhash, wanted_unihash, current_unihash, datacaches):
680        try:
681            extra_data = {}
682            method = self.method
683            if tid in self.extramethod:
684                method = method + self.extramethod[tid]
685
686            data = self.client().report_unihash_equiv(taskhash, method, wanted_unihash, extra_data)
687            hashequiv_logger.verbose('Reported task %s as unihash %s to %s (%s)' % (tid, wanted_unihash, self.server, str(data)))
688
689            if data is None:
690                bb.warn("Server unable to handle unihash report")
691                return False
692
693            finalunihash = data['unihash']
694
695            if finalunihash == current_unihash:
696                hashequiv_logger.verbose('Task %s unihash %s unchanged by server' % (tid, finalunihash))
697            elif finalunihash == wanted_unihash:
698                hashequiv_logger.verbose('Task %s unihash changed %s -> %s as wanted' % (tid, current_unihash, finalunihash))
699                self.set_unihash(tid, finalunihash)
700                return True
701            else:
702                # TODO: What to do here?
703                hashequiv_logger.verbose('Task %s unihash reported as unwanted hash %s' % (tid, finalunihash))
704
705        except ConnectionError as e:
706            bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
707
708        return False
709
710#
711# Dummy class used for bitbake-selftest
712#
713class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureGeneratorBasicHash):
714    name = "TestEquivHash"
715    def init_rundepcheck(self, data):
716        super().init_rundepcheck(data)
717        self.server = data.getVar('BB_HASHSERVE')
718        self.method = "sstate_output_hash"
719
720#
721# Dummy class used for bitbake-selftest
722#
723class SignatureGeneratorTestMulticonfigDepends(SignatureGeneratorBasicHash):
724    name = "TestMulticonfigDepends"
725    supports_multiconfig_datacaches = True
726
727def dump_this_task(outfile, d):
728    import bb.parse
729    fn = d.getVar("BB_FILENAME")
730    task = "do_" + d.getVar("BB_CURRENTTASK")
731    referencestamp = bb.build.stamp_internal(task, d, None, True)
732    bb.parse.siggen.dump_sigtask(fn, task, outfile, "customfile:" + referencestamp)
733
734def init_colors(enable_color):
735    """Initialise colour dict for passing to compare_sigfiles()"""
736    # First set up the colours
737    colors = {'color_title':   '\033[1m',
738              'color_default': '\033[0m',
739              'color_add':     '\033[0;32m',
740              'color_remove':  '\033[0;31m',
741             }
742    # Leave all keys present but clear the values
743    if not enable_color:
744        for k in colors.keys():
745            colors[k] = ''
746    return colors
747
748def worddiff_str(oldstr, newstr, colors=None):
749    if not colors:
750        colors = init_colors(False)
751    diff = simplediff.diff(oldstr.split(' '), newstr.split(' '))
752    ret = []
753    for change, value in diff:
754        value = ' '.join(value)
755        if change == '=':
756            ret.append(value)
757        elif change == '+':
758            item = '{color_add}{{+{value}+}}{color_default}'.format(value=value, **colors)
759            ret.append(item)
760        elif change == '-':
761            item = '{color_remove}[-{value}-]{color_default}'.format(value=value, **colors)
762            ret.append(item)
763    whitespace_note = ''
764    if oldstr != newstr and ' '.join(oldstr.split()) == ' '.join(newstr.split()):
765        whitespace_note = ' (whitespace changed)'
766    return '"%s"%s' % (' '.join(ret), whitespace_note)
767
768def list_inline_diff(oldlist, newlist, colors=None):
769    if not colors:
770        colors = init_colors(False)
771    diff = simplediff.diff(oldlist, newlist)
772    ret = []
773    for change, value in diff:
774        value = ' '.join(value)
775        if change == '=':
776            ret.append("'%s'" % value)
777        elif change == '+':
778            item = '{color_add}+{value}{color_default}'.format(value=value, **colors)
779            ret.append(item)
780        elif change == '-':
781            item = '{color_remove}-{value}{color_default}'.format(value=value, **colors)
782            ret.append(item)
783    return '[%s]' % (', '.join(ret))
784
785def clean_basepath(basepath):
786    basepath, dir, recipe_task = basepath.rsplit("/", 2)
787    cleaned = dir + '/' + recipe_task
788
789    if basepath[0] == '/':
790        return cleaned
791
792    if basepath.startswith("mc:") and basepath.count(':') >= 2:
793        mc, mc_name, basepath = basepath.split(":", 2)
794        mc_suffix = ':mc:' + mc_name
795    else:
796        mc_suffix = ''
797
798    # mc stuff now removed from basepath. Whatever was next, if present will be the first
799    # suffix. ':/', recipe path start, marks the end of this. Something like
800    # 'virtual:a[:b[:c]]:/path...' (b and c being optional)
801    if basepath[0] != '/':
802        cleaned += ':' + basepath.split(':/', 1)[0]
803
804    return cleaned + mc_suffix
805
806def clean_basepaths(a):
807    b = {}
808    for x in a:
809        b[clean_basepath(x)] = a[x]
810    return b
811
812def clean_basepaths_list(a):
813    b = []
814    for x in a:
815        b.append(clean_basepath(x))
816    return b
817
818# Handled renamed fields
819def handle_renames(data):
820    if 'basewhitelist' in data:
821        data['basehash_ignore_vars'] = data['basewhitelist']
822        del data['basewhitelist']
823    if 'taskwhitelist' in data:
824        data['taskhash_ignore_tasks'] = data['taskwhitelist']
825        del data['taskwhitelist']
826
827
828def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
829    output = []
830
831    colors = init_colors(color)
832    def color_format(formatstr, **values):
833        """
834        Return colour formatted string.
835        NOTE: call with the format string, not an already formatted string
836        containing values (otherwise you could have trouble with { and }
837        characters)
838        """
839        if not formatstr.endswith('{color_default}'):
840            formatstr += '{color_default}'
841        # In newer python 3 versions you can pass both of these directly,
842        # but we only require 3.4 at the moment
843        formatparams = {}
844        formatparams.update(colors)
845        formatparams.update(values)
846        return formatstr.format(**formatparams)
847
848    with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f:
849        a_data = json.load(f, object_hook=SetDecoder)
850    with bb.compress.zstd.open(b, "rt", encoding="utf-8", num_threads=1) as f:
851        b_data = json.load(f, object_hook=SetDecoder)
852
853    for data in [a_data, b_data]:
854        handle_renames(data)
855
856    def dict_diff(a, b, ignored_vars=set()):
857        sa = set(a.keys())
858        sb = set(b.keys())
859        common = sa & sb
860        changed = set()
861        for i in common:
862            if a[i] != b[i] and i not in ignored_vars:
863                changed.add(i)
864        added = sb - sa
865        removed = sa - sb
866        return changed, added, removed
867
868    def file_checksums_diff(a, b):
869        from collections import Counter
870
871        # Convert lists back to tuples
872        a = [(f[0], f[1]) for f in a]
873        b = [(f[0], f[1]) for f in b]
874
875        # Compare lists, ensuring we can handle duplicate filenames if they exist
876        removedcount = Counter(a)
877        removedcount.subtract(b)
878        addedcount = Counter(b)
879        addedcount.subtract(a)
880        added = []
881        for x in b:
882            if addedcount[x] > 0:
883                addedcount[x] -= 1
884                added.append(x)
885        removed = []
886        changed = []
887        for x in a:
888            if removedcount[x] > 0:
889                removedcount[x] -= 1
890                for y in added:
891                    if y[0] == x[0]:
892                        changed.append((x[0], x[1], y[1]))
893                        added.remove(y)
894                        break
895                else:
896                    removed.append(x)
897        added = [x[0] for x in added]
898        removed = [x[0] for x in removed]
899        return changed, added, removed
900
901    if 'basehash_ignore_vars' in a_data and a_data['basehash_ignore_vars'] != b_data['basehash_ignore_vars']:
902        output.append(color_format("{color_title}basehash_ignore_vars changed{color_default} from '%s' to '%s'") % (a_data['basehash_ignore_vars'], b_data['basehash_ignore_vars']))
903        if a_data['basehash_ignore_vars'] and b_data['basehash_ignore_vars']:
904            output.append("changed items: %s" % a_data['basehash_ignore_vars'].symmetric_difference(b_data['basehash_ignore_vars']))
905
906    if 'taskhash_ignore_tasks' in a_data and a_data['taskhash_ignore_tasks'] != b_data['taskhash_ignore_tasks']:
907        output.append(color_format("{color_title}taskhash_ignore_tasks changed{color_default} from '%s' to '%s'") % (a_data['taskhash_ignore_tasks'], b_data['taskhash_ignore_tasks']))
908        if a_data['taskhash_ignore_tasks'] and b_data['taskhash_ignore_tasks']:
909            output.append("changed items: %s" % a_data['taskhash_ignore_tasks'].symmetric_difference(b_data['taskhash_ignore_tasks']))
910
911    if a_data['taskdeps'] != b_data['taskdeps']:
912        output.append(color_format("{color_title}Task dependencies changed{color_default} from:\n%s\nto:\n%s") % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps'])))
913
914    if a_data['basehash'] != b_data['basehash'] and not collapsed:
915        output.append(color_format("{color_title}basehash changed{color_default} from %s to %s") % (a_data['basehash'], b_data['basehash']))
916
917    changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basehash_ignore_vars'] & b_data['basehash_ignore_vars'])
918    if changed:
919        for dep in sorted(changed):
920            output.append(color_format("{color_title}List of dependencies for variable %s changed from '{color_default}%s{color_title}' to '{color_default}%s{color_title}'") % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep]))
921            if a_data['gendeps'][dep] and b_data['gendeps'][dep]:
922                output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep]))
923    if added:
924        for dep in sorted(added):
925            output.append(color_format("{color_title}Dependency on variable %s was added") % (dep))
926    if removed:
927        for dep in sorted(removed):
928            output.append(color_format("{color_title}Dependency on Variable %s was removed") % (dep))
929
930
931    changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals'])
932    if changed:
933        for dep in sorted(changed):
934            oldval = a_data['varvals'][dep]
935            newval = b_data['varvals'][dep]
936            if newval and oldval and ('\n' in oldval or '\n' in newval):
937                diff = difflib.unified_diff(oldval.splitlines(), newval.splitlines(), lineterm='')
938                # Cut off the first two lines, since we aren't interested in
939                # the old/new filename (they are blank anyway in this case)
940                difflines = list(diff)[2:]
941                if color:
942                    # Add colour to diff output
943                    for i, line in enumerate(difflines):
944                        if line.startswith('+'):
945                            line = color_format('{color_add}{line}', line=line)
946                            difflines[i] = line
947                        elif line.startswith('-'):
948                            line = color_format('{color_remove}{line}', line=line)
949                            difflines[i] = line
950                output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff='\n'.join(difflines)))
951            elif newval and oldval and (' ' in oldval or ' ' in newval):
952                output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff=worddiff_str(oldval, newval, colors)))
953            else:
954                output.append(color_format("{color_title}Variable {var} value changed from '{color_default}{oldval}{color_title}' to '{color_default}{newval}{color_title}'{color_default}", var=dep, oldval=oldval, newval=newval))
955
956    if not 'file_checksum_values' in a_data:
957         a_data['file_checksum_values'] = []
958    if not 'file_checksum_values' in b_data:
959         b_data['file_checksum_values'] = []
960
961    changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values'])
962    if changed:
963        for f, old, new in changed:
964            output.append(color_format("{color_title}Checksum for file %s changed{color_default} from %s to %s") % (f, old, new))
965    if added:
966        for f in added:
967            output.append(color_format("{color_title}Dependency on checksum of file %s was added") % (f))
968    if removed:
969        for f in removed:
970            output.append(color_format("{color_title}Dependency on checksum of file %s was removed") % (f))
971
972    if not 'runtaskdeps' in a_data:
973         a_data['runtaskdeps'] = {}
974    if not 'runtaskdeps' in b_data:
975         b_data['runtaskdeps'] = {}
976
977    if not collapsed:
978        if len(a_data['runtaskdeps']) != len(b_data['runtaskdeps']):
979            changed = ["Number of task dependencies changed"]
980        else:
981            changed = []
982            for idx, task in enumerate(a_data['runtaskdeps']):
983                a = a_data['runtaskdeps'][idx]
984                b = b_data['runtaskdeps'][idx]
985                if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b] and not collapsed:
986                    changed.append("%s with hash %s\n changed to\n%s with hash %s" % (clean_basepath(a), a_data['runtaskhashes'][a], clean_basepath(b), b_data['runtaskhashes'][b]))
987
988        if changed:
989            clean_a = clean_basepaths_list(a_data['runtaskdeps'])
990            clean_b = clean_basepaths_list(b_data['runtaskdeps'])
991            if clean_a != clean_b:
992                output.append(color_format("{color_title}runtaskdeps changed:{color_default}\n%s") % list_inline_diff(clean_a, clean_b, colors))
993            else:
994                output.append(color_format("{color_title}runtaskdeps changed:"))
995            output.append("\n".join(changed))
996
997
998    if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data:
999        a = clean_basepaths(a_data['runtaskhashes'])
1000        b = clean_basepaths(b_data['runtaskhashes'])
1001        changed, added, removed = dict_diff(a, b)
1002        if added:
1003            for dep in sorted(added):
1004                bdep_found = False
1005                if removed:
1006                    for bdep in removed:
1007                        if b[dep] == a[bdep]:
1008                            #output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep))
1009                            bdep_found = True
1010                if not bdep_found:
1011                    output.append(color_format("{color_title}Dependency on task %s was added{color_default} with hash %s") % (dep, b[dep]))
1012        if removed:
1013            for dep in sorted(removed):
1014                adep_found = False
1015                if added:
1016                    for adep in added:
1017                        if b[adep] == a[dep]:
1018                            #output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep))
1019                            adep_found = True
1020                if not adep_found:
1021                    output.append(color_format("{color_title}Dependency on task %s was removed{color_default} with hash %s") % (dep, a[dep]))
1022        if changed:
1023            for dep in sorted(changed):
1024                if not collapsed:
1025                    output.append(color_format("{color_title}Hash for task dependency %s changed{color_default} from %s to %s") % (dep, a[dep], b[dep]))
1026                if callable(recursecb):
1027                    recout = recursecb(dep, a[dep], b[dep])
1028                    if recout:
1029                        if collapsed:
1030                            output.extend(recout)
1031                        else:
1032                            # If a dependent hash changed, might as well print the line above and then defer to the changes in
1033                            # that hash since in all likelyhood, they're the same changes this task also saw.
1034                            output = [output[-1]] + recout
1035                            break
1036
1037    a_taint = a_data.get('taint', None)
1038    b_taint = b_data.get('taint', None)
1039    if a_taint != b_taint:
1040        if a_taint and a_taint.startswith('nostamp:'):
1041            a_taint = a_taint.replace('nostamp:', 'nostamp(uuid4):')
1042        if b_taint and b_taint.startswith('nostamp:'):
1043            b_taint = b_taint.replace('nostamp:', 'nostamp(uuid4):')
1044        output.append(color_format("{color_title}Taint (by forced/invalidated task) changed{color_default} from %s to %s") % (a_taint, b_taint))
1045
1046    return output
1047
1048
1049def calc_basehash(sigdata):
1050    task = sigdata['task']
1051    basedata = sigdata['varvals'][task]
1052
1053    if basedata is None:
1054        basedata = ''
1055
1056    alldeps = sigdata['taskdeps']
1057    for dep in alldeps:
1058        basedata = basedata + dep
1059        val = sigdata['varvals'][dep]
1060        if val is not None:
1061            basedata = basedata + str(val)
1062
1063    return hashlib.sha256(basedata.encode("utf-8")).hexdigest()
1064
1065def calc_taskhash(sigdata):
1066    data = sigdata['basehash']
1067
1068    for dep in sigdata['runtaskdeps']:
1069        data = data + sigdata['runtaskhashes'][dep]
1070
1071    for c in sigdata['file_checksum_values']:
1072        if c[1]:
1073            if "./" in c[0]:
1074                data = data + c[0]
1075            data = data + c[1]
1076
1077    if 'taint' in sigdata:
1078        if 'nostamp:' in sigdata['taint']:
1079            data = data + sigdata['taint'][8:]
1080        else:
1081            data = data + sigdata['taint']
1082
1083    return hashlib.sha256(data.encode("utf-8")).hexdigest()
1084
1085
1086def dump_sigfile(a):
1087    output = []
1088
1089    with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f:
1090        a_data = json.load(f, object_hook=SetDecoder)
1091
1092    handle_renames(a_data)
1093
1094    output.append("basehash_ignore_vars: %s" % (sorted(a_data['basehash_ignore_vars'])))
1095
1096    output.append("taskhash_ignore_tasks: %s" % (sorted(a_data['taskhash_ignore_tasks'] or [])))
1097
1098    output.append("Task dependencies: %s" % (sorted(a_data['taskdeps'])))
1099
1100    output.append("basehash: %s" % (a_data['basehash']))
1101
1102    for dep in sorted(a_data['gendeps']):
1103        output.append("List of dependencies for variable %s is %s" % (dep, sorted(a_data['gendeps'][dep])))
1104
1105    for dep in sorted(a_data['varvals']):
1106        output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep]))
1107
1108    if 'runtaskdeps' in a_data:
1109        output.append("Tasks this task depends on: %s" % (sorted(a_data['runtaskdeps'])))
1110
1111    if 'file_checksum_values' in a_data:
1112        output.append("This task depends on the checksums of files: %s" % (sorted(a_data['file_checksum_values'])))
1113
1114    if 'runtaskhashes' in a_data:
1115        for dep in sorted(a_data['runtaskhashes']):
1116            output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep]))
1117
1118    if 'taint' in a_data:
1119        if a_data['taint'].startswith('nostamp:'):
1120            msg = a_data['taint'].replace('nostamp:', 'nostamp(uuid4):')
1121        else:
1122            msg = a_data['taint']
1123        output.append("Tainted (by forced/invalidated task): %s" % msg)
1124
1125    if 'task' in a_data:
1126        computed_basehash = calc_basehash(a_data)
1127        output.append("Computed base hash is %s and from file %s" % (computed_basehash, a_data['basehash']))
1128    else:
1129        output.append("Unable to compute base hash")
1130
1131    computed_taskhash = calc_taskhash(a_data)
1132    output.append("Computed task hash is %s" % computed_taskhash)
1133
1134    return output
1135