xref: /openbmc/openbmc/poky/bitbake/lib/bb/siggen.py (revision 2a25492c)
1#
2# Copyright BitBake Contributors
3#
4# SPDX-License-Identifier: GPL-2.0-only
5#
6
7import hashlib
8import logging
9import os
10import re
11import tempfile
12import pickle
13import bb.data
14import difflib
15import simplediff
16import json
17import types
18import bb.compress.zstd
19from bb.checksum import FileChecksumCache
20from bb import runqueue
21import hashserv
22import hashserv.client
23
24logger = logging.getLogger('BitBake.SigGen')
25hashequiv_logger = logging.getLogger('BitBake.SigGen.HashEquiv')
26
27class SetEncoder(json.JSONEncoder):
28    def default(self, obj):
29        if isinstance(obj, set) or isinstance(obj, frozenset):
30            return dict(_set_object=list(sorted(obj)))
31        return json.JSONEncoder.default(self, obj)
32
33def SetDecoder(dct):
34    if '_set_object' in dct:
35        return frozenset(dct['_set_object'])
36    return dct
37
38def init(d):
39    siggens = [obj for obj in globals().values()
40                      if type(obj) is type and issubclass(obj, SignatureGenerator)]
41
42    desired = d.getVar("BB_SIGNATURE_HANDLER") or "noop"
43    for sg in siggens:
44        if desired == sg.name:
45            return sg(d)
46    else:
47        logger.error("Invalid signature generator '%s', using default 'noop'\n"
48                     "Available generators: %s", desired,
49                     ', '.join(obj.name for obj in siggens))
50        return SignatureGenerator(d)
51
52class SignatureGenerator(object):
53    """
54    """
55    name = "noop"
56
57    def __init__(self, data):
58        self.basehash = {}
59        self.taskhash = {}
60        self.unihash = {}
61        self.runtaskdeps = {}
62        self.file_checksum_values = {}
63        self.taints = {}
64        self.unitaskhashes = {}
65        self.tidtopn = {}
66        self.setscenetasks = set()
67
68    def finalise(self, fn, d, varient):
69        return
70
71    def postparsing_clean_cache(self):
72        return
73
74    def setup_datacache(self, datacaches):
75        self.datacaches = datacaches
76
77    def setup_datacache_from_datastore(self, mcfn, d):
78        # In task context we have no cache so setup internal data structures
79        # from the fully parsed data store provided
80
81        mc = d.getVar("__BBMULTICONFIG", False) or ""
82        tasks = d.getVar('__BBTASKS', False)
83
84        self.datacaches = {}
85        self.datacaches[mc] = types.SimpleNamespace()
86        setattr(self.datacaches[mc], "stamp", {})
87        self.datacaches[mc].stamp[mcfn] = d.getVar('STAMP')
88        setattr(self.datacaches[mc], "stamp_extrainfo", {})
89        self.datacaches[mc].stamp_extrainfo[mcfn] = {}
90        for t in tasks:
91            flag = d.getVarFlag(t, "stamp-extra-info")
92            if flag:
93                self.datacaches[mc].stamp_extrainfo[mcfn][t] = flag
94
95    def get_unihash(self, tid):
96        return self.taskhash[tid]
97
98    def prep_taskhash(self, tid, deps, dataCaches):
99        return
100
101    def get_taskhash(self, tid, deps, dataCaches):
102        self.taskhash[tid] = hashlib.sha256(tid.encode("utf-8")).hexdigest()
103        return self.taskhash[tid]
104
105    def writeout_file_checksum_cache(self):
106        """Write/update the file checksum cache onto disk"""
107        return
108
109    def stampfile_base(self, mcfn):
110        mc = bb.runqueue.mc_from_tid(mcfn)
111        return self.datacaches[mc].stamp[mcfn]
112
113    def stampfile_mcfn(self, taskname, mcfn, extrainfo=True):
114        mc = bb.runqueue.mc_from_tid(mcfn)
115        stamp = self.datacaches[mc].stamp[mcfn]
116        if not stamp:
117            return
118
119        stamp_extrainfo = ""
120        if extrainfo:
121            taskflagname = taskname
122            if taskname.endswith("_setscene"):
123                taskflagname = taskname.replace("_setscene", "")
124            stamp_extrainfo = self.datacaches[mc].stamp_extrainfo[mcfn].get(taskflagname) or ""
125
126        return self.stampfile(stamp, mcfn, taskname, stamp_extrainfo)
127
128    def stampfile(self, stampbase, file_name, taskname, extrainfo):
129        return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
130
131    def stampcleanmask_mcfn(self, taskname, mcfn):
132        mc = bb.runqueue.mc_from_tid(mcfn)
133        stamp = self.datacaches[mc].stamp[mcfn]
134        if not stamp:
135            return []
136
137        taskflagname = taskname
138        if taskname.endswith("_setscene"):
139            taskflagname = taskname.replace("_setscene", "")
140        stamp_extrainfo = self.datacaches[mc].stamp_extrainfo[mcfn].get(taskflagname) or ""
141
142        return self.stampcleanmask(stamp, mcfn, taskname, stamp_extrainfo)
143
144    def stampcleanmask(self, stampbase, file_name, taskname, extrainfo):
145        return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
146
147    def dump_sigtask(self, mcfn, task, stampbase, runtime):
148        return
149
150    def invalidate_task(self, task, mcfn):
151        mc = bb.runqueue.mc_from_tid(mcfn)
152        stamp = self.datacaches[mc].stamp[mcfn]
153        bb.utils.remove(stamp)
154
155    def dump_sigs(self, dataCache, options):
156        return
157
158    def get_taskdata(self):
159        return (self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks)
160
161    def set_taskdata(self, data):
162        self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks = data
163
164    def reset(self, data):
165        self.__init__(data)
166
167    def get_taskhashes(self):
168        return self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn
169
170    def set_taskhashes(self, hashes):
171        self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn = hashes
172
173    def save_unitaskhashes(self):
174        return
175
176    def copy_unitaskhashes(self, targetdir):
177        return
178
179    def set_setscene_tasks(self, setscene_tasks):
180        return
181
182    def exit(self):
183        return
184
185def build_pnid(mc, pn, taskname):
186    if mc:
187        return "mc:" + mc + ":" + pn + ":" + taskname
188    return pn + ":" + taskname
189
190class SignatureGeneratorBasic(SignatureGenerator):
191    """
192    """
193    name = "basic"
194
195    def __init__(self, data):
196        self.basehash = {}
197        self.taskhash = {}
198        self.unihash = {}
199        self.runtaskdeps = {}
200        self.file_checksum_values = {}
201        self.taints = {}
202        self.setscenetasks = set()
203        self.basehash_ignore_vars = set((data.getVar("BB_BASEHASH_IGNORE_VARS") or "").split())
204        self.taskhash_ignore_tasks = None
205        self.init_rundepcheck(data)
206        checksum_cache_file = data.getVar("BB_HASH_CHECKSUM_CACHE_FILE")
207        if checksum_cache_file:
208            self.checksum_cache = FileChecksumCache()
209            self.checksum_cache.init_cache(data, checksum_cache_file)
210        else:
211            self.checksum_cache = None
212
213        self.unihash_cache = bb.cache.SimpleCache("3")
214        self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {})
215        self.localdirsexclude = (data.getVar("BB_SIGNATURE_LOCAL_DIRS_EXCLUDE") or "CVS .bzr .git .hg .osc .p4 .repo .svn").split()
216        self.tidtopn = {}
217
218    def init_rundepcheck(self, data):
219        self.taskhash_ignore_tasks = data.getVar("BB_TASKHASH_IGNORE_TASKS") or None
220        if self.taskhash_ignore_tasks:
221            self.twl = re.compile(self.taskhash_ignore_tasks)
222        else:
223            self.twl = None
224
225    def _build_data(self, mcfn, d):
226
227        ignore_mismatch = ((d.getVar("BB_HASH_IGNORE_MISMATCH") or '') == '1')
228        tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d, self.basehash_ignore_vars)
229
230        taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, self.basehash_ignore_vars, mcfn)
231
232        for task in tasklist:
233            tid = mcfn + ":" + task
234            if not ignore_mismatch and tid in self.basehash and self.basehash[tid] != basehash[tid]:
235                bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (tid, self.basehash[tid], basehash[tid]))
236                bb.error("The following commands may help:")
237                cmd = "$ bitbake %s -c%s" % (d.getVar('PN'), task)
238                # Make sure sigdata is dumped before run printdiff
239                bb.error("%s -Snone" % cmd)
240                bb.error("Then:")
241                bb.error("%s -Sprintdiff\n" % cmd)
242            self.basehash[tid] = basehash[tid]
243
244        return taskdeps, gendeps, lookupcache
245
246    def set_setscene_tasks(self, setscene_tasks):
247        self.setscenetasks = set(setscene_tasks)
248
249    def finalise(self, fn, d, variant):
250
251        mc = d.getVar("__BBMULTICONFIG", False) or ""
252        mcfn = fn
253        if variant or mc:
254            mcfn = bb.cache.realfn2virtual(fn, variant, mc)
255
256        try:
257            taskdeps, gendeps, lookupcache = self._build_data(mcfn, d)
258        except bb.parse.SkipRecipe:
259            raise
260        except:
261            bb.warn("Error during finalise of %s" % mcfn)
262            raise
263
264        #Slow but can be useful for debugging mismatched basehashes
265        #for task in self.taskdeps[mcfn]:
266        #    self.dump_sigtask(mcfn, task, d.getVar("STAMP"), False)
267
268        basehashes = {}
269        for task in taskdeps:
270            basehashes[task] = self.basehash[mcfn + ":" + task]
271
272        d.setVar("__siggen_basehashes", basehashes)
273        d.setVar("__siggen_gendeps", gendeps)
274        d.setVar("__siggen_varvals", lookupcache)
275        d.setVar("__siggen_taskdeps", taskdeps)
276
277    def setup_datacache_from_datastore(self, mcfn, d):
278        super().setup_datacache_from_datastore(mcfn, d)
279
280        mc = bb.runqueue.mc_from_tid(mcfn)
281        for attr in ["siggen_varvals", "siggen_taskdeps", "siggen_gendeps"]:
282            if not hasattr(self.datacaches[mc], attr):
283                setattr(self.datacaches[mc], attr, {})
284        self.datacaches[mc].siggen_varvals[mcfn] = d.getVar("__siggen_varvals")
285        self.datacaches[mc].siggen_taskdeps[mcfn] = d.getVar("__siggen_taskdeps")
286        self.datacaches[mc].siggen_gendeps[mcfn] = d.getVar("__siggen_gendeps")
287
288    def rundep_check(self, fn, recipename, task, dep, depname, dataCaches):
289        # Return True if we should keep the dependency, False to drop it
290        # We only manipulate the dependencies for packages not in the ignore
291        # list
292        if self.twl and not self.twl.search(recipename):
293            # then process the actual dependencies
294            if self.twl.search(depname):
295                return False
296        return True
297
298    def read_taint(self, fn, task, stampbase):
299        taint = None
300        try:
301            with open(stampbase + '.' + task + '.taint', 'r') as taintf:
302                taint = taintf.read()
303        except IOError:
304            pass
305        return taint
306
307    def prep_taskhash(self, tid, deps, dataCaches):
308
309        (mc, _, task, mcfn) = bb.runqueue.split_tid_mcfn(tid)
310
311        self.basehash[tid] = dataCaches[mc].basetaskhash[tid]
312        self.runtaskdeps[tid] = []
313        self.file_checksum_values[tid] = []
314        recipename = dataCaches[mc].pkg_fn[mcfn]
315
316        self.tidtopn[tid] = recipename
317        # save hashfn for deps into siginfo?
318        for dep in deps:
319            (depmc, _, deptask, depmcfn) = bb.runqueue.split_tid_mcfn(dep)
320            dep_pn = dataCaches[depmc].pkg_fn[depmcfn]
321
322            if not self.rundep_check(mcfn, recipename, task, dep, dep_pn, dataCaches):
323                continue
324
325            if dep not in self.taskhash:
326                bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep)
327
328            dep_pnid = build_pnid(depmc, dep_pn, deptask)
329            self.runtaskdeps[tid].append((dep_pnid, dep))
330
331        if task in dataCaches[mc].file_checksums[mcfn]:
332            if self.checksum_cache:
333                checksums = self.checksum_cache.get_checksums(dataCaches[mc].file_checksums[mcfn][task], recipename, self.localdirsexclude)
334            else:
335                checksums = bb.fetch2.get_file_checksums(dataCaches[mc].file_checksums[mcfn][task], recipename, self.localdirsexclude)
336            for (f,cs) in checksums:
337                self.file_checksum_values[tid].append((f,cs))
338
339        taskdep = dataCaches[mc].task_deps[mcfn]
340        if 'nostamp' in taskdep and task in taskdep['nostamp']:
341            # Nostamp tasks need an implicit taint so that they force any dependent tasks to run
342            if tid in self.taints and self.taints[tid].startswith("nostamp:"):
343                # Don't reset taint value upon every call
344                pass
345            else:
346                import uuid
347                taint = str(uuid.uuid4())
348                self.taints[tid] = "nostamp:" + taint
349
350        taint = self.read_taint(mcfn, task, dataCaches[mc].stamp[mcfn])
351        if taint:
352            self.taints[tid] = taint
353            logger.warning("%s is tainted from a forced run" % tid)
354
355        return
356
357    def get_taskhash(self, tid, deps, dataCaches):
358
359        data = self.basehash[tid]
360        for dep in sorted(self.runtaskdeps[tid]):
361            data += self.get_unihash(dep[1])
362
363        for (f, cs) in self.file_checksum_values[tid]:
364            if cs:
365                if "/./" in f:
366                    data += "./" + f.split("/./")[1]
367                data += cs
368
369        if tid in self.taints:
370            if self.taints[tid].startswith("nostamp:"):
371                data += self.taints[tid][8:]
372            else:
373                data += self.taints[tid]
374
375        h = hashlib.sha256(data.encode("utf-8")).hexdigest()
376        self.taskhash[tid] = h
377        #d.setVar("BB_TASKHASH:task-%s" % task, taskhash[task])
378        return h
379
380    def writeout_file_checksum_cache(self):
381        """Write/update the file checksum cache onto disk"""
382        if self.checksum_cache:
383            self.checksum_cache.save_extras()
384            self.checksum_cache.save_merge()
385        else:
386            bb.fetch2.fetcher_parse_save()
387            bb.fetch2.fetcher_parse_done()
388
389    def save_unitaskhashes(self):
390        self.unihash_cache.save(self.unitaskhashes)
391
392    def copy_unitaskhashes(self, targetdir):
393        self.unihash_cache.copyfile(targetdir)
394
395    def dump_sigtask(self, mcfn, task, stampbase, runtime):
396        tid = mcfn + ":" + task
397        mc = bb.runqueue.mc_from_tid(mcfn)
398        referencestamp = stampbase
399        if isinstance(runtime, str) and runtime.startswith("customfile"):
400            sigfile = stampbase
401            referencestamp = runtime[11:]
402        elif runtime and tid in self.taskhash:
403            sigfile = stampbase + "." + task + ".sigdata" + "." + self.get_unihash(tid)
404        else:
405            sigfile = stampbase + "." + task + ".sigbasedata" + "." + self.basehash[tid]
406
407        with bb.utils.umask(0o002):
408            bb.utils.mkdirhier(os.path.dirname(sigfile))
409
410        data = {}
411        data['task'] = task
412        data['basehash_ignore_vars'] = self.basehash_ignore_vars
413        data['taskhash_ignore_tasks'] = self.taskhash_ignore_tasks
414        data['taskdeps'] = self.datacaches[mc].siggen_taskdeps[mcfn][task]
415        data['basehash'] = self.basehash[tid]
416        data['gendeps'] = {}
417        data['varvals'] = {}
418        data['varvals'][task] = self.datacaches[mc].siggen_varvals[mcfn][task]
419        for dep in self.datacaches[mc].siggen_taskdeps[mcfn][task]:
420            if dep in self.basehash_ignore_vars:
421               continue
422            data['gendeps'][dep] = self.datacaches[mc].siggen_gendeps[mcfn][dep]
423            data['varvals'][dep] = self.datacaches[mc].siggen_varvals[mcfn][dep]
424
425        if runtime and tid in self.taskhash:
426            data['runtaskdeps'] = [dep[0] for dep in sorted(self.runtaskdeps[tid])]
427            data['file_checksum_values'] = []
428            for f,cs in self.file_checksum_values[tid]:
429                if "/./" in f:
430                    data['file_checksum_values'].append(("./" + f.split("/./")[1], cs))
431                else:
432                    data['file_checksum_values'].append((os.path.basename(f), cs))
433            data['runtaskhashes'] = {}
434            for dep in self.runtaskdeps[tid]:
435                data['runtaskhashes'][dep[0]] = self.get_unihash(dep[1])
436            data['taskhash'] = self.taskhash[tid]
437            data['unihash'] = self.get_unihash(tid)
438
439        taint = self.read_taint(mcfn, task, referencestamp)
440        if taint:
441            data['taint'] = taint
442
443        if runtime and tid in self.taints:
444            if 'nostamp:' in self.taints[tid]:
445                data['taint'] = self.taints[tid]
446
447        computed_basehash = calc_basehash(data)
448        if computed_basehash != self.basehash[tid]:
449            bb.error("Basehash mismatch %s versus %s for %s" % (computed_basehash, self.basehash[tid], tid))
450        if runtime and tid in self.taskhash:
451            computed_taskhash = calc_taskhash(data)
452            if computed_taskhash != self.taskhash[tid]:
453                bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid))
454                sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash)
455
456        fd, tmpfile = bb.utils.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.")
457        try:
458            with bb.compress.zstd.open(fd, "wt", encoding="utf-8", num_threads=1) as f:
459                json.dump(data, f, sort_keys=True, separators=(",", ":"), cls=SetEncoder)
460                f.flush()
461            os.chmod(tmpfile, 0o664)
462            bb.utils.rename(tmpfile, sigfile)
463        except (OSError, IOError) as err:
464            try:
465                os.unlink(tmpfile)
466            except OSError:
467                pass
468            raise err
469
470class SignatureGeneratorBasicHash(SignatureGeneratorBasic):
471    name = "basichash"
472
473    def get_stampfile_hash(self, tid):
474        if tid in self.taskhash:
475            return self.taskhash[tid]
476
477        # If task is not in basehash, then error
478        return self.basehash[tid]
479
480    def stampfile(self, stampbase, mcfn, taskname, extrainfo, clean=False):
481        if taskname.endswith("_setscene"):
482            tid = mcfn + ":" + taskname[:-9]
483        else:
484            tid = mcfn + ":" + taskname
485        if clean:
486            h = "*"
487        else:
488            h = self.get_stampfile_hash(tid)
489
490        return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.')
491
492    def stampcleanmask(self, stampbase, mcfn, taskname, extrainfo):
493        return self.stampfile(stampbase, mcfn, taskname, extrainfo, clean=True)
494
495    def invalidate_task(self, task, mcfn):
496        bb.note("Tainting hash to force rebuild of task %s, %s" % (mcfn, task))
497
498        mc = bb.runqueue.mc_from_tid(mcfn)
499        stamp = self.datacaches[mc].stamp[mcfn]
500
501        taintfn = stamp + '.' + task + '.taint'
502
503        import uuid
504        bb.utils.mkdirhier(os.path.dirname(taintfn))
505        # The specific content of the taint file is not really important,
506        # we just need it to be random, so a random UUID is used
507        with open(taintfn, 'w') as taintf:
508            taintf.write(str(uuid.uuid4()))
509
510class SignatureGeneratorUniHashMixIn(object):
511    def __init__(self, data):
512        self.extramethod = {}
513        super().__init__(data)
514
515    def get_taskdata(self):
516        return (self.server, self.method, self.extramethod) + super().get_taskdata()
517
518    def set_taskdata(self, data):
519        self.server, self.method, self.extramethod = data[:3]
520        super().set_taskdata(data[3:])
521
522    def client(self):
523        if getattr(self, '_client', None) is None:
524            self._client = hashserv.create_client(self.server)
525        return self._client
526
527    def reset(self, data):
528        if getattr(self, '_client', None) is not None:
529            self._client.close()
530            self._client = None
531        return super().reset(data)
532
533    def exit(self):
534        if getattr(self, '_client', None) is not None:
535            self._client.close()
536            self._client = None
537        return super().exit()
538
539    def get_stampfile_hash(self, tid):
540        if tid in self.taskhash:
541            # If a unique hash is reported, use it as the stampfile hash. This
542            # ensures that if a task won't be re-run if the taskhash changes,
543            # but it would result in the same output hash
544            unihash = self._get_unihash(tid)
545            if unihash is not None:
546                return unihash
547
548        return super().get_stampfile_hash(tid)
549
550    def set_unihash(self, tid, unihash):
551        (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
552        key = mc + ":" + self.tidtopn[tid] + ":" + taskname
553        self.unitaskhashes[key] = (self.taskhash[tid], unihash)
554        self.unihash[tid] = unihash
555
556    def _get_unihash(self, tid, checkkey=None):
557        if tid not in self.tidtopn:
558            return None
559        (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
560        key = mc + ":" + self.tidtopn[tid] + ":" + taskname
561        if key not in self.unitaskhashes:
562            return None
563        if not checkkey:
564            checkkey = self.taskhash[tid]
565        (key, unihash) = self.unitaskhashes[key]
566        if key != checkkey:
567            return None
568        return unihash
569
570    def get_unihash(self, tid):
571        taskhash = self.taskhash[tid]
572
573        # If its not a setscene task we can return
574        if self.setscenetasks and tid not in self.setscenetasks:
575            self.unihash[tid] = None
576            return taskhash
577
578        # TODO: This cache can grow unbounded. It probably only needs to keep
579        # for each task
580        unihash =  self._get_unihash(tid)
581        if unihash is not None:
582            self.unihash[tid] = unihash
583            return unihash
584
585        # In the absence of being able to discover a unique hash from the
586        # server, make it be equivalent to the taskhash. The unique "hash" only
587        # really needs to be a unique string (not even necessarily a hash), but
588        # making it match the taskhash has a few advantages:
589        #
590        # 1) All of the sstate code that assumes hashes can be the same
591        # 2) It provides maximal compatibility with builders that don't use
592        #    an equivalency server
593        # 3) The value is easy for multiple independent builders to derive the
594        #    same unique hash from the same input. This means that if the
595        #    independent builders find the same taskhash, but it isn't reported
596        #    to the server, there is a better chance that they will agree on
597        #    the unique hash.
598        unihash = taskhash
599
600        try:
601            method = self.method
602            if tid in self.extramethod:
603                method = method + self.extramethod[tid]
604            data = self.client().get_unihash(method, self.taskhash[tid])
605            if data:
606                unihash = data
607                # A unique hash equal to the taskhash is not very interesting,
608                # so it is reported it at debug level 2. If they differ, that
609                # is much more interesting, so it is reported at debug level 1
610                hashequiv_logger.bbdebug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server))
611            else:
612                hashequiv_logger.debug2('No reported unihash for %s:%s from %s' % (tid, taskhash, self.server))
613        except ConnectionError as e:
614            bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
615
616        self.set_unihash(tid, unihash)
617        self.unihash[tid] = unihash
618        return unihash
619
620    def report_unihash(self, path, task, d):
621        import importlib
622
623        taskhash = d.getVar('BB_TASKHASH')
624        unihash = d.getVar('BB_UNIHASH')
625        report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1'
626        tempdir = d.getVar('T')
627        mcfn = d.getVar('BB_FILENAME')
628        tid = mcfn + ':do_' + task
629        key = tid + ':' + taskhash
630
631        if self.setscenetasks and tid not in self.setscenetasks:
632            return
633
634        # This can happen if locked sigs are in action. Detect and just exit
635        if taskhash != self.taskhash[tid]:
636            return
637
638        # Sanity checks
639        cache_unihash = self._get_unihash(tid, checkkey=taskhash)
640        if cache_unihash is None:
641            bb.fatal('%s not in unihash cache. Please report this error' % key)
642
643        if cache_unihash != unihash:
644            bb.fatal("Cache unihash %s doesn't match BB_UNIHASH %s" % (cache_unihash, unihash))
645
646        sigfile = None
647        sigfile_name = "depsig.do_%s.%d" % (task, os.getpid())
648        sigfile_link = "depsig.do_%s" % task
649
650        try:
651            sigfile = open(os.path.join(tempdir, sigfile_name), 'w+b')
652
653            locs = {'path': path, 'sigfile': sigfile, 'task': task, 'd': d}
654
655            if "." in self.method:
656                (module, method) = self.method.rsplit('.', 1)
657                locs['method'] = getattr(importlib.import_module(module), method)
658                outhash = bb.utils.better_eval('method(path, sigfile, task, d)', locs)
659            else:
660                outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs)
661
662            try:
663                extra_data = {}
664
665                owner = d.getVar('SSTATE_HASHEQUIV_OWNER')
666                if owner:
667                    extra_data['owner'] = owner
668
669                if report_taskdata:
670                    sigfile.seek(0)
671
672                    extra_data['PN'] = d.getVar('PN')
673                    extra_data['PV'] = d.getVar('PV')
674                    extra_data['PR'] = d.getVar('PR')
675                    extra_data['task'] = task
676                    extra_data['outhash_siginfo'] = sigfile.read().decode('utf-8')
677
678                method = self.method
679                if tid in self.extramethod:
680                    method = method + self.extramethod[tid]
681
682                data = self.client().report_unihash(taskhash, method, outhash, unihash, extra_data)
683                new_unihash = data['unihash']
684
685                if new_unihash != unihash:
686                    hashequiv_logger.debug('Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server))
687                    bb.event.fire(bb.runqueue.taskUniHashUpdate(mcfn + ':do_' + task, new_unihash), d)
688                    self.set_unihash(tid, new_unihash)
689                    d.setVar('BB_UNIHASH', new_unihash)
690                else:
691                    hashequiv_logger.debug('Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server))
692            except ConnectionError as e:
693                bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
694        finally:
695            if sigfile:
696                sigfile.close()
697
698                sigfile_link_path = os.path.join(tempdir, sigfile_link)
699                bb.utils.remove(sigfile_link_path)
700
701                try:
702                    os.symlink(sigfile_name, sigfile_link_path)
703                except OSError:
704                    pass
705
706    def report_unihash_equiv(self, tid, taskhash, wanted_unihash, current_unihash, datacaches):
707        try:
708            extra_data = {}
709            method = self.method
710            if tid in self.extramethod:
711                method = method + self.extramethod[tid]
712
713            data = self.client().report_unihash_equiv(taskhash, method, wanted_unihash, extra_data)
714            hashequiv_logger.verbose('Reported task %s as unihash %s to %s (%s)' % (tid, wanted_unihash, self.server, str(data)))
715
716            if data is None:
717                bb.warn("Server unable to handle unihash report")
718                return False
719
720            finalunihash = data['unihash']
721
722            if finalunihash == current_unihash:
723                hashequiv_logger.verbose('Task %s unihash %s unchanged by server' % (tid, finalunihash))
724            elif finalunihash == wanted_unihash:
725                hashequiv_logger.verbose('Task %s unihash changed %s -> %s as wanted' % (tid, current_unihash, finalunihash))
726                self.set_unihash(tid, finalunihash)
727                return True
728            else:
729                # TODO: What to do here?
730                hashequiv_logger.verbose('Task %s unihash reported as unwanted hash %s' % (tid, finalunihash))
731
732        except ConnectionError as e:
733            bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
734
735        return False
736
737#
738# Dummy class used for bitbake-selftest
739#
740class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureGeneratorBasicHash):
741    name = "TestEquivHash"
742    def init_rundepcheck(self, data):
743        super().init_rundepcheck(data)
744        self.server = data.getVar('BB_HASHSERVE')
745        self.method = "sstate_output_hash"
746
747def dump_this_task(outfile, d):
748    import bb.parse
749    mcfn = d.getVar("BB_FILENAME")
750    task = "do_" + d.getVar("BB_CURRENTTASK")
751    referencestamp = bb.parse.siggen.stampfile_base(mcfn)
752    bb.parse.siggen.dump_sigtask(mcfn, task, outfile, "customfile:" + referencestamp)
753
754def init_colors(enable_color):
755    """Initialise colour dict for passing to compare_sigfiles()"""
756    # First set up the colours
757    colors = {'color_title':   '\033[1m',
758              'color_default': '\033[0m',
759              'color_add':     '\033[0;32m',
760              'color_remove':  '\033[0;31m',
761             }
762    # Leave all keys present but clear the values
763    if not enable_color:
764        for k in colors.keys():
765            colors[k] = ''
766    return colors
767
768def worddiff_str(oldstr, newstr, colors=None):
769    if not colors:
770        colors = init_colors(False)
771    diff = simplediff.diff(oldstr.split(' '), newstr.split(' '))
772    ret = []
773    for change, value in diff:
774        value = ' '.join(value)
775        if change == '=':
776            ret.append(value)
777        elif change == '+':
778            item = '{color_add}{{+{value}+}}{color_default}'.format(value=value, **colors)
779            ret.append(item)
780        elif change == '-':
781            item = '{color_remove}[-{value}-]{color_default}'.format(value=value, **colors)
782            ret.append(item)
783    whitespace_note = ''
784    if oldstr != newstr and ' '.join(oldstr.split()) == ' '.join(newstr.split()):
785        whitespace_note = ' (whitespace changed)'
786    return '"%s"%s' % (' '.join(ret), whitespace_note)
787
788def list_inline_diff(oldlist, newlist, colors=None):
789    if not colors:
790        colors = init_colors(False)
791    diff = simplediff.diff(oldlist, newlist)
792    ret = []
793    for change, value in diff:
794        value = ' '.join(value)
795        if change == '=':
796            ret.append("'%s'" % value)
797        elif change == '+':
798            item = '{color_add}+{value}{color_default}'.format(value=value, **colors)
799            ret.append(item)
800        elif change == '-':
801            item = '{color_remove}-{value}{color_default}'.format(value=value, **colors)
802            ret.append(item)
803    return '[%s]' % (', '.join(ret))
804
805# Handled renamed fields
806def handle_renames(data):
807    if 'basewhitelist' in data:
808        data['basehash_ignore_vars'] = data['basewhitelist']
809        del data['basewhitelist']
810    if 'taskwhitelist' in data:
811        data['taskhash_ignore_tasks'] = data['taskwhitelist']
812        del data['taskwhitelist']
813
814
815def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
816    output = []
817
818    colors = init_colors(color)
819    def color_format(formatstr, **values):
820        """
821        Return colour formatted string.
822        NOTE: call with the format string, not an already formatted string
823        containing values (otherwise you could have trouble with { and }
824        characters)
825        """
826        if not formatstr.endswith('{color_default}'):
827            formatstr += '{color_default}'
828        # In newer python 3 versions you can pass both of these directly,
829        # but we only require 3.4 at the moment
830        formatparams = {}
831        formatparams.update(colors)
832        formatparams.update(values)
833        return formatstr.format(**formatparams)
834
835    with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f:
836        a_data = json.load(f, object_hook=SetDecoder)
837    with bb.compress.zstd.open(b, "rt", encoding="utf-8", num_threads=1) as f:
838        b_data = json.load(f, object_hook=SetDecoder)
839
840    for data in [a_data, b_data]:
841        handle_renames(data)
842
843    def dict_diff(a, b, ignored_vars=set()):
844        sa = set(a.keys())
845        sb = set(b.keys())
846        common = sa & sb
847        changed = set()
848        for i in common:
849            if a[i] != b[i] and i not in ignored_vars:
850                changed.add(i)
851        added = sb - sa
852        removed = sa - sb
853        return changed, added, removed
854
855    def file_checksums_diff(a, b):
856        from collections import Counter
857
858        # Convert lists back to tuples
859        a = [(f[0], f[1]) for f in a]
860        b = [(f[0], f[1]) for f in b]
861
862        # Compare lists, ensuring we can handle duplicate filenames if they exist
863        removedcount = Counter(a)
864        removedcount.subtract(b)
865        addedcount = Counter(b)
866        addedcount.subtract(a)
867        added = []
868        for x in b:
869            if addedcount[x] > 0:
870                addedcount[x] -= 1
871                added.append(x)
872        removed = []
873        changed = []
874        for x in a:
875            if removedcount[x] > 0:
876                removedcount[x] -= 1
877                for y in added:
878                    if y[0] == x[0]:
879                        changed.append((x[0], x[1], y[1]))
880                        added.remove(y)
881                        break
882                else:
883                    removed.append(x)
884        added = [x[0] for x in added]
885        removed = [x[0] for x in removed]
886        return changed, added, removed
887
888    if 'basehash_ignore_vars' in a_data and a_data['basehash_ignore_vars'] != b_data['basehash_ignore_vars']:
889        output.append(color_format("{color_title}basehash_ignore_vars changed{color_default} from '%s' to '%s'") % (a_data['basehash_ignore_vars'], b_data['basehash_ignore_vars']))
890        if a_data['basehash_ignore_vars'] and b_data['basehash_ignore_vars']:
891            output.append("changed items: %s" % a_data['basehash_ignore_vars'].symmetric_difference(b_data['basehash_ignore_vars']))
892
893    if 'taskhash_ignore_tasks' in a_data and a_data['taskhash_ignore_tasks'] != b_data['taskhash_ignore_tasks']:
894        output.append(color_format("{color_title}taskhash_ignore_tasks changed{color_default} from '%s' to '%s'") % (a_data['taskhash_ignore_tasks'], b_data['taskhash_ignore_tasks']))
895        if a_data['taskhash_ignore_tasks'] and b_data['taskhash_ignore_tasks']:
896            output.append("changed items: %s" % a_data['taskhash_ignore_tasks'].symmetric_difference(b_data['taskhash_ignore_tasks']))
897
898    if a_data['taskdeps'] != b_data['taskdeps']:
899        output.append(color_format("{color_title}Task dependencies changed{color_default} from:\n%s\nto:\n%s") % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps'])))
900
901    if a_data['basehash'] != b_data['basehash'] and not collapsed:
902        output.append(color_format("{color_title}basehash changed{color_default} from %s to %s") % (a_data['basehash'], b_data['basehash']))
903
904    changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basehash_ignore_vars'] & b_data['basehash_ignore_vars'])
905    if changed:
906        for dep in sorted(changed):
907            output.append(color_format("{color_title}List of dependencies for variable %s changed from '{color_default}%s{color_title}' to '{color_default}%s{color_title}'") % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep]))
908            if a_data['gendeps'][dep] and b_data['gendeps'][dep]:
909                output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep]))
910    if added:
911        for dep in sorted(added):
912            output.append(color_format("{color_title}Dependency on variable %s was added") % (dep))
913    if removed:
914        for dep in sorted(removed):
915            output.append(color_format("{color_title}Dependency on Variable %s was removed") % (dep))
916
917
918    changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals'])
919    if changed:
920        for dep in sorted(changed):
921            oldval = a_data['varvals'][dep]
922            newval = b_data['varvals'][dep]
923            if newval and oldval and ('\n' in oldval or '\n' in newval):
924                diff = difflib.unified_diff(oldval.splitlines(), newval.splitlines(), lineterm='')
925                # Cut off the first two lines, since we aren't interested in
926                # the old/new filename (they are blank anyway in this case)
927                difflines = list(diff)[2:]
928                if color:
929                    # Add colour to diff output
930                    for i, line in enumerate(difflines):
931                        if line.startswith('+'):
932                            line = color_format('{color_add}{line}', line=line)
933                            difflines[i] = line
934                        elif line.startswith('-'):
935                            line = color_format('{color_remove}{line}', line=line)
936                            difflines[i] = line
937                output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff='\n'.join(difflines)))
938            elif newval and oldval and (' ' in oldval or ' ' in newval):
939                output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff=worddiff_str(oldval, newval, colors)))
940            else:
941                output.append(color_format("{color_title}Variable {var} value changed from '{color_default}{oldval}{color_title}' to '{color_default}{newval}{color_title}'{color_default}", var=dep, oldval=oldval, newval=newval))
942
943    if not 'file_checksum_values' in a_data:
944         a_data['file_checksum_values'] = []
945    if not 'file_checksum_values' in b_data:
946         b_data['file_checksum_values'] = []
947
948    changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values'])
949    if changed:
950        for f, old, new in changed:
951            output.append(color_format("{color_title}Checksum for file %s changed{color_default} from %s to %s") % (f, old, new))
952    if added:
953        for f in added:
954            output.append(color_format("{color_title}Dependency on checksum of file %s was added") % (f))
955    if removed:
956        for f in removed:
957            output.append(color_format("{color_title}Dependency on checksum of file %s was removed") % (f))
958
959    if not 'runtaskdeps' in a_data:
960         a_data['runtaskdeps'] = {}
961    if not 'runtaskdeps' in b_data:
962         b_data['runtaskdeps'] = {}
963
964    if not collapsed:
965        if len(a_data['runtaskdeps']) != len(b_data['runtaskdeps']):
966            changed = ["Number of task dependencies changed"]
967        else:
968            changed = []
969            for idx, task in enumerate(a_data['runtaskdeps']):
970                a = a_data['runtaskdeps'][idx]
971                b = b_data['runtaskdeps'][idx]
972                if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b] and not collapsed:
973                    changed.append("%s with hash %s\n changed to\n%s with hash %s" % (a, a_data['runtaskhashes'][a], b, b_data['runtaskhashes'][b]))
974
975        if changed:
976            clean_a = a_data['runtaskdeps']
977            clean_b = b_data['runtaskdeps']
978            if clean_a != clean_b:
979                output.append(color_format("{color_title}runtaskdeps changed:{color_default}\n%s") % list_inline_diff(clean_a, clean_b, colors))
980            else:
981                output.append(color_format("{color_title}runtaskdeps changed:"))
982            output.append("\n".join(changed))
983
984
985    if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data:
986        a = a_data['runtaskhashes']
987        b = b_data['runtaskhashes']
988        changed, added, removed = dict_diff(a, b)
989        if added:
990            for dep in sorted(added):
991                bdep_found = False
992                if removed:
993                    for bdep in removed:
994                        if b[dep] == a[bdep]:
995                            #output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep))
996                            bdep_found = True
997                if not bdep_found:
998                    output.append(color_format("{color_title}Dependency on task %s was added{color_default} with hash %s") % (dep, b[dep]))
999        if removed:
1000            for dep in sorted(removed):
1001                adep_found = False
1002                if added:
1003                    for adep in added:
1004                        if b[adep] == a[dep]:
1005                            #output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep))
1006                            adep_found = True
1007                if not adep_found:
1008                    output.append(color_format("{color_title}Dependency on task %s was removed{color_default} with hash %s") % (dep, a[dep]))
1009        if changed:
1010            for dep in sorted(changed):
1011                if not collapsed:
1012                    output.append(color_format("{color_title}Hash for task dependency %s changed{color_default} from %s to %s") % (dep, a[dep], b[dep]))
1013                if callable(recursecb):
1014                    recout = recursecb(dep, a[dep], b[dep])
1015                    if recout:
1016                        if collapsed:
1017                            output.extend(recout)
1018                        else:
1019                            # If a dependent hash changed, might as well print the line above and then defer to the changes in
1020                            # that hash since in all likelyhood, they're the same changes this task also saw.
1021                            output = [output[-1]] + recout
1022                            break
1023
1024    a_taint = a_data.get('taint', None)
1025    b_taint = b_data.get('taint', None)
1026    if a_taint != b_taint:
1027        if a_taint and a_taint.startswith('nostamp:'):
1028            a_taint = a_taint.replace('nostamp:', 'nostamp(uuid4):')
1029        if b_taint and b_taint.startswith('nostamp:'):
1030            b_taint = b_taint.replace('nostamp:', 'nostamp(uuid4):')
1031        output.append(color_format("{color_title}Taint (by forced/invalidated task) changed{color_default} from %s to %s") % (a_taint, b_taint))
1032
1033    return output
1034
1035
1036def calc_basehash(sigdata):
1037    task = sigdata['task']
1038    basedata = sigdata['varvals'][task]
1039
1040    if basedata is None:
1041        basedata = ''
1042
1043    alldeps = sigdata['taskdeps']
1044    for dep in sorted(alldeps):
1045        basedata = basedata + dep
1046        val = sigdata['varvals'][dep]
1047        if val is not None:
1048            basedata = basedata + str(val)
1049
1050    return hashlib.sha256(basedata.encode("utf-8")).hexdigest()
1051
1052def calc_taskhash(sigdata):
1053    data = sigdata['basehash']
1054
1055    for dep in sigdata['runtaskdeps']:
1056        data = data + sigdata['runtaskhashes'][dep]
1057
1058    for c in sigdata['file_checksum_values']:
1059        if c[1]:
1060            if "./" in c[0]:
1061                data = data + c[0]
1062            data = data + c[1]
1063
1064    if 'taint' in sigdata:
1065        if 'nostamp:' in sigdata['taint']:
1066            data = data + sigdata['taint'][8:]
1067        else:
1068            data = data + sigdata['taint']
1069
1070    return hashlib.sha256(data.encode("utf-8")).hexdigest()
1071
1072
1073def dump_sigfile(a):
1074    output = []
1075
1076    with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f:
1077        a_data = json.load(f, object_hook=SetDecoder)
1078
1079    handle_renames(a_data)
1080
1081    output.append("basehash_ignore_vars: %s" % (sorted(a_data['basehash_ignore_vars'])))
1082
1083    output.append("taskhash_ignore_tasks: %s" % (sorted(a_data['taskhash_ignore_tasks'] or [])))
1084
1085    output.append("Task dependencies: %s" % (sorted(a_data['taskdeps'])))
1086
1087    output.append("basehash: %s" % (a_data['basehash']))
1088
1089    for dep in sorted(a_data['gendeps']):
1090        output.append("List of dependencies for variable %s is %s" % (dep, sorted(a_data['gendeps'][dep])))
1091
1092    for dep in sorted(a_data['varvals']):
1093        output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep]))
1094
1095    if 'runtaskdeps' in a_data:
1096        output.append("Tasks this task depends on: %s" % (sorted(a_data['runtaskdeps'])))
1097
1098    if 'file_checksum_values' in a_data:
1099        output.append("This task depends on the checksums of files: %s" % (sorted(a_data['file_checksum_values'])))
1100
1101    if 'runtaskhashes' in a_data:
1102        for dep in sorted(a_data['runtaskhashes']):
1103            output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep]))
1104
1105    if 'taint' in a_data:
1106        if a_data['taint'].startswith('nostamp:'):
1107            msg = a_data['taint'].replace('nostamp:', 'nostamp(uuid4):')
1108        else:
1109            msg = a_data['taint']
1110        output.append("Tainted (by forced/invalidated task): %s" % msg)
1111
1112    if 'task' in a_data:
1113        computed_basehash = calc_basehash(a_data)
1114        output.append("Computed base hash is %s and from file %s" % (computed_basehash, a_data['basehash']))
1115    else:
1116        output.append("Unable to compute base hash")
1117
1118    computed_taskhash = calc_taskhash(a_data)
1119    output.append("Computed task hash is %s" % computed_taskhash)
1120
1121    return output
1122