xref: /openbmc/openbmc/poky/bitbake/lib/bb/siggen.py (revision 5082cc7f)
1#
2# Copyright BitBake Contributors
3#
4# SPDX-License-Identifier: GPL-2.0-only
5#
6
7import hashlib
8import logging
9import os
10import re
11import tempfile
12import pickle
13import bb.data
14import difflib
15import simplediff
16import json
17import types
18import bb.compress.zstd
19from bb.checksum import FileChecksumCache
20from bb import runqueue
21import hashserv
22import hashserv.client
23
24logger = logging.getLogger('BitBake.SigGen')
25hashequiv_logger = logging.getLogger('BitBake.SigGen.HashEquiv')
26
27class SetEncoder(json.JSONEncoder):
28    def default(self, obj):
29        if isinstance(obj, set) or isinstance(obj, frozenset):
30            return dict(_set_object=list(sorted(obj)))
31        return json.JSONEncoder.default(self, obj)
32
33def SetDecoder(dct):
34    if '_set_object' in dct:
35        return frozenset(dct['_set_object'])
36    return dct
37
38def init(d):
39    siggens = [obj for obj in globals().values()
40                      if type(obj) is type and issubclass(obj, SignatureGenerator)]
41
42    desired = d.getVar("BB_SIGNATURE_HANDLER") or "noop"
43    for sg in siggens:
44        if desired == sg.name:
45            return sg(d)
46    else:
47        logger.error("Invalid signature generator '%s', using default 'noop'\n"
48                     "Available generators: %s", desired,
49                     ', '.join(obj.name for obj in siggens))
50        return SignatureGenerator(d)
51
52class SignatureGenerator(object):
53    """
54    """
55    name = "noop"
56
57    def __init__(self, data):
58        self.basehash = {}
59        self.taskhash = {}
60        self.unihash = {}
61        self.runtaskdeps = {}
62        self.file_checksum_values = {}
63        self.taints = {}
64        self.unitaskhashes = {}
65        self.tidtopn = {}
66        self.setscenetasks = set()
67
68    def finalise(self, fn, d, varient):
69        return
70
71    def postparsing_clean_cache(self):
72        return
73
74    def setup_datacache(self, datacaches):
75        self.datacaches = datacaches
76
77    def setup_datacache_from_datastore(self, mcfn, d):
78        # In task context we have no cache so setup internal data structures
79        # from the fully parsed data store provided
80
81        mc = d.getVar("__BBMULTICONFIG", False) or ""
82        tasks = d.getVar('__BBTASKS', False)
83
84        self.datacaches = {}
85        self.datacaches[mc] = types.SimpleNamespace()
86        setattr(self.datacaches[mc], "stamp", {})
87        self.datacaches[mc].stamp[mcfn] = d.getVar('STAMP')
88        setattr(self.datacaches[mc], "stamp_extrainfo", {})
89        self.datacaches[mc].stamp_extrainfo[mcfn] = {}
90        for t in tasks:
91            flag = d.getVarFlag(t, "stamp-extra-info")
92            if flag:
93                self.datacaches[mc].stamp_extrainfo[mcfn][t] = flag
94
95    def get_unihash(self, tid):
96        return self.taskhash[tid]
97
98    def prep_taskhash(self, tid, deps, dataCaches):
99        return
100
101    def get_taskhash(self, tid, deps, dataCaches):
102        self.taskhash[tid] = hashlib.sha256(tid.encode("utf-8")).hexdigest()
103        return self.taskhash[tid]
104
105    def writeout_file_checksum_cache(self):
106        """Write/update the file checksum cache onto disk"""
107        return
108
109    def stampfile_base(self, mcfn):
110        mc = bb.runqueue.mc_from_tid(mcfn)
111        return self.datacaches[mc].stamp[mcfn]
112
113    def stampfile_mcfn(self, taskname, mcfn, extrainfo=True):
114        mc = bb.runqueue.mc_from_tid(mcfn)
115        stamp = self.datacaches[mc].stamp[mcfn]
116        if not stamp:
117            return
118
119        stamp_extrainfo = ""
120        if extrainfo:
121            taskflagname = taskname
122            if taskname.endswith("_setscene"):
123                taskflagname = taskname.replace("_setscene", "")
124            stamp_extrainfo = self.datacaches[mc].stamp_extrainfo[mcfn].get(taskflagname) or ""
125
126        return self.stampfile(stamp, mcfn, taskname, stamp_extrainfo)
127
128    def stampfile(self, stampbase, file_name, taskname, extrainfo):
129        return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
130
131    def stampcleanmask_mcfn(self, taskname, mcfn):
132        mc = bb.runqueue.mc_from_tid(mcfn)
133        stamp = self.datacaches[mc].stamp[mcfn]
134        if not stamp:
135            return []
136
137        taskflagname = taskname
138        if taskname.endswith("_setscene"):
139            taskflagname = taskname.replace("_setscene", "")
140        stamp_extrainfo = self.datacaches[mc].stamp_extrainfo[mcfn].get(taskflagname) or ""
141
142        return self.stampcleanmask(stamp, mcfn, taskname, stamp_extrainfo)
143
144    def stampcleanmask(self, stampbase, file_name, taskname, extrainfo):
145        return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
146
147    def dump_sigtask(self, mcfn, task, stampbase, runtime):
148        return
149
150    def invalidate_task(self, task, mcfn):
151        mc = bb.runqueue.mc_from_tid(mcfn)
152        stamp = self.datacaches[mc].stamp[mcfn]
153        bb.utils.remove(stamp)
154
155    def dump_sigs(self, dataCache, options):
156        return
157
158    def get_taskdata(self):
159        return (self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks)
160
161    def set_taskdata(self, data):
162        self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks = data
163
164    def reset(self, data):
165        self.__init__(data)
166
167    def get_taskhashes(self):
168        return self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn
169
170    def set_taskhashes(self, hashes):
171        self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn = hashes
172
173    def save_unitaskhashes(self):
174        return
175
176    def copy_unitaskhashes(self, targetdir):
177        return
178
179    def set_setscene_tasks(self, setscene_tasks):
180        return
181
182    def exit(self):
183        return
184
185def build_pnid(mc, pn, taskname):
186    if mc:
187        return "mc:" + mc + ":" + pn + ":" + taskname
188    return pn + ":" + taskname
189
190class SignatureGeneratorBasic(SignatureGenerator):
191    """
192    """
193    name = "basic"
194
195    def __init__(self, data):
196        self.basehash = {}
197        self.taskhash = {}
198        self.unihash = {}
199        self.runtaskdeps = {}
200        self.file_checksum_values = {}
201        self.taints = {}
202        self.setscenetasks = set()
203        self.basehash_ignore_vars = set((data.getVar("BB_BASEHASH_IGNORE_VARS") or "").split())
204        self.taskhash_ignore_tasks = None
205        self.init_rundepcheck(data)
206        checksum_cache_file = data.getVar("BB_HASH_CHECKSUM_CACHE_FILE")
207        if checksum_cache_file:
208            self.checksum_cache = FileChecksumCache()
209            self.checksum_cache.init_cache(data, checksum_cache_file)
210        else:
211            self.checksum_cache = None
212
213        self.unihash_cache = bb.cache.SimpleCache("3")
214        self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {})
215        self.localdirsexclude = (data.getVar("BB_SIGNATURE_LOCAL_DIRS_EXCLUDE") or "CVS .bzr .git .hg .osc .p4 .repo .svn").split()
216        self.tidtopn = {}
217
218    def init_rundepcheck(self, data):
219        self.taskhash_ignore_tasks = data.getVar("BB_TASKHASH_IGNORE_TASKS") or None
220        if self.taskhash_ignore_tasks:
221            self.twl = re.compile(self.taskhash_ignore_tasks)
222        else:
223            self.twl = None
224
225    def _build_data(self, mcfn, d):
226
227        ignore_mismatch = ((d.getVar("BB_HASH_IGNORE_MISMATCH") or '') == '1')
228        tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d, self.basehash_ignore_vars)
229
230        taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, self.basehash_ignore_vars, mcfn)
231
232        for task in tasklist:
233            tid = mcfn + ":" + task
234            if not ignore_mismatch and tid in self.basehash and self.basehash[tid] != basehash[tid]:
235                bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (tid, self.basehash[tid], basehash[tid]))
236                bb.error("The following commands may help:")
237                cmd = "$ bitbake %s -c%s" % (d.getVar('PN'), task)
238                # Make sure sigdata is dumped before run printdiff
239                bb.error("%s -Snone" % cmd)
240                bb.error("Then:")
241                bb.error("%s -Sprintdiff\n" % cmd)
242            self.basehash[tid] = basehash[tid]
243
244        return taskdeps, gendeps, lookupcache
245
246    def set_setscene_tasks(self, setscene_tasks):
247        self.setscenetasks = set(setscene_tasks)
248
249    def finalise(self, fn, d, variant):
250
251        mc = d.getVar("__BBMULTICONFIG", False) or ""
252        mcfn = fn
253        if variant or mc:
254            mcfn = bb.cache.realfn2virtual(fn, variant, mc)
255
256        try:
257            taskdeps, gendeps, lookupcache = self._build_data(mcfn, d)
258        except bb.parse.SkipRecipe:
259            raise
260        except:
261            bb.warn("Error during finalise of %s" % mcfn)
262            raise
263
264        basehashes = {}
265        for task in taskdeps:
266            basehashes[task] = self.basehash[mcfn + ":" + task]
267
268        d.setVar("__siggen_basehashes", basehashes)
269        d.setVar("__siggen_gendeps", gendeps)
270        d.setVar("__siggen_varvals", lookupcache)
271        d.setVar("__siggen_taskdeps", taskdeps)
272
273        #Slow but can be useful for debugging mismatched basehashes
274        #self.setup_datacache_from_datastore(mcfn, d)
275        #for task in taskdeps:
276        #    self.dump_sigtask(mcfn, task, d.getVar("STAMP"), False)
277
278    def setup_datacache_from_datastore(self, mcfn, d):
279        super().setup_datacache_from_datastore(mcfn, d)
280
281        mc = bb.runqueue.mc_from_tid(mcfn)
282        for attr in ["siggen_varvals", "siggen_taskdeps", "siggen_gendeps"]:
283            if not hasattr(self.datacaches[mc], attr):
284                setattr(self.datacaches[mc], attr, {})
285        self.datacaches[mc].siggen_varvals[mcfn] = d.getVar("__siggen_varvals")
286        self.datacaches[mc].siggen_taskdeps[mcfn] = d.getVar("__siggen_taskdeps")
287        self.datacaches[mc].siggen_gendeps[mcfn] = d.getVar("__siggen_gendeps")
288
289    def rundep_check(self, fn, recipename, task, dep, depname, dataCaches):
290        # Return True if we should keep the dependency, False to drop it
291        # We only manipulate the dependencies for packages not in the ignore
292        # list
293        if self.twl and not self.twl.search(recipename):
294            # then process the actual dependencies
295            if self.twl.search(depname):
296                return False
297        return True
298
299    def read_taint(self, fn, task, stampbase):
300        taint = None
301        try:
302            with open(stampbase + '.' + task + '.taint', 'r') as taintf:
303                taint = taintf.read()
304        except IOError:
305            pass
306        return taint
307
308    def prep_taskhash(self, tid, deps, dataCaches):
309
310        (mc, _, task, mcfn) = bb.runqueue.split_tid_mcfn(tid)
311
312        self.basehash[tid] = dataCaches[mc].basetaskhash[tid]
313        self.runtaskdeps[tid] = []
314        self.file_checksum_values[tid] = []
315        recipename = dataCaches[mc].pkg_fn[mcfn]
316
317        self.tidtopn[tid] = recipename
318        # save hashfn for deps into siginfo?
319        for dep in deps:
320            (depmc, _, deptask, depmcfn) = bb.runqueue.split_tid_mcfn(dep)
321            dep_pn = dataCaches[depmc].pkg_fn[depmcfn]
322
323            if not self.rundep_check(mcfn, recipename, task, dep, dep_pn, dataCaches):
324                continue
325
326            if dep not in self.taskhash:
327                bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep)
328
329            dep_pnid = build_pnid(depmc, dep_pn, deptask)
330            self.runtaskdeps[tid].append((dep_pnid, dep))
331
332        if task in dataCaches[mc].file_checksums[mcfn]:
333            if self.checksum_cache:
334                checksums = self.checksum_cache.get_checksums(dataCaches[mc].file_checksums[mcfn][task], recipename, self.localdirsexclude)
335            else:
336                checksums = bb.fetch2.get_file_checksums(dataCaches[mc].file_checksums[mcfn][task], recipename, self.localdirsexclude)
337            for (f,cs) in checksums:
338                self.file_checksum_values[tid].append((f,cs))
339
340        taskdep = dataCaches[mc].task_deps[mcfn]
341        if 'nostamp' in taskdep and task in taskdep['nostamp']:
342            # Nostamp tasks need an implicit taint so that they force any dependent tasks to run
343            if tid in self.taints and self.taints[tid].startswith("nostamp:"):
344                # Don't reset taint value upon every call
345                pass
346            else:
347                import uuid
348                taint = str(uuid.uuid4())
349                self.taints[tid] = "nostamp:" + taint
350
351        taint = self.read_taint(mcfn, task, dataCaches[mc].stamp[mcfn])
352        if taint:
353            self.taints[tid] = taint
354            logger.warning("%s is tainted from a forced run" % tid)
355
356        return
357
358    def get_taskhash(self, tid, deps, dataCaches):
359
360        data = self.basehash[tid]
361        for dep in sorted(self.runtaskdeps[tid]):
362            data += self.get_unihash(dep[1])
363
364        for (f, cs) in sorted(self.file_checksum_values[tid], key=clean_checksum_file_path):
365            if cs:
366                if "/./" in f:
367                    data += "./" + f.split("/./")[1]
368                data += cs
369
370        if tid in self.taints:
371            if self.taints[tid].startswith("nostamp:"):
372                data += self.taints[tid][8:]
373            else:
374                data += self.taints[tid]
375
376        h = hashlib.sha256(data.encode("utf-8")).hexdigest()
377        self.taskhash[tid] = h
378        #d.setVar("BB_TASKHASH:task-%s" % task, taskhash[task])
379        return h
380
381    def writeout_file_checksum_cache(self):
382        """Write/update the file checksum cache onto disk"""
383        if self.checksum_cache:
384            self.checksum_cache.save_extras()
385            self.checksum_cache.save_merge()
386        else:
387            bb.fetch2.fetcher_parse_save()
388            bb.fetch2.fetcher_parse_done()
389
390    def save_unitaskhashes(self):
391        self.unihash_cache.save(self.unitaskhashes)
392
393    def copy_unitaskhashes(self, targetdir):
394        self.unihash_cache.copyfile(targetdir)
395
396    def dump_sigtask(self, mcfn, task, stampbase, runtime):
397        tid = mcfn + ":" + task
398        mc = bb.runqueue.mc_from_tid(mcfn)
399        referencestamp = stampbase
400        if isinstance(runtime, str) and runtime.startswith("customfile"):
401            sigfile = stampbase
402            referencestamp = runtime[11:]
403        elif runtime and tid in self.taskhash:
404            sigfile = stampbase + "." + task + ".sigdata" + "." + self.get_unihash(tid)
405        else:
406            sigfile = stampbase + "." + task + ".sigbasedata" + "." + self.basehash[tid]
407
408        with bb.utils.umask(0o002):
409            bb.utils.mkdirhier(os.path.dirname(sigfile))
410
411        data = {}
412        data['task'] = task
413        data['basehash_ignore_vars'] = self.basehash_ignore_vars
414        data['taskhash_ignore_tasks'] = self.taskhash_ignore_tasks
415        data['taskdeps'] = self.datacaches[mc].siggen_taskdeps[mcfn][task]
416        data['basehash'] = self.basehash[tid]
417        data['gendeps'] = {}
418        data['varvals'] = {}
419        data['varvals'][task] = self.datacaches[mc].siggen_varvals[mcfn][task]
420        for dep in self.datacaches[mc].siggen_taskdeps[mcfn][task]:
421            if dep in self.basehash_ignore_vars:
422                continue
423            data['gendeps'][dep] = self.datacaches[mc].siggen_gendeps[mcfn][dep]
424            data['varvals'][dep] = self.datacaches[mc].siggen_varvals[mcfn][dep]
425
426        if runtime and tid in self.taskhash:
427            data['runtaskdeps'] = [dep[0] for dep in sorted(self.runtaskdeps[tid])]
428            data['file_checksum_values'] = []
429            for f,cs in sorted(self.file_checksum_values[tid], key=clean_checksum_file_path):
430                if "/./" in f:
431                    data['file_checksum_values'].append(("./" + f.split("/./")[1], cs))
432                else:
433                    data['file_checksum_values'].append((os.path.basename(f), cs))
434            data['runtaskhashes'] = {}
435            for dep in self.runtaskdeps[tid]:
436                data['runtaskhashes'][dep[0]] = self.get_unihash(dep[1])
437            data['taskhash'] = self.taskhash[tid]
438            data['unihash'] = self.get_unihash(tid)
439
440        taint = self.read_taint(mcfn, task, referencestamp)
441        if taint:
442            data['taint'] = taint
443
444        if runtime and tid in self.taints:
445            if 'nostamp:' in self.taints[tid]:
446                data['taint'] = self.taints[tid]
447
448        computed_basehash = calc_basehash(data)
449        if computed_basehash != self.basehash[tid]:
450            bb.error("Basehash mismatch %s versus %s for %s" % (computed_basehash, self.basehash[tid], tid))
451        if runtime and tid in self.taskhash:
452            computed_taskhash = calc_taskhash(data)
453            if computed_taskhash != self.taskhash[tid]:
454                bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid))
455                sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash)
456
457        fd, tmpfile = bb.utils.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.")
458        try:
459            with bb.compress.zstd.open(fd, "wt", encoding="utf-8", num_threads=1) as f:
460                json.dump(data, f, sort_keys=True, separators=(",", ":"), cls=SetEncoder)
461                f.flush()
462            os.chmod(tmpfile, 0o664)
463            bb.utils.rename(tmpfile, sigfile)
464        except (OSError, IOError) as err:
465            try:
466                os.unlink(tmpfile)
467            except OSError:
468                pass
469            raise err
470
471class SignatureGeneratorBasicHash(SignatureGeneratorBasic):
472    name = "basichash"
473
474    def get_stampfile_hash(self, tid):
475        if tid in self.taskhash:
476            return self.taskhash[tid]
477
478        # If task is not in basehash, then error
479        return self.basehash[tid]
480
481    def stampfile(self, stampbase, mcfn, taskname, extrainfo, clean=False):
482        if taskname.endswith("_setscene"):
483            tid = mcfn + ":" + taskname[:-9]
484        else:
485            tid = mcfn + ":" + taskname
486        if clean:
487            h = "*"
488        else:
489            h = self.get_stampfile_hash(tid)
490
491        return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.')
492
493    def stampcleanmask(self, stampbase, mcfn, taskname, extrainfo):
494        return self.stampfile(stampbase, mcfn, taskname, extrainfo, clean=True)
495
496    def invalidate_task(self, task, mcfn):
497        bb.note("Tainting hash to force rebuild of task %s, %s" % (mcfn, task))
498
499        mc = bb.runqueue.mc_from_tid(mcfn)
500        stamp = self.datacaches[mc].stamp[mcfn]
501
502        taintfn = stamp + '.' + task + '.taint'
503
504        import uuid
505        bb.utils.mkdirhier(os.path.dirname(taintfn))
506        # The specific content of the taint file is not really important,
507        # we just need it to be random, so a random UUID is used
508        with open(taintfn, 'w') as taintf:
509            taintf.write(str(uuid.uuid4()))
510
511class SignatureGeneratorUniHashMixIn(object):
512    def __init__(self, data):
513        self.extramethod = {}
514        super().__init__(data)
515
516    def get_taskdata(self):
517        return (self.server, self.method, self.extramethod) + super().get_taskdata()
518
519    def set_taskdata(self, data):
520        self.server, self.method, self.extramethod = data[:3]
521        super().set_taskdata(data[3:])
522
523    def client(self):
524        if getattr(self, '_client', None) is None:
525            self._client = hashserv.create_client(self.server)
526        return self._client
527
528    def reset(self, data):
529        if getattr(self, '_client', None) is not None:
530            self._client.close()
531            self._client = None
532        return super().reset(data)
533
534    def exit(self):
535        if getattr(self, '_client', None) is not None:
536            self._client.close()
537            self._client = None
538        return super().exit()
539
540    def get_stampfile_hash(self, tid):
541        if tid in self.taskhash:
542            # If a unique hash is reported, use it as the stampfile hash. This
543            # ensures that if a task won't be re-run if the taskhash changes,
544            # but it would result in the same output hash
545            unihash = self._get_unihash(tid)
546            if unihash is not None:
547                return unihash
548
549        return super().get_stampfile_hash(tid)
550
551    def set_unihash(self, tid, unihash):
552        (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
553        key = mc + ":" + self.tidtopn[tid] + ":" + taskname
554        self.unitaskhashes[key] = (self.taskhash[tid], unihash)
555        self.unihash[tid] = unihash
556
557    def _get_unihash(self, tid, checkkey=None):
558        if tid not in self.tidtopn:
559            return None
560        (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
561        key = mc + ":" + self.tidtopn[tid] + ":" + taskname
562        if key not in self.unitaskhashes:
563            return None
564        if not checkkey:
565            checkkey = self.taskhash[tid]
566        (key, unihash) = self.unitaskhashes[key]
567        if key != checkkey:
568            return None
569        return unihash
570
571    def get_unihash(self, tid):
572        taskhash = self.taskhash[tid]
573
574        # If its not a setscene task we can return
575        if self.setscenetasks and tid not in self.setscenetasks:
576            self.unihash[tid] = None
577            return taskhash
578
579        # TODO: This cache can grow unbounded. It probably only needs to keep
580        # for each task
581        unihash =  self._get_unihash(tid)
582        if unihash is not None:
583            self.unihash[tid] = unihash
584            return unihash
585
586        # In the absence of being able to discover a unique hash from the
587        # server, make it be equivalent to the taskhash. The unique "hash" only
588        # really needs to be a unique string (not even necessarily a hash), but
589        # making it match the taskhash has a few advantages:
590        #
591        # 1) All of the sstate code that assumes hashes can be the same
592        # 2) It provides maximal compatibility with builders that don't use
593        #    an equivalency server
594        # 3) The value is easy for multiple independent builders to derive the
595        #    same unique hash from the same input. This means that if the
596        #    independent builders find the same taskhash, but it isn't reported
597        #    to the server, there is a better chance that they will agree on
598        #    the unique hash.
599        unihash = taskhash
600
601        try:
602            method = self.method
603            if tid in self.extramethod:
604                method = method + self.extramethod[tid]
605            data = self.client().get_unihash(method, self.taskhash[tid])
606            if data:
607                unihash = data
608                # A unique hash equal to the taskhash is not very interesting,
609                # so it is reported it at debug level 2. If they differ, that
610                # is much more interesting, so it is reported at debug level 1
611                hashequiv_logger.bbdebug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server))
612            else:
613                hashequiv_logger.debug2('No reported unihash for %s:%s from %s' % (tid, taskhash, self.server))
614        except ConnectionError as e:
615            bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
616
617        self.set_unihash(tid, unihash)
618        self.unihash[tid] = unihash
619        return unihash
620
621    def report_unihash(self, path, task, d):
622        import importlib
623
624        taskhash = d.getVar('BB_TASKHASH')
625        unihash = d.getVar('BB_UNIHASH')
626        report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1'
627        tempdir = d.getVar('T')
628        mcfn = d.getVar('BB_FILENAME')
629        tid = mcfn + ':do_' + task
630        key = tid + ':' + taskhash
631
632        if self.setscenetasks and tid not in self.setscenetasks:
633            return
634
635        # This can happen if locked sigs are in action. Detect and just exit
636        if taskhash != self.taskhash[tid]:
637            return
638
639        # Sanity checks
640        cache_unihash = self._get_unihash(tid, checkkey=taskhash)
641        if cache_unihash is None:
642            bb.fatal('%s not in unihash cache. Please report this error' % key)
643
644        if cache_unihash != unihash:
645            bb.fatal("Cache unihash %s doesn't match BB_UNIHASH %s" % (cache_unihash, unihash))
646
647        sigfile = None
648        sigfile_name = "depsig.do_%s.%d" % (task, os.getpid())
649        sigfile_link = "depsig.do_%s" % task
650
651        try:
652            sigfile = open(os.path.join(tempdir, sigfile_name), 'w+b')
653
654            locs = {'path': path, 'sigfile': sigfile, 'task': task, 'd': d}
655
656            if "." in self.method:
657                (module, method) = self.method.rsplit('.', 1)
658                locs['method'] = getattr(importlib.import_module(module), method)
659                outhash = bb.utils.better_eval('method(path, sigfile, task, d)', locs)
660            else:
661                outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs)
662
663            try:
664                extra_data = {}
665
666                owner = d.getVar('SSTATE_HASHEQUIV_OWNER')
667                if owner:
668                    extra_data['owner'] = owner
669
670                if report_taskdata:
671                    sigfile.seek(0)
672
673                    extra_data['PN'] = d.getVar('PN')
674                    extra_data['PV'] = d.getVar('PV')
675                    extra_data['PR'] = d.getVar('PR')
676                    extra_data['task'] = task
677                    extra_data['outhash_siginfo'] = sigfile.read().decode('utf-8')
678
679                method = self.method
680                if tid in self.extramethod:
681                    method = method + self.extramethod[tid]
682
683                data = self.client().report_unihash(taskhash, method, outhash, unihash, extra_data)
684                new_unihash = data['unihash']
685
686                if new_unihash != unihash:
687                    hashequiv_logger.debug('Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server))
688                    bb.event.fire(bb.runqueue.taskUniHashUpdate(mcfn + ':do_' + task, new_unihash), d)
689                    self.set_unihash(tid, new_unihash)
690                    d.setVar('BB_UNIHASH', new_unihash)
691                else:
692                    hashequiv_logger.debug('Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server))
693            except ConnectionError as e:
694                bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
695        finally:
696            if sigfile:
697                sigfile.close()
698
699                sigfile_link_path = os.path.join(tempdir, sigfile_link)
700                bb.utils.remove(sigfile_link_path)
701
702                try:
703                    os.symlink(sigfile_name, sigfile_link_path)
704                except OSError:
705                    pass
706
707    def report_unihash_equiv(self, tid, taskhash, wanted_unihash, current_unihash, datacaches):
708        try:
709            extra_data = {}
710            method = self.method
711            if tid in self.extramethod:
712                method = method + self.extramethod[tid]
713
714            data = self.client().report_unihash_equiv(taskhash, method, wanted_unihash, extra_data)
715            hashequiv_logger.verbose('Reported task %s as unihash %s to %s (%s)' % (tid, wanted_unihash, self.server, str(data)))
716
717            if data is None:
718                bb.warn("Server unable to handle unihash report")
719                return False
720
721            finalunihash = data['unihash']
722
723            if finalunihash == current_unihash:
724                hashequiv_logger.verbose('Task %s unihash %s unchanged by server' % (tid, finalunihash))
725            elif finalunihash == wanted_unihash:
726                hashequiv_logger.verbose('Task %s unihash changed %s -> %s as wanted' % (tid, current_unihash, finalunihash))
727                self.set_unihash(tid, finalunihash)
728                return True
729            else:
730                # TODO: What to do here?
731                hashequiv_logger.verbose('Task %s unihash reported as unwanted hash %s' % (tid, finalunihash))
732
733        except ConnectionError as e:
734            bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
735
736        return False
737
738#
739# Dummy class used for bitbake-selftest
740#
741class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureGeneratorBasicHash):
742    name = "TestEquivHash"
743    def init_rundepcheck(self, data):
744        super().init_rundepcheck(data)
745        self.server = data.getVar('BB_HASHSERVE')
746        self.method = "sstate_output_hash"
747
748def clean_checksum_file_path(file_checksum_tuple):
749    f, cs = file_checksum_tuple
750    if "/./" in f:
751        return "./" + f.split("/./")[1]
752    return f
753
754def dump_this_task(outfile, d):
755    import bb.parse
756    mcfn = d.getVar("BB_FILENAME")
757    task = "do_" + d.getVar("BB_CURRENTTASK")
758    referencestamp = bb.parse.siggen.stampfile_base(mcfn)
759    bb.parse.siggen.dump_sigtask(mcfn, task, outfile, "customfile:" + referencestamp)
760
761def init_colors(enable_color):
762    """Initialise colour dict for passing to compare_sigfiles()"""
763    # First set up the colours
764    colors = {'color_title':   '\033[1m',
765              'color_default': '\033[0m',
766              'color_add':     '\033[0;32m',
767              'color_remove':  '\033[0;31m',
768             }
769    # Leave all keys present but clear the values
770    if not enable_color:
771        for k in colors.keys():
772            colors[k] = ''
773    return colors
774
775def worddiff_str(oldstr, newstr, colors=None):
776    if not colors:
777        colors = init_colors(False)
778    diff = simplediff.diff(oldstr.split(' '), newstr.split(' '))
779    ret = []
780    for change, value in diff:
781        value = ' '.join(value)
782        if change == '=':
783            ret.append(value)
784        elif change == '+':
785            item = '{color_add}{{+{value}+}}{color_default}'.format(value=value, **colors)
786            ret.append(item)
787        elif change == '-':
788            item = '{color_remove}[-{value}-]{color_default}'.format(value=value, **colors)
789            ret.append(item)
790    whitespace_note = ''
791    if oldstr != newstr and ' '.join(oldstr.split()) == ' '.join(newstr.split()):
792        whitespace_note = ' (whitespace changed)'
793    return '"%s"%s' % (' '.join(ret), whitespace_note)
794
795def list_inline_diff(oldlist, newlist, colors=None):
796    if not colors:
797        colors = init_colors(False)
798    diff = simplediff.diff(oldlist, newlist)
799    ret = []
800    for change, value in diff:
801        value = ' '.join(value)
802        if change == '=':
803            ret.append("'%s'" % value)
804        elif change == '+':
805            item = '{color_add}+{value}{color_default}'.format(value=value, **colors)
806            ret.append(item)
807        elif change == '-':
808            item = '{color_remove}-{value}{color_default}'.format(value=value, **colors)
809            ret.append(item)
810    return '[%s]' % (', '.join(ret))
811
812# Handled renamed fields
813def handle_renames(data):
814    if 'basewhitelist' in data:
815        data['basehash_ignore_vars'] = data['basewhitelist']
816        del data['basewhitelist']
817    if 'taskwhitelist' in data:
818        data['taskhash_ignore_tasks'] = data['taskwhitelist']
819        del data['taskwhitelist']
820
821
822def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
823    output = []
824
825    colors = init_colors(color)
826    def color_format(formatstr, **values):
827        """
828        Return colour formatted string.
829        NOTE: call with the format string, not an already formatted string
830        containing values (otherwise you could have trouble with { and }
831        characters)
832        """
833        if not formatstr.endswith('{color_default}'):
834            formatstr += '{color_default}'
835        # In newer python 3 versions you can pass both of these directly,
836        # but we only require 3.4 at the moment
837        formatparams = {}
838        formatparams.update(colors)
839        formatparams.update(values)
840        return formatstr.format(**formatparams)
841
842    with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f:
843        a_data = json.load(f, object_hook=SetDecoder)
844    with bb.compress.zstd.open(b, "rt", encoding="utf-8", num_threads=1) as f:
845        b_data = json.load(f, object_hook=SetDecoder)
846
847    for data in [a_data, b_data]:
848        handle_renames(data)
849
850    def dict_diff(a, b, ignored_vars=set()):
851        sa = set(a.keys())
852        sb = set(b.keys())
853        common = sa & sb
854        changed = set()
855        for i in common:
856            if a[i] != b[i] and i not in ignored_vars:
857                changed.add(i)
858        added = sb - sa
859        removed = sa - sb
860        return changed, added, removed
861
862    def file_checksums_diff(a, b):
863        from collections import Counter
864
865        # Convert lists back to tuples
866        a = [(f[0], f[1]) for f in a]
867        b = [(f[0], f[1]) for f in b]
868
869        # Compare lists, ensuring we can handle duplicate filenames if they exist
870        removedcount = Counter(a)
871        removedcount.subtract(b)
872        addedcount = Counter(b)
873        addedcount.subtract(a)
874        added = []
875        for x in b:
876            if addedcount[x] > 0:
877                addedcount[x] -= 1
878                added.append(x)
879        removed = []
880        changed = []
881        for x in a:
882            if removedcount[x] > 0:
883                removedcount[x] -= 1
884                for y in added:
885                    if y[0] == x[0]:
886                        changed.append((x[0], x[1], y[1]))
887                        added.remove(y)
888                        break
889                else:
890                    removed.append(x)
891        added = [x[0] for x in added]
892        removed = [x[0] for x in removed]
893        return changed, added, removed
894
895    if 'basehash_ignore_vars' in a_data and a_data['basehash_ignore_vars'] != b_data['basehash_ignore_vars']:
896        output.append(color_format("{color_title}basehash_ignore_vars changed{color_default} from '%s' to '%s'") % (a_data['basehash_ignore_vars'], b_data['basehash_ignore_vars']))
897        if a_data['basehash_ignore_vars'] and b_data['basehash_ignore_vars']:
898            output.append("changed items: %s" % a_data['basehash_ignore_vars'].symmetric_difference(b_data['basehash_ignore_vars']))
899
900    if 'taskhash_ignore_tasks' in a_data and a_data['taskhash_ignore_tasks'] != b_data['taskhash_ignore_tasks']:
901        output.append(color_format("{color_title}taskhash_ignore_tasks changed{color_default} from '%s' to '%s'") % (a_data['taskhash_ignore_tasks'], b_data['taskhash_ignore_tasks']))
902        if a_data['taskhash_ignore_tasks'] and b_data['taskhash_ignore_tasks']:
903            output.append("changed items: %s" % a_data['taskhash_ignore_tasks'].symmetric_difference(b_data['taskhash_ignore_tasks']))
904
905    if a_data['taskdeps'] != b_data['taskdeps']:
906        output.append(color_format("{color_title}Task dependencies changed{color_default} from:\n%s\nto:\n%s") % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps'])))
907
908    if a_data['basehash'] != b_data['basehash'] and not collapsed:
909        output.append(color_format("{color_title}basehash changed{color_default} from %s to %s") % (a_data['basehash'], b_data['basehash']))
910
911    changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basehash_ignore_vars'] & b_data['basehash_ignore_vars'])
912    if changed:
913        for dep in sorted(changed):
914            output.append(color_format("{color_title}List of dependencies for variable %s changed from '{color_default}%s{color_title}' to '{color_default}%s{color_title}'") % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep]))
915            if a_data['gendeps'][dep] and b_data['gendeps'][dep]:
916                output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep]))
917    if added:
918        for dep in sorted(added):
919            output.append(color_format("{color_title}Dependency on variable %s was added") % (dep))
920    if removed:
921        for dep in sorted(removed):
922            output.append(color_format("{color_title}Dependency on Variable %s was removed") % (dep))
923
924
925    changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals'])
926    if changed:
927        for dep in sorted(changed):
928            oldval = a_data['varvals'][dep]
929            newval = b_data['varvals'][dep]
930            if newval and oldval and ('\n' in oldval or '\n' in newval):
931                diff = difflib.unified_diff(oldval.splitlines(), newval.splitlines(), lineterm='')
932                # Cut off the first two lines, since we aren't interested in
933                # the old/new filename (they are blank anyway in this case)
934                difflines = list(diff)[2:]
935                if color:
936                    # Add colour to diff output
937                    for i, line in enumerate(difflines):
938                        if line.startswith('+'):
939                            line = color_format('{color_add}{line}', line=line)
940                            difflines[i] = line
941                        elif line.startswith('-'):
942                            line = color_format('{color_remove}{line}', line=line)
943                            difflines[i] = line
944                output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff='\n'.join(difflines)))
945            elif newval and oldval and (' ' in oldval or ' ' in newval):
946                output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff=worddiff_str(oldval, newval, colors)))
947            else:
948                output.append(color_format("{color_title}Variable {var} value changed from '{color_default}{oldval}{color_title}' to '{color_default}{newval}{color_title}'{color_default}", var=dep, oldval=oldval, newval=newval))
949
950    if not 'file_checksum_values' in a_data:
951         a_data['file_checksum_values'] = []
952    if not 'file_checksum_values' in b_data:
953         b_data['file_checksum_values'] = []
954
955    changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values'])
956    if changed:
957        for f, old, new in changed:
958            output.append(color_format("{color_title}Checksum for file %s changed{color_default} from %s to %s") % (f, old, new))
959    if added:
960        for f in added:
961            output.append(color_format("{color_title}Dependency on checksum of file %s was added") % (f))
962    if removed:
963        for f in removed:
964            output.append(color_format("{color_title}Dependency on checksum of file %s was removed") % (f))
965
966    if not 'runtaskdeps' in a_data:
967         a_data['runtaskdeps'] = {}
968    if not 'runtaskdeps' in b_data:
969         b_data['runtaskdeps'] = {}
970
971    if not collapsed:
972        if len(a_data['runtaskdeps']) != len(b_data['runtaskdeps']):
973            changed = ["Number of task dependencies changed"]
974        else:
975            changed = []
976            for idx, task in enumerate(a_data['runtaskdeps']):
977                a = a_data['runtaskdeps'][idx]
978                b = b_data['runtaskdeps'][idx]
979                if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b] and not collapsed:
980                    changed.append("%s with hash %s\n changed to\n%s with hash %s" % (a, a_data['runtaskhashes'][a], b, b_data['runtaskhashes'][b]))
981
982        if changed:
983            clean_a = a_data['runtaskdeps']
984            clean_b = b_data['runtaskdeps']
985            if clean_a != clean_b:
986                output.append(color_format("{color_title}runtaskdeps changed:{color_default}\n%s") % list_inline_diff(clean_a, clean_b, colors))
987            else:
988                output.append(color_format("{color_title}runtaskdeps changed:"))
989            output.append("\n".join(changed))
990
991
992    if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data:
993        a = a_data['runtaskhashes']
994        b = b_data['runtaskhashes']
995        changed, added, removed = dict_diff(a, b)
996        if added:
997            for dep in sorted(added):
998                bdep_found = False
999                if removed:
1000                    for bdep in removed:
1001                        if b[dep] == a[bdep]:
1002                            #output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep))
1003                            bdep_found = True
1004                if not bdep_found:
1005                    output.append(color_format("{color_title}Dependency on task %s was added{color_default} with hash %s") % (dep, b[dep]))
1006        if removed:
1007            for dep in sorted(removed):
1008                adep_found = False
1009                if added:
1010                    for adep in added:
1011                        if b[adep] == a[dep]:
1012                            #output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep))
1013                            adep_found = True
1014                if not adep_found:
1015                    output.append(color_format("{color_title}Dependency on task %s was removed{color_default} with hash %s") % (dep, a[dep]))
1016        if changed:
1017            for dep in sorted(changed):
1018                if not collapsed:
1019                    output.append(color_format("{color_title}Hash for task dependency %s changed{color_default} from %s to %s") % (dep, a[dep], b[dep]))
1020                if callable(recursecb):
1021                    recout = recursecb(dep, a[dep], b[dep])
1022                    if recout:
1023                        if collapsed:
1024                            output.extend(recout)
1025                        else:
1026                            # If a dependent hash changed, might as well print the line above and then defer to the changes in
1027                            # that hash since in all likelyhood, they're the same changes this task also saw.
1028                            output = [output[-1]] + recout
1029                            break
1030
1031    a_taint = a_data.get('taint', None)
1032    b_taint = b_data.get('taint', None)
1033    if a_taint != b_taint:
1034        if a_taint and a_taint.startswith('nostamp:'):
1035            a_taint = a_taint.replace('nostamp:', 'nostamp(uuid4):')
1036        if b_taint and b_taint.startswith('nostamp:'):
1037            b_taint = b_taint.replace('nostamp:', 'nostamp(uuid4):')
1038        output.append(color_format("{color_title}Taint (by forced/invalidated task) changed{color_default} from %s to %s") % (a_taint, b_taint))
1039
1040    return output
1041
1042
1043def calc_basehash(sigdata):
1044    task = sigdata['task']
1045    basedata = sigdata['varvals'][task]
1046
1047    if basedata is None:
1048        basedata = ''
1049
1050    alldeps = sigdata['taskdeps']
1051    for dep in sorted(alldeps):
1052        basedata = basedata + dep
1053        val = sigdata['varvals'][dep]
1054        if val is not None:
1055            basedata = basedata + str(val)
1056
1057    return hashlib.sha256(basedata.encode("utf-8")).hexdigest()
1058
1059def calc_taskhash(sigdata):
1060    data = sigdata['basehash']
1061
1062    for dep in sigdata['runtaskdeps']:
1063        data = data + sigdata['runtaskhashes'][dep]
1064
1065    for c in sigdata['file_checksum_values']:
1066        if c[1]:
1067            if "./" in c[0]:
1068                data = data + c[0]
1069            data = data + c[1]
1070
1071    if 'taint' in sigdata:
1072        if 'nostamp:' in sigdata['taint']:
1073            data = data + sigdata['taint'][8:]
1074        else:
1075            data = data + sigdata['taint']
1076
1077    return hashlib.sha256(data.encode("utf-8")).hexdigest()
1078
1079
1080def dump_sigfile(a):
1081    output = []
1082
1083    with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f:
1084        a_data = json.load(f, object_hook=SetDecoder)
1085
1086    handle_renames(a_data)
1087
1088    output.append("basehash_ignore_vars: %s" % (sorted(a_data['basehash_ignore_vars'])))
1089
1090    output.append("taskhash_ignore_tasks: %s" % (sorted(a_data['taskhash_ignore_tasks'] or [])))
1091
1092    output.append("Task dependencies: %s" % (sorted(a_data['taskdeps'])))
1093
1094    output.append("basehash: %s" % (a_data['basehash']))
1095
1096    for dep in sorted(a_data['gendeps']):
1097        output.append("List of dependencies for variable %s is %s" % (dep, sorted(a_data['gendeps'][dep])))
1098
1099    for dep in sorted(a_data['varvals']):
1100        output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep]))
1101
1102    if 'runtaskdeps' in a_data:
1103        output.append("Tasks this task depends on: %s" % (sorted(a_data['runtaskdeps'])))
1104
1105    if 'file_checksum_values' in a_data:
1106        output.append("This task depends on the checksums of files: %s" % (sorted(a_data['file_checksum_values'])))
1107
1108    if 'runtaskhashes' in a_data:
1109        for dep in sorted(a_data['runtaskhashes']):
1110            output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep]))
1111
1112    if 'taint' in a_data:
1113        if a_data['taint'].startswith('nostamp:'):
1114            msg = a_data['taint'].replace('nostamp:', 'nostamp(uuid4):')
1115        else:
1116            msg = a_data['taint']
1117        output.append("Tainted (by forced/invalidated task): %s" % msg)
1118
1119    if 'task' in a_data:
1120        computed_basehash = calc_basehash(a_data)
1121        output.append("Computed base hash is %s and from file %s" % (computed_basehash, a_data['basehash']))
1122    else:
1123        output.append("Unable to compute base hash")
1124
1125    computed_taskhash = calc_taskhash(a_data)
1126    output.append("Computed task hash is %s" % computed_taskhash)
1127
1128    return output
1129