xref: /openbmc/openbmc/poky/bitbake/lib/bb/siggen.py (revision 67ab205e)
1#
2# Copyright BitBake Contributors
3#
4# SPDX-License-Identifier: GPL-2.0-only
5#
6
7import hashlib
8import logging
9import os
10import re
11import tempfile
12import pickle
13import bb.data
14import difflib
15import simplediff
16import json
17import types
18import bb.compress.zstd
19from bb.checksum import FileChecksumCache
20from bb import runqueue
21import hashserv
22import hashserv.client
23
24logger = logging.getLogger('BitBake.SigGen')
25hashequiv_logger = logging.getLogger('BitBake.SigGen.HashEquiv')
26
27#find_siginfo and find_siginfo_version are set by the metadata siggen
28# The minimum version of the find_siginfo function we need
29find_siginfo_minversion = 2
30
31def check_siggen_version(siggen):
32    if not hasattr(siggen, "find_siginfo_version"):
33        bb.fatal("Siggen from metadata (OE-Core?) is too old, please update it (no version found)")
34    if siggen.find_siginfo_version < siggen.find_siginfo_minversion:
35        bb.fatal("Siggen from metadata (OE-Core?) is too old, please update it (%s vs %s)" % (siggen.find_siginfo_version, siggen.find_siginfo_minversion))
36
37class SetEncoder(json.JSONEncoder):
38    def default(self, obj):
39        if isinstance(obj, set) or isinstance(obj, frozenset):
40            return dict(_set_object=list(sorted(obj)))
41        return json.JSONEncoder.default(self, obj)
42
43def SetDecoder(dct):
44    if '_set_object' in dct:
45        return frozenset(dct['_set_object'])
46    return dct
47
48def init(d):
49    siggens = [obj for obj in globals().values()
50                      if type(obj) is type and issubclass(obj, SignatureGenerator)]
51
52    desired = d.getVar("BB_SIGNATURE_HANDLER") or "noop"
53    for sg in siggens:
54        if desired == sg.name:
55            return sg(d)
56    else:
57        logger.error("Invalid signature generator '%s', using default 'noop'\n"
58                     "Available generators: %s", desired,
59                     ', '.join(obj.name for obj in siggens))
60        return SignatureGenerator(d)
61
62class SignatureGenerator(object):
63    """
64    """
65    name = "noop"
66
67    def __init__(self, data):
68        self.basehash = {}
69        self.taskhash = {}
70        self.unihash = {}
71        self.runtaskdeps = {}
72        self.file_checksum_values = {}
73        self.taints = {}
74        self.unitaskhashes = {}
75        self.tidtopn = {}
76        self.setscenetasks = set()
77
78    def finalise(self, fn, d, varient):
79        return
80
81    def postparsing_clean_cache(self):
82        return
83
84    def setup_datacache(self, datacaches):
85        self.datacaches = datacaches
86
87    def setup_datacache_from_datastore(self, mcfn, d):
88        # In task context we have no cache so setup internal data structures
89        # from the fully parsed data store provided
90
91        mc = d.getVar("__BBMULTICONFIG", False) or ""
92        tasks = d.getVar('__BBTASKS', False)
93
94        self.datacaches = {}
95        self.datacaches[mc] = types.SimpleNamespace()
96        setattr(self.datacaches[mc], "stamp", {})
97        self.datacaches[mc].stamp[mcfn] = d.getVar('STAMP')
98        setattr(self.datacaches[mc], "stamp_extrainfo", {})
99        self.datacaches[mc].stamp_extrainfo[mcfn] = {}
100        for t in tasks:
101            flag = d.getVarFlag(t, "stamp-extra-info")
102            if flag:
103                self.datacaches[mc].stamp_extrainfo[mcfn][t] = flag
104
105    def get_unihash(self, tid):
106        return self.taskhash[tid]
107
108    def prep_taskhash(self, tid, deps, dataCaches):
109        return
110
111    def get_taskhash(self, tid, deps, dataCaches):
112        self.taskhash[tid] = hashlib.sha256(tid.encode("utf-8")).hexdigest()
113        return self.taskhash[tid]
114
115    def writeout_file_checksum_cache(self):
116        """Write/update the file checksum cache onto disk"""
117        return
118
119    def stampfile_base(self, mcfn):
120        mc = bb.runqueue.mc_from_tid(mcfn)
121        return self.datacaches[mc].stamp[mcfn]
122
123    def stampfile_mcfn(self, taskname, mcfn, extrainfo=True):
124        mc = bb.runqueue.mc_from_tid(mcfn)
125        stamp = self.datacaches[mc].stamp[mcfn]
126        if not stamp:
127            return
128
129        stamp_extrainfo = ""
130        if extrainfo:
131            taskflagname = taskname
132            if taskname.endswith("_setscene"):
133                taskflagname = taskname.replace("_setscene", "")
134            stamp_extrainfo = self.datacaches[mc].stamp_extrainfo[mcfn].get(taskflagname) or ""
135
136        return self.stampfile(stamp, mcfn, taskname, stamp_extrainfo)
137
138    def stampfile(self, stampbase, file_name, taskname, extrainfo):
139        return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
140
141    def stampcleanmask_mcfn(self, taskname, mcfn):
142        mc = bb.runqueue.mc_from_tid(mcfn)
143        stamp = self.datacaches[mc].stamp[mcfn]
144        if not stamp:
145            return []
146
147        taskflagname = taskname
148        if taskname.endswith("_setscene"):
149            taskflagname = taskname.replace("_setscene", "")
150        stamp_extrainfo = self.datacaches[mc].stamp_extrainfo[mcfn].get(taskflagname) or ""
151
152        return self.stampcleanmask(stamp, mcfn, taskname, stamp_extrainfo)
153
154    def stampcleanmask(self, stampbase, file_name, taskname, extrainfo):
155        return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
156
157    def dump_sigtask(self, mcfn, task, stampbase, runtime):
158        return
159
160    def invalidate_task(self, task, mcfn):
161        mc = bb.runqueue.mc_from_tid(mcfn)
162        stamp = self.datacaches[mc].stamp[mcfn]
163        bb.utils.remove(stamp)
164
165    def dump_sigs(self, dataCache, options):
166        return
167
168    def get_taskdata(self):
169        return (self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks)
170
171    def set_taskdata(self, data):
172        self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks = data
173
174    def reset(self, data):
175        self.__init__(data)
176
177    def get_taskhashes(self):
178        return self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn
179
180    def set_taskhashes(self, hashes):
181        self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn = hashes
182
183    def save_unitaskhashes(self):
184        return
185
186    def copy_unitaskhashes(self, targetdir):
187        return
188
189    def set_setscene_tasks(self, setscene_tasks):
190        return
191
192    def exit(self):
193        return
194
195def build_pnid(mc, pn, taskname):
196    if mc:
197        return "mc:" + mc + ":" + pn + ":" + taskname
198    return pn + ":" + taskname
199
200class SignatureGeneratorBasic(SignatureGenerator):
201    """
202    """
203    name = "basic"
204
205    def __init__(self, data):
206        self.basehash = {}
207        self.taskhash = {}
208        self.unihash = {}
209        self.runtaskdeps = {}
210        self.file_checksum_values = {}
211        self.taints = {}
212        self.setscenetasks = set()
213        self.basehash_ignore_vars = set((data.getVar("BB_BASEHASH_IGNORE_VARS") or "").split())
214        self.taskhash_ignore_tasks = None
215        self.init_rundepcheck(data)
216        checksum_cache_file = data.getVar("BB_HASH_CHECKSUM_CACHE_FILE")
217        if checksum_cache_file:
218            self.checksum_cache = FileChecksumCache()
219            self.checksum_cache.init_cache(data, checksum_cache_file)
220        else:
221            self.checksum_cache = None
222
223        self.unihash_cache = bb.cache.SimpleCache("3")
224        self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {})
225        self.localdirsexclude = (data.getVar("BB_SIGNATURE_LOCAL_DIRS_EXCLUDE") or "CVS .bzr .git .hg .osc .p4 .repo .svn").split()
226        self.tidtopn = {}
227
228    def init_rundepcheck(self, data):
229        self.taskhash_ignore_tasks = data.getVar("BB_TASKHASH_IGNORE_TASKS") or None
230        if self.taskhash_ignore_tasks:
231            self.twl = re.compile(self.taskhash_ignore_tasks)
232        else:
233            self.twl = None
234
235    def _build_data(self, mcfn, d):
236
237        ignore_mismatch = ((d.getVar("BB_HASH_IGNORE_MISMATCH") or '') == '1')
238        tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d, self.basehash_ignore_vars)
239
240        taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, self.basehash_ignore_vars, mcfn)
241
242        for task in tasklist:
243            tid = mcfn + ":" + task
244            if not ignore_mismatch and tid in self.basehash and self.basehash[tid] != basehash[tid]:
245                bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (tid, self.basehash[tid], basehash[tid]))
246                bb.error("The following commands may help:")
247                cmd = "$ bitbake %s -c%s" % (d.getVar('PN'), task)
248                # Make sure sigdata is dumped before run printdiff
249                bb.error("%s -Snone" % cmd)
250                bb.error("Then:")
251                bb.error("%s -Sprintdiff\n" % cmd)
252            self.basehash[tid] = basehash[tid]
253
254        return taskdeps, gendeps, lookupcache
255
256    def set_setscene_tasks(self, setscene_tasks):
257        self.setscenetasks = set(setscene_tasks)
258
259    def finalise(self, fn, d, variant):
260
261        mc = d.getVar("__BBMULTICONFIG", False) or ""
262        mcfn = fn
263        if variant or mc:
264            mcfn = bb.cache.realfn2virtual(fn, variant, mc)
265
266        try:
267            taskdeps, gendeps, lookupcache = self._build_data(mcfn, d)
268        except bb.parse.SkipRecipe:
269            raise
270        except:
271            bb.warn("Error during finalise of %s" % mcfn)
272            raise
273
274        basehashes = {}
275        for task in taskdeps:
276            basehashes[task] = self.basehash[mcfn + ":" + task]
277
278        d.setVar("__siggen_basehashes", basehashes)
279        d.setVar("__siggen_gendeps", gendeps)
280        d.setVar("__siggen_varvals", lookupcache)
281        d.setVar("__siggen_taskdeps", taskdeps)
282
283        #Slow but can be useful for debugging mismatched basehashes
284        #self.setup_datacache_from_datastore(mcfn, d)
285        #for task in taskdeps:
286        #    self.dump_sigtask(mcfn, task, d.getVar("STAMP"), False)
287
288    def setup_datacache_from_datastore(self, mcfn, d):
289        super().setup_datacache_from_datastore(mcfn, d)
290
291        mc = bb.runqueue.mc_from_tid(mcfn)
292        for attr in ["siggen_varvals", "siggen_taskdeps", "siggen_gendeps"]:
293            if not hasattr(self.datacaches[mc], attr):
294                setattr(self.datacaches[mc], attr, {})
295        self.datacaches[mc].siggen_varvals[mcfn] = d.getVar("__siggen_varvals")
296        self.datacaches[mc].siggen_taskdeps[mcfn] = d.getVar("__siggen_taskdeps")
297        self.datacaches[mc].siggen_gendeps[mcfn] = d.getVar("__siggen_gendeps")
298
299    def rundep_check(self, fn, recipename, task, dep, depname, dataCaches):
300        # Return True if we should keep the dependency, False to drop it
301        # We only manipulate the dependencies for packages not in the ignore
302        # list
303        if self.twl and not self.twl.search(recipename):
304            # then process the actual dependencies
305            if self.twl.search(depname):
306                return False
307        return True
308
309    def read_taint(self, fn, task, stampbase):
310        taint = None
311        try:
312            with open(stampbase + '.' + task + '.taint', 'r') as taintf:
313                taint = taintf.read()
314        except IOError:
315            pass
316        return taint
317
318    def prep_taskhash(self, tid, deps, dataCaches):
319
320        (mc, _, task, mcfn) = bb.runqueue.split_tid_mcfn(tid)
321
322        self.basehash[tid] = dataCaches[mc].basetaskhash[tid]
323        self.runtaskdeps[tid] = []
324        self.file_checksum_values[tid] = []
325        recipename = dataCaches[mc].pkg_fn[mcfn]
326
327        self.tidtopn[tid] = recipename
328        # save hashfn for deps into siginfo?
329        for dep in deps:
330            (depmc, _, deptask, depmcfn) = bb.runqueue.split_tid_mcfn(dep)
331            dep_pn = dataCaches[depmc].pkg_fn[depmcfn]
332
333            if not self.rundep_check(mcfn, recipename, task, dep, dep_pn, dataCaches):
334                continue
335
336            if dep not in self.taskhash:
337                bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep)
338
339            dep_pnid = build_pnid(depmc, dep_pn, deptask)
340            self.runtaskdeps[tid].append((dep_pnid, dep))
341
342        if task in dataCaches[mc].file_checksums[mcfn]:
343            if self.checksum_cache:
344                checksums = self.checksum_cache.get_checksums(dataCaches[mc].file_checksums[mcfn][task], recipename, self.localdirsexclude)
345            else:
346                checksums = bb.fetch2.get_file_checksums(dataCaches[mc].file_checksums[mcfn][task], recipename, self.localdirsexclude)
347            for (f,cs) in checksums:
348                self.file_checksum_values[tid].append((f,cs))
349
350        taskdep = dataCaches[mc].task_deps[mcfn]
351        if 'nostamp' in taskdep and task in taskdep['nostamp']:
352            # Nostamp tasks need an implicit taint so that they force any dependent tasks to run
353            if tid in self.taints and self.taints[tid].startswith("nostamp:"):
354                # Don't reset taint value upon every call
355                pass
356            else:
357                import uuid
358                taint = str(uuid.uuid4())
359                self.taints[tid] = "nostamp:" + taint
360
361        taint = self.read_taint(mcfn, task, dataCaches[mc].stamp[mcfn])
362        if taint:
363            self.taints[tid] = taint
364            logger.warning("%s is tainted from a forced run" % tid)
365
366        return
367
368    def get_taskhash(self, tid, deps, dataCaches):
369
370        data = self.basehash[tid]
371        for dep in sorted(self.runtaskdeps[tid]):
372            data += self.get_unihash(dep[1])
373
374        for (f, cs) in sorted(self.file_checksum_values[tid], key=clean_checksum_file_path):
375            if cs:
376                if "/./" in f:
377                    data += "./" + f.split("/./")[1]
378                data += cs
379
380        if tid in self.taints:
381            if self.taints[tid].startswith("nostamp:"):
382                data += self.taints[tid][8:]
383            else:
384                data += self.taints[tid]
385
386        h = hashlib.sha256(data.encode("utf-8")).hexdigest()
387        self.taskhash[tid] = h
388        #d.setVar("BB_TASKHASH:task-%s" % task, taskhash[task])
389        return h
390
391    def writeout_file_checksum_cache(self):
392        """Write/update the file checksum cache onto disk"""
393        if self.checksum_cache:
394            self.checksum_cache.save_extras()
395            self.checksum_cache.save_merge()
396        else:
397            bb.fetch2.fetcher_parse_save()
398            bb.fetch2.fetcher_parse_done()
399
400    def save_unitaskhashes(self):
401        self.unihash_cache.save(self.unitaskhashes)
402
403    def copy_unitaskhashes(self, targetdir):
404        self.unihash_cache.copyfile(targetdir)
405
406    def dump_sigtask(self, mcfn, task, stampbase, runtime):
407        tid = mcfn + ":" + task
408        mc = bb.runqueue.mc_from_tid(mcfn)
409        referencestamp = stampbase
410        if isinstance(runtime, str) and runtime.startswith("customfile"):
411            sigfile = stampbase
412            referencestamp = runtime[11:]
413        elif runtime and tid in self.taskhash:
414            sigfile = stampbase + "." + task + ".sigdata" + "." + self.get_unihash(tid)
415        else:
416            sigfile = stampbase + "." + task + ".sigbasedata" + "." + self.basehash[tid]
417
418        with bb.utils.umask(0o002):
419            bb.utils.mkdirhier(os.path.dirname(sigfile))
420
421        data = {}
422        data['task'] = task
423        data['basehash_ignore_vars'] = self.basehash_ignore_vars
424        data['taskhash_ignore_tasks'] = self.taskhash_ignore_tasks
425        data['taskdeps'] = self.datacaches[mc].siggen_taskdeps[mcfn][task]
426        data['basehash'] = self.basehash[tid]
427        data['gendeps'] = {}
428        data['varvals'] = {}
429        data['varvals'][task] = self.datacaches[mc].siggen_varvals[mcfn][task]
430        for dep in self.datacaches[mc].siggen_taskdeps[mcfn][task]:
431            if dep in self.basehash_ignore_vars:
432                continue
433            data['gendeps'][dep] = self.datacaches[mc].siggen_gendeps[mcfn][dep]
434            data['varvals'][dep] = self.datacaches[mc].siggen_varvals[mcfn][dep]
435
436        if runtime and tid in self.taskhash:
437            data['runtaskdeps'] = [dep[0] for dep in sorted(self.runtaskdeps[tid])]
438            data['file_checksum_values'] = []
439            for f,cs in sorted(self.file_checksum_values[tid], key=clean_checksum_file_path):
440                if "/./" in f:
441                    data['file_checksum_values'].append(("./" + f.split("/./")[1], cs))
442                else:
443                    data['file_checksum_values'].append((os.path.basename(f), cs))
444            data['runtaskhashes'] = {}
445            for dep in self.runtaskdeps[tid]:
446                data['runtaskhashes'][dep[0]] = self.get_unihash(dep[1])
447            data['taskhash'] = self.taskhash[tid]
448            data['unihash'] = self.get_unihash(tid)
449
450        taint = self.read_taint(mcfn, task, referencestamp)
451        if taint:
452            data['taint'] = taint
453
454        if runtime and tid in self.taints:
455            if 'nostamp:' in self.taints[tid]:
456                data['taint'] = self.taints[tid]
457
458        computed_basehash = calc_basehash(data)
459        if computed_basehash != self.basehash[tid]:
460            bb.error("Basehash mismatch %s versus %s for %s" % (computed_basehash, self.basehash[tid], tid))
461        if runtime and tid in self.taskhash:
462            computed_taskhash = calc_taskhash(data)
463            if computed_taskhash != self.taskhash[tid]:
464                bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid))
465                sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash)
466
467        fd, tmpfile = bb.utils.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.")
468        try:
469            with bb.compress.zstd.open(fd, "wt", encoding="utf-8", num_threads=1) as f:
470                json.dump(data, f, sort_keys=True, separators=(",", ":"), cls=SetEncoder)
471                f.flush()
472            os.chmod(tmpfile, 0o664)
473            bb.utils.rename(tmpfile, sigfile)
474        except (OSError, IOError) as err:
475            try:
476                os.unlink(tmpfile)
477            except OSError:
478                pass
479            raise err
480
481class SignatureGeneratorBasicHash(SignatureGeneratorBasic):
482    name = "basichash"
483
484    def get_stampfile_hash(self, tid):
485        if tid in self.taskhash:
486            return self.taskhash[tid]
487
488        # If task is not in basehash, then error
489        return self.basehash[tid]
490
491    def stampfile(self, stampbase, mcfn, taskname, extrainfo, clean=False):
492        if taskname.endswith("_setscene"):
493            tid = mcfn + ":" + taskname[:-9]
494        else:
495            tid = mcfn + ":" + taskname
496        if clean:
497            h = "*"
498        else:
499            h = self.get_stampfile_hash(tid)
500
501        return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.')
502
503    def stampcleanmask(self, stampbase, mcfn, taskname, extrainfo):
504        return self.stampfile(stampbase, mcfn, taskname, extrainfo, clean=True)
505
506    def invalidate_task(self, task, mcfn):
507        bb.note("Tainting hash to force rebuild of task %s, %s" % (mcfn, task))
508
509        mc = bb.runqueue.mc_from_tid(mcfn)
510        stamp = self.datacaches[mc].stamp[mcfn]
511
512        taintfn = stamp + '.' + task + '.taint'
513
514        import uuid
515        bb.utils.mkdirhier(os.path.dirname(taintfn))
516        # The specific content of the taint file is not really important,
517        # we just need it to be random, so a random UUID is used
518        with open(taintfn, 'w') as taintf:
519            taintf.write(str(uuid.uuid4()))
520
521class SignatureGeneratorUniHashMixIn(object):
522    def __init__(self, data):
523        self.extramethod = {}
524        super().__init__(data)
525
526    def get_taskdata(self):
527        return (self.server, self.method, self.extramethod) + super().get_taskdata()
528
529    def set_taskdata(self, data):
530        self.server, self.method, self.extramethod = data[:3]
531        super().set_taskdata(data[3:])
532
533    def client(self):
534        if getattr(self, '_client', None) is None:
535            self._client = hashserv.create_client(self.server)
536        return self._client
537
538    def reset(self, data):
539        if getattr(self, '_client', None) is not None:
540            self._client.close()
541            self._client = None
542        return super().reset(data)
543
544    def exit(self):
545        if getattr(self, '_client', None) is not None:
546            self._client.close()
547            self._client = None
548        return super().exit()
549
550    def get_stampfile_hash(self, tid):
551        if tid in self.taskhash:
552            # If a unique hash is reported, use it as the stampfile hash. This
553            # ensures that if a task won't be re-run if the taskhash changes,
554            # but it would result in the same output hash
555            unihash = self._get_unihash(tid)
556            if unihash is not None:
557                return unihash
558
559        return super().get_stampfile_hash(tid)
560
561    def set_unihash(self, tid, unihash):
562        (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
563        key = mc + ":" + self.tidtopn[tid] + ":" + taskname
564        self.unitaskhashes[key] = (self.taskhash[tid], unihash)
565        self.unihash[tid] = unihash
566
567    def _get_unihash(self, tid, checkkey=None):
568        if tid not in self.tidtopn:
569            return None
570        (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
571        key = mc + ":" + self.tidtopn[tid] + ":" + taskname
572        if key not in self.unitaskhashes:
573            return None
574        if not checkkey:
575            checkkey = self.taskhash[tid]
576        (key, unihash) = self.unitaskhashes[key]
577        if key != checkkey:
578            return None
579        return unihash
580
581    def get_unihash(self, tid):
582        taskhash = self.taskhash[tid]
583
584        # If its not a setscene task we can return
585        if self.setscenetasks and tid not in self.setscenetasks:
586            self.unihash[tid] = None
587            return taskhash
588
589        # TODO: This cache can grow unbounded. It probably only needs to keep
590        # for each task
591        unihash =  self._get_unihash(tid)
592        if unihash is not None:
593            self.unihash[tid] = unihash
594            return unihash
595
596        # In the absence of being able to discover a unique hash from the
597        # server, make it be equivalent to the taskhash. The unique "hash" only
598        # really needs to be a unique string (not even necessarily a hash), but
599        # making it match the taskhash has a few advantages:
600        #
601        # 1) All of the sstate code that assumes hashes can be the same
602        # 2) It provides maximal compatibility with builders that don't use
603        #    an equivalency server
604        # 3) The value is easy for multiple independent builders to derive the
605        #    same unique hash from the same input. This means that if the
606        #    independent builders find the same taskhash, but it isn't reported
607        #    to the server, there is a better chance that they will agree on
608        #    the unique hash.
609        unihash = taskhash
610
611        try:
612            method = self.method
613            if tid in self.extramethod:
614                method = method + self.extramethod[tid]
615            data = self.client().get_unihash(method, self.taskhash[tid])
616            if data:
617                unihash = data
618                # A unique hash equal to the taskhash is not very interesting,
619                # so it is reported it at debug level 2. If they differ, that
620                # is much more interesting, so it is reported at debug level 1
621                hashequiv_logger.bbdebug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server))
622            else:
623                hashequiv_logger.debug2('No reported unihash for %s:%s from %s' % (tid, taskhash, self.server))
624        except ConnectionError as e:
625            bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
626
627        self.set_unihash(tid, unihash)
628        self.unihash[tid] = unihash
629        return unihash
630
631    def report_unihash(self, path, task, d):
632        import importlib
633
634        taskhash = d.getVar('BB_TASKHASH')
635        unihash = d.getVar('BB_UNIHASH')
636        report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1'
637        tempdir = d.getVar('T')
638        mcfn = d.getVar('BB_FILENAME')
639        tid = mcfn + ':do_' + task
640        key = tid + ':' + taskhash
641
642        if self.setscenetasks and tid not in self.setscenetasks:
643            return
644
645        # This can happen if locked sigs are in action. Detect and just exit
646        if taskhash != self.taskhash[tid]:
647            return
648
649        # Sanity checks
650        cache_unihash = self._get_unihash(tid, checkkey=taskhash)
651        if cache_unihash is None:
652            bb.fatal('%s not in unihash cache. Please report this error' % key)
653
654        if cache_unihash != unihash:
655            bb.fatal("Cache unihash %s doesn't match BB_UNIHASH %s" % (cache_unihash, unihash))
656
657        sigfile = None
658        sigfile_name = "depsig.do_%s.%d" % (task, os.getpid())
659        sigfile_link = "depsig.do_%s" % task
660
661        try:
662            sigfile = open(os.path.join(tempdir, sigfile_name), 'w+b')
663
664            locs = {'path': path, 'sigfile': sigfile, 'task': task, 'd': d}
665
666            if "." in self.method:
667                (module, method) = self.method.rsplit('.', 1)
668                locs['method'] = getattr(importlib.import_module(module), method)
669                outhash = bb.utils.better_eval('method(path, sigfile, task, d)', locs)
670            else:
671                outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs)
672
673            try:
674                extra_data = {}
675
676                owner = d.getVar('SSTATE_HASHEQUIV_OWNER')
677                if owner:
678                    extra_data['owner'] = owner
679
680                if report_taskdata:
681                    sigfile.seek(0)
682
683                    extra_data['PN'] = d.getVar('PN')
684                    extra_data['PV'] = d.getVar('PV')
685                    extra_data['PR'] = d.getVar('PR')
686                    extra_data['task'] = task
687                    extra_data['outhash_siginfo'] = sigfile.read().decode('utf-8')
688
689                method = self.method
690                if tid in self.extramethod:
691                    method = method + self.extramethod[tid]
692
693                data = self.client().report_unihash(taskhash, method, outhash, unihash, extra_data)
694                new_unihash = data['unihash']
695
696                if new_unihash != unihash:
697                    hashequiv_logger.debug('Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server))
698                    bb.event.fire(bb.runqueue.taskUniHashUpdate(mcfn + ':do_' + task, new_unihash), d)
699                    self.set_unihash(tid, new_unihash)
700                    d.setVar('BB_UNIHASH', new_unihash)
701                else:
702                    hashequiv_logger.debug('Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server))
703            except ConnectionError as e:
704                bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
705        finally:
706            if sigfile:
707                sigfile.close()
708
709                sigfile_link_path = os.path.join(tempdir, sigfile_link)
710                bb.utils.remove(sigfile_link_path)
711
712                try:
713                    os.symlink(sigfile_name, sigfile_link_path)
714                except OSError:
715                    pass
716
717    def report_unihash_equiv(self, tid, taskhash, wanted_unihash, current_unihash, datacaches):
718        try:
719            extra_data = {}
720            method = self.method
721            if tid in self.extramethod:
722                method = method + self.extramethod[tid]
723
724            data = self.client().report_unihash_equiv(taskhash, method, wanted_unihash, extra_data)
725            hashequiv_logger.verbose('Reported task %s as unihash %s to %s (%s)' % (tid, wanted_unihash, self.server, str(data)))
726
727            if data is None:
728                bb.warn("Server unable to handle unihash report")
729                return False
730
731            finalunihash = data['unihash']
732
733            if finalunihash == current_unihash:
734                hashequiv_logger.verbose('Task %s unihash %s unchanged by server' % (tid, finalunihash))
735            elif finalunihash == wanted_unihash:
736                hashequiv_logger.verbose('Task %s unihash changed %s -> %s as wanted' % (tid, current_unihash, finalunihash))
737                self.set_unihash(tid, finalunihash)
738                return True
739            else:
740                # TODO: What to do here?
741                hashequiv_logger.verbose('Task %s unihash reported as unwanted hash %s' % (tid, finalunihash))
742
743        except ConnectionError as e:
744            bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
745
746        return False
747
748#
749# Dummy class used for bitbake-selftest
750#
751class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureGeneratorBasicHash):
752    name = "TestEquivHash"
753    def init_rundepcheck(self, data):
754        super().init_rundepcheck(data)
755        self.server = data.getVar('BB_HASHSERVE')
756        self.method = "sstate_output_hash"
757
758def clean_checksum_file_path(file_checksum_tuple):
759    f, cs = file_checksum_tuple
760    if "/./" in f:
761        return "./" + f.split("/./")[1]
762    return f
763
764def dump_this_task(outfile, d):
765    import bb.parse
766    mcfn = d.getVar("BB_FILENAME")
767    task = "do_" + d.getVar("BB_CURRENTTASK")
768    referencestamp = bb.parse.siggen.stampfile_base(mcfn)
769    bb.parse.siggen.dump_sigtask(mcfn, task, outfile, "customfile:" + referencestamp)
770
771def init_colors(enable_color):
772    """Initialise colour dict for passing to compare_sigfiles()"""
773    # First set up the colours
774    colors = {'color_title':   '\033[1m',
775              'color_default': '\033[0m',
776              'color_add':     '\033[0;32m',
777              'color_remove':  '\033[0;31m',
778             }
779    # Leave all keys present but clear the values
780    if not enable_color:
781        for k in colors.keys():
782            colors[k] = ''
783    return colors
784
785def worddiff_str(oldstr, newstr, colors=None):
786    if not colors:
787        colors = init_colors(False)
788    diff = simplediff.diff(oldstr.split(' '), newstr.split(' '))
789    ret = []
790    for change, value in diff:
791        value = ' '.join(value)
792        if change == '=':
793            ret.append(value)
794        elif change == '+':
795            item = '{color_add}{{+{value}+}}{color_default}'.format(value=value, **colors)
796            ret.append(item)
797        elif change == '-':
798            item = '{color_remove}[-{value}-]{color_default}'.format(value=value, **colors)
799            ret.append(item)
800    whitespace_note = ''
801    if oldstr != newstr and ' '.join(oldstr.split()) == ' '.join(newstr.split()):
802        whitespace_note = ' (whitespace changed)'
803    return '"%s"%s' % (' '.join(ret), whitespace_note)
804
805def list_inline_diff(oldlist, newlist, colors=None):
806    if not colors:
807        colors = init_colors(False)
808    diff = simplediff.diff(oldlist, newlist)
809    ret = []
810    for change, value in diff:
811        value = ' '.join(value)
812        if change == '=':
813            ret.append("'%s'" % value)
814        elif change == '+':
815            item = '{color_add}+{value}{color_default}'.format(value=value, **colors)
816            ret.append(item)
817        elif change == '-':
818            item = '{color_remove}-{value}{color_default}'.format(value=value, **colors)
819            ret.append(item)
820    return '[%s]' % (', '.join(ret))
821
822# Handled renamed fields
823def handle_renames(data):
824    if 'basewhitelist' in data:
825        data['basehash_ignore_vars'] = data['basewhitelist']
826        del data['basewhitelist']
827    if 'taskwhitelist' in data:
828        data['taskhash_ignore_tasks'] = data['taskwhitelist']
829        del data['taskwhitelist']
830
831
832def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
833    output = []
834
835    colors = init_colors(color)
836    def color_format(formatstr, **values):
837        """
838        Return colour formatted string.
839        NOTE: call with the format string, not an already formatted string
840        containing values (otherwise you could have trouble with { and }
841        characters)
842        """
843        if not formatstr.endswith('{color_default}'):
844            formatstr += '{color_default}'
845        # In newer python 3 versions you can pass both of these directly,
846        # but we only require 3.4 at the moment
847        formatparams = {}
848        formatparams.update(colors)
849        formatparams.update(values)
850        return formatstr.format(**formatparams)
851
852    with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f:
853        a_data = json.load(f, object_hook=SetDecoder)
854    with bb.compress.zstd.open(b, "rt", encoding="utf-8", num_threads=1) as f:
855        b_data = json.load(f, object_hook=SetDecoder)
856
857    for data in [a_data, b_data]:
858        handle_renames(data)
859
860    def dict_diff(a, b, ignored_vars=set()):
861        sa = set(a.keys())
862        sb = set(b.keys())
863        common = sa & sb
864        changed = set()
865        for i in common:
866            if a[i] != b[i] and i not in ignored_vars:
867                changed.add(i)
868        added = sb - sa
869        removed = sa - sb
870        return changed, added, removed
871
872    def file_checksums_diff(a, b):
873        from collections import Counter
874
875        # Convert lists back to tuples
876        a = [(f[0], f[1]) for f in a]
877        b = [(f[0], f[1]) for f in b]
878
879        # Compare lists, ensuring we can handle duplicate filenames if they exist
880        removedcount = Counter(a)
881        removedcount.subtract(b)
882        addedcount = Counter(b)
883        addedcount.subtract(a)
884        added = []
885        for x in b:
886            if addedcount[x] > 0:
887                addedcount[x] -= 1
888                added.append(x)
889        removed = []
890        changed = []
891        for x in a:
892            if removedcount[x] > 0:
893                removedcount[x] -= 1
894                for y in added:
895                    if y[0] == x[0]:
896                        changed.append((x[0], x[1], y[1]))
897                        added.remove(y)
898                        break
899                else:
900                    removed.append(x)
901        added = [x[0] for x in added]
902        removed = [x[0] for x in removed]
903        return changed, added, removed
904
905    if 'basehash_ignore_vars' in a_data and a_data['basehash_ignore_vars'] != b_data['basehash_ignore_vars']:
906        output.append(color_format("{color_title}basehash_ignore_vars changed{color_default} from '%s' to '%s'") % (a_data['basehash_ignore_vars'], b_data['basehash_ignore_vars']))
907        if a_data['basehash_ignore_vars'] and b_data['basehash_ignore_vars']:
908            output.append("changed items: %s" % a_data['basehash_ignore_vars'].symmetric_difference(b_data['basehash_ignore_vars']))
909
910    if 'taskhash_ignore_tasks' in a_data and a_data['taskhash_ignore_tasks'] != b_data['taskhash_ignore_tasks']:
911        output.append(color_format("{color_title}taskhash_ignore_tasks changed{color_default} from '%s' to '%s'") % (a_data['taskhash_ignore_tasks'], b_data['taskhash_ignore_tasks']))
912        if a_data['taskhash_ignore_tasks'] and b_data['taskhash_ignore_tasks']:
913            output.append("changed items: %s" % a_data['taskhash_ignore_tasks'].symmetric_difference(b_data['taskhash_ignore_tasks']))
914
915    if a_data['taskdeps'] != b_data['taskdeps']:
916        output.append(color_format("{color_title}Task dependencies changed{color_default} from:\n%s\nto:\n%s") % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps'])))
917
918    if a_data['basehash'] != b_data['basehash'] and not collapsed:
919        output.append(color_format("{color_title}basehash changed{color_default} from %s to %s") % (a_data['basehash'], b_data['basehash']))
920
921    changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basehash_ignore_vars'] & b_data['basehash_ignore_vars'])
922    if changed:
923        for dep in sorted(changed):
924            output.append(color_format("{color_title}List of dependencies for variable %s changed from '{color_default}%s{color_title}' to '{color_default}%s{color_title}'") % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep]))
925            if a_data['gendeps'][dep] and b_data['gendeps'][dep]:
926                output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep]))
927    if added:
928        for dep in sorted(added):
929            output.append(color_format("{color_title}Dependency on variable %s was added") % (dep))
930    if removed:
931        for dep in sorted(removed):
932            output.append(color_format("{color_title}Dependency on Variable %s was removed") % (dep))
933
934
935    changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals'])
936    if changed:
937        for dep in sorted(changed):
938            oldval = a_data['varvals'][dep]
939            newval = b_data['varvals'][dep]
940            if newval and oldval and ('\n' in oldval or '\n' in newval):
941                diff = difflib.unified_diff(oldval.splitlines(), newval.splitlines(), lineterm='')
942                # Cut off the first two lines, since we aren't interested in
943                # the old/new filename (they are blank anyway in this case)
944                difflines = list(diff)[2:]
945                if color:
946                    # Add colour to diff output
947                    for i, line in enumerate(difflines):
948                        if line.startswith('+'):
949                            line = color_format('{color_add}{line}', line=line)
950                            difflines[i] = line
951                        elif line.startswith('-'):
952                            line = color_format('{color_remove}{line}', line=line)
953                            difflines[i] = line
954                output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff='\n'.join(difflines)))
955            elif newval and oldval and (' ' in oldval or ' ' in newval):
956                output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff=worddiff_str(oldval, newval, colors)))
957            else:
958                output.append(color_format("{color_title}Variable {var} value changed from '{color_default}{oldval}{color_title}' to '{color_default}{newval}{color_title}'{color_default}", var=dep, oldval=oldval, newval=newval))
959
960    if not 'file_checksum_values' in a_data:
961         a_data['file_checksum_values'] = []
962    if not 'file_checksum_values' in b_data:
963         b_data['file_checksum_values'] = []
964
965    changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values'])
966    if changed:
967        for f, old, new in changed:
968            output.append(color_format("{color_title}Checksum for file %s changed{color_default} from %s to %s") % (f, old, new))
969    if added:
970        for f in added:
971            output.append(color_format("{color_title}Dependency on checksum of file %s was added") % (f))
972    if removed:
973        for f in removed:
974            output.append(color_format("{color_title}Dependency on checksum of file %s was removed") % (f))
975
976    if not 'runtaskdeps' in a_data:
977         a_data['runtaskdeps'] = {}
978    if not 'runtaskdeps' in b_data:
979         b_data['runtaskdeps'] = {}
980
981    if not collapsed:
982        if len(a_data['runtaskdeps']) != len(b_data['runtaskdeps']):
983            changed = ["Number of task dependencies changed"]
984        else:
985            changed = []
986            for idx, task in enumerate(a_data['runtaskdeps']):
987                a = a_data['runtaskdeps'][idx]
988                b = b_data['runtaskdeps'][idx]
989                if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b] and not collapsed:
990                    changed.append("%s with hash %s\n changed to\n%s with hash %s" % (a, a_data['runtaskhashes'][a], b, b_data['runtaskhashes'][b]))
991
992        if changed:
993            clean_a = a_data['runtaskdeps']
994            clean_b = b_data['runtaskdeps']
995            if clean_a != clean_b:
996                output.append(color_format("{color_title}runtaskdeps changed:{color_default}\n%s") % list_inline_diff(clean_a, clean_b, colors))
997            else:
998                output.append(color_format("{color_title}runtaskdeps changed:"))
999            output.append("\n".join(changed))
1000
1001
1002    if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data:
1003        a = a_data['runtaskhashes']
1004        b = b_data['runtaskhashes']
1005        changed, added, removed = dict_diff(a, b)
1006        if added:
1007            for dep in sorted(added):
1008                bdep_found = False
1009                if removed:
1010                    for bdep in removed:
1011                        if b[dep] == a[bdep]:
1012                            #output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep))
1013                            bdep_found = True
1014                if not bdep_found:
1015                    output.append(color_format("{color_title}Dependency on task %s was added{color_default} with hash %s") % (dep, b[dep]))
1016        if removed:
1017            for dep in sorted(removed):
1018                adep_found = False
1019                if added:
1020                    for adep in added:
1021                        if b[adep] == a[dep]:
1022                            #output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep))
1023                            adep_found = True
1024                if not adep_found:
1025                    output.append(color_format("{color_title}Dependency on task %s was removed{color_default} with hash %s") % (dep, a[dep]))
1026        if changed:
1027            for dep in sorted(changed):
1028                if not collapsed:
1029                    output.append(color_format("{color_title}Hash for task dependency %s changed{color_default} from %s to %s") % (dep, a[dep], b[dep]))
1030                if callable(recursecb):
1031                    recout = recursecb(dep, a[dep], b[dep])
1032                    if recout:
1033                        if collapsed:
1034                            output.extend(recout)
1035                        else:
1036                            # If a dependent hash changed, might as well print the line above and then defer to the changes in
1037                            # that hash since in all likelyhood, they're the same changes this task also saw.
1038                            output = [output[-1]] + recout
1039                            break
1040
1041    a_taint = a_data.get('taint', None)
1042    b_taint = b_data.get('taint', None)
1043    if a_taint != b_taint:
1044        if a_taint and a_taint.startswith('nostamp:'):
1045            a_taint = a_taint.replace('nostamp:', 'nostamp(uuid4):')
1046        if b_taint and b_taint.startswith('nostamp:'):
1047            b_taint = b_taint.replace('nostamp:', 'nostamp(uuid4):')
1048        output.append(color_format("{color_title}Taint (by forced/invalidated task) changed{color_default} from %s to %s") % (a_taint, b_taint))
1049
1050    return output
1051
1052
1053def calc_basehash(sigdata):
1054    task = sigdata['task']
1055    basedata = sigdata['varvals'][task]
1056
1057    if basedata is None:
1058        basedata = ''
1059
1060    alldeps = sigdata['taskdeps']
1061    for dep in sorted(alldeps):
1062        basedata = basedata + dep
1063        val = sigdata['varvals'][dep]
1064        if val is not None:
1065            basedata = basedata + str(val)
1066
1067    return hashlib.sha256(basedata.encode("utf-8")).hexdigest()
1068
1069def calc_taskhash(sigdata):
1070    data = sigdata['basehash']
1071
1072    for dep in sigdata['runtaskdeps']:
1073        data = data + sigdata['runtaskhashes'][dep]
1074
1075    for c in sigdata['file_checksum_values']:
1076        if c[1]:
1077            if "./" in c[0]:
1078                data = data + c[0]
1079            data = data + c[1]
1080
1081    if 'taint' in sigdata:
1082        if 'nostamp:' in sigdata['taint']:
1083            data = data + sigdata['taint'][8:]
1084        else:
1085            data = data + sigdata['taint']
1086
1087    return hashlib.sha256(data.encode("utf-8")).hexdigest()
1088
1089
1090def dump_sigfile(a):
1091    output = []
1092
1093    with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f:
1094        a_data = json.load(f, object_hook=SetDecoder)
1095
1096    handle_renames(a_data)
1097
1098    output.append("basehash_ignore_vars: %s" % (sorted(a_data['basehash_ignore_vars'])))
1099
1100    output.append("taskhash_ignore_tasks: %s" % (sorted(a_data['taskhash_ignore_tasks'] or [])))
1101
1102    output.append("Task dependencies: %s" % (sorted(a_data['taskdeps'])))
1103
1104    output.append("basehash: %s" % (a_data['basehash']))
1105
1106    for dep in sorted(a_data['gendeps']):
1107        output.append("List of dependencies for variable %s is %s" % (dep, sorted(a_data['gendeps'][dep])))
1108
1109    for dep in sorted(a_data['varvals']):
1110        output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep]))
1111
1112    if 'runtaskdeps' in a_data:
1113        output.append("Tasks this task depends on: %s" % (sorted(a_data['runtaskdeps'])))
1114
1115    if 'file_checksum_values' in a_data:
1116        output.append("This task depends on the checksums of files: %s" % (sorted(a_data['file_checksum_values'])))
1117
1118    if 'runtaskhashes' in a_data:
1119        for dep in sorted(a_data['runtaskhashes']):
1120            output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep]))
1121
1122    if 'taint' in a_data:
1123        if a_data['taint'].startswith('nostamp:'):
1124            msg = a_data['taint'].replace('nostamp:', 'nostamp(uuid4):')
1125        else:
1126            msg = a_data['taint']
1127        output.append("Tainted (by forced/invalidated task): %s" % msg)
1128
1129    if 'task' in a_data:
1130        computed_basehash = calc_basehash(a_data)
1131        output.append("Computed base hash is %s and from file %s" % (computed_basehash, a_data['basehash']))
1132    else:
1133        output.append("Unable to compute base hash")
1134
1135    computed_taskhash = calc_taskhash(a_data)
1136    output.append("Computed task hash is %s" % computed_taskhash)
1137
1138    return output
1139