xref: /openbmc/openbmc/poky/bitbake/lib/bb/siggen.py (revision f3f93bb8)
1#
2# SPDX-License-Identifier: GPL-2.0-only
3#
4
5import hashlib
6import logging
7import os
8import re
9import tempfile
10import pickle
11import bb.data
12import difflib
13import simplediff
14from bb.checksum import FileChecksumCache
15from bb import runqueue
16import hashserv
17
18logger = logging.getLogger('BitBake.SigGen')
19
20def init(d):
21    siggens = [obj for obj in globals().values()
22                      if type(obj) is type and issubclass(obj, SignatureGenerator)]
23
24    desired = d.getVar("BB_SIGNATURE_HANDLER") or "noop"
25    for sg in siggens:
26        if desired == sg.name:
27            return sg(d)
28            break
29    else:
30        logger.error("Invalid signature generator '%s', using default 'noop'\n"
31                     "Available generators: %s", desired,
32                     ', '.join(obj.name for obj in siggens))
33        return SignatureGenerator(d)
34
35class SignatureGenerator(object):
36    """
37    """
38    name = "noop"
39
40    def __init__(self, data):
41        self.basehash = {}
42        self.taskhash = {}
43        self.runtaskdeps = {}
44        self.file_checksum_values = {}
45        self.taints = {}
46        self.unitaskhashes = {}
47        self.setscenetasks = {}
48
49    def finalise(self, fn, d, varient):
50        return
51
52    def get_unihash(self, tid):
53        return self.taskhash[tid]
54
55    def get_taskhash(self, tid, deps, dataCache):
56        self.taskhash[tid] = hashlib.sha256(tid.encode("utf-8")).hexdigest()
57        return self.taskhash[tid]
58
59    def writeout_file_checksum_cache(self):
60        """Write/update the file checksum cache onto disk"""
61        return
62
63    def stampfile(self, stampbase, file_name, taskname, extrainfo):
64        return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
65
66    def stampcleanmask(self, stampbase, file_name, taskname, extrainfo):
67        return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
68
69    def dump_sigtask(self, fn, task, stampbase, runtime):
70        return
71
72    def invalidate_task(self, task, d, fn):
73        bb.build.del_stamp(task, d, fn)
74
75    def dump_sigs(self, dataCache, options):
76        return
77
78    def get_taskdata(self):
79        return (self.runtaskdeps, self.taskhash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.setscenetasks)
80
81    def set_taskdata(self, data):
82        self.runtaskdeps, self.taskhash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.setscenetasks = data
83
84    def reset(self, data):
85        self.__init__(data)
86
87    def get_taskhashes(self):
88        return self.taskhash, self.unitaskhashes
89
90    def set_taskhashes(self, hashes):
91        self.taskhash, self.unitaskhashes = hashes
92
93    def save_unitaskhashes(self):
94        return
95
96    def set_setscene_tasks(self, setscene_tasks):
97        return
98
99class SignatureGeneratorBasic(SignatureGenerator):
100    """
101    """
102    name = "basic"
103
104    def __init__(self, data):
105        self.basehash = {}
106        self.taskhash = {}
107        self.taskdeps = {}
108        self.runtaskdeps = {}
109        self.file_checksum_values = {}
110        self.taints = {}
111        self.gendeps = {}
112        self.lookupcache = {}
113        self.setscenetasks = {}
114        self.basewhitelist = set((data.getVar("BB_HASHBASE_WHITELIST") or "").split())
115        self.taskwhitelist = None
116        self.init_rundepcheck(data)
117        checksum_cache_file = data.getVar("BB_HASH_CHECKSUM_CACHE_FILE")
118        if checksum_cache_file:
119            self.checksum_cache = FileChecksumCache()
120            self.checksum_cache.init_cache(data, checksum_cache_file)
121        else:
122            self.checksum_cache = None
123
124        self.unihash_cache = bb.cache.SimpleCache("1")
125        self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {})
126
127    def init_rundepcheck(self, data):
128        self.taskwhitelist = data.getVar("BB_HASHTASK_WHITELIST") or None
129        if self.taskwhitelist:
130            self.twl = re.compile(self.taskwhitelist)
131        else:
132            self.twl = None
133
134    def _build_data(self, fn, d):
135
136        ignore_mismatch = ((d.getVar("BB_HASH_IGNORE_MISMATCH") or '') == '1')
137        tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d)
138
139        taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, self.basewhitelist, fn)
140
141        for task in tasklist:
142            tid = fn + ":" + task
143            if not ignore_mismatch and tid in self.basehash and self.basehash[tid] != basehash[tid]:
144                bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (tid, self.basehash[tid], basehash[tid]))
145                bb.error("The following commands may help:")
146                cmd = "$ bitbake %s -c%s" % (d.getVar('PN'), task)
147                # Make sure sigdata is dumped before run printdiff
148                bb.error("%s -Snone" % cmd)
149                bb.error("Then:")
150                bb.error("%s -Sprintdiff\n" % cmd)
151            self.basehash[tid] = basehash[tid]
152
153        self.taskdeps[fn] = taskdeps
154        self.gendeps[fn] = gendeps
155        self.lookupcache[fn] = lookupcache
156
157        return taskdeps
158
159    def set_setscene_tasks(self, setscene_tasks):
160        self.setscenetasks = setscene_tasks
161
162    def finalise(self, fn, d, variant):
163
164        mc = d.getVar("__BBMULTICONFIG", False) or ""
165        if variant or mc:
166            fn = bb.cache.realfn2virtual(fn, variant, mc)
167
168        try:
169            taskdeps = self._build_data(fn, d)
170        except bb.parse.SkipRecipe:
171            raise
172        except:
173            bb.warn("Error during finalise of %s" % fn)
174            raise
175
176        #Slow but can be useful for debugging mismatched basehashes
177        #for task in self.taskdeps[fn]:
178        #    self.dump_sigtask(fn, task, d.getVar("STAMP"), False)
179
180        for task in taskdeps:
181            d.setVar("BB_BASEHASH_task-%s" % task, self.basehash[fn + ":" + task])
182
183    def rundep_check(self, fn, recipename, task, dep, depname, dataCache):
184        # Return True if we should keep the dependency, False to drop it
185        # We only manipulate the dependencies for packages not in the whitelist
186        if self.twl and not self.twl.search(recipename):
187            # then process the actual dependencies
188            if self.twl.search(depname):
189                return False
190        return True
191
192    def read_taint(self, fn, task, stampbase):
193        taint = None
194        try:
195            with open(stampbase + '.' + task + '.taint', 'r') as taintf:
196                taint = taintf.read()
197        except IOError:
198            pass
199        return taint
200
201    def get_taskhash(self, tid, deps, dataCache):
202
203        (mc, _, task, fn) = bb.runqueue.split_tid_mcfn(tid)
204
205        data = dataCache.basetaskhash[tid]
206        self.basehash[tid] = data
207        self.runtaskdeps[tid] = []
208        self.file_checksum_values[tid] = []
209        recipename = dataCache.pkg_fn[fn]
210        for dep in sorted(deps, key=clean_basepath):
211            (depmc, _, deptaskname, depfn) = bb.runqueue.split_tid_mcfn(dep)
212            if mc != depmc:
213                continue
214            depname = dataCache.pkg_fn[depfn]
215            if not self.rundep_check(fn, recipename, task, dep, depname, dataCache):
216                continue
217            if dep not in self.taskhash:
218                bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep)
219            data = data + self.get_unihash(dep)
220            self.runtaskdeps[tid].append(dep)
221
222        if task in dataCache.file_checksums[fn]:
223            if self.checksum_cache:
224                checksums = self.checksum_cache.get_checksums(dataCache.file_checksums[fn][task], recipename)
225            else:
226                checksums = bb.fetch2.get_file_checksums(dataCache.file_checksums[fn][task], recipename)
227            for (f,cs) in checksums:
228                self.file_checksum_values[tid].append((f,cs))
229                if cs:
230                    data = data + cs
231
232        taskdep = dataCache.task_deps[fn]
233        if 'nostamp' in taskdep and task in taskdep['nostamp']:
234            # Nostamp tasks need an implicit taint so that they force any dependent tasks to run
235            import uuid
236            taint = str(uuid.uuid4())
237            data = data + taint
238            self.taints[tid] = "nostamp:" + taint
239
240        taint = self.read_taint(fn, task, dataCache.stamp[fn])
241        if taint:
242            data = data + taint
243            self.taints[tid] = taint
244            logger.warning("%s is tainted from a forced run" % tid)
245
246        h = hashlib.sha256(data.encode("utf-8")).hexdigest()
247        self.taskhash[tid] = h
248        #d.setVar("BB_TASKHASH_task-%s" % task, taskhash[task])
249        return h
250
251    def writeout_file_checksum_cache(self):
252        """Write/update the file checksum cache onto disk"""
253        if self.checksum_cache:
254            self.checksum_cache.save_extras()
255            self.checksum_cache.save_merge()
256        else:
257            bb.fetch2.fetcher_parse_save()
258            bb.fetch2.fetcher_parse_done()
259
260    def save_unitaskhashes(self):
261        self.unihash_cache.save(self.unitaskhashes)
262
263    def dump_sigtask(self, fn, task, stampbase, runtime):
264
265        tid = fn + ":" + task
266        referencestamp = stampbase
267        if isinstance(runtime, str) and runtime.startswith("customfile"):
268            sigfile = stampbase
269            referencestamp = runtime[11:]
270        elif runtime and tid in self.taskhash:
271            sigfile = stampbase + "." + task + ".sigdata" + "." + self.get_unihash(tid)
272        else:
273            sigfile = stampbase + "." + task + ".sigbasedata" + "." + self.basehash[tid]
274
275        bb.utils.mkdirhier(os.path.dirname(sigfile))
276
277        data = {}
278        data['task'] = task
279        data['basewhitelist'] = self.basewhitelist
280        data['taskwhitelist'] = self.taskwhitelist
281        data['taskdeps'] = self.taskdeps[fn][task]
282        data['basehash'] = self.basehash[tid]
283        data['gendeps'] = {}
284        data['varvals'] = {}
285        data['varvals'][task] = self.lookupcache[fn][task]
286        for dep in self.taskdeps[fn][task]:
287            if dep in self.basewhitelist:
288                continue
289            data['gendeps'][dep] = self.gendeps[fn][dep]
290            data['varvals'][dep] = self.lookupcache[fn][dep]
291
292        if runtime and tid in self.taskhash:
293            data['runtaskdeps'] = self.runtaskdeps[tid]
294            data['file_checksum_values'] = [(os.path.basename(f), cs) for f,cs in self.file_checksum_values[tid]]
295            data['runtaskhashes'] = {}
296            for dep in data['runtaskdeps']:
297                data['runtaskhashes'][dep] = self.get_unihash(dep)
298            data['taskhash'] = self.taskhash[tid]
299            data['unihash'] = self.get_unihash(tid)
300
301        taint = self.read_taint(fn, task, referencestamp)
302        if taint:
303            data['taint'] = taint
304
305        if runtime and tid in self.taints:
306            if 'nostamp:' in self.taints[tid]:
307                data['taint'] = self.taints[tid]
308
309        computed_basehash = calc_basehash(data)
310        if computed_basehash != self.basehash[tid]:
311            bb.error("Basehash mismatch %s versus %s for %s" % (computed_basehash, self.basehash[tid], tid))
312        if runtime and tid in self.taskhash:
313            computed_taskhash = calc_taskhash(data)
314            if computed_taskhash != self.taskhash[tid]:
315                bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid))
316                sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash)
317
318        fd, tmpfile = tempfile.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.")
319        try:
320            with os.fdopen(fd, "wb") as stream:
321                p = pickle.dump(data, stream, -1)
322                stream.flush()
323            os.chmod(tmpfile, 0o664)
324            os.rename(tmpfile, sigfile)
325        except (OSError, IOError) as err:
326            try:
327                os.unlink(tmpfile)
328            except OSError:
329                pass
330            raise err
331
332    def dump_sigfn(self, fn, dataCaches, options):
333        if fn in self.taskdeps:
334            for task in self.taskdeps[fn]:
335                tid = fn + ":" + task
336                mc = bb.runqueue.mc_from_tid(tid)
337                if tid not in self.taskhash:
338                    continue
339                if dataCaches[mc].basetaskhash[tid] != self.basehash[tid]:
340                    bb.error("Bitbake's cached basehash does not match the one we just generated (%s)!" % tid)
341                    bb.error("The mismatched hashes were %s and %s" % (dataCaches[mc].basetaskhash[tid], self.basehash[tid]))
342                self.dump_sigtask(fn, task, dataCaches[mc].stamp[fn], True)
343
344class SignatureGeneratorBasicHash(SignatureGeneratorBasic):
345    name = "basichash"
346
347    def get_stampfile_hash(self, tid):
348        if tid in self.taskhash:
349            return self.taskhash[tid]
350
351        # If task is not in basehash, then error
352        return self.basehash[tid]
353
354    def stampfile(self, stampbase, fn, taskname, extrainfo, clean=False):
355        if taskname != "do_setscene" and taskname.endswith("_setscene"):
356            tid = fn + ":" + taskname[:-9]
357        else:
358            tid = fn + ":" + taskname
359        if clean:
360            h = "*"
361        else:
362            h = self.get_stampfile_hash(tid)
363
364        return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.')
365
366    def stampcleanmask(self, stampbase, fn, taskname, extrainfo):
367        return self.stampfile(stampbase, fn, taskname, extrainfo, clean=True)
368
369    def invalidate_task(self, task, d, fn):
370        bb.note("Tainting hash to force rebuild of task %s, %s" % (fn, task))
371        bb.build.write_taint(task, d, fn)
372
373class SignatureGeneratorUniHashMixIn(object):
374    def get_taskdata(self):
375        return (self.server, self.method) + super().get_taskdata()
376
377    def set_taskdata(self, data):
378        self.server, self.method = data[:2]
379        super().set_taskdata(data[2:])
380
381    def client(self):
382        if getattr(self, '_client', None) is None:
383            self._client = hashserv.create_client(self.server)
384        return self._client
385
386    def __get_task_unihash_key(self, tid):
387        # TODO: The key only *needs* to be the taskhash, the tid is just
388        # convenient
389        return '%s:%s' % (tid.rsplit("/", 1)[1], self.taskhash[tid])
390
391    def get_stampfile_hash(self, tid):
392        if tid in self.taskhash:
393            # If a unique hash is reported, use it as the stampfile hash. This
394            # ensures that if a task won't be re-run if the taskhash changes,
395            # but it would result in the same output hash
396            unihash = self.unitaskhashes.get(self.__get_task_unihash_key(tid), None)
397            if unihash is not None:
398                return unihash
399
400        return super().get_stampfile_hash(tid)
401
402    def set_unihash(self, tid, unihash):
403        self.unitaskhashes[self.__get_task_unihash_key(tid)] = unihash
404
405    def get_unihash(self, tid):
406        taskhash = self.taskhash[tid]
407
408        # If its not a setscene task we can return
409        if self.setscenetasks and tid not in self.setscenetasks:
410            return taskhash
411
412        key = self.__get_task_unihash_key(tid)
413
414        # TODO: This cache can grow unbounded. It probably only needs to keep
415        # for each task
416        unihash = self.unitaskhashes.get(key, None)
417        if unihash is not None:
418            return unihash
419
420        # In the absence of being able to discover a unique hash from the
421        # server, make it be equivalent to the taskhash. The unique "hash" only
422        # really needs to be a unique string (not even necessarily a hash), but
423        # making it match the taskhash has a few advantages:
424        #
425        # 1) All of the sstate code that assumes hashes can be the same
426        # 2) It provides maximal compatibility with builders that don't use
427        #    an equivalency server
428        # 3) The value is easy for multiple independent builders to derive the
429        #    same unique hash from the same input. This means that if the
430        #    independent builders find the same taskhash, but it isn't reported
431        #    to the server, there is a better chance that they will agree on
432        #    the unique hash.
433        unihash = taskhash
434
435        try:
436            data = self.client().get_unihash(self.method, self.taskhash[tid])
437            if data:
438                unihash = data
439                # A unique hash equal to the taskhash is not very interesting,
440                # so it is reported it at debug level 2. If they differ, that
441                # is much more interesting, so it is reported at debug level 1
442                bb.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server))
443            else:
444                bb.debug(2, 'No reported unihash for %s:%s from %s' % (tid, taskhash, self.server))
445        except hashserv.client.HashConnectionError as e:
446            bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
447
448        self.unitaskhashes[key] = unihash
449        return unihash
450
451    def report_unihash(self, path, task, d):
452        import importlib
453
454        taskhash = d.getVar('BB_TASKHASH')
455        unihash = d.getVar('BB_UNIHASH')
456        report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1'
457        tempdir = d.getVar('T')
458        fn = d.getVar('BB_FILENAME')
459        tid = fn + ':do_' + task
460        key = tid.rsplit("/", 1)[1] + ':' + taskhash
461
462        if self.setscenetasks and tid not in self.setscenetasks:
463            return
464
465        # Sanity checks
466        cache_unihash = self.unitaskhashes.get(key, None)
467        if cache_unihash is None:
468            bb.fatal('%s not in unihash cache. Please report this error' % key)
469
470        if cache_unihash != unihash:
471            bb.fatal("Cache unihash %s doesn't match BB_UNIHASH %s" % (cache_unihash, unihash))
472
473        sigfile = None
474        sigfile_name = "depsig.do_%s.%d" % (task, os.getpid())
475        sigfile_link = "depsig.do_%s" % task
476
477        try:
478            sigfile = open(os.path.join(tempdir, sigfile_name), 'w+b')
479
480            locs = {'path': path, 'sigfile': sigfile, 'task': task, 'd': d}
481
482            if "." in self.method:
483                (module, method) = self.method.rsplit('.', 1)
484                locs['method'] = getattr(importlib.import_module(module), method)
485                outhash = bb.utils.better_eval('method(path, sigfile, task, d)', locs)
486            else:
487                outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs)
488
489            try:
490                extra_data = {}
491
492                owner = d.getVar('SSTATE_HASHEQUIV_OWNER')
493                if owner:
494                    extra_data['owner'] = owner
495
496                if report_taskdata:
497                    sigfile.seek(0)
498
499                    extra_data['PN'] = d.getVar('PN')
500                    extra_data['PV'] = d.getVar('PV')
501                    extra_data['PR'] = d.getVar('PR')
502                    extra_data['task'] = task
503                    extra_data['outhash_siginfo'] = sigfile.read().decode('utf-8')
504
505                data = self.client().report_unihash(taskhash, self.method, outhash, unihash, extra_data)
506                new_unihash = data['unihash']
507
508                if new_unihash != unihash:
509                    bb.debug(1, 'Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server))
510                    bb.event.fire(bb.runqueue.taskUniHashUpdate(fn + ':do_' + task, new_unihash), d)
511                else:
512                    bb.debug(1, 'Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server))
513            except hashserv.client.HashConnectionError as e:
514                bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
515        finally:
516            if sigfile:
517                sigfile.close()
518
519                sigfile_link_path = os.path.join(tempdir, sigfile_link)
520                bb.utils.remove(sigfile_link_path)
521
522                try:
523                    os.symlink(sigfile_name, sigfile_link_path)
524                except OSError:
525                    pass
526
527
528#
529# Dummy class used for bitbake-selftest
530#
531class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureGeneratorBasicHash):
532    name = "TestEquivHash"
533    def init_rundepcheck(self, data):
534        super().init_rundepcheck(data)
535        self.server = data.getVar('BB_HASHSERVE')
536        self.method = "sstate_output_hash"
537
538
539def dump_this_task(outfile, d):
540    import bb.parse
541    fn = d.getVar("BB_FILENAME")
542    task = "do_" + d.getVar("BB_CURRENTTASK")
543    referencestamp = bb.build.stamp_internal(task, d, None, True)
544    bb.parse.siggen.dump_sigtask(fn, task, outfile, "customfile:" + referencestamp)
545
546def init_colors(enable_color):
547    """Initialise colour dict for passing to compare_sigfiles()"""
548    # First set up the colours
549    colors = {'color_title':   '\033[1m',
550              'color_default': '\033[0m',
551              'color_add':     '\033[0;32m',
552              'color_remove':  '\033[0;31m',
553             }
554    # Leave all keys present but clear the values
555    if not enable_color:
556        for k in colors.keys():
557            colors[k] = ''
558    return colors
559
560def worddiff_str(oldstr, newstr, colors=None):
561    if not colors:
562        colors = init_colors(False)
563    diff = simplediff.diff(oldstr.split(' '), newstr.split(' '))
564    ret = []
565    for change, value in diff:
566        value = ' '.join(value)
567        if change == '=':
568            ret.append(value)
569        elif change == '+':
570            item = '{color_add}{{+{value}+}}{color_default}'.format(value=value, **colors)
571            ret.append(item)
572        elif change == '-':
573            item = '{color_remove}[-{value}-]{color_default}'.format(value=value, **colors)
574            ret.append(item)
575    whitespace_note = ''
576    if oldstr != newstr and ' '.join(oldstr.split()) == ' '.join(newstr.split()):
577        whitespace_note = ' (whitespace changed)'
578    return '"%s"%s' % (' '.join(ret), whitespace_note)
579
580def list_inline_diff(oldlist, newlist, colors=None):
581    if not colors:
582        colors = init_colors(False)
583    diff = simplediff.diff(oldlist, newlist)
584    ret = []
585    for change, value in diff:
586        value = ' '.join(value)
587        if change == '=':
588            ret.append("'%s'" % value)
589        elif change == '+':
590            item = '{color_add}+{value}{color_default}'.format(value=value, **colors)
591            ret.append(item)
592        elif change == '-':
593            item = '{color_remove}-{value}{color_default}'.format(value=value, **colors)
594            ret.append(item)
595    return '[%s]' % (', '.join(ret))
596
597def clean_basepath(a):
598    mc = None
599    if a.startswith("mc:"):
600        _, mc, a = a.split(":", 2)
601    b = a.rsplit("/", 2)[1] + '/' + a.rsplit("/", 2)[2]
602    if a.startswith("virtual:"):
603        b = b + ":" + a.rsplit(":", 1)[0]
604    if mc:
605        b = b + ":mc:" + mc
606    return b
607
608def clean_basepaths(a):
609    b = {}
610    for x in a:
611        b[clean_basepath(x)] = a[x]
612    return b
613
614def clean_basepaths_list(a):
615    b = []
616    for x in a:
617        b.append(clean_basepath(x))
618    return b
619
620def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
621    output = []
622
623    colors = init_colors(color)
624    def color_format(formatstr, **values):
625        """
626        Return colour formatted string.
627        NOTE: call with the format string, not an already formatted string
628        containing values (otherwise you could have trouble with { and }
629        characters)
630        """
631        if not formatstr.endswith('{color_default}'):
632            formatstr += '{color_default}'
633        # In newer python 3 versions you can pass both of these directly,
634        # but we only require 3.4 at the moment
635        formatparams = {}
636        formatparams.update(colors)
637        formatparams.update(values)
638        return formatstr.format(**formatparams)
639
640    with open(a, 'rb') as f:
641        p1 = pickle.Unpickler(f)
642        a_data = p1.load()
643    with open(b, 'rb') as f:
644        p2 = pickle.Unpickler(f)
645        b_data = p2.load()
646
647    def dict_diff(a, b, whitelist=set()):
648        sa = set(a.keys())
649        sb = set(b.keys())
650        common = sa & sb
651        changed = set()
652        for i in common:
653            if a[i] != b[i] and i not in whitelist:
654                changed.add(i)
655        added = sb - sa
656        removed = sa - sb
657        return changed, added, removed
658
659    def file_checksums_diff(a, b):
660        from collections import Counter
661        # Handle old siginfo format
662        if isinstance(a, dict):
663            a = [(os.path.basename(f), cs) for f, cs in a.items()]
664        if isinstance(b, dict):
665            b = [(os.path.basename(f), cs) for f, cs in b.items()]
666        # Compare lists, ensuring we can handle duplicate filenames if they exist
667        removedcount = Counter(a)
668        removedcount.subtract(b)
669        addedcount = Counter(b)
670        addedcount.subtract(a)
671        added = []
672        for x in b:
673            if addedcount[x] > 0:
674                addedcount[x] -= 1
675                added.append(x)
676        removed = []
677        changed = []
678        for x in a:
679            if removedcount[x] > 0:
680                removedcount[x] -= 1
681                for y in added:
682                    if y[0] == x[0]:
683                        changed.append((x[0], x[1], y[1]))
684                        added.remove(y)
685                        break
686                else:
687                    removed.append(x)
688        added = [x[0] for x in added]
689        removed = [x[0] for x in removed]
690        return changed, added, removed
691
692    if 'basewhitelist' in a_data and a_data['basewhitelist'] != b_data['basewhitelist']:
693        output.append(color_format("{color_title}basewhitelist changed{color_default} from '%s' to '%s'") % (a_data['basewhitelist'], b_data['basewhitelist']))
694        if a_data['basewhitelist'] and b_data['basewhitelist']:
695            output.append("changed items: %s" % a_data['basewhitelist'].symmetric_difference(b_data['basewhitelist']))
696
697    if 'taskwhitelist' in a_data and a_data['taskwhitelist'] != b_data['taskwhitelist']:
698        output.append(color_format("{color_title}taskwhitelist changed{color_default} from '%s' to '%s'") % (a_data['taskwhitelist'], b_data['taskwhitelist']))
699        if a_data['taskwhitelist'] and b_data['taskwhitelist']:
700            output.append("changed items: %s" % a_data['taskwhitelist'].symmetric_difference(b_data['taskwhitelist']))
701
702    if a_data['taskdeps'] != b_data['taskdeps']:
703        output.append(color_format("{color_title}Task dependencies changed{color_default} from:\n%s\nto:\n%s") % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps'])))
704
705    if a_data['basehash'] != b_data['basehash'] and not collapsed:
706        output.append(color_format("{color_title}basehash changed{color_default} from %s to %s") % (a_data['basehash'], b_data['basehash']))
707
708    changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basewhitelist'] & b_data['basewhitelist'])
709    if changed:
710        for dep in changed:
711            output.append(color_format("{color_title}List of dependencies for variable %s changed from '{color_default}%s{color_title}' to '{color_default}%s{color_title}'") % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep]))
712            if a_data['gendeps'][dep] and b_data['gendeps'][dep]:
713                output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep]))
714    if added:
715        for dep in added:
716            output.append(color_format("{color_title}Dependency on variable %s was added") % (dep))
717    if removed:
718        for dep in removed:
719            output.append(color_format("{color_title}Dependency on Variable %s was removed") % (dep))
720
721
722    changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals'])
723    if changed:
724        for dep in changed:
725            oldval = a_data['varvals'][dep]
726            newval = b_data['varvals'][dep]
727            if newval and oldval and ('\n' in oldval or '\n' in newval):
728                diff = difflib.unified_diff(oldval.splitlines(), newval.splitlines(), lineterm='')
729                # Cut off the first two lines, since we aren't interested in
730                # the old/new filename (they are blank anyway in this case)
731                difflines = list(diff)[2:]
732                if color:
733                    # Add colour to diff output
734                    for i, line in enumerate(difflines):
735                        if line.startswith('+'):
736                            line = color_format('{color_add}{line}', line=line)
737                            difflines[i] = line
738                        elif line.startswith('-'):
739                            line = color_format('{color_remove}{line}', line=line)
740                            difflines[i] = line
741                output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff='\n'.join(difflines)))
742            elif newval and oldval and (' ' in oldval or ' ' in newval):
743                output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff=worddiff_str(oldval, newval, colors)))
744            else:
745                output.append(color_format("{color_title}Variable {var} value changed from '{color_default}{oldval}{color_title}' to '{color_default}{newval}{color_title}'{color_default}", var=dep, oldval=oldval, newval=newval))
746
747    if not 'file_checksum_values' in a_data:
748         a_data['file_checksum_values'] = {}
749    if not 'file_checksum_values' in b_data:
750         b_data['file_checksum_values'] = {}
751
752    changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values'])
753    if changed:
754        for f, old, new in changed:
755            output.append(color_format("{color_title}Checksum for file %s changed{color_default} from %s to %s") % (f, old, new))
756    if added:
757        for f in added:
758            output.append(color_format("{color_title}Dependency on checksum of file %s was added") % (f))
759    if removed:
760        for f in removed:
761            output.append(color_format("{color_title}Dependency on checksum of file %s was removed") % (f))
762
763    if not 'runtaskdeps' in a_data:
764         a_data['runtaskdeps'] = {}
765    if not 'runtaskdeps' in b_data:
766         b_data['runtaskdeps'] = {}
767
768    if not collapsed:
769        if len(a_data['runtaskdeps']) != len(b_data['runtaskdeps']):
770            changed = ["Number of task dependencies changed"]
771        else:
772            changed = []
773            for idx, task in enumerate(a_data['runtaskdeps']):
774                a = a_data['runtaskdeps'][idx]
775                b = b_data['runtaskdeps'][idx]
776                if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b] and not collapsed:
777                    changed.append("%s with hash %s\n changed to\n%s with hash %s" % (clean_basepath(a), a_data['runtaskhashes'][a], clean_basepath(b), b_data['runtaskhashes'][b]))
778
779        if changed:
780            clean_a = clean_basepaths_list(a_data['runtaskdeps'])
781            clean_b = clean_basepaths_list(b_data['runtaskdeps'])
782            if clean_a != clean_b:
783                output.append(color_format("{color_title}runtaskdeps changed:{color_default}\n%s") % list_inline_diff(clean_a, clean_b, colors))
784            else:
785                output.append(color_format("{color_title}runtaskdeps changed:"))
786            output.append("\n".join(changed))
787
788
789    if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data:
790        a = a_data['runtaskhashes']
791        b = b_data['runtaskhashes']
792        changed, added, removed = dict_diff(a, b)
793        if added:
794            for dep in added:
795                bdep_found = False
796                if removed:
797                    for bdep in removed:
798                        if b[dep] == a[bdep]:
799                            #output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep))
800                            bdep_found = True
801                if not bdep_found:
802                    output.append(color_format("{color_title}Dependency on task %s was added{color_default} with hash %s") % (clean_basepath(dep), b[dep]))
803        if removed:
804            for dep in removed:
805                adep_found = False
806                if added:
807                    for adep in added:
808                        if b[adep] == a[dep]:
809                            #output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep))
810                            adep_found = True
811                if not adep_found:
812                    output.append(color_format("{color_title}Dependency on task %s was removed{color_default} with hash %s") % (clean_basepath(dep), a[dep]))
813        if changed:
814            for dep in changed:
815                if not collapsed:
816                    output.append(color_format("{color_title}Hash for dependent task %s changed{color_default} from %s to %s") % (clean_basepath(dep), a[dep], b[dep]))
817                if callable(recursecb):
818                    recout = recursecb(dep, a[dep], b[dep])
819                    if recout:
820                        if collapsed:
821                            output.extend(recout)
822                        else:
823                            # If a dependent hash changed, might as well print the line above and then defer to the changes in
824                            # that hash since in all likelyhood, they're the same changes this task also saw.
825                            output = [output[-1]] + recout
826
827    a_taint = a_data.get('taint', None)
828    b_taint = b_data.get('taint', None)
829    if a_taint != b_taint:
830        if a_taint and a_taint.startswith('nostamp:'):
831            a_taint = a_taint.replace('nostamp:', 'nostamp(uuid4):')
832        if b_taint and b_taint.startswith('nostamp:'):
833            b_taint = b_taint.replace('nostamp:', 'nostamp(uuid4):')
834        output.append(color_format("{color_title}Taint (by forced/invalidated task) changed{color_default} from %s to %s") % (a_taint, b_taint))
835
836    return output
837
838
839def calc_basehash(sigdata):
840    task = sigdata['task']
841    basedata = sigdata['varvals'][task]
842
843    if basedata is None:
844        basedata = ''
845
846    alldeps = sigdata['taskdeps']
847    for dep in alldeps:
848        basedata = basedata + dep
849        val = sigdata['varvals'][dep]
850        if val is not None:
851            basedata = basedata + str(val)
852
853    return hashlib.sha256(basedata.encode("utf-8")).hexdigest()
854
855def calc_taskhash(sigdata):
856    data = sigdata['basehash']
857
858    for dep in sigdata['runtaskdeps']:
859        data = data + sigdata['runtaskhashes'][dep]
860
861    for c in sigdata['file_checksum_values']:
862        if c[1]:
863            data = data + c[1]
864
865    if 'taint' in sigdata:
866        if 'nostamp:' in sigdata['taint']:
867            data = data + sigdata['taint'][8:]
868        else:
869            data = data + sigdata['taint']
870
871    return hashlib.sha256(data.encode("utf-8")).hexdigest()
872
873
874def dump_sigfile(a):
875    output = []
876
877    with open(a, 'rb') as f:
878        p1 = pickle.Unpickler(f)
879        a_data = p1.load()
880
881    output.append("basewhitelist: %s" % (a_data['basewhitelist']))
882
883    output.append("taskwhitelist: %s" % (a_data['taskwhitelist']))
884
885    output.append("Task dependencies: %s" % (sorted(a_data['taskdeps'])))
886
887    output.append("basehash: %s" % (a_data['basehash']))
888
889    for dep in a_data['gendeps']:
890        output.append("List of dependencies for variable %s is %s" % (dep, a_data['gendeps'][dep]))
891
892    for dep in a_data['varvals']:
893        output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep]))
894
895    if 'runtaskdeps' in a_data:
896        output.append("Tasks this task depends on: %s" % (a_data['runtaskdeps']))
897
898    if 'file_checksum_values' in a_data:
899        output.append("This task depends on the checksums of files: %s" % (a_data['file_checksum_values']))
900
901    if 'runtaskhashes' in a_data:
902        for dep in a_data['runtaskhashes']:
903            output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep]))
904
905    if 'taint' in a_data:
906        if a_data['taint'].startswith('nostamp:'):
907            msg = a_data['taint'].replace('nostamp:', 'nostamp(uuid4):')
908        else:
909            msg = a_data['taint']
910        output.append("Tainted (by forced/invalidated task): %s" % msg)
911
912    if 'task' in a_data:
913        computed_basehash = calc_basehash(a_data)
914        output.append("Computed base hash is %s and from file %s" % (computed_basehash, a_data['basehash']))
915    else:
916        output.append("Unable to compute base hash")
917
918    computed_taskhash = calc_taskhash(a_data)
919    output.append("Computed task hash is %s" % computed_taskhash)
920
921    return output
922