xref: /openbmc/openbmc/poky/bitbake/lib/bb/siggen.py (revision 08902b01)
1#
2# SPDX-License-Identifier: GPL-2.0-only
3#
4
5import hashlib
6import logging
7import os
8import re
9import tempfile
10import pickle
11import bb.data
12import difflib
13import simplediff
14from bb.checksum import FileChecksumCache
15from bb import runqueue
16
17logger = logging.getLogger('BitBake.SigGen')
18
19def init(d):
20    siggens = [obj for obj in globals().values()
21                      if type(obj) is type and issubclass(obj, SignatureGenerator)]
22
23    desired = d.getVar("BB_SIGNATURE_HANDLER") or "noop"
24    for sg in siggens:
25        if desired == sg.name:
26            return sg(d)
27            break
28    else:
29        logger.error("Invalid signature generator '%s', using default 'noop'\n"
30                     "Available generators: %s", desired,
31                     ', '.join(obj.name for obj in siggens))
32        return SignatureGenerator(d)
33
34class SignatureGenerator(object):
35    """
36    """
37    name = "noop"
38
39    def __init__(self, data):
40        self.basehash = {}
41        self.taskhash = {}
42        self.runtaskdeps = {}
43        self.file_checksum_values = {}
44        self.taints = {}
45        self.unitaskhashes = {}
46
47    def finalise(self, fn, d, varient):
48        return
49
50    def get_unihash(self, tid):
51        return self.taskhash[tid]
52
53    def get_taskhash(self, tid, deps, dataCache):
54        self.taskhash[tid] = hashlib.sha256(tid.encode("utf-8")).hexdigest()
55        return self.taskhash[tid]
56
57    def writeout_file_checksum_cache(self):
58        """Write/update the file checksum cache onto disk"""
59        return
60
61    def stampfile(self, stampbase, file_name, taskname, extrainfo):
62        return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
63
64    def stampcleanmask(self, stampbase, file_name, taskname, extrainfo):
65        return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
66
67    def dump_sigtask(self, fn, task, stampbase, runtime):
68        return
69
70    def invalidate_task(self, task, d, fn):
71        bb.build.del_stamp(task, d, fn)
72
73    def dump_sigs(self, dataCache, options):
74        return
75
76    def get_taskdata(self):
77        return (self.runtaskdeps, self.taskhash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes)
78
79    def set_taskdata(self, data):
80        self.runtaskdeps, self.taskhash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes = data
81
82    def reset(self, data):
83        self.__init__(data)
84
85    def get_taskhashes(self):
86        return self.taskhash, self.unitaskhashes
87
88    def set_taskhashes(self, hashes):
89        self.taskhash, self.unitaskhashes = hashes
90
91    def save_unitaskhashes(self):
92        return
93
94
95class SignatureGeneratorBasic(SignatureGenerator):
96    """
97    """
98    name = "basic"
99
100    def __init__(self, data):
101        self.basehash = {}
102        self.taskhash = {}
103        self.taskdeps = {}
104        self.runtaskdeps = {}
105        self.file_checksum_values = {}
106        self.taints = {}
107        self.gendeps = {}
108        self.lookupcache = {}
109        self.basewhitelist = set((data.getVar("BB_HASHBASE_WHITELIST") or "").split())
110        self.taskwhitelist = None
111        self.init_rundepcheck(data)
112        checksum_cache_file = data.getVar("BB_HASH_CHECKSUM_CACHE_FILE")
113        if checksum_cache_file:
114            self.checksum_cache = FileChecksumCache()
115            self.checksum_cache.init_cache(data, checksum_cache_file)
116        else:
117            self.checksum_cache = None
118
119        self.unihash_cache = bb.cache.SimpleCache("1")
120        self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {})
121
122    def init_rundepcheck(self, data):
123        self.taskwhitelist = data.getVar("BB_HASHTASK_WHITELIST") or None
124        if self.taskwhitelist:
125            self.twl = re.compile(self.taskwhitelist)
126        else:
127            self.twl = None
128
129    def _build_data(self, fn, d):
130
131        ignore_mismatch = ((d.getVar("BB_HASH_IGNORE_MISMATCH") or '') == '1')
132        tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d)
133
134        taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, self.basewhitelist, fn)
135
136        for task in tasklist:
137            tid = fn + ":" + task
138            if not ignore_mismatch and tid in self.basehash and self.basehash[tid] != basehash[tid]:
139                bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (tid, self.basehash[tid], basehash[tid]))
140                bb.error("The following commands may help:")
141                cmd = "$ bitbake %s -c%s" % (d.getVar('PN'), task)
142                # Make sure sigdata is dumped before run printdiff
143                bb.error("%s -Snone" % cmd)
144                bb.error("Then:")
145                bb.error("%s -Sprintdiff\n" % cmd)
146            self.basehash[tid] = basehash[tid]
147
148        self.taskdeps[fn] = taskdeps
149        self.gendeps[fn] = gendeps
150        self.lookupcache[fn] = lookupcache
151
152        return taskdeps
153
154    def finalise(self, fn, d, variant):
155
156        mc = d.getVar("__BBMULTICONFIG", False) or ""
157        if variant or mc:
158            fn = bb.cache.realfn2virtual(fn, variant, mc)
159
160        try:
161            taskdeps = self._build_data(fn, d)
162        except bb.parse.SkipRecipe:
163            raise
164        except:
165            bb.warn("Error during finalise of %s" % fn)
166            raise
167
168        #Slow but can be useful for debugging mismatched basehashes
169        #for task in self.taskdeps[fn]:
170        #    self.dump_sigtask(fn, task, d.getVar("STAMP"), False)
171
172        for task in taskdeps:
173            d.setVar("BB_BASEHASH_task-%s" % task, self.basehash[fn + ":" + task])
174
175    def rundep_check(self, fn, recipename, task, dep, depname, dataCache):
176        # Return True if we should keep the dependency, False to drop it
177        # We only manipulate the dependencies for packages not in the whitelist
178        if self.twl and not self.twl.search(recipename):
179            # then process the actual dependencies
180            if self.twl.search(depname):
181                return False
182        return True
183
184    def read_taint(self, fn, task, stampbase):
185        taint = None
186        try:
187            with open(stampbase + '.' + task + '.taint', 'r') as taintf:
188                taint = taintf.read()
189        except IOError:
190            pass
191        return taint
192
193    def get_taskhash(self, tid, deps, dataCache):
194
195        (mc, _, task, fn) = bb.runqueue.split_tid_mcfn(tid)
196
197        data = dataCache.basetaskhash[tid]
198        self.basehash[tid] = data
199        self.runtaskdeps[tid] = []
200        self.file_checksum_values[tid] = []
201        recipename = dataCache.pkg_fn[fn]
202        for dep in sorted(deps, key=clean_basepath):
203            (depmc, _, deptaskname, depfn) = bb.runqueue.split_tid_mcfn(dep)
204            if mc != depmc:
205                continue
206            depname = dataCache.pkg_fn[depfn]
207            if not self.rundep_check(fn, recipename, task, dep, depname, dataCache):
208                continue
209            if dep not in self.taskhash:
210                bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep)
211            data = data + self.get_unihash(dep)
212            self.runtaskdeps[tid].append(dep)
213
214        if task in dataCache.file_checksums[fn]:
215            if self.checksum_cache:
216                checksums = self.checksum_cache.get_checksums(dataCache.file_checksums[fn][task], recipename)
217            else:
218                checksums = bb.fetch2.get_file_checksums(dataCache.file_checksums[fn][task], recipename)
219            for (f,cs) in checksums:
220                self.file_checksum_values[tid].append((f,cs))
221                if cs:
222                    data = data + cs
223
224        taskdep = dataCache.task_deps[fn]
225        if 'nostamp' in taskdep and task in taskdep['nostamp']:
226            # Nostamp tasks need an implicit taint so that they force any dependent tasks to run
227            import uuid
228            taint = str(uuid.uuid4())
229            data = data + taint
230            self.taints[tid] = "nostamp:" + taint
231
232        taint = self.read_taint(fn, task, dataCache.stamp[fn])
233        if taint:
234            data = data + taint
235            self.taints[tid] = taint
236            logger.warning("%s is tainted from a forced run" % tid)
237
238        h = hashlib.sha256(data.encode("utf-8")).hexdigest()
239        self.taskhash[tid] = h
240        #d.setVar("BB_TASKHASH_task-%s" % task, taskhash[task])
241        return h
242
243    def writeout_file_checksum_cache(self):
244        """Write/update the file checksum cache onto disk"""
245        if self.checksum_cache:
246            self.checksum_cache.save_extras()
247            self.checksum_cache.save_merge()
248        else:
249            bb.fetch2.fetcher_parse_save()
250            bb.fetch2.fetcher_parse_done()
251
252    def save_unitaskhashes(self):
253        self.unihash_cache.save(self.unitaskhashes)
254
255    def dump_sigtask(self, fn, task, stampbase, runtime):
256
257        tid = fn + ":" + task
258        referencestamp = stampbase
259        if isinstance(runtime, str) and runtime.startswith("customfile"):
260            sigfile = stampbase
261            referencestamp = runtime[11:]
262        elif runtime and tid in self.taskhash:
263            sigfile = stampbase + "." + task + ".sigdata" + "." + self.taskhash[tid]
264        else:
265            sigfile = stampbase + "." + task + ".sigbasedata" + "." + self.basehash[tid]
266
267        bb.utils.mkdirhier(os.path.dirname(sigfile))
268
269        data = {}
270        data['task'] = task
271        data['basewhitelist'] = self.basewhitelist
272        data['taskwhitelist'] = self.taskwhitelist
273        data['taskdeps'] = self.taskdeps[fn][task]
274        data['basehash'] = self.basehash[tid]
275        data['gendeps'] = {}
276        data['varvals'] = {}
277        data['varvals'][task] = self.lookupcache[fn][task]
278        for dep in self.taskdeps[fn][task]:
279            if dep in self.basewhitelist:
280                continue
281            data['gendeps'][dep] = self.gendeps[fn][dep]
282            data['varvals'][dep] = self.lookupcache[fn][dep]
283
284        if runtime and tid in self.taskhash:
285            data['runtaskdeps'] = self.runtaskdeps[tid]
286            data['file_checksum_values'] = [(os.path.basename(f), cs) for f,cs in self.file_checksum_values[tid]]
287            data['runtaskhashes'] = {}
288            for dep in data['runtaskdeps']:
289                data['runtaskhashes'][dep] = self.get_unihash(dep)
290            data['taskhash'] = self.taskhash[tid]
291
292        taint = self.read_taint(fn, task, referencestamp)
293        if taint:
294            data['taint'] = taint
295
296        if runtime and tid in self.taints:
297            if 'nostamp:' in self.taints[tid]:
298                data['taint'] = self.taints[tid]
299
300        computed_basehash = calc_basehash(data)
301        if computed_basehash != self.basehash[tid]:
302            bb.error("Basehash mismatch %s versus %s for %s" % (computed_basehash, self.basehash[tid], tid))
303        if runtime and tid in self.taskhash:
304            computed_taskhash = calc_taskhash(data)
305            if computed_taskhash != self.taskhash[tid]:
306                bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid))
307                sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash)
308
309        fd, tmpfile = tempfile.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.")
310        try:
311            with os.fdopen(fd, "wb") as stream:
312                p = pickle.dump(data, stream, -1)
313                stream.flush()
314            os.chmod(tmpfile, 0o664)
315            os.rename(tmpfile, sigfile)
316        except (OSError, IOError) as err:
317            try:
318                os.unlink(tmpfile)
319            except OSError:
320                pass
321            raise err
322
323    def dump_sigfn(self, fn, dataCaches, options):
324        if fn in self.taskdeps:
325            for task in self.taskdeps[fn]:
326                tid = fn + ":" + task
327                mc = bb.runqueue.mc_from_tid(tid)
328                if tid not in self.taskhash:
329                    continue
330                if dataCaches[mc].basetaskhash[tid] != self.basehash[tid]:
331                    bb.error("Bitbake's cached basehash does not match the one we just generated (%s)!" % tid)
332                    bb.error("The mismatched hashes were %s and %s" % (dataCaches[mc].basetaskhash[tid], self.basehash[tid]))
333                self.dump_sigtask(fn, task, dataCaches[mc].stamp[fn], True)
334
335class SignatureGeneratorBasicHash(SignatureGeneratorBasic):
336    name = "basichash"
337
338    def get_stampfile_hash(self, tid):
339        if tid in self.taskhash:
340            return self.taskhash[tid]
341
342        # If task is not in basehash, then error
343        return self.basehash[tid]
344
345    def stampfile(self, stampbase, fn, taskname, extrainfo, clean=False):
346        if taskname != "do_setscene" and taskname.endswith("_setscene"):
347            tid = fn + ":" + taskname[:-9]
348        else:
349            tid = fn + ":" + taskname
350        if clean:
351            h = "*"
352        else:
353            h = self.get_stampfile_hash(tid)
354
355        return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.')
356
357    def stampcleanmask(self, stampbase, fn, taskname, extrainfo):
358        return self.stampfile(stampbase, fn, taskname, extrainfo, clean=True)
359
360    def invalidate_task(self, task, d, fn):
361        bb.note("Tainting hash to force rebuild of task %s, %s" % (fn, task))
362        bb.build.write_taint(task, d, fn)
363
364class SignatureGeneratorUniHashMixIn(object):
365    def get_taskdata(self):
366        return (self.server, self.method) + super().get_taskdata()
367
368    def set_taskdata(self, data):
369        self.server, self.method = data[:2]
370        super().set_taskdata(data[2:])
371
372    def __get_task_unihash_key(self, tid):
373        # TODO: The key only *needs* to be the taskhash, the tid is just
374        # convenient
375        return '%s:%s' % (tid, self.taskhash[tid])
376
377    def get_stampfile_hash(self, tid):
378        if tid in self.taskhash:
379            # If a unique hash is reported, use it as the stampfile hash. This
380            # ensures that if a task won't be re-run if the taskhash changes,
381            # but it would result in the same output hash
382            unihash = self.unitaskhashes.get(self.__get_task_unihash_key(tid), None)
383            if unihash is not None:
384                return unihash
385
386        return super().get_stampfile_hash(tid)
387
388    def set_unihash(self, tid, unihash):
389        self.unitaskhashes[self.__get_task_unihash_key(tid)] = unihash
390
391    def get_unihash(self, tid):
392        import urllib
393        import json
394
395        taskhash = self.taskhash[tid]
396
397        key = self.__get_task_unihash_key(tid)
398
399        # TODO: This cache can grow unbounded. It probably only needs to keep
400        # for each task
401        unihash = self.unitaskhashes.get(key, None)
402        if unihash is not None:
403            return unihash
404
405        # In the absence of being able to discover a unique hash from the
406        # server, make it be equivalent to the taskhash. The unique "hash" only
407        # really needs to be a unique string (not even necessarily a hash), but
408        # making it match the taskhash has a few advantages:
409        #
410        # 1) All of the sstate code that assumes hashes can be the same
411        # 2) It provides maximal compatibility with builders that don't use
412        #    an equivalency server
413        # 3) The value is easy for multiple independent builders to derive the
414        #    same unique hash from the same input. This means that if the
415        #    independent builders find the same taskhash, but it isn't reported
416        #    to the server, there is a better chance that they will agree on
417        #    the unique hash.
418        unihash = taskhash
419
420        try:
421            url = '%s/v1/equivalent?%s' % (self.server,
422                    urllib.parse.urlencode({'method': self.method, 'taskhash': self.taskhash[tid]}))
423
424            request = urllib.request.Request(url)
425            response = urllib.request.urlopen(request)
426            data = response.read().decode('utf-8')
427
428            json_data = json.loads(data)
429
430            if json_data:
431                unihash = json_data['unihash']
432                # A unique hash equal to the taskhash is not very interesting,
433                # so it is reported it at debug level 2. If they differ, that
434                # is much more interesting, so it is reported at debug level 1
435                bb.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server))
436            else:
437                bb.debug(2, 'No reported unihash for %s:%s from %s' % (tid, taskhash, self.server))
438        except urllib.error.URLError as e:
439            bb.warn('Failure contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
440        except (KeyError, json.JSONDecodeError) as e:
441            bb.warn('Poorly formatted response from %s: %s' % (self.server, str(e)))
442
443        self.unitaskhashes[key] = unihash
444        return unihash
445
446    def report_unihash(self, path, task, d):
447        import urllib
448        import json
449        import tempfile
450        import base64
451        import importlib
452
453        taskhash = d.getVar('BB_TASKHASH')
454        unihash = d.getVar('BB_UNIHASH')
455        report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1'
456        tempdir = d.getVar('T')
457        fn = d.getVar('BB_FILENAME')
458        key = fn + ':do_' + task + ':' + taskhash
459
460        # Sanity checks
461        cache_unihash = self.unitaskhashes.get(key, None)
462        if cache_unihash is None:
463            bb.fatal('%s not in unihash cache. Please report this error' % key)
464
465        if cache_unihash != unihash:
466            bb.fatal("Cache unihash %s doesn't match BB_UNIHASH %s" % (cache_unihash, unihash))
467
468        sigfile = None
469        sigfile_name = "depsig.do_%s.%d" % (task, os.getpid())
470        sigfile_link = "depsig.do_%s" % task
471
472        try:
473            sigfile = open(os.path.join(tempdir, sigfile_name), 'w+b')
474
475            locs = {'path': path, 'sigfile': sigfile, 'task': task, 'd': d}
476
477            if "." in self.method:
478                (module, method) = self.method.rsplit('.', 1)
479                locs['method'] = getattr(importlib.import_module(module), method)
480                outhash = bb.utils.better_eval('method(path, sigfile, task, d)', locs)
481            else:
482                outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs)
483
484            try:
485                url = '%s/v1/equivalent' % self.server
486                task_data = {
487                    'taskhash': taskhash,
488                    'method': self.method,
489                    'outhash': outhash,
490                    'unihash': unihash,
491                    'owner': d.getVar('SSTATE_HASHEQUIV_OWNER')
492                    }
493
494                if report_taskdata:
495                    sigfile.seek(0)
496
497                    task_data['PN'] = d.getVar('PN')
498                    task_data['PV'] = d.getVar('PV')
499                    task_data['PR'] = d.getVar('PR')
500                    task_data['task'] = task
501                    task_data['outhash_siginfo'] = sigfile.read().decode('utf-8')
502
503                headers = {'content-type': 'application/json'}
504
505                request = urllib.request.Request(url, json.dumps(task_data).encode('utf-8'), headers)
506                response = urllib.request.urlopen(request)
507                data = response.read().decode('utf-8')
508
509                json_data = json.loads(data)
510                new_unihash = json_data['unihash']
511
512                if new_unihash != unihash:
513                    bb.debug(1, 'Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server))
514                    bb.event.fire(bb.runqueue.taskUniHashUpdate(fn + ':do_' + task, new_unihash), d)
515                else:
516                    bb.debug(1, 'Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server))
517            except urllib.error.URLError as e:
518                bb.warn('Failure contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
519            except (KeyError, json.JSONDecodeError) as e:
520                bb.warn('Poorly formatted response from %s: %s' % (self.server, str(e)))
521        finally:
522            if sigfile:
523                sigfile.close()
524
525                sigfile_link_path = os.path.join(tempdir, sigfile_link)
526                bb.utils.remove(sigfile_link_path)
527
528                try:
529                    os.symlink(sigfile_name, sigfile_link_path)
530                except OSError:
531                    pass
532
533
534#
535# Dummy class used for bitbake-selftest
536#
537class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureGeneratorBasicHash):
538    name = "TestEquivHash"
539    def init_rundepcheck(self, data):
540        super().init_rundepcheck(data)
541        self.server = "http://" + data.getVar('BB_HASHSERVE')
542        self.method = "sstate_output_hash"
543
544
545def dump_this_task(outfile, d):
546    import bb.parse
547    fn = d.getVar("BB_FILENAME")
548    task = "do_" + d.getVar("BB_CURRENTTASK")
549    referencestamp = bb.build.stamp_internal(task, d, None, True)
550    bb.parse.siggen.dump_sigtask(fn, task, outfile, "customfile:" + referencestamp)
551
552def init_colors(enable_color):
553    """Initialise colour dict for passing to compare_sigfiles()"""
554    # First set up the colours
555    colors = {'color_title':   '\033[1m',
556              'color_default': '\033[0m',
557              'color_add':     '\033[0;32m',
558              'color_remove':  '\033[0;31m',
559             }
560    # Leave all keys present but clear the values
561    if not enable_color:
562        for k in colors.keys():
563            colors[k] = ''
564    return colors
565
566def worddiff_str(oldstr, newstr, colors=None):
567    if not colors:
568        colors = init_colors(False)
569    diff = simplediff.diff(oldstr.split(' '), newstr.split(' '))
570    ret = []
571    for change, value in diff:
572        value = ' '.join(value)
573        if change == '=':
574            ret.append(value)
575        elif change == '+':
576            item = '{color_add}{{+{value}+}}{color_default}'.format(value=value, **colors)
577            ret.append(item)
578        elif change == '-':
579            item = '{color_remove}[-{value}-]{color_default}'.format(value=value, **colors)
580            ret.append(item)
581    whitespace_note = ''
582    if oldstr != newstr and ' '.join(oldstr.split()) == ' '.join(newstr.split()):
583        whitespace_note = ' (whitespace changed)'
584    return '"%s"%s' % (' '.join(ret), whitespace_note)
585
586def list_inline_diff(oldlist, newlist, colors=None):
587    if not colors:
588        colors = init_colors(False)
589    diff = simplediff.diff(oldlist, newlist)
590    ret = []
591    for change, value in diff:
592        value = ' '.join(value)
593        if change == '=':
594            ret.append("'%s'" % value)
595        elif change == '+':
596            item = '{color_add}+{value}{color_default}'.format(value=value, **colors)
597            ret.append(item)
598        elif change == '-':
599            item = '{color_remove}-{value}{color_default}'.format(value=value, **colors)
600            ret.append(item)
601    return '[%s]' % (', '.join(ret))
602
603def clean_basepath(a):
604    mc = None
605    if a.startswith("mc:"):
606        _, mc, a = a.split(":", 2)
607    b = a.rsplit("/", 2)[1] + '/' + a.rsplit("/", 2)[2]
608    if a.startswith("virtual:"):
609        b = b + ":" + a.rsplit(":", 1)[0]
610    if mc:
611        b = b + ":mc:" + mc
612    return b
613
614def clean_basepaths(a):
615    b = {}
616    for x in a:
617        b[clean_basepath(x)] = a[x]
618    return b
619
620def clean_basepaths_list(a):
621    b = []
622    for x in a:
623        b.append(clean_basepath(x))
624    return b
625
626def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
627    output = []
628
629    colors = init_colors(color)
630    def color_format(formatstr, **values):
631        """
632        Return colour formatted string.
633        NOTE: call with the format string, not an already formatted string
634        containing values (otherwise you could have trouble with { and }
635        characters)
636        """
637        if not formatstr.endswith('{color_default}'):
638            formatstr += '{color_default}'
639        # In newer python 3 versions you can pass both of these directly,
640        # but we only require 3.4 at the moment
641        formatparams = {}
642        formatparams.update(colors)
643        formatparams.update(values)
644        return formatstr.format(**formatparams)
645
646    with open(a, 'rb') as f:
647        p1 = pickle.Unpickler(f)
648        a_data = p1.load()
649    with open(b, 'rb') as f:
650        p2 = pickle.Unpickler(f)
651        b_data = p2.load()
652
653    def dict_diff(a, b, whitelist=set()):
654        sa = set(a.keys())
655        sb = set(b.keys())
656        common = sa & sb
657        changed = set()
658        for i in common:
659            if a[i] != b[i] and i not in whitelist:
660                changed.add(i)
661        added = sb - sa
662        removed = sa - sb
663        return changed, added, removed
664
665    def file_checksums_diff(a, b):
666        from collections import Counter
667        # Handle old siginfo format
668        if isinstance(a, dict):
669            a = [(os.path.basename(f), cs) for f, cs in a.items()]
670        if isinstance(b, dict):
671            b = [(os.path.basename(f), cs) for f, cs in b.items()]
672        # Compare lists, ensuring we can handle duplicate filenames if they exist
673        removedcount = Counter(a)
674        removedcount.subtract(b)
675        addedcount = Counter(b)
676        addedcount.subtract(a)
677        added = []
678        for x in b:
679            if addedcount[x] > 0:
680                addedcount[x] -= 1
681                added.append(x)
682        removed = []
683        changed = []
684        for x in a:
685            if removedcount[x] > 0:
686                removedcount[x] -= 1
687                for y in added:
688                    if y[0] == x[0]:
689                        changed.append((x[0], x[1], y[1]))
690                        added.remove(y)
691                        break
692                else:
693                    removed.append(x)
694        added = [x[0] for x in added]
695        removed = [x[0] for x in removed]
696        return changed, added, removed
697
698    if 'basewhitelist' in a_data and a_data['basewhitelist'] != b_data['basewhitelist']:
699        output.append(color_format("{color_title}basewhitelist changed{color_default} from '%s' to '%s'") % (a_data['basewhitelist'], b_data['basewhitelist']))
700        if a_data['basewhitelist'] and b_data['basewhitelist']:
701            output.append("changed items: %s" % a_data['basewhitelist'].symmetric_difference(b_data['basewhitelist']))
702
703    if 'taskwhitelist' in a_data and a_data['taskwhitelist'] != b_data['taskwhitelist']:
704        output.append(color_format("{color_title}taskwhitelist changed{color_default} from '%s' to '%s'") % (a_data['taskwhitelist'], b_data['taskwhitelist']))
705        if a_data['taskwhitelist'] and b_data['taskwhitelist']:
706            output.append("changed items: %s" % a_data['taskwhitelist'].symmetric_difference(b_data['taskwhitelist']))
707
708    if a_data['taskdeps'] != b_data['taskdeps']:
709        output.append(color_format("{color_title}Task dependencies changed{color_default} from:\n%s\nto:\n%s") % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps'])))
710
711    if a_data['basehash'] != b_data['basehash'] and not collapsed:
712        output.append(color_format("{color_title}basehash changed{color_default} from %s to %s") % (a_data['basehash'], b_data['basehash']))
713
714    changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basewhitelist'] & b_data['basewhitelist'])
715    if changed:
716        for dep in changed:
717            output.append(color_format("{color_title}List of dependencies for variable %s changed from '{color_default}%s{color_title}' to '{color_default}%s{color_title}'") % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep]))
718            if a_data['gendeps'][dep] and b_data['gendeps'][dep]:
719                output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep]))
720    if added:
721        for dep in added:
722            output.append(color_format("{color_title}Dependency on variable %s was added") % (dep))
723    if removed:
724        for dep in removed:
725            output.append(color_format("{color_title}Dependency on Variable %s was removed") % (dep))
726
727
728    changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals'])
729    if changed:
730        for dep in changed:
731            oldval = a_data['varvals'][dep]
732            newval = b_data['varvals'][dep]
733            if newval and oldval and ('\n' in oldval or '\n' in newval):
734                diff = difflib.unified_diff(oldval.splitlines(), newval.splitlines(), lineterm='')
735                # Cut off the first two lines, since we aren't interested in
736                # the old/new filename (they are blank anyway in this case)
737                difflines = list(diff)[2:]
738                if color:
739                    # Add colour to diff output
740                    for i, line in enumerate(difflines):
741                        if line.startswith('+'):
742                            line = color_format('{color_add}{line}', line=line)
743                            difflines[i] = line
744                        elif line.startswith('-'):
745                            line = color_format('{color_remove}{line}', line=line)
746                            difflines[i] = line
747                output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff='\n'.join(difflines)))
748            elif newval and oldval and (' ' in oldval or ' ' in newval):
749                output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff=worddiff_str(oldval, newval, colors)))
750            else:
751                output.append(color_format("{color_title}Variable {var} value changed from '{color_default}{oldval}{color_title}' to '{color_default}{newval}{color_title}'{color_default}", var=dep, oldval=oldval, newval=newval))
752
753    if not 'file_checksum_values' in a_data:
754         a_data['file_checksum_values'] = {}
755    if not 'file_checksum_values' in b_data:
756         b_data['file_checksum_values'] = {}
757
758    changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values'])
759    if changed:
760        for f, old, new in changed:
761            output.append(color_format("{color_title}Checksum for file %s changed{color_default} from %s to %s") % (f, old, new))
762    if added:
763        for f in added:
764            output.append(color_format("{color_title}Dependency on checksum of file %s was added") % (f))
765    if removed:
766        for f in removed:
767            output.append(color_format("{color_title}Dependency on checksum of file %s was removed") % (f))
768
769    if not 'runtaskdeps' in a_data:
770         a_data['runtaskdeps'] = {}
771    if not 'runtaskdeps' in b_data:
772         b_data['runtaskdeps'] = {}
773
774    if not collapsed:
775        if len(a_data['runtaskdeps']) != len(b_data['runtaskdeps']):
776            changed = ["Number of task dependencies changed"]
777        else:
778            changed = []
779            for idx, task in enumerate(a_data['runtaskdeps']):
780                a = a_data['runtaskdeps'][idx]
781                b = b_data['runtaskdeps'][idx]
782                if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b] and not collapsed:
783                    changed.append("%s with hash %s\n changed to\n%s with hash %s" % (clean_basepath(a), a_data['runtaskhashes'][a], clean_basepath(b), b_data['runtaskhashes'][b]))
784
785        if changed:
786            clean_a = clean_basepaths_list(a_data['runtaskdeps'])
787            clean_b = clean_basepaths_list(b_data['runtaskdeps'])
788            if clean_a != clean_b:
789                output.append(color_format("{color_title}runtaskdeps changed:{color_default}\n%s") % list_inline_diff(clean_a, clean_b, colors))
790            else:
791                output.append(color_format("{color_title}runtaskdeps changed:"))
792            output.append("\n".join(changed))
793
794
795    if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data:
796        a = a_data['runtaskhashes']
797        b = b_data['runtaskhashes']
798        changed, added, removed = dict_diff(a, b)
799        if added:
800            for dep in added:
801                bdep_found = False
802                if removed:
803                    for bdep in removed:
804                        if b[dep] == a[bdep]:
805                            #output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep))
806                            bdep_found = True
807                if not bdep_found:
808                    output.append(color_format("{color_title}Dependency on task %s was added{color_default} with hash %s") % (clean_basepath(dep), b[dep]))
809        if removed:
810            for dep in removed:
811                adep_found = False
812                if added:
813                    for adep in added:
814                        if b[adep] == a[dep]:
815                            #output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep))
816                            adep_found = True
817                if not adep_found:
818                    output.append(color_format("{color_title}Dependency on task %s was removed{color_default} with hash %s") % (clean_basepath(dep), a[dep]))
819        if changed:
820            for dep in changed:
821                if not collapsed:
822                    output.append(color_format("{color_title}Hash for dependent task %s changed{color_default} from %s to %s") % (clean_basepath(dep), a[dep], b[dep]))
823                if callable(recursecb):
824                    recout = recursecb(dep, a[dep], b[dep])
825                    if recout:
826                        if collapsed:
827                            output.extend(recout)
828                        else:
829                            # If a dependent hash changed, might as well print the line above and then defer to the changes in
830                            # that hash since in all likelyhood, they're the same changes this task also saw.
831                            output = [output[-1]] + recout
832
833    a_taint = a_data.get('taint', None)
834    b_taint = b_data.get('taint', None)
835    if a_taint != b_taint:
836        if a_taint and a_taint.startswith('nostamp:'):
837            a_taint = a_taint.replace('nostamp:', 'nostamp(uuid4):')
838        if b_taint and b_taint.startswith('nostamp:'):
839            b_taint = b_taint.replace('nostamp:', 'nostamp(uuid4):')
840        output.append(color_format("{color_title}Taint (by forced/invalidated task) changed{color_default} from %s to %s") % (a_taint, b_taint))
841
842    return output
843
844
845def calc_basehash(sigdata):
846    task = sigdata['task']
847    basedata = sigdata['varvals'][task]
848
849    if basedata is None:
850        basedata = ''
851
852    alldeps = sigdata['taskdeps']
853    for dep in alldeps:
854        basedata = basedata + dep
855        val = sigdata['varvals'][dep]
856        if val is not None:
857            basedata = basedata + str(val)
858
859    return hashlib.sha256(basedata.encode("utf-8")).hexdigest()
860
861def calc_taskhash(sigdata):
862    data = sigdata['basehash']
863
864    for dep in sigdata['runtaskdeps']:
865        data = data + sigdata['runtaskhashes'][dep]
866
867    for c in sigdata['file_checksum_values']:
868        if c[1]:
869            data = data + c[1]
870
871    if 'taint' in sigdata:
872        if 'nostamp:' in sigdata['taint']:
873            data = data + sigdata['taint'][8:]
874        else:
875            data = data + sigdata['taint']
876
877    return hashlib.sha256(data.encode("utf-8")).hexdigest()
878
879
880def dump_sigfile(a):
881    output = []
882
883    with open(a, 'rb') as f:
884        p1 = pickle.Unpickler(f)
885        a_data = p1.load()
886
887    output.append("basewhitelist: %s" % (a_data['basewhitelist']))
888
889    output.append("taskwhitelist: %s" % (a_data['taskwhitelist']))
890
891    output.append("Task dependencies: %s" % (sorted(a_data['taskdeps'])))
892
893    output.append("basehash: %s" % (a_data['basehash']))
894
895    for dep in a_data['gendeps']:
896        output.append("List of dependencies for variable %s is %s" % (dep, a_data['gendeps'][dep]))
897
898    for dep in a_data['varvals']:
899        output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep]))
900
901    if 'runtaskdeps' in a_data:
902        output.append("Tasks this task depends on: %s" % (a_data['runtaskdeps']))
903
904    if 'file_checksum_values' in a_data:
905        output.append("This task depends on the checksums of files: %s" % (a_data['file_checksum_values']))
906
907    if 'runtaskhashes' in a_data:
908        for dep in a_data['runtaskhashes']:
909            output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep]))
910
911    if 'taint' in a_data:
912        if a_data['taint'].startswith('nostamp:'):
913            msg = a_data['taint'].replace('nostamp:', 'nostamp(uuid4):')
914        else:
915            msg = a_data['taint']
916        output.append("Tainted (by forced/invalidated task): %s" % msg)
917
918    if 'task' in a_data:
919        computed_basehash = calc_basehash(a_data)
920        output.append("Computed base hash is %s and from file %s" % (computed_basehash, a_data['basehash']))
921    else:
922        output.append("Unable to compute base hash")
923
924    computed_taskhash = calc_taskhash(a_data)
925    output.append("Computed task hash is %s" % computed_taskhash)
926
927    return output
928