xref: /openbmc/openbmc/poky/bitbake/lib/bb/siggen.py (revision 4ed12e16)
1#
2# SPDX-License-Identifier: GPL-2.0-only
3#
4
5import hashlib
6import logging
7import os
8import re
9import tempfile
10import pickle
11import bb.data
12import difflib
13import simplediff
14from bb.checksum import FileChecksumCache
15from bb import runqueue
16import hashserv
17
18logger = logging.getLogger('BitBake.SigGen')
19hashequiv_logger = logging.getLogger('BitBake.SigGen.HashEquiv')
20
21def init(d):
22    siggens = [obj for obj in globals().values()
23                      if type(obj) is type and issubclass(obj, SignatureGenerator)]
24
25    desired = d.getVar("BB_SIGNATURE_HANDLER") or "noop"
26    for sg in siggens:
27        if desired == sg.name:
28            return sg(d)
29            break
30    else:
31        logger.error("Invalid signature generator '%s', using default 'noop'\n"
32                     "Available generators: %s", desired,
33                     ', '.join(obj.name for obj in siggens))
34        return SignatureGenerator(d)
35
36class SignatureGenerator(object):
37    """
38    """
39    name = "noop"
40
41    def __init__(self, data):
42        self.basehash = {}
43        self.taskhash = {}
44        self.unihash = {}
45        self.runtaskdeps = {}
46        self.file_checksum_values = {}
47        self.taints = {}
48        self.unitaskhashes = {}
49        self.tidtopn = {}
50        self.setscenetasks = set()
51
52    def finalise(self, fn, d, varient):
53        return
54
55    def postparsing_clean_cache(self):
56        return
57
58    def get_unihash(self, tid):
59        return self.taskhash[tid]
60
61    def prep_taskhash(self, tid, deps, dataCache):
62        return
63
64    def get_taskhash(self, tid, deps, dataCache):
65        self.taskhash[tid] = hashlib.sha256(tid.encode("utf-8")).hexdigest()
66        return self.taskhash[tid]
67
68    def writeout_file_checksum_cache(self):
69        """Write/update the file checksum cache onto disk"""
70        return
71
72    def stampfile(self, stampbase, file_name, taskname, extrainfo):
73        return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
74
75    def stampcleanmask(self, stampbase, file_name, taskname, extrainfo):
76        return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
77
78    def dump_sigtask(self, fn, task, stampbase, runtime):
79        return
80
81    def invalidate_task(self, task, d, fn):
82        bb.build.del_stamp(task, d, fn)
83
84    def dump_sigs(self, dataCache, options):
85        return
86
87    def get_taskdata(self):
88        return (self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks)
89
90    def set_taskdata(self, data):
91        self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks = data
92
93    def reset(self, data):
94        self.__init__(data)
95
96    def get_taskhashes(self):
97        return self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn
98
99    def set_taskhashes(self, hashes):
100        self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn = hashes
101
102    def save_unitaskhashes(self):
103        return
104
105    def set_setscene_tasks(self, setscene_tasks):
106        return
107
108class SignatureGeneratorBasic(SignatureGenerator):
109    """
110    """
111    name = "basic"
112
113    def __init__(self, data):
114        self.basehash = {}
115        self.taskhash = {}
116        self.unihash = {}
117        self.taskdeps = {}
118        self.runtaskdeps = {}
119        self.file_checksum_values = {}
120        self.taints = {}
121        self.gendeps = {}
122        self.lookupcache = {}
123        self.setscenetasks = set()
124        self.basewhitelist = set((data.getVar("BB_HASHBASE_WHITELIST") or "").split())
125        self.taskwhitelist = None
126        self.init_rundepcheck(data)
127        checksum_cache_file = data.getVar("BB_HASH_CHECKSUM_CACHE_FILE")
128        if checksum_cache_file:
129            self.checksum_cache = FileChecksumCache()
130            self.checksum_cache.init_cache(data, checksum_cache_file)
131        else:
132            self.checksum_cache = None
133
134        self.unihash_cache = bb.cache.SimpleCache("3")
135        self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {})
136        self.localdirsexclude = (data.getVar("BB_SIGNATURE_LOCAL_DIRS_EXCLUDE") or "CVS .bzr .git .hg .osc .p4 .repo .svn").split()
137        self.tidtopn = {}
138
139    def init_rundepcheck(self, data):
140        self.taskwhitelist = data.getVar("BB_HASHTASK_WHITELIST") or None
141        if self.taskwhitelist:
142            self.twl = re.compile(self.taskwhitelist)
143        else:
144            self.twl = None
145
146    def _build_data(self, fn, d):
147
148        ignore_mismatch = ((d.getVar("BB_HASH_IGNORE_MISMATCH") or '') == '1')
149        tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d, self.basewhitelist)
150
151        taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, self.basewhitelist, fn)
152
153        for task in tasklist:
154            tid = fn + ":" + task
155            if not ignore_mismatch and tid in self.basehash and self.basehash[tid] != basehash[tid]:
156                bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (tid, self.basehash[tid], basehash[tid]))
157                bb.error("The following commands may help:")
158                cmd = "$ bitbake %s -c%s" % (d.getVar('PN'), task)
159                # Make sure sigdata is dumped before run printdiff
160                bb.error("%s -Snone" % cmd)
161                bb.error("Then:")
162                bb.error("%s -Sprintdiff\n" % cmd)
163            self.basehash[tid] = basehash[tid]
164
165        self.taskdeps[fn] = taskdeps
166        self.gendeps[fn] = gendeps
167        self.lookupcache[fn] = lookupcache
168
169        return taskdeps
170
171    def set_setscene_tasks(self, setscene_tasks):
172        self.setscenetasks = set(setscene_tasks)
173
174    def finalise(self, fn, d, variant):
175
176        mc = d.getVar("__BBMULTICONFIG", False) or ""
177        if variant or mc:
178            fn = bb.cache.realfn2virtual(fn, variant, mc)
179
180        try:
181            taskdeps = self._build_data(fn, d)
182        except bb.parse.SkipRecipe:
183            raise
184        except:
185            bb.warn("Error during finalise of %s" % fn)
186            raise
187
188        #Slow but can be useful for debugging mismatched basehashes
189        #for task in self.taskdeps[fn]:
190        #    self.dump_sigtask(fn, task, d.getVar("STAMP"), False)
191
192        for task in taskdeps:
193            d.setVar("BB_BASEHASH_task-%s" % task, self.basehash[fn + ":" + task])
194
195    def postparsing_clean_cache(self):
196        #
197        # After parsing we can remove some things from memory to reduce our memory footprint
198        #
199        self.gendeps = {}
200        self.lookupcache = {}
201        self.taskdeps = {}
202
203    def rundep_check(self, fn, recipename, task, dep, depname, dataCache):
204        # Return True if we should keep the dependency, False to drop it
205        # We only manipulate the dependencies for packages not in the whitelist
206        if self.twl and not self.twl.search(recipename):
207            # then process the actual dependencies
208            if self.twl.search(depname):
209                return False
210        return True
211
212    def read_taint(self, fn, task, stampbase):
213        taint = None
214        try:
215            with open(stampbase + '.' + task + '.taint', 'r') as taintf:
216                taint = taintf.read()
217        except IOError:
218            pass
219        return taint
220
221    def prep_taskhash(self, tid, deps, dataCache):
222
223        (mc, _, task, fn) = bb.runqueue.split_tid_mcfn(tid)
224
225        self.basehash[tid] = dataCache.basetaskhash[tid]
226        self.runtaskdeps[tid] = []
227        self.file_checksum_values[tid] = []
228        recipename = dataCache.pkg_fn[fn]
229
230        self.tidtopn[tid] = recipename
231
232        for dep in sorted(deps, key=clean_basepath):
233            (depmc, _, deptaskname, depfn) = bb.runqueue.split_tid_mcfn(dep)
234            if mc != depmc:
235                continue
236            depname = dataCache.pkg_fn[depfn]
237            if not self.rundep_check(fn, recipename, task, dep, depname, dataCache):
238                continue
239            if dep not in self.taskhash:
240                bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep)
241            self.runtaskdeps[tid].append(dep)
242
243        if task in dataCache.file_checksums[fn]:
244            if self.checksum_cache:
245                checksums = self.checksum_cache.get_checksums(dataCache.file_checksums[fn][task], recipename, self.localdirsexclude)
246            else:
247                checksums = bb.fetch2.get_file_checksums(dataCache.file_checksums[fn][task], recipename, self.localdirsexclude)
248            for (f,cs) in checksums:
249                self.file_checksum_values[tid].append((f,cs))
250
251        taskdep = dataCache.task_deps[fn]
252        if 'nostamp' in taskdep and task in taskdep['nostamp']:
253            # Nostamp tasks need an implicit taint so that they force any dependent tasks to run
254            if tid in self.taints and self.taints[tid].startswith("nostamp:"):
255                # Don't reset taint value upon every call
256                pass
257            else:
258                import uuid
259                taint = str(uuid.uuid4())
260                self.taints[tid] = "nostamp:" + taint
261
262        taint = self.read_taint(fn, task, dataCache.stamp[fn])
263        if taint:
264            self.taints[tid] = taint
265            logger.warning("%s is tainted from a forced run" % tid)
266
267        return
268
269    def get_taskhash(self, tid, deps, dataCache):
270
271        data = self.basehash[tid]
272        for dep in self.runtaskdeps[tid]:
273            if dep in self.unihash:
274                if self.unihash[dep] is None:
275                    data = data + self.taskhash[dep]
276                else:
277                    data = data + self.unihash[dep]
278            else:
279                data = data + self.get_unihash(dep)
280
281        for (f, cs) in self.file_checksum_values[tid]:
282            if cs:
283                data = data + cs
284
285        if tid in self.taints:
286            if self.taints[tid].startswith("nostamp:"):
287                data = data + self.taints[tid][8:]
288            else:
289                data = data + self.taints[tid]
290
291        h = hashlib.sha256(data.encode("utf-8")).hexdigest()
292        self.taskhash[tid] = h
293        #d.setVar("BB_TASKHASH_task-%s" % task, taskhash[task])
294        return h
295
296    def writeout_file_checksum_cache(self):
297        """Write/update the file checksum cache onto disk"""
298        if self.checksum_cache:
299            self.checksum_cache.save_extras()
300            self.checksum_cache.save_merge()
301        else:
302            bb.fetch2.fetcher_parse_save()
303            bb.fetch2.fetcher_parse_done()
304
305    def save_unitaskhashes(self):
306        self.unihash_cache.save(self.unitaskhashes)
307
308    def dump_sigtask(self, fn, task, stampbase, runtime):
309
310        tid = fn + ":" + task
311        referencestamp = stampbase
312        if isinstance(runtime, str) and runtime.startswith("customfile"):
313            sigfile = stampbase
314            referencestamp = runtime[11:]
315        elif runtime and tid in self.taskhash:
316            sigfile = stampbase + "." + task + ".sigdata" + "." + self.get_unihash(tid)
317        else:
318            sigfile = stampbase + "." + task + ".sigbasedata" + "." + self.basehash[tid]
319
320        bb.utils.mkdirhier(os.path.dirname(sigfile))
321
322        data = {}
323        data['task'] = task
324        data['basewhitelist'] = self.basewhitelist
325        data['taskwhitelist'] = self.taskwhitelist
326        data['taskdeps'] = self.taskdeps[fn][task]
327        data['basehash'] = self.basehash[tid]
328        data['gendeps'] = {}
329        data['varvals'] = {}
330        data['varvals'][task] = self.lookupcache[fn][task]
331        for dep in self.taskdeps[fn][task]:
332            if dep in self.basewhitelist:
333                continue
334            data['gendeps'][dep] = self.gendeps[fn][dep]
335            data['varvals'][dep] = self.lookupcache[fn][dep]
336
337        if runtime and tid in self.taskhash:
338            data['runtaskdeps'] = self.runtaskdeps[tid]
339            data['file_checksum_values'] = [(os.path.basename(f), cs) for f,cs in self.file_checksum_values[tid]]
340            data['runtaskhashes'] = {}
341            for dep in data['runtaskdeps']:
342                data['runtaskhashes'][dep] = self.get_unihash(dep)
343            data['taskhash'] = self.taskhash[tid]
344            data['unihash'] = self.get_unihash(tid)
345
346        taint = self.read_taint(fn, task, referencestamp)
347        if taint:
348            data['taint'] = taint
349
350        if runtime and tid in self.taints:
351            if 'nostamp:' in self.taints[tid]:
352                data['taint'] = self.taints[tid]
353
354        computed_basehash = calc_basehash(data)
355        if computed_basehash != self.basehash[tid]:
356            bb.error("Basehash mismatch %s versus %s for %s" % (computed_basehash, self.basehash[tid], tid))
357        if runtime and tid in self.taskhash:
358            computed_taskhash = calc_taskhash(data)
359            if computed_taskhash != self.taskhash[tid]:
360                bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid))
361                sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash)
362
363        fd, tmpfile = tempfile.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.")
364        try:
365            with os.fdopen(fd, "wb") as stream:
366                p = pickle.dump(data, stream, -1)
367                stream.flush()
368            os.chmod(tmpfile, 0o664)
369            os.rename(tmpfile, sigfile)
370        except (OSError, IOError) as err:
371            try:
372                os.unlink(tmpfile)
373            except OSError:
374                pass
375            raise err
376
377    def dump_sigfn(self, fn, dataCaches, options):
378        if fn in self.taskdeps:
379            for task in self.taskdeps[fn]:
380                tid = fn + ":" + task
381                mc = bb.runqueue.mc_from_tid(tid)
382                if tid not in self.taskhash:
383                    continue
384                if dataCaches[mc].basetaskhash[tid] != self.basehash[tid]:
385                    bb.error("Bitbake's cached basehash does not match the one we just generated (%s)!" % tid)
386                    bb.error("The mismatched hashes were %s and %s" % (dataCaches[mc].basetaskhash[tid], self.basehash[tid]))
387                self.dump_sigtask(fn, task, dataCaches[mc].stamp[fn], True)
388
389class SignatureGeneratorBasicHash(SignatureGeneratorBasic):
390    name = "basichash"
391
392    def get_stampfile_hash(self, tid):
393        if tid in self.taskhash:
394            return self.taskhash[tid]
395
396        # If task is not in basehash, then error
397        return self.basehash[tid]
398
399    def stampfile(self, stampbase, fn, taskname, extrainfo, clean=False):
400        if taskname != "do_setscene" and taskname.endswith("_setscene"):
401            tid = fn + ":" + taskname[:-9]
402        else:
403            tid = fn + ":" + taskname
404        if clean:
405            h = "*"
406        else:
407            h = self.get_stampfile_hash(tid)
408
409        return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.')
410
411    def stampcleanmask(self, stampbase, fn, taskname, extrainfo):
412        return self.stampfile(stampbase, fn, taskname, extrainfo, clean=True)
413
414    def invalidate_task(self, task, d, fn):
415        bb.note("Tainting hash to force rebuild of task %s, %s" % (fn, task))
416        bb.build.write_taint(task, d, fn)
417
418class SignatureGeneratorUniHashMixIn(object):
419    def __init__(self, data):
420        self.extramethod = {}
421        super().__init__(data)
422
423    def get_taskdata(self):
424        return (self.server, self.method, self.extramethod) + super().get_taskdata()
425
426    def set_taskdata(self, data):
427        self.server, self.method, self.extramethod = data[:3]
428        super().set_taskdata(data[3:])
429
430    def client(self):
431        if getattr(self, '_client', None) is None:
432            self._client = hashserv.create_client(self.server)
433        return self._client
434
435    def get_stampfile_hash(self, tid):
436        if tid in self.taskhash:
437            # If a unique hash is reported, use it as the stampfile hash. This
438            # ensures that if a task won't be re-run if the taskhash changes,
439            # but it would result in the same output hash
440            unihash = self._get_unihash(tid)
441            if unihash is not None:
442                return unihash
443
444        return super().get_stampfile_hash(tid)
445
446    def set_unihash(self, tid, unihash):
447        (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
448        key = mc + ":" + self.tidtopn[tid] + ":" + taskname
449        self.unitaskhashes[key] = (self.taskhash[tid], unihash)
450        self.unihash[tid] = unihash
451
452    def _get_unihash(self, tid, checkkey=None):
453        if tid not in self.tidtopn:
454            return None
455        (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
456        key = mc + ":" + self.tidtopn[tid] + ":" + taskname
457        if key not in self.unitaskhashes:
458            return None
459        if not checkkey:
460            checkkey = self.taskhash[tid]
461        (key, unihash) = self.unitaskhashes[key]
462        if key != checkkey:
463            return None
464        return unihash
465
466    def get_unihash(self, tid):
467        taskhash = self.taskhash[tid]
468
469        # If its not a setscene task we can return
470        if self.setscenetasks and tid not in self.setscenetasks:
471            self.unihash[tid] = None
472            return taskhash
473
474        # TODO: This cache can grow unbounded. It probably only needs to keep
475        # for each task
476        unihash =  self._get_unihash(tid)
477        if unihash is not None:
478            self.unihash[tid] = unihash
479            return unihash
480
481        # In the absence of being able to discover a unique hash from the
482        # server, make it be equivalent to the taskhash. The unique "hash" only
483        # really needs to be a unique string (not even necessarily a hash), but
484        # making it match the taskhash has a few advantages:
485        #
486        # 1) All of the sstate code that assumes hashes can be the same
487        # 2) It provides maximal compatibility with builders that don't use
488        #    an equivalency server
489        # 3) The value is easy for multiple independent builders to derive the
490        #    same unique hash from the same input. This means that if the
491        #    independent builders find the same taskhash, but it isn't reported
492        #    to the server, there is a better chance that they will agree on
493        #    the unique hash.
494        unihash = taskhash
495
496        try:
497            method = self.method
498            if tid in self.extramethod:
499                method = method + self.extramethod[tid]
500            data = self.client().get_unihash(method, self.taskhash[tid])
501            if data:
502                unihash = data
503                # A unique hash equal to the taskhash is not very interesting,
504                # so it is reported it at debug level 2. If they differ, that
505                # is much more interesting, so it is reported at debug level 1
506                hashequiv_logger.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server))
507            else:
508                hashequiv_logger.debug(2, 'No reported unihash for %s:%s from %s' % (tid, taskhash, self.server))
509        except hashserv.client.HashConnectionError as e:
510            bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
511
512        self.set_unihash(tid, unihash)
513        self.unihash[tid] = unihash
514        return unihash
515
516    def report_unihash(self, path, task, d):
517        import importlib
518
519        taskhash = d.getVar('BB_TASKHASH')
520        unihash = d.getVar('BB_UNIHASH')
521        report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1'
522        tempdir = d.getVar('T')
523        fn = d.getVar('BB_FILENAME')
524        tid = fn + ':do_' + task
525        key = tid + ':' + taskhash
526
527        if self.setscenetasks and tid not in self.setscenetasks:
528            return
529
530        # This can happen if locked sigs are in action. Detect and just abort
531        if taskhash != self.taskhash[tid]:
532            return
533
534        # Sanity checks
535        cache_unihash = self._get_unihash(tid, checkkey=taskhash)
536        if cache_unihash is None:
537            bb.fatal('%s not in unihash cache. Please report this error' % key)
538
539        if cache_unihash != unihash:
540            bb.fatal("Cache unihash %s doesn't match BB_UNIHASH %s" % (cache_unihash, unihash))
541
542        sigfile = None
543        sigfile_name = "depsig.do_%s.%d" % (task, os.getpid())
544        sigfile_link = "depsig.do_%s" % task
545
546        try:
547            sigfile = open(os.path.join(tempdir, sigfile_name), 'w+b')
548
549            locs = {'path': path, 'sigfile': sigfile, 'task': task, 'd': d}
550
551            if "." in self.method:
552                (module, method) = self.method.rsplit('.', 1)
553                locs['method'] = getattr(importlib.import_module(module), method)
554                outhash = bb.utils.better_eval('method(path, sigfile, task, d)', locs)
555            else:
556                outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs)
557
558            try:
559                extra_data = {}
560
561                owner = d.getVar('SSTATE_HASHEQUIV_OWNER')
562                if owner:
563                    extra_data['owner'] = owner
564
565                if report_taskdata:
566                    sigfile.seek(0)
567
568                    extra_data['PN'] = d.getVar('PN')
569                    extra_data['PV'] = d.getVar('PV')
570                    extra_data['PR'] = d.getVar('PR')
571                    extra_data['task'] = task
572                    extra_data['outhash_siginfo'] = sigfile.read().decode('utf-8')
573
574                method = self.method
575                if tid in self.extramethod:
576                    method = method + self.extramethod[tid]
577
578                data = self.client().report_unihash(taskhash, method, outhash, unihash, extra_data)
579                new_unihash = data['unihash']
580
581                if new_unihash != unihash:
582                    hashequiv_logger.debug(1, 'Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server))
583                    bb.event.fire(bb.runqueue.taskUniHashUpdate(fn + ':do_' + task, new_unihash), d)
584                    self.set_unihash(tid, new_unihash)
585                    d.setVar('BB_UNIHASH', new_unihash)
586                else:
587                    hashequiv_logger.debug(1, 'Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server))
588            except hashserv.client.HashConnectionError as e:
589                bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
590        finally:
591            if sigfile:
592                sigfile.close()
593
594                sigfile_link_path = os.path.join(tempdir, sigfile_link)
595                bb.utils.remove(sigfile_link_path)
596
597                try:
598                    os.symlink(sigfile_name, sigfile_link_path)
599                except OSError:
600                    pass
601
602    def report_unihash_equiv(self, tid, taskhash, wanted_unihash, current_unihash, datacaches):
603        try:
604            extra_data = {}
605            method = self.method
606            if tid in self.extramethod:
607                method = method + self.extramethod[tid]
608
609            data = self.client().report_unihash_equiv(taskhash, method, wanted_unihash, extra_data)
610            hashequiv_logger.verbose('Reported task %s as unihash %s to %s (%s)' % (tid, wanted_unihash, self.server, str(data)))
611
612            if data is None:
613                bb.warn("Server unable to handle unihash report")
614                return False
615
616            finalunihash = data['unihash']
617
618            if finalunihash == current_unihash:
619                hashequiv_logger.verbose('Task %s unihash %s unchanged by server' % (tid, finalunihash))
620            elif finalunihash == wanted_unihash:
621                hashequiv_logger.verbose('Task %s unihash changed %s -> %s as wanted' % (tid, current_unihash, finalunihash))
622                self.set_unihash(tid, finalunihash)
623                return True
624            else:
625                # TODO: What to do here?
626                hashequiv_logger.verbose('Task %s unihash reported as unwanted hash %s' % (tid, finalunihash))
627
628        except hashserv.client.HashConnectionError as e:
629            bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
630
631        return False
632
633#
634# Dummy class used for bitbake-selftest
635#
636class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureGeneratorBasicHash):
637    name = "TestEquivHash"
638    def init_rundepcheck(self, data):
639        super().init_rundepcheck(data)
640        self.server = data.getVar('BB_HASHSERVE')
641        self.method = "sstate_output_hash"
642
643
644def dump_this_task(outfile, d):
645    import bb.parse
646    fn = d.getVar("BB_FILENAME")
647    task = "do_" + d.getVar("BB_CURRENTTASK")
648    referencestamp = bb.build.stamp_internal(task, d, None, True)
649    bb.parse.siggen.dump_sigtask(fn, task, outfile, "customfile:" + referencestamp)
650
651def init_colors(enable_color):
652    """Initialise colour dict for passing to compare_sigfiles()"""
653    # First set up the colours
654    colors = {'color_title':   '\033[1m',
655              'color_default': '\033[0m',
656              'color_add':     '\033[0;32m',
657              'color_remove':  '\033[0;31m',
658             }
659    # Leave all keys present but clear the values
660    if not enable_color:
661        for k in colors.keys():
662            colors[k] = ''
663    return colors
664
665def worddiff_str(oldstr, newstr, colors=None):
666    if not colors:
667        colors = init_colors(False)
668    diff = simplediff.diff(oldstr.split(' '), newstr.split(' '))
669    ret = []
670    for change, value in diff:
671        value = ' '.join(value)
672        if change == '=':
673            ret.append(value)
674        elif change == '+':
675            item = '{color_add}{{+{value}+}}{color_default}'.format(value=value, **colors)
676            ret.append(item)
677        elif change == '-':
678            item = '{color_remove}[-{value}-]{color_default}'.format(value=value, **colors)
679            ret.append(item)
680    whitespace_note = ''
681    if oldstr != newstr and ' '.join(oldstr.split()) == ' '.join(newstr.split()):
682        whitespace_note = ' (whitespace changed)'
683    return '"%s"%s' % (' '.join(ret), whitespace_note)
684
685def list_inline_diff(oldlist, newlist, colors=None):
686    if not colors:
687        colors = init_colors(False)
688    diff = simplediff.diff(oldlist, newlist)
689    ret = []
690    for change, value in diff:
691        value = ' '.join(value)
692        if change == '=':
693            ret.append("'%s'" % value)
694        elif change == '+':
695            item = '{color_add}+{value}{color_default}'.format(value=value, **colors)
696            ret.append(item)
697        elif change == '-':
698            item = '{color_remove}-{value}{color_default}'.format(value=value, **colors)
699            ret.append(item)
700    return '[%s]' % (', '.join(ret))
701
702def clean_basepath(a):
703    mc = None
704    if a.startswith("mc:"):
705        _, mc, a = a.split(":", 2)
706    b = a.rsplit("/", 2)[1] + '/' + a.rsplit("/", 2)[2]
707    if a.startswith("virtual:"):
708        b = b + ":" + a.rsplit(":", 1)[0]
709    if mc:
710        b = b + ":mc:" + mc
711    return b
712
713def clean_basepaths(a):
714    b = {}
715    for x in a:
716        b[clean_basepath(x)] = a[x]
717    return b
718
719def clean_basepaths_list(a):
720    b = []
721    for x in a:
722        b.append(clean_basepath(x))
723    return b
724
725def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
726    output = []
727
728    colors = init_colors(color)
729    def color_format(formatstr, **values):
730        """
731        Return colour formatted string.
732        NOTE: call with the format string, not an already formatted string
733        containing values (otherwise you could have trouble with { and }
734        characters)
735        """
736        if not formatstr.endswith('{color_default}'):
737            formatstr += '{color_default}'
738        # In newer python 3 versions you can pass both of these directly,
739        # but we only require 3.4 at the moment
740        formatparams = {}
741        formatparams.update(colors)
742        formatparams.update(values)
743        return formatstr.format(**formatparams)
744
745    with open(a, 'rb') as f:
746        p1 = pickle.Unpickler(f)
747        a_data = p1.load()
748    with open(b, 'rb') as f:
749        p2 = pickle.Unpickler(f)
750        b_data = p2.load()
751
752    def dict_diff(a, b, whitelist=set()):
753        sa = set(a.keys())
754        sb = set(b.keys())
755        common = sa & sb
756        changed = set()
757        for i in common:
758            if a[i] != b[i] and i not in whitelist:
759                changed.add(i)
760        added = sb - sa
761        removed = sa - sb
762        return changed, added, removed
763
764    def file_checksums_diff(a, b):
765        from collections import Counter
766        # Handle old siginfo format
767        if isinstance(a, dict):
768            a = [(os.path.basename(f), cs) for f, cs in a.items()]
769        if isinstance(b, dict):
770            b = [(os.path.basename(f), cs) for f, cs in b.items()]
771        # Compare lists, ensuring we can handle duplicate filenames if they exist
772        removedcount = Counter(a)
773        removedcount.subtract(b)
774        addedcount = Counter(b)
775        addedcount.subtract(a)
776        added = []
777        for x in b:
778            if addedcount[x] > 0:
779                addedcount[x] -= 1
780                added.append(x)
781        removed = []
782        changed = []
783        for x in a:
784            if removedcount[x] > 0:
785                removedcount[x] -= 1
786                for y in added:
787                    if y[0] == x[0]:
788                        changed.append((x[0], x[1], y[1]))
789                        added.remove(y)
790                        break
791                else:
792                    removed.append(x)
793        added = [x[0] for x in added]
794        removed = [x[0] for x in removed]
795        return changed, added, removed
796
797    if 'basewhitelist' in a_data and a_data['basewhitelist'] != b_data['basewhitelist']:
798        output.append(color_format("{color_title}basewhitelist changed{color_default} from '%s' to '%s'") % (a_data['basewhitelist'], b_data['basewhitelist']))
799        if a_data['basewhitelist'] and b_data['basewhitelist']:
800            output.append("changed items: %s" % a_data['basewhitelist'].symmetric_difference(b_data['basewhitelist']))
801
802    if 'taskwhitelist' in a_data and a_data['taskwhitelist'] != b_data['taskwhitelist']:
803        output.append(color_format("{color_title}taskwhitelist changed{color_default} from '%s' to '%s'") % (a_data['taskwhitelist'], b_data['taskwhitelist']))
804        if a_data['taskwhitelist'] and b_data['taskwhitelist']:
805            output.append("changed items: %s" % a_data['taskwhitelist'].symmetric_difference(b_data['taskwhitelist']))
806
807    if a_data['taskdeps'] != b_data['taskdeps']:
808        output.append(color_format("{color_title}Task dependencies changed{color_default} from:\n%s\nto:\n%s") % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps'])))
809
810    if a_data['basehash'] != b_data['basehash'] and not collapsed:
811        output.append(color_format("{color_title}basehash changed{color_default} from %s to %s") % (a_data['basehash'], b_data['basehash']))
812
813    changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basewhitelist'] & b_data['basewhitelist'])
814    if changed:
815        for dep in changed:
816            output.append(color_format("{color_title}List of dependencies for variable %s changed from '{color_default}%s{color_title}' to '{color_default}%s{color_title}'") % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep]))
817            if a_data['gendeps'][dep] and b_data['gendeps'][dep]:
818                output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep]))
819    if added:
820        for dep in added:
821            output.append(color_format("{color_title}Dependency on variable %s was added") % (dep))
822    if removed:
823        for dep in removed:
824            output.append(color_format("{color_title}Dependency on Variable %s was removed") % (dep))
825
826
827    changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals'])
828    if changed:
829        for dep in changed:
830            oldval = a_data['varvals'][dep]
831            newval = b_data['varvals'][dep]
832            if newval and oldval and ('\n' in oldval or '\n' in newval):
833                diff = difflib.unified_diff(oldval.splitlines(), newval.splitlines(), lineterm='')
834                # Cut off the first two lines, since we aren't interested in
835                # the old/new filename (they are blank anyway in this case)
836                difflines = list(diff)[2:]
837                if color:
838                    # Add colour to diff output
839                    for i, line in enumerate(difflines):
840                        if line.startswith('+'):
841                            line = color_format('{color_add}{line}', line=line)
842                            difflines[i] = line
843                        elif line.startswith('-'):
844                            line = color_format('{color_remove}{line}', line=line)
845                            difflines[i] = line
846                output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff='\n'.join(difflines)))
847            elif newval and oldval and (' ' in oldval or ' ' in newval):
848                output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff=worddiff_str(oldval, newval, colors)))
849            else:
850                output.append(color_format("{color_title}Variable {var} value changed from '{color_default}{oldval}{color_title}' to '{color_default}{newval}{color_title}'{color_default}", var=dep, oldval=oldval, newval=newval))
851
852    if not 'file_checksum_values' in a_data:
853         a_data['file_checksum_values'] = {}
854    if not 'file_checksum_values' in b_data:
855         b_data['file_checksum_values'] = {}
856
857    changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values'])
858    if changed:
859        for f, old, new in changed:
860            output.append(color_format("{color_title}Checksum for file %s changed{color_default} from %s to %s") % (f, old, new))
861    if added:
862        for f in added:
863            output.append(color_format("{color_title}Dependency on checksum of file %s was added") % (f))
864    if removed:
865        for f in removed:
866            output.append(color_format("{color_title}Dependency on checksum of file %s was removed") % (f))
867
868    if not 'runtaskdeps' in a_data:
869         a_data['runtaskdeps'] = {}
870    if not 'runtaskdeps' in b_data:
871         b_data['runtaskdeps'] = {}
872
873    if not collapsed:
874        if len(a_data['runtaskdeps']) != len(b_data['runtaskdeps']):
875            changed = ["Number of task dependencies changed"]
876        else:
877            changed = []
878            for idx, task in enumerate(a_data['runtaskdeps']):
879                a = a_data['runtaskdeps'][idx]
880                b = b_data['runtaskdeps'][idx]
881                if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b] and not collapsed:
882                    changed.append("%s with hash %s\n changed to\n%s with hash %s" % (clean_basepath(a), a_data['runtaskhashes'][a], clean_basepath(b), b_data['runtaskhashes'][b]))
883
884        if changed:
885            clean_a = clean_basepaths_list(a_data['runtaskdeps'])
886            clean_b = clean_basepaths_list(b_data['runtaskdeps'])
887            if clean_a != clean_b:
888                output.append(color_format("{color_title}runtaskdeps changed:{color_default}\n%s") % list_inline_diff(clean_a, clean_b, colors))
889            else:
890                output.append(color_format("{color_title}runtaskdeps changed:"))
891            output.append("\n".join(changed))
892
893
894    if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data:
895        a = a_data['runtaskhashes']
896        b = b_data['runtaskhashes']
897        changed, added, removed = dict_diff(a, b)
898        if added:
899            for dep in added:
900                bdep_found = False
901                if removed:
902                    for bdep in removed:
903                        if b[dep] == a[bdep]:
904                            #output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep))
905                            bdep_found = True
906                if not bdep_found:
907                    output.append(color_format("{color_title}Dependency on task %s was added{color_default} with hash %s") % (clean_basepath(dep), b[dep]))
908        if removed:
909            for dep in removed:
910                adep_found = False
911                if added:
912                    for adep in added:
913                        if b[adep] == a[dep]:
914                            #output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep))
915                            adep_found = True
916                if not adep_found:
917                    output.append(color_format("{color_title}Dependency on task %s was removed{color_default} with hash %s") % (clean_basepath(dep), a[dep]))
918        if changed:
919            for dep in changed:
920                if not collapsed:
921                    output.append(color_format("{color_title}Hash for dependent task %s changed{color_default} from %s to %s") % (clean_basepath(dep), a[dep], b[dep]))
922                if callable(recursecb):
923                    recout = recursecb(dep, a[dep], b[dep])
924                    if recout:
925                        if collapsed:
926                            output.extend(recout)
927                        else:
928                            # If a dependent hash changed, might as well print the line above and then defer to the changes in
929                            # that hash since in all likelyhood, they're the same changes this task also saw.
930                            output = [output[-1]] + recout
931
932    a_taint = a_data.get('taint', None)
933    b_taint = b_data.get('taint', None)
934    if a_taint != b_taint:
935        if a_taint and a_taint.startswith('nostamp:'):
936            a_taint = a_taint.replace('nostamp:', 'nostamp(uuid4):')
937        if b_taint and b_taint.startswith('nostamp:'):
938            b_taint = b_taint.replace('nostamp:', 'nostamp(uuid4):')
939        output.append(color_format("{color_title}Taint (by forced/invalidated task) changed{color_default} from %s to %s") % (a_taint, b_taint))
940
941    return output
942
943
944def calc_basehash(sigdata):
945    task = sigdata['task']
946    basedata = sigdata['varvals'][task]
947
948    if basedata is None:
949        basedata = ''
950
951    alldeps = sigdata['taskdeps']
952    for dep in alldeps:
953        basedata = basedata + dep
954        val = sigdata['varvals'][dep]
955        if val is not None:
956            basedata = basedata + str(val)
957
958    return hashlib.sha256(basedata.encode("utf-8")).hexdigest()
959
960def calc_taskhash(sigdata):
961    data = sigdata['basehash']
962
963    for dep in sigdata['runtaskdeps']:
964        data = data + sigdata['runtaskhashes'][dep]
965
966    for c in sigdata['file_checksum_values']:
967        if c[1]:
968            data = data + c[1]
969
970    if 'taint' in sigdata:
971        if 'nostamp:' in sigdata['taint']:
972            data = data + sigdata['taint'][8:]
973        else:
974            data = data + sigdata['taint']
975
976    return hashlib.sha256(data.encode("utf-8")).hexdigest()
977
978
979def dump_sigfile(a):
980    output = []
981
982    with open(a, 'rb') as f:
983        p1 = pickle.Unpickler(f)
984        a_data = p1.load()
985
986    output.append("basewhitelist: %s" % (a_data['basewhitelist']))
987
988    output.append("taskwhitelist: %s" % (a_data['taskwhitelist']))
989
990    output.append("Task dependencies: %s" % (sorted(a_data['taskdeps'])))
991
992    output.append("basehash: %s" % (a_data['basehash']))
993
994    for dep in a_data['gendeps']:
995        output.append("List of dependencies for variable %s is %s" % (dep, a_data['gendeps'][dep]))
996
997    for dep in a_data['varvals']:
998        output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep]))
999
1000    if 'runtaskdeps' in a_data:
1001        output.append("Tasks this task depends on: %s" % (a_data['runtaskdeps']))
1002
1003    if 'file_checksum_values' in a_data:
1004        output.append("This task depends on the checksums of files: %s" % (a_data['file_checksum_values']))
1005
1006    if 'runtaskhashes' in a_data:
1007        for dep in a_data['runtaskhashes']:
1008            output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep]))
1009
1010    if 'taint' in a_data:
1011        if a_data['taint'].startswith('nostamp:'):
1012            msg = a_data['taint'].replace('nostamp:', 'nostamp(uuid4):')
1013        else:
1014            msg = a_data['taint']
1015        output.append("Tainted (by forced/invalidated task): %s" % msg)
1016
1017    if 'task' in a_data:
1018        computed_basehash = calc_basehash(a_data)
1019        output.append("Computed base hash is %s and from file %s" % (computed_basehash, a_data['basehash']))
1020    else:
1021        output.append("Unable to compute base hash")
1022
1023    computed_taskhash = calc_taskhash(a_data)
1024    output.append("Computed task hash is %s" % computed_taskhash)
1025
1026    return output
1027