xref: /openbmc/openbmc/poky/bitbake/lib/bb/siggen.py (revision 12fc939c)
1#
2# SPDX-License-Identifier: GPL-2.0-only
3#
4
5import hashlib
6import logging
7import os
8import re
9import tempfile
10import pickle
11import bb.data
12import difflib
13import simplediff
14from bb.checksum import FileChecksumCache
15from bb import runqueue
16import hashserv
17import hashserv.client
18
19logger = logging.getLogger('BitBake.SigGen')
20hashequiv_logger = logging.getLogger('BitBake.SigGen.HashEquiv')
21
22def init(d):
23    siggens = [obj for obj in globals().values()
24                      if type(obj) is type and issubclass(obj, SignatureGenerator)]
25
26    desired = d.getVar("BB_SIGNATURE_HANDLER") or "noop"
27    for sg in siggens:
28        if desired == sg.name:
29            return sg(d)
30            break
31    else:
32        logger.error("Invalid signature generator '%s', using default 'noop'\n"
33                     "Available generators: %s", desired,
34                     ', '.join(obj.name for obj in siggens))
35        return SignatureGenerator(d)
36
37class SignatureGenerator(object):
38    """
39    """
40    name = "noop"
41
42    # If the derived class supports multiconfig datacaches, set this to True
43    # The default is False for backward compatibility with derived signature
44    # generators that do not understand multiconfig caches
45    supports_multiconfig_datacaches = False
46
47    def __init__(self, data):
48        self.basehash = {}
49        self.taskhash = {}
50        self.unihash = {}
51        self.runtaskdeps = {}
52        self.file_checksum_values = {}
53        self.taints = {}
54        self.unitaskhashes = {}
55        self.tidtopn = {}
56        self.setscenetasks = set()
57
58    def finalise(self, fn, d, varient):
59        return
60
61    def postparsing_clean_cache(self):
62        return
63
64    def get_unihash(self, tid):
65        return self.taskhash[tid]
66
67    def prep_taskhash(self, tid, deps, dataCaches):
68        return
69
70    def get_taskhash(self, tid, deps, dataCaches):
71        self.taskhash[tid] = hashlib.sha256(tid.encode("utf-8")).hexdigest()
72        return self.taskhash[tid]
73
74    def writeout_file_checksum_cache(self):
75        """Write/update the file checksum cache onto disk"""
76        return
77
78    def stampfile(self, stampbase, file_name, taskname, extrainfo):
79        return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
80
81    def stampcleanmask(self, stampbase, file_name, taskname, extrainfo):
82        return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
83
84    def dump_sigtask(self, fn, task, stampbase, runtime):
85        return
86
87    def invalidate_task(self, task, d, fn):
88        bb.build.del_stamp(task, d, fn)
89
90    def dump_sigs(self, dataCache, options):
91        return
92
93    def get_taskdata(self):
94        return (self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks)
95
96    def set_taskdata(self, data):
97        self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks = data
98
99    def reset(self, data):
100        self.__init__(data)
101
102    def get_taskhashes(self):
103        return self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn
104
105    def set_taskhashes(self, hashes):
106        self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn = hashes
107
108    def save_unitaskhashes(self):
109        return
110
111    def set_setscene_tasks(self, setscene_tasks):
112        return
113
114    @classmethod
115    def get_data_caches(cls, dataCaches, mc):
116        """
117        This function returns the datacaches that should be passed to signature
118        generator functions. If the signature generator supports multiconfig
119        caches, the entire dictionary of data caches is sent, otherwise a
120        special proxy is sent that support both index access to all
121        multiconfigs, and also direct access for the default multiconfig.
122
123        The proxy class allows code in this class itself to always use
124        multiconfig aware code (to ease maintenance), but derived classes that
125        are unaware of multiconfig data caches can still access the default
126        multiconfig as expected.
127
128        Do not override this function in derived classes; it will be removed in
129        the future when support for multiconfig data caches is mandatory
130        """
131        class DataCacheProxy(object):
132            def __init__(self):
133                pass
134
135            def __getitem__(self, key):
136                return dataCaches[key]
137
138            def __getattr__(self, name):
139                return getattr(dataCaches[mc], name)
140
141        if cls.supports_multiconfig_datacaches:
142            return dataCaches
143
144        return DataCacheProxy()
145
146class SignatureGeneratorBasic(SignatureGenerator):
147    """
148    """
149    name = "basic"
150
151    def __init__(self, data):
152        self.basehash = {}
153        self.taskhash = {}
154        self.unihash = {}
155        self.taskdeps = {}
156        self.runtaskdeps = {}
157        self.file_checksum_values = {}
158        self.taints = {}
159        self.gendeps = {}
160        self.lookupcache = {}
161        self.setscenetasks = set()
162        self.basewhitelist = set((data.getVar("BB_HASHBASE_WHITELIST") or "").split())
163        self.taskwhitelist = None
164        self.init_rundepcheck(data)
165        checksum_cache_file = data.getVar("BB_HASH_CHECKSUM_CACHE_FILE")
166        if checksum_cache_file:
167            self.checksum_cache = FileChecksumCache()
168            self.checksum_cache.init_cache(data, checksum_cache_file)
169        else:
170            self.checksum_cache = None
171
172        self.unihash_cache = bb.cache.SimpleCache("3")
173        self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {})
174        self.localdirsexclude = (data.getVar("BB_SIGNATURE_LOCAL_DIRS_EXCLUDE") or "CVS .bzr .git .hg .osc .p4 .repo .svn").split()
175        self.tidtopn = {}
176
177    def init_rundepcheck(self, data):
178        self.taskwhitelist = data.getVar("BB_HASHTASK_WHITELIST") or None
179        if self.taskwhitelist:
180            self.twl = re.compile(self.taskwhitelist)
181        else:
182            self.twl = None
183
184    def _build_data(self, fn, d):
185
186        ignore_mismatch = ((d.getVar("BB_HASH_IGNORE_MISMATCH") or '') == '1')
187        tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d, self.basewhitelist)
188
189        taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, self.basewhitelist, fn)
190
191        for task in tasklist:
192            tid = fn + ":" + task
193            if not ignore_mismatch and tid in self.basehash and self.basehash[tid] != basehash[tid]:
194                bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (tid, self.basehash[tid], basehash[tid]))
195                bb.error("The following commands may help:")
196                cmd = "$ bitbake %s -c%s" % (d.getVar('PN'), task)
197                # Make sure sigdata is dumped before run printdiff
198                bb.error("%s -Snone" % cmd)
199                bb.error("Then:")
200                bb.error("%s -Sprintdiff\n" % cmd)
201            self.basehash[tid] = basehash[tid]
202
203        self.taskdeps[fn] = taskdeps
204        self.gendeps[fn] = gendeps
205        self.lookupcache[fn] = lookupcache
206
207        return taskdeps
208
209    def set_setscene_tasks(self, setscene_tasks):
210        self.setscenetasks = set(setscene_tasks)
211
212    def finalise(self, fn, d, variant):
213
214        mc = d.getVar("__BBMULTICONFIG", False) or ""
215        if variant or mc:
216            fn = bb.cache.realfn2virtual(fn, variant, mc)
217
218        try:
219            taskdeps = self._build_data(fn, d)
220        except bb.parse.SkipRecipe:
221            raise
222        except:
223            bb.warn("Error during finalise of %s" % fn)
224            raise
225
226        #Slow but can be useful for debugging mismatched basehashes
227        #for task in self.taskdeps[fn]:
228        #    self.dump_sigtask(fn, task, d.getVar("STAMP"), False)
229
230        for task in taskdeps:
231            d.setVar("BB_BASEHASH_task-%s" % task, self.basehash[fn + ":" + task])
232
233    def postparsing_clean_cache(self):
234        #
235        # After parsing we can remove some things from memory to reduce our memory footprint
236        #
237        self.gendeps = {}
238        self.lookupcache = {}
239        self.taskdeps = {}
240
241    def rundep_check(self, fn, recipename, task, dep, depname, dataCaches):
242        # Return True if we should keep the dependency, False to drop it
243        # We only manipulate the dependencies for packages not in the whitelist
244        if self.twl and not self.twl.search(recipename):
245            # then process the actual dependencies
246            if self.twl.search(depname):
247                return False
248        return True
249
250    def read_taint(self, fn, task, stampbase):
251        taint = None
252        try:
253            with open(stampbase + '.' + task + '.taint', 'r') as taintf:
254                taint = taintf.read()
255        except IOError:
256            pass
257        return taint
258
259    def prep_taskhash(self, tid, deps, dataCaches):
260
261        (mc, _, task, fn) = bb.runqueue.split_tid_mcfn(tid)
262
263        self.basehash[tid] = dataCaches[mc].basetaskhash[tid]
264        self.runtaskdeps[tid] = []
265        self.file_checksum_values[tid] = []
266        recipename = dataCaches[mc].pkg_fn[fn]
267
268        self.tidtopn[tid] = recipename
269
270        for dep in sorted(deps, key=clean_basepath):
271            (depmc, _, _, depmcfn) = bb.runqueue.split_tid_mcfn(dep)
272            depname = dataCaches[depmc].pkg_fn[depmcfn]
273            if not self.supports_multiconfig_datacaches and mc != depmc:
274                # If the signature generator doesn't understand multiconfig
275                # data caches, any dependency not in the same multiconfig must
276                # be skipped for backward compatibility
277                continue
278            if not self.rundep_check(fn, recipename, task, dep, depname, dataCaches):
279                continue
280            if dep not in self.taskhash:
281                bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep)
282            self.runtaskdeps[tid].append(dep)
283
284        if task in dataCaches[mc].file_checksums[fn]:
285            if self.checksum_cache:
286                checksums = self.checksum_cache.get_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude)
287            else:
288                checksums = bb.fetch2.get_file_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude)
289            for (f,cs) in checksums:
290                self.file_checksum_values[tid].append((f,cs))
291
292        taskdep = dataCaches[mc].task_deps[fn]
293        if 'nostamp' in taskdep and task in taskdep['nostamp']:
294            # Nostamp tasks need an implicit taint so that they force any dependent tasks to run
295            if tid in self.taints and self.taints[tid].startswith("nostamp:"):
296                # Don't reset taint value upon every call
297                pass
298            else:
299                import uuid
300                taint = str(uuid.uuid4())
301                self.taints[tid] = "nostamp:" + taint
302
303        taint = self.read_taint(fn, task, dataCaches[mc].stamp[fn])
304        if taint:
305            self.taints[tid] = taint
306            logger.warning("%s is tainted from a forced run" % tid)
307
308        return
309
310    def get_taskhash(self, tid, deps, dataCaches):
311
312        data = self.basehash[tid]
313        for dep in self.runtaskdeps[tid]:
314            data = data + self.get_unihash(dep)
315
316        for (f, cs) in self.file_checksum_values[tid]:
317            if cs:
318                data = data + cs
319
320        if tid in self.taints:
321            if self.taints[tid].startswith("nostamp:"):
322                data = data + self.taints[tid][8:]
323            else:
324                data = data + self.taints[tid]
325
326        h = hashlib.sha256(data.encode("utf-8")).hexdigest()
327        self.taskhash[tid] = h
328        #d.setVar("BB_TASKHASH_task-%s" % task, taskhash[task])
329        return h
330
331    def writeout_file_checksum_cache(self):
332        """Write/update the file checksum cache onto disk"""
333        if self.checksum_cache:
334            self.checksum_cache.save_extras()
335            self.checksum_cache.save_merge()
336        else:
337            bb.fetch2.fetcher_parse_save()
338            bb.fetch2.fetcher_parse_done()
339
340    def save_unitaskhashes(self):
341        self.unihash_cache.save(self.unitaskhashes)
342
343    def dump_sigtask(self, fn, task, stampbase, runtime):
344
345        tid = fn + ":" + task
346        referencestamp = stampbase
347        if isinstance(runtime, str) and runtime.startswith("customfile"):
348            sigfile = stampbase
349            referencestamp = runtime[11:]
350        elif runtime and tid in self.taskhash:
351            sigfile = stampbase + "." + task + ".sigdata" + "." + self.get_unihash(tid)
352        else:
353            sigfile = stampbase + "." + task + ".sigbasedata" + "." + self.basehash[tid]
354
355        with bb.utils.umask(0o002):
356            bb.utils.mkdirhier(os.path.dirname(sigfile))
357
358        data = {}
359        data['task'] = task
360        data['basewhitelist'] = self.basewhitelist
361        data['taskwhitelist'] = self.taskwhitelist
362        data['taskdeps'] = self.taskdeps[fn][task]
363        data['basehash'] = self.basehash[tid]
364        data['gendeps'] = {}
365        data['varvals'] = {}
366        data['varvals'][task] = self.lookupcache[fn][task]
367        for dep in self.taskdeps[fn][task]:
368            if dep in self.basewhitelist:
369                continue
370            data['gendeps'][dep] = self.gendeps[fn][dep]
371            data['varvals'][dep] = self.lookupcache[fn][dep]
372
373        if runtime and tid in self.taskhash:
374            data['runtaskdeps'] = self.runtaskdeps[tid]
375            data['file_checksum_values'] = [(os.path.basename(f), cs) for f,cs in self.file_checksum_values[tid]]
376            data['runtaskhashes'] = {}
377            for dep in data['runtaskdeps']:
378                data['runtaskhashes'][dep] = self.get_unihash(dep)
379            data['taskhash'] = self.taskhash[tid]
380            data['unihash'] = self.get_unihash(tid)
381
382        taint = self.read_taint(fn, task, referencestamp)
383        if taint:
384            data['taint'] = taint
385
386        if runtime and tid in self.taints:
387            if 'nostamp:' in self.taints[tid]:
388                data['taint'] = self.taints[tid]
389
390        computed_basehash = calc_basehash(data)
391        if computed_basehash != self.basehash[tid]:
392            bb.error("Basehash mismatch %s versus %s for %s" % (computed_basehash, self.basehash[tid], tid))
393        if runtime and tid in self.taskhash:
394            computed_taskhash = calc_taskhash(data)
395            if computed_taskhash != self.taskhash[tid]:
396                bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid))
397                sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash)
398
399        fd, tmpfile = tempfile.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.")
400        try:
401            with os.fdopen(fd, "wb") as stream:
402                p = pickle.dump(data, stream, -1)
403                stream.flush()
404            os.chmod(tmpfile, 0o664)
405            bb.utils.rename(tmpfile, sigfile)
406        except (OSError, IOError) as err:
407            try:
408                os.unlink(tmpfile)
409            except OSError:
410                pass
411            raise err
412
413    def dump_sigfn(self, fn, dataCaches, options):
414        if fn in self.taskdeps:
415            for task in self.taskdeps[fn]:
416                tid = fn + ":" + task
417                mc = bb.runqueue.mc_from_tid(tid)
418                if tid not in self.taskhash:
419                    continue
420                if dataCaches[mc].basetaskhash[tid] != self.basehash[tid]:
421                    bb.error("Bitbake's cached basehash does not match the one we just generated (%s)!" % tid)
422                    bb.error("The mismatched hashes were %s and %s" % (dataCaches[mc].basetaskhash[tid], self.basehash[tid]))
423                self.dump_sigtask(fn, task, dataCaches[mc].stamp[fn], True)
424
425class SignatureGeneratorBasicHash(SignatureGeneratorBasic):
426    name = "basichash"
427
428    def get_stampfile_hash(self, tid):
429        if tid in self.taskhash:
430            return self.taskhash[tid]
431
432        # If task is not in basehash, then error
433        return self.basehash[tid]
434
435    def stampfile(self, stampbase, fn, taskname, extrainfo, clean=False):
436        if taskname != "do_setscene" and taskname.endswith("_setscene"):
437            tid = fn + ":" + taskname[:-9]
438        else:
439            tid = fn + ":" + taskname
440        if clean:
441            h = "*"
442        else:
443            h = self.get_stampfile_hash(tid)
444
445        return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.')
446
447    def stampcleanmask(self, stampbase, fn, taskname, extrainfo):
448        return self.stampfile(stampbase, fn, taskname, extrainfo, clean=True)
449
450    def invalidate_task(self, task, d, fn):
451        bb.note("Tainting hash to force rebuild of task %s, %s" % (fn, task))
452        bb.build.write_taint(task, d, fn)
453
454class SignatureGeneratorUniHashMixIn(object):
455    def __init__(self, data):
456        self.extramethod = {}
457        super().__init__(data)
458
459    def get_taskdata(self):
460        return (self.server, self.method, self.extramethod) + super().get_taskdata()
461
462    def set_taskdata(self, data):
463        self.server, self.method, self.extramethod = data[:3]
464        super().set_taskdata(data[3:])
465
466    def client(self):
467        if getattr(self, '_client', None) is None:
468            self._client = hashserv.create_client(self.server)
469        return self._client
470
471    def get_stampfile_hash(self, tid):
472        if tid in self.taskhash:
473            # If a unique hash is reported, use it as the stampfile hash. This
474            # ensures that if a task won't be re-run if the taskhash changes,
475            # but it would result in the same output hash
476            unihash = self._get_unihash(tid)
477            if unihash is not None:
478                return unihash
479
480        return super().get_stampfile_hash(tid)
481
482    def set_unihash(self, tid, unihash):
483        (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
484        key = mc + ":" + self.tidtopn[tid] + ":" + taskname
485        self.unitaskhashes[key] = (self.taskhash[tid], unihash)
486        self.unihash[tid] = unihash
487
488    def _get_unihash(self, tid, checkkey=None):
489        if tid not in self.tidtopn:
490            return None
491        (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
492        key = mc + ":" + self.tidtopn[tid] + ":" + taskname
493        if key not in self.unitaskhashes:
494            return None
495        if not checkkey:
496            checkkey = self.taskhash[tid]
497        (key, unihash) = self.unitaskhashes[key]
498        if key != checkkey:
499            return None
500        return unihash
501
502    def get_unihash(self, tid):
503        taskhash = self.taskhash[tid]
504
505        # If its not a setscene task we can return
506        if self.setscenetasks and tid not in self.setscenetasks:
507            self.unihash[tid] = None
508            return taskhash
509
510        # TODO: This cache can grow unbounded. It probably only needs to keep
511        # for each task
512        unihash =  self._get_unihash(tid)
513        if unihash is not None:
514            self.unihash[tid] = unihash
515            return unihash
516
517        # In the absence of being able to discover a unique hash from the
518        # server, make it be equivalent to the taskhash. The unique "hash" only
519        # really needs to be a unique string (not even necessarily a hash), but
520        # making it match the taskhash has a few advantages:
521        #
522        # 1) All of the sstate code that assumes hashes can be the same
523        # 2) It provides maximal compatibility with builders that don't use
524        #    an equivalency server
525        # 3) The value is easy for multiple independent builders to derive the
526        #    same unique hash from the same input. This means that if the
527        #    independent builders find the same taskhash, but it isn't reported
528        #    to the server, there is a better chance that they will agree on
529        #    the unique hash.
530        unihash = taskhash
531
532        try:
533            method = self.method
534            if tid in self.extramethod:
535                method = method + self.extramethod[tid]
536            data = self.client().get_unihash(method, self.taskhash[tid])
537            if data:
538                unihash = data
539                # A unique hash equal to the taskhash is not very interesting,
540                # so it is reported it at debug level 2. If they differ, that
541                # is much more interesting, so it is reported at debug level 1
542                hashequiv_logger.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server))
543            else:
544                hashequiv_logger.debug2('No reported unihash for %s:%s from %s' % (tid, taskhash, self.server))
545        except ConnectionError as e:
546            bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
547
548        self.set_unihash(tid, unihash)
549        self.unihash[tid] = unihash
550        return unihash
551
552    def report_unihash(self, path, task, d):
553        import importlib
554
555        taskhash = d.getVar('BB_TASKHASH')
556        unihash = d.getVar('BB_UNIHASH')
557        report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1'
558        tempdir = d.getVar('T')
559        fn = d.getVar('BB_FILENAME')
560        tid = fn + ':do_' + task
561        key = tid + ':' + taskhash
562
563        if self.setscenetasks and tid not in self.setscenetasks:
564            return
565
566        # This can happen if locked sigs are in action. Detect and just abort
567        if taskhash != self.taskhash[tid]:
568            return
569
570        # Sanity checks
571        cache_unihash = self._get_unihash(tid, checkkey=taskhash)
572        if cache_unihash is None:
573            bb.fatal('%s not in unihash cache. Please report this error' % key)
574
575        if cache_unihash != unihash:
576            bb.fatal("Cache unihash %s doesn't match BB_UNIHASH %s" % (cache_unihash, unihash))
577
578        sigfile = None
579        sigfile_name = "depsig.do_%s.%d" % (task, os.getpid())
580        sigfile_link = "depsig.do_%s" % task
581
582        try:
583            sigfile = open(os.path.join(tempdir, sigfile_name), 'w+b')
584
585            locs = {'path': path, 'sigfile': sigfile, 'task': task, 'd': d}
586
587            if "." in self.method:
588                (module, method) = self.method.rsplit('.', 1)
589                locs['method'] = getattr(importlib.import_module(module), method)
590                outhash = bb.utils.better_eval('method(path, sigfile, task, d)', locs)
591            else:
592                outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs)
593
594            try:
595                extra_data = {}
596
597                owner = d.getVar('SSTATE_HASHEQUIV_OWNER')
598                if owner:
599                    extra_data['owner'] = owner
600
601                if report_taskdata:
602                    sigfile.seek(0)
603
604                    extra_data['PN'] = d.getVar('PN')
605                    extra_data['PV'] = d.getVar('PV')
606                    extra_data['PR'] = d.getVar('PR')
607                    extra_data['task'] = task
608                    extra_data['outhash_siginfo'] = sigfile.read().decode('utf-8')
609
610                method = self.method
611                if tid in self.extramethod:
612                    method = method + self.extramethod[tid]
613
614                data = self.client().report_unihash(taskhash, method, outhash, unihash, extra_data)
615                new_unihash = data['unihash']
616
617                if new_unihash != unihash:
618                    hashequiv_logger.debug('Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server))
619                    bb.event.fire(bb.runqueue.taskUniHashUpdate(fn + ':do_' + task, new_unihash), d)
620                    self.set_unihash(tid, new_unihash)
621                    d.setVar('BB_UNIHASH', new_unihash)
622                else:
623                    hashequiv_logger.debug('Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server))
624            except ConnectionError as e:
625                bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
626        finally:
627            if sigfile:
628                sigfile.close()
629
630                sigfile_link_path = os.path.join(tempdir, sigfile_link)
631                bb.utils.remove(sigfile_link_path)
632
633                try:
634                    os.symlink(sigfile_name, sigfile_link_path)
635                except OSError:
636                    pass
637
638    def report_unihash_equiv(self, tid, taskhash, wanted_unihash, current_unihash, datacaches):
639        try:
640            extra_data = {}
641            method = self.method
642            if tid in self.extramethod:
643                method = method + self.extramethod[tid]
644
645            data = self.client().report_unihash_equiv(taskhash, method, wanted_unihash, extra_data)
646            hashequiv_logger.verbose('Reported task %s as unihash %s to %s (%s)' % (tid, wanted_unihash, self.server, str(data)))
647
648            if data is None:
649                bb.warn("Server unable to handle unihash report")
650                return False
651
652            finalunihash = data['unihash']
653
654            if finalunihash == current_unihash:
655                hashequiv_logger.verbose('Task %s unihash %s unchanged by server' % (tid, finalunihash))
656            elif finalunihash == wanted_unihash:
657                hashequiv_logger.verbose('Task %s unihash changed %s -> %s as wanted' % (tid, current_unihash, finalunihash))
658                self.set_unihash(tid, finalunihash)
659                return True
660            else:
661                # TODO: What to do here?
662                hashequiv_logger.verbose('Task %s unihash reported as unwanted hash %s' % (tid, finalunihash))
663
664        except ConnectionError as e:
665            bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
666
667        return False
668
669#
670# Dummy class used for bitbake-selftest
671#
672class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureGeneratorBasicHash):
673    name = "TestEquivHash"
674    def init_rundepcheck(self, data):
675        super().init_rundepcheck(data)
676        self.server = data.getVar('BB_HASHSERVE')
677        self.method = "sstate_output_hash"
678
679#
680# Dummy class used for bitbake-selftest
681#
682class SignatureGeneratorTestMulticonfigDepends(SignatureGeneratorBasicHash):
683    name = "TestMulticonfigDepends"
684    supports_multiconfig_datacaches = True
685
686def dump_this_task(outfile, d):
687    import bb.parse
688    fn = d.getVar("BB_FILENAME")
689    task = "do_" + d.getVar("BB_CURRENTTASK")
690    referencestamp = bb.build.stamp_internal(task, d, None, True)
691    bb.parse.siggen.dump_sigtask(fn, task, outfile, "customfile:" + referencestamp)
692
693def init_colors(enable_color):
694    """Initialise colour dict for passing to compare_sigfiles()"""
695    # First set up the colours
696    colors = {'color_title':   '\033[1m',
697              'color_default': '\033[0m',
698              'color_add':     '\033[0;32m',
699              'color_remove':  '\033[0;31m',
700             }
701    # Leave all keys present but clear the values
702    if not enable_color:
703        for k in colors.keys():
704            colors[k] = ''
705    return colors
706
707def worddiff_str(oldstr, newstr, colors=None):
708    if not colors:
709        colors = init_colors(False)
710    diff = simplediff.diff(oldstr.split(' '), newstr.split(' '))
711    ret = []
712    for change, value in diff:
713        value = ' '.join(value)
714        if change == '=':
715            ret.append(value)
716        elif change == '+':
717            item = '{color_add}{{+{value}+}}{color_default}'.format(value=value, **colors)
718            ret.append(item)
719        elif change == '-':
720            item = '{color_remove}[-{value}-]{color_default}'.format(value=value, **colors)
721            ret.append(item)
722    whitespace_note = ''
723    if oldstr != newstr and ' '.join(oldstr.split()) == ' '.join(newstr.split()):
724        whitespace_note = ' (whitespace changed)'
725    return '"%s"%s' % (' '.join(ret), whitespace_note)
726
727def list_inline_diff(oldlist, newlist, colors=None):
728    if not colors:
729        colors = init_colors(False)
730    diff = simplediff.diff(oldlist, newlist)
731    ret = []
732    for change, value in diff:
733        value = ' '.join(value)
734        if change == '=':
735            ret.append("'%s'" % value)
736        elif change == '+':
737            item = '{color_add}+{value}{color_default}'.format(value=value, **colors)
738            ret.append(item)
739        elif change == '-':
740            item = '{color_remove}-{value}{color_default}'.format(value=value, **colors)
741            ret.append(item)
742    return '[%s]' % (', '.join(ret))
743
744def clean_basepath(basepath):
745    basepath, dir, recipe_task = basepath.rsplit("/", 2)
746    cleaned = dir + '/' + recipe_task
747
748    if basepath[0] == '/':
749        return cleaned
750
751    if basepath.startswith("mc:") and basepath.count(':') >= 2:
752        mc, mc_name, basepath = basepath.split(":", 2)
753        mc_suffix = ':mc:' + mc_name
754    else:
755        mc_suffix = ''
756
757    # mc stuff now removed from basepath. Whatever was next, if present will be the first
758    # suffix. ':/', recipe path start, marks the end of this. Something like
759    # 'virtual:a[:b[:c]]:/path...' (b and c being optional)
760    if basepath[0] != '/':
761        cleaned += ':' + basepath.split(':/', 1)[0]
762
763    return cleaned + mc_suffix
764
765def clean_basepaths(a):
766    b = {}
767    for x in a:
768        b[clean_basepath(x)] = a[x]
769    return b
770
771def clean_basepaths_list(a):
772    b = []
773    for x in a:
774        b.append(clean_basepath(x))
775    return b
776
777def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
778    output = []
779
780    colors = init_colors(color)
781    def color_format(formatstr, **values):
782        """
783        Return colour formatted string.
784        NOTE: call with the format string, not an already formatted string
785        containing values (otherwise you could have trouble with { and }
786        characters)
787        """
788        if not formatstr.endswith('{color_default}'):
789            formatstr += '{color_default}'
790        # In newer python 3 versions you can pass both of these directly,
791        # but we only require 3.4 at the moment
792        formatparams = {}
793        formatparams.update(colors)
794        formatparams.update(values)
795        return formatstr.format(**formatparams)
796
797    with open(a, 'rb') as f:
798        p1 = pickle.Unpickler(f)
799        a_data = p1.load()
800    with open(b, 'rb') as f:
801        p2 = pickle.Unpickler(f)
802        b_data = p2.load()
803
804    def dict_diff(a, b, whitelist=set()):
805        sa = set(a.keys())
806        sb = set(b.keys())
807        common = sa & sb
808        changed = set()
809        for i in common:
810            if a[i] != b[i] and i not in whitelist:
811                changed.add(i)
812        added = sb - sa
813        removed = sa - sb
814        return changed, added, removed
815
816    def file_checksums_diff(a, b):
817        from collections import Counter
818        # Handle old siginfo format
819        if isinstance(a, dict):
820            a = [(os.path.basename(f), cs) for f, cs in a.items()]
821        if isinstance(b, dict):
822            b = [(os.path.basename(f), cs) for f, cs in b.items()]
823        # Compare lists, ensuring we can handle duplicate filenames if they exist
824        removedcount = Counter(a)
825        removedcount.subtract(b)
826        addedcount = Counter(b)
827        addedcount.subtract(a)
828        added = []
829        for x in b:
830            if addedcount[x] > 0:
831                addedcount[x] -= 1
832                added.append(x)
833        removed = []
834        changed = []
835        for x in a:
836            if removedcount[x] > 0:
837                removedcount[x] -= 1
838                for y in added:
839                    if y[0] == x[0]:
840                        changed.append((x[0], x[1], y[1]))
841                        added.remove(y)
842                        break
843                else:
844                    removed.append(x)
845        added = [x[0] for x in added]
846        removed = [x[0] for x in removed]
847        return changed, added, removed
848
849    if 'basewhitelist' in a_data and a_data['basewhitelist'] != b_data['basewhitelist']:
850        output.append(color_format("{color_title}basewhitelist changed{color_default} from '%s' to '%s'") % (a_data['basewhitelist'], b_data['basewhitelist']))
851        if a_data['basewhitelist'] and b_data['basewhitelist']:
852            output.append("changed items: %s" % a_data['basewhitelist'].symmetric_difference(b_data['basewhitelist']))
853
854    if 'taskwhitelist' in a_data and a_data['taskwhitelist'] != b_data['taskwhitelist']:
855        output.append(color_format("{color_title}taskwhitelist changed{color_default} from '%s' to '%s'") % (a_data['taskwhitelist'], b_data['taskwhitelist']))
856        if a_data['taskwhitelist'] and b_data['taskwhitelist']:
857            output.append("changed items: %s" % a_data['taskwhitelist'].symmetric_difference(b_data['taskwhitelist']))
858
859    if a_data['taskdeps'] != b_data['taskdeps']:
860        output.append(color_format("{color_title}Task dependencies changed{color_default} from:\n%s\nto:\n%s") % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps'])))
861
862    if a_data['basehash'] != b_data['basehash'] and not collapsed:
863        output.append(color_format("{color_title}basehash changed{color_default} from %s to %s") % (a_data['basehash'], b_data['basehash']))
864
865    changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basewhitelist'] & b_data['basewhitelist'])
866    if changed:
867        for dep in changed:
868            output.append(color_format("{color_title}List of dependencies for variable %s changed from '{color_default}%s{color_title}' to '{color_default}%s{color_title}'") % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep]))
869            if a_data['gendeps'][dep] and b_data['gendeps'][dep]:
870                output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep]))
871    if added:
872        for dep in added:
873            output.append(color_format("{color_title}Dependency on variable %s was added") % (dep))
874    if removed:
875        for dep in removed:
876            output.append(color_format("{color_title}Dependency on Variable %s was removed") % (dep))
877
878
879    changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals'])
880    if changed:
881        for dep in changed:
882            oldval = a_data['varvals'][dep]
883            newval = b_data['varvals'][dep]
884            if newval and oldval and ('\n' in oldval or '\n' in newval):
885                diff = difflib.unified_diff(oldval.splitlines(), newval.splitlines(), lineterm='')
886                # Cut off the first two lines, since we aren't interested in
887                # the old/new filename (they are blank anyway in this case)
888                difflines = list(diff)[2:]
889                if color:
890                    # Add colour to diff output
891                    for i, line in enumerate(difflines):
892                        if line.startswith('+'):
893                            line = color_format('{color_add}{line}', line=line)
894                            difflines[i] = line
895                        elif line.startswith('-'):
896                            line = color_format('{color_remove}{line}', line=line)
897                            difflines[i] = line
898                output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff='\n'.join(difflines)))
899            elif newval and oldval and (' ' in oldval or ' ' in newval):
900                output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff=worddiff_str(oldval, newval, colors)))
901            else:
902                output.append(color_format("{color_title}Variable {var} value changed from '{color_default}{oldval}{color_title}' to '{color_default}{newval}{color_title}'{color_default}", var=dep, oldval=oldval, newval=newval))
903
904    if not 'file_checksum_values' in a_data:
905         a_data['file_checksum_values'] = {}
906    if not 'file_checksum_values' in b_data:
907         b_data['file_checksum_values'] = {}
908
909    changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values'])
910    if changed:
911        for f, old, new in changed:
912            output.append(color_format("{color_title}Checksum for file %s changed{color_default} from %s to %s") % (f, old, new))
913    if added:
914        for f in added:
915            output.append(color_format("{color_title}Dependency on checksum of file %s was added") % (f))
916    if removed:
917        for f in removed:
918            output.append(color_format("{color_title}Dependency on checksum of file %s was removed") % (f))
919
920    if not 'runtaskdeps' in a_data:
921         a_data['runtaskdeps'] = {}
922    if not 'runtaskdeps' in b_data:
923         b_data['runtaskdeps'] = {}
924
925    if not collapsed:
926        if len(a_data['runtaskdeps']) != len(b_data['runtaskdeps']):
927            changed = ["Number of task dependencies changed"]
928        else:
929            changed = []
930            for idx, task in enumerate(a_data['runtaskdeps']):
931                a = a_data['runtaskdeps'][idx]
932                b = b_data['runtaskdeps'][idx]
933                if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b] and not collapsed:
934                    changed.append("%s with hash %s\n changed to\n%s with hash %s" % (clean_basepath(a), a_data['runtaskhashes'][a], clean_basepath(b), b_data['runtaskhashes'][b]))
935
936        if changed:
937            clean_a = clean_basepaths_list(a_data['runtaskdeps'])
938            clean_b = clean_basepaths_list(b_data['runtaskdeps'])
939            if clean_a != clean_b:
940                output.append(color_format("{color_title}runtaskdeps changed:{color_default}\n%s") % list_inline_diff(clean_a, clean_b, colors))
941            else:
942                output.append(color_format("{color_title}runtaskdeps changed:"))
943            output.append("\n".join(changed))
944
945
946    if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data:
947        a = a_data['runtaskhashes']
948        b = b_data['runtaskhashes']
949        changed, added, removed = dict_diff(a, b)
950        if added:
951            for dep in added:
952                bdep_found = False
953                if removed:
954                    for bdep in removed:
955                        if b[dep] == a[bdep]:
956                            #output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep))
957                            bdep_found = True
958                if not bdep_found:
959                    output.append(color_format("{color_title}Dependency on task %s was added{color_default} with hash %s") % (clean_basepath(dep), b[dep]))
960        if removed:
961            for dep in removed:
962                adep_found = False
963                if added:
964                    for adep in added:
965                        if b[adep] == a[dep]:
966                            #output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep))
967                            adep_found = True
968                if not adep_found:
969                    output.append(color_format("{color_title}Dependency on task %s was removed{color_default} with hash %s") % (clean_basepath(dep), a[dep]))
970        if changed:
971            for dep in changed:
972                if not collapsed:
973                    output.append(color_format("{color_title}Hash for dependent task %s changed{color_default} from %s to %s") % (clean_basepath(dep), a[dep], b[dep]))
974                if callable(recursecb):
975                    recout = recursecb(dep, a[dep], b[dep])
976                    if recout:
977                        if collapsed:
978                            output.extend(recout)
979                        else:
980                            # If a dependent hash changed, might as well print the line above and then defer to the changes in
981                            # that hash since in all likelyhood, they're the same changes this task also saw.
982                            output = [output[-1]] + recout
983
984    a_taint = a_data.get('taint', None)
985    b_taint = b_data.get('taint', None)
986    if a_taint != b_taint:
987        if a_taint and a_taint.startswith('nostamp:'):
988            a_taint = a_taint.replace('nostamp:', 'nostamp(uuid4):')
989        if b_taint and b_taint.startswith('nostamp:'):
990            b_taint = b_taint.replace('nostamp:', 'nostamp(uuid4):')
991        output.append(color_format("{color_title}Taint (by forced/invalidated task) changed{color_default} from %s to %s") % (a_taint, b_taint))
992
993    return output
994
995
996def calc_basehash(sigdata):
997    task = sigdata['task']
998    basedata = sigdata['varvals'][task]
999
1000    if basedata is None:
1001        basedata = ''
1002
1003    alldeps = sigdata['taskdeps']
1004    for dep in alldeps:
1005        basedata = basedata + dep
1006        val = sigdata['varvals'][dep]
1007        if val is not None:
1008            basedata = basedata + str(val)
1009
1010    return hashlib.sha256(basedata.encode("utf-8")).hexdigest()
1011
1012def calc_taskhash(sigdata):
1013    data = sigdata['basehash']
1014
1015    for dep in sigdata['runtaskdeps']:
1016        data = data + sigdata['runtaskhashes'][dep]
1017
1018    for c in sigdata['file_checksum_values']:
1019        if c[1]:
1020            data = data + c[1]
1021
1022    if 'taint' in sigdata:
1023        if 'nostamp:' in sigdata['taint']:
1024            data = data + sigdata['taint'][8:]
1025        else:
1026            data = data + sigdata['taint']
1027
1028    return hashlib.sha256(data.encode("utf-8")).hexdigest()
1029
1030
1031def dump_sigfile(a):
1032    output = []
1033
1034    with open(a, 'rb') as f:
1035        p1 = pickle.Unpickler(f)
1036        a_data = p1.load()
1037
1038    output.append("basewhitelist: %s" % (a_data['basewhitelist']))
1039
1040    output.append("taskwhitelist: %s" % (a_data['taskwhitelist']))
1041
1042    output.append("Task dependencies: %s" % (sorted(a_data['taskdeps'])))
1043
1044    output.append("basehash: %s" % (a_data['basehash']))
1045
1046    for dep in a_data['gendeps']:
1047        output.append("List of dependencies for variable %s is %s" % (dep, a_data['gendeps'][dep]))
1048
1049    for dep in a_data['varvals']:
1050        output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep]))
1051
1052    if 'runtaskdeps' in a_data:
1053        output.append("Tasks this task depends on: %s" % (a_data['runtaskdeps']))
1054
1055    if 'file_checksum_values' in a_data:
1056        output.append("This task depends on the checksums of files: %s" % (a_data['file_checksum_values']))
1057
1058    if 'runtaskhashes' in a_data:
1059        for dep in a_data['runtaskhashes']:
1060            output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep]))
1061
1062    if 'taint' in a_data:
1063        if a_data['taint'].startswith('nostamp:'):
1064            msg = a_data['taint'].replace('nostamp:', 'nostamp(uuid4):')
1065        else:
1066            msg = a_data['taint']
1067        output.append("Tainted (by forced/invalidated task): %s" % msg)
1068
1069    if 'task' in a_data:
1070        computed_basehash = calc_basehash(a_data)
1071        output.append("Computed base hash is %s and from file %s" % (computed_basehash, a_data['basehash']))
1072    else:
1073        output.append("Unable to compute base hash")
1074
1075    computed_taskhash = calc_taskhash(a_data)
1076    output.append("Computed task hash is %s" % computed_taskhash)
1077
1078    return output
1079