xref: /openbmc/openbmc/poky/bitbake/lib/bb/siggen.py (revision d541ec52)
1#
2# SPDX-License-Identifier: GPL-2.0-only
3#
4
5import hashlib
6import logging
7import os
8import re
9import tempfile
10import pickle
11import bb.data
12import difflib
13import simplediff
14import json
15import bb.compress.zstd
16from bb.checksum import FileChecksumCache
17from bb import runqueue
18import hashserv
19import hashserv.client
20
21logger = logging.getLogger('BitBake.SigGen')
22hashequiv_logger = logging.getLogger('BitBake.SigGen.HashEquiv')
23
24class SetEncoder(json.JSONEncoder):
25    def default(self, obj):
26        if isinstance(obj, set):
27            return dict(_set_object=list(sorted(obj)))
28        return json.JSONEncoder.default(self, obj)
29
30def SetDecoder(dct):
31    if '_set_object' in dct:
32        return set(dct['_set_object'])
33    return dct
34
35def init(d):
36    siggens = [obj for obj in globals().values()
37                      if type(obj) is type and issubclass(obj, SignatureGenerator)]
38
39    desired = d.getVar("BB_SIGNATURE_HANDLER") or "noop"
40    for sg in siggens:
41        if desired == sg.name:
42            return sg(d)
43            break
44    else:
45        logger.error("Invalid signature generator '%s', using default 'noop'\n"
46                     "Available generators: %s", desired,
47                     ', '.join(obj.name for obj in siggens))
48        return SignatureGenerator(d)
49
50class SignatureGenerator(object):
51    """
52    """
53    name = "noop"
54
55    # If the derived class supports multiconfig datacaches, set this to True
56    # The default is False for backward compatibility with derived signature
57    # generators that do not understand multiconfig caches
58    supports_multiconfig_datacaches = False
59
60    def __init__(self, data):
61        self.basehash = {}
62        self.taskhash = {}
63        self.unihash = {}
64        self.runtaskdeps = {}
65        self.file_checksum_values = {}
66        self.taints = {}
67        self.unitaskhashes = {}
68        self.tidtopn = {}
69        self.setscenetasks = set()
70
71    def finalise(self, fn, d, varient):
72        return
73
74    def postparsing_clean_cache(self):
75        return
76
77    def get_unihash(self, tid):
78        return self.taskhash[tid]
79
80    def prep_taskhash(self, tid, deps, dataCaches):
81        return
82
83    def get_taskhash(self, tid, deps, dataCaches):
84        self.taskhash[tid] = hashlib.sha256(tid.encode("utf-8")).hexdigest()
85        return self.taskhash[tid]
86
87    def writeout_file_checksum_cache(self):
88        """Write/update the file checksum cache onto disk"""
89        return
90
91    def stampfile(self, stampbase, file_name, taskname, extrainfo):
92        return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
93
94    def stampcleanmask(self, stampbase, file_name, taskname, extrainfo):
95        return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
96
97    def dump_sigtask(self, fn, task, stampbase, runtime):
98        return
99
100    def invalidate_task(self, task, d, fn):
101        bb.build.del_stamp(task, d, fn)
102
103    def dump_sigs(self, dataCache, options):
104        return
105
106    def get_taskdata(self):
107        return (self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks)
108
109    def set_taskdata(self, data):
110        self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks = data
111
112    def reset(self, data):
113        self.__init__(data)
114
115    def get_taskhashes(self):
116        return self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn
117
118    def set_taskhashes(self, hashes):
119        self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn = hashes
120
121    def save_unitaskhashes(self):
122        return
123
124    def set_setscene_tasks(self, setscene_tasks):
125        return
126
127    @classmethod
128    def get_data_caches(cls, dataCaches, mc):
129        """
130        This function returns the datacaches that should be passed to signature
131        generator functions. If the signature generator supports multiconfig
132        caches, the entire dictionary of data caches is sent, otherwise a
133        special proxy is sent that support both index access to all
134        multiconfigs, and also direct access for the default multiconfig.
135
136        The proxy class allows code in this class itself to always use
137        multiconfig aware code (to ease maintenance), but derived classes that
138        are unaware of multiconfig data caches can still access the default
139        multiconfig as expected.
140
141        Do not override this function in derived classes; it will be removed in
142        the future when support for multiconfig data caches is mandatory
143        """
144        class DataCacheProxy(object):
145            def __init__(self):
146                pass
147
148            def __getitem__(self, key):
149                return dataCaches[key]
150
151            def __getattr__(self, name):
152                return getattr(dataCaches[mc], name)
153
154        if cls.supports_multiconfig_datacaches:
155            return dataCaches
156
157        return DataCacheProxy()
158
159    def exit(self):
160        return
161
162class SignatureGeneratorBasic(SignatureGenerator):
163    """
164    """
165    name = "basic"
166
167    def __init__(self, data):
168        self.basehash = {}
169        self.taskhash = {}
170        self.unihash = {}
171        self.taskdeps = {}
172        self.runtaskdeps = {}
173        self.file_checksum_values = {}
174        self.taints = {}
175        self.gendeps = {}
176        self.lookupcache = {}
177        self.setscenetasks = set()
178        self.basehash_ignore_vars = set((data.getVar("BB_BASEHASH_IGNORE_VARS") or "").split())
179        self.taskhash_ignore_tasks = None
180        self.init_rundepcheck(data)
181        checksum_cache_file = data.getVar("BB_HASH_CHECKSUM_CACHE_FILE")
182        if checksum_cache_file:
183            self.checksum_cache = FileChecksumCache()
184            self.checksum_cache.init_cache(data, checksum_cache_file)
185        else:
186            self.checksum_cache = None
187
188        self.unihash_cache = bb.cache.SimpleCache("3")
189        self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {})
190        self.localdirsexclude = (data.getVar("BB_SIGNATURE_LOCAL_DIRS_EXCLUDE") or "CVS .bzr .git .hg .osc .p4 .repo .svn").split()
191        self.tidtopn = {}
192
193    def init_rundepcheck(self, data):
194        self.taskhash_ignore_tasks = data.getVar("BB_TASKHASH_IGNORE_TASKS") or None
195        if self.taskhash_ignore_tasks:
196            self.twl = re.compile(self.taskhash_ignore_tasks)
197        else:
198            self.twl = None
199
200    def _build_data(self, fn, d):
201
202        ignore_mismatch = ((d.getVar("BB_HASH_IGNORE_MISMATCH") or '') == '1')
203        tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d, self.basehash_ignore_vars)
204
205        taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, self.basehash_ignore_vars, fn)
206
207        for task in tasklist:
208            tid = fn + ":" + task
209            if not ignore_mismatch and tid in self.basehash and self.basehash[tid] != basehash[tid]:
210                bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (tid, self.basehash[tid], basehash[tid]))
211                bb.error("The following commands may help:")
212                cmd = "$ bitbake %s -c%s" % (d.getVar('PN'), task)
213                # Make sure sigdata is dumped before run printdiff
214                bb.error("%s -Snone" % cmd)
215                bb.error("Then:")
216                bb.error("%s -Sprintdiff\n" % cmd)
217            self.basehash[tid] = basehash[tid]
218
219        self.taskdeps[fn] = taskdeps
220        self.gendeps[fn] = gendeps
221        self.lookupcache[fn] = lookupcache
222
223        return taskdeps
224
225    def set_setscene_tasks(self, setscene_tasks):
226        self.setscenetasks = set(setscene_tasks)
227
228    def finalise(self, fn, d, variant):
229
230        mc = d.getVar("__BBMULTICONFIG", False) or ""
231        if variant or mc:
232            fn = bb.cache.realfn2virtual(fn, variant, mc)
233
234        try:
235            taskdeps = self._build_data(fn, d)
236        except bb.parse.SkipRecipe:
237            raise
238        except:
239            bb.warn("Error during finalise of %s" % fn)
240            raise
241
242        #Slow but can be useful for debugging mismatched basehashes
243        #for task in self.taskdeps[fn]:
244        #    self.dump_sigtask(fn, task, d.getVar("STAMP"), False)
245
246        for task in taskdeps:
247            d.setVar("BB_BASEHASH:task-%s" % task, self.basehash[fn + ":" + task])
248
249    def postparsing_clean_cache(self):
250        #
251        # After parsing we can remove some things from memory to reduce our memory footprint
252        #
253        self.gendeps = {}
254        self.lookupcache = {}
255        self.taskdeps = {}
256
257    def rundep_check(self, fn, recipename, task, dep, depname, dataCaches):
258        # Return True if we should keep the dependency, False to drop it
259        # We only manipulate the dependencies for packages not in the ignore
260        # list
261        if self.twl and not self.twl.search(recipename):
262            # then process the actual dependencies
263            if self.twl.search(depname):
264                return False
265        return True
266
267    def read_taint(self, fn, task, stampbase):
268        taint = None
269        try:
270            with open(stampbase + '.' + task + '.taint', 'r') as taintf:
271                taint = taintf.read()
272        except IOError:
273            pass
274        return taint
275
276    def prep_taskhash(self, tid, deps, dataCaches):
277
278        (mc, _, task, fn) = bb.runqueue.split_tid_mcfn(tid)
279
280        self.basehash[tid] = dataCaches[mc].basetaskhash[tid]
281        self.runtaskdeps[tid] = []
282        self.file_checksum_values[tid] = []
283        recipename = dataCaches[mc].pkg_fn[fn]
284
285        self.tidtopn[tid] = recipename
286
287        for dep in sorted(deps, key=clean_basepath):
288            (depmc, _, _, depmcfn) = bb.runqueue.split_tid_mcfn(dep)
289            depname = dataCaches[depmc].pkg_fn[depmcfn]
290            if not self.supports_multiconfig_datacaches and mc != depmc:
291                # If the signature generator doesn't understand multiconfig
292                # data caches, any dependency not in the same multiconfig must
293                # be skipped for backward compatibility
294                continue
295            if not self.rundep_check(fn, recipename, task, dep, depname, dataCaches):
296                continue
297            if dep not in self.taskhash:
298                bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep)
299            self.runtaskdeps[tid].append(dep)
300
301        if task in dataCaches[mc].file_checksums[fn]:
302            if self.checksum_cache:
303                checksums = self.checksum_cache.get_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude)
304            else:
305                checksums = bb.fetch2.get_file_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude)
306            for (f,cs) in checksums:
307                self.file_checksum_values[tid].append((f,cs))
308
309        taskdep = dataCaches[mc].task_deps[fn]
310        if 'nostamp' in taskdep and task in taskdep['nostamp']:
311            # Nostamp tasks need an implicit taint so that they force any dependent tasks to run
312            if tid in self.taints and self.taints[tid].startswith("nostamp:"):
313                # Don't reset taint value upon every call
314                pass
315            else:
316                import uuid
317                taint = str(uuid.uuid4())
318                self.taints[tid] = "nostamp:" + taint
319
320        taint = self.read_taint(fn, task, dataCaches[mc].stamp[fn])
321        if taint:
322            self.taints[tid] = taint
323            logger.warning("%s is tainted from a forced run" % tid)
324
325        return
326
327    def get_taskhash(self, tid, deps, dataCaches):
328
329        data = self.basehash[tid]
330        for dep in self.runtaskdeps[tid]:
331            data = data + self.get_unihash(dep)
332
333        for (f, cs) in self.file_checksum_values[tid]:
334            if cs:
335                if "/./" in f:
336                    data = data + "./" + f.split("/./")[1]
337                data = data + cs
338
339        if tid in self.taints:
340            if self.taints[tid].startswith("nostamp:"):
341                data = data + self.taints[tid][8:]
342            else:
343                data = data + self.taints[tid]
344
345        h = hashlib.sha256(data.encode("utf-8")).hexdigest()
346        self.taskhash[tid] = h
347        #d.setVar("BB_TASKHASH:task-%s" % task, taskhash[task])
348        return h
349
350    def writeout_file_checksum_cache(self):
351        """Write/update the file checksum cache onto disk"""
352        if self.checksum_cache:
353            self.checksum_cache.save_extras()
354            self.checksum_cache.save_merge()
355        else:
356            bb.fetch2.fetcher_parse_save()
357            bb.fetch2.fetcher_parse_done()
358
359    def save_unitaskhashes(self):
360        self.unihash_cache.save(self.unitaskhashes)
361
362    def dump_sigtask(self, fn, task, stampbase, runtime):
363
364        tid = fn + ":" + task
365        referencestamp = stampbase
366        if isinstance(runtime, str) and runtime.startswith("customfile"):
367            sigfile = stampbase
368            referencestamp = runtime[11:]
369        elif runtime and tid in self.taskhash:
370            sigfile = stampbase + "." + task + ".sigdata" + "." + self.get_unihash(tid)
371        else:
372            sigfile = stampbase + "." + task + ".sigbasedata" + "." + self.basehash[tid]
373
374        with bb.utils.umask(0o002):
375            bb.utils.mkdirhier(os.path.dirname(sigfile))
376
377        data = {}
378        data['task'] = task
379        data['basehash_ignore_vars'] = self.basehash_ignore_vars
380        data['taskhash_ignore_tasks'] = self.taskhash_ignore_tasks
381        data['taskdeps'] = self.taskdeps[fn][task]
382        data['basehash'] = self.basehash[tid]
383        data['gendeps'] = {}
384        data['varvals'] = {}
385        data['varvals'][task] = self.lookupcache[fn][task]
386        for dep in self.taskdeps[fn][task]:
387            if dep in self.basehash_ignore_vars:
388                continue
389            data['gendeps'][dep] = self.gendeps[fn][dep]
390            data['varvals'][dep] = self.lookupcache[fn][dep]
391
392        if runtime and tid in self.taskhash:
393            data['runtaskdeps'] = self.runtaskdeps[tid]
394            data['file_checksum_values'] = []
395            for f,cs in self.file_checksum_values[tid]:
396                if "/./" in f:
397                    data['file_checksum_values'].append(("./" + f.split("/./")[1], cs))
398                else:
399                    data['file_checksum_values'].append((os.path.basename(f), cs))
400            data['runtaskhashes'] = {}
401            for dep in data['runtaskdeps']:
402                data['runtaskhashes'][dep] = self.get_unihash(dep)
403            data['taskhash'] = self.taskhash[tid]
404            data['unihash'] = self.get_unihash(tid)
405
406        taint = self.read_taint(fn, task, referencestamp)
407        if taint:
408            data['taint'] = taint
409
410        if runtime and tid in self.taints:
411            if 'nostamp:' in self.taints[tid]:
412                data['taint'] = self.taints[tid]
413
414        computed_basehash = calc_basehash(data)
415        if computed_basehash != self.basehash[tid]:
416            bb.error("Basehash mismatch %s versus %s for %s" % (computed_basehash, self.basehash[tid], tid))
417        if runtime and tid in self.taskhash:
418            computed_taskhash = calc_taskhash(data)
419            if computed_taskhash != self.taskhash[tid]:
420                bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid))
421                sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash)
422
423        fd, tmpfile = tempfile.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.")
424        try:
425            with bb.compress.zstd.open(fd, "wt", encoding="utf-8", num_threads=1) as f:
426                json.dump(data, f, sort_keys=True, separators=(",", ":"), cls=SetEncoder)
427                f.flush()
428            os.chmod(tmpfile, 0o664)
429            bb.utils.rename(tmpfile, sigfile)
430        except (OSError, IOError) as err:
431            try:
432                os.unlink(tmpfile)
433            except OSError:
434                pass
435            raise err
436
437    def dump_sigfn(self, fn, dataCaches, options):
438        if fn in self.taskdeps:
439            for task in self.taskdeps[fn]:
440                tid = fn + ":" + task
441                mc = bb.runqueue.mc_from_tid(tid)
442                if tid not in self.taskhash:
443                    continue
444                if dataCaches[mc].basetaskhash[tid] != self.basehash[tid]:
445                    bb.error("Bitbake's cached basehash does not match the one we just generated (%s)!" % tid)
446                    bb.error("The mismatched hashes were %s and %s" % (dataCaches[mc].basetaskhash[tid], self.basehash[tid]))
447                self.dump_sigtask(fn, task, dataCaches[mc].stamp[fn], True)
448
449class SignatureGeneratorBasicHash(SignatureGeneratorBasic):
450    name = "basichash"
451
452    def get_stampfile_hash(self, tid):
453        if tid in self.taskhash:
454            return self.taskhash[tid]
455
456        # If task is not in basehash, then error
457        return self.basehash[tid]
458
459    def stampfile(self, stampbase, fn, taskname, extrainfo, clean=False):
460        if taskname != "do_setscene" and taskname.endswith("_setscene"):
461            tid = fn + ":" + taskname[:-9]
462        else:
463            tid = fn + ":" + taskname
464        if clean:
465            h = "*"
466        else:
467            h = self.get_stampfile_hash(tid)
468
469        return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.')
470
471    def stampcleanmask(self, stampbase, fn, taskname, extrainfo):
472        return self.stampfile(stampbase, fn, taskname, extrainfo, clean=True)
473
474    def invalidate_task(self, task, d, fn):
475        bb.note("Tainting hash to force rebuild of task %s, %s" % (fn, task))
476        bb.build.write_taint(task, d, fn)
477
478class SignatureGeneratorUniHashMixIn(object):
479    def __init__(self, data):
480        self.extramethod = {}
481        super().__init__(data)
482
483    def get_taskdata(self):
484        return (self.server, self.method, self.extramethod) + super().get_taskdata()
485
486    def set_taskdata(self, data):
487        self.server, self.method, self.extramethod = data[:3]
488        super().set_taskdata(data[3:])
489
490    def client(self):
491        if getattr(self, '_client', None) is None:
492            self._client = hashserv.create_client(self.server)
493        return self._client
494
495    def reset(self, data):
496        if getattr(self, '_client', None) is not None:
497            self._client.close()
498            self._client = None
499        return super().reset(data)
500
501    def exit(self):
502        if getattr(self, '_client', None) is not None:
503            self._client.close()
504            self._client = None
505        return super().exit()
506
507    def get_stampfile_hash(self, tid):
508        if tid in self.taskhash:
509            # If a unique hash is reported, use it as the stampfile hash. This
510            # ensures that if a task won't be re-run if the taskhash changes,
511            # but it would result in the same output hash
512            unihash = self._get_unihash(tid)
513            if unihash is not None:
514                return unihash
515
516        return super().get_stampfile_hash(tid)
517
518    def set_unihash(self, tid, unihash):
519        (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
520        key = mc + ":" + self.tidtopn[tid] + ":" + taskname
521        self.unitaskhashes[key] = (self.taskhash[tid], unihash)
522        self.unihash[tid] = unihash
523
524    def _get_unihash(self, tid, checkkey=None):
525        if tid not in self.tidtopn:
526            return None
527        (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
528        key = mc + ":" + self.tidtopn[tid] + ":" + taskname
529        if key not in self.unitaskhashes:
530            return None
531        if not checkkey:
532            checkkey = self.taskhash[tid]
533        (key, unihash) = self.unitaskhashes[key]
534        if key != checkkey:
535            return None
536        return unihash
537
538    def get_unihash(self, tid):
539        taskhash = self.taskhash[tid]
540
541        # If its not a setscene task we can return
542        if self.setscenetasks and tid not in self.setscenetasks:
543            self.unihash[tid] = None
544            return taskhash
545
546        # TODO: This cache can grow unbounded. It probably only needs to keep
547        # for each task
548        unihash =  self._get_unihash(tid)
549        if unihash is not None:
550            self.unihash[tid] = unihash
551            return unihash
552
553        # In the absence of being able to discover a unique hash from the
554        # server, make it be equivalent to the taskhash. The unique "hash" only
555        # really needs to be a unique string (not even necessarily a hash), but
556        # making it match the taskhash has a few advantages:
557        #
558        # 1) All of the sstate code that assumes hashes can be the same
559        # 2) It provides maximal compatibility with builders that don't use
560        #    an equivalency server
561        # 3) The value is easy for multiple independent builders to derive the
562        #    same unique hash from the same input. This means that if the
563        #    independent builders find the same taskhash, but it isn't reported
564        #    to the server, there is a better chance that they will agree on
565        #    the unique hash.
566        unihash = taskhash
567
568        try:
569            method = self.method
570            if tid in self.extramethod:
571                method = method + self.extramethod[tid]
572            data = self.client().get_unihash(method, self.taskhash[tid])
573            if data:
574                unihash = data
575                # A unique hash equal to the taskhash is not very interesting,
576                # so it is reported it at debug level 2. If they differ, that
577                # is much more interesting, so it is reported at debug level 1
578                hashequiv_logger.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server))
579            else:
580                hashequiv_logger.debug2('No reported unihash for %s:%s from %s' % (tid, taskhash, self.server))
581        except ConnectionError as e:
582            bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
583
584        self.set_unihash(tid, unihash)
585        self.unihash[tid] = unihash
586        return unihash
587
588    def report_unihash(self, path, task, d):
589        import importlib
590
591        taskhash = d.getVar('BB_TASKHASH')
592        unihash = d.getVar('BB_UNIHASH')
593        report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1'
594        tempdir = d.getVar('T')
595        fn = d.getVar('BB_FILENAME')
596        tid = fn + ':do_' + task
597        key = tid + ':' + taskhash
598
599        if self.setscenetasks and tid not in self.setscenetasks:
600            return
601
602        # This can happen if locked sigs are in action. Detect and just exit
603        if taskhash != self.taskhash[tid]:
604            return
605
606        # Sanity checks
607        cache_unihash = self._get_unihash(tid, checkkey=taskhash)
608        if cache_unihash is None:
609            bb.fatal('%s not in unihash cache. Please report this error' % key)
610
611        if cache_unihash != unihash:
612            bb.fatal("Cache unihash %s doesn't match BB_UNIHASH %s" % (cache_unihash, unihash))
613
614        sigfile = None
615        sigfile_name = "depsig.do_%s.%d" % (task, os.getpid())
616        sigfile_link = "depsig.do_%s" % task
617
618        try:
619            sigfile = open(os.path.join(tempdir, sigfile_name), 'w+b')
620
621            locs = {'path': path, 'sigfile': sigfile, 'task': task, 'd': d}
622
623            if "." in self.method:
624                (module, method) = self.method.rsplit('.', 1)
625                locs['method'] = getattr(importlib.import_module(module), method)
626                outhash = bb.utils.better_eval('method(path, sigfile, task, d)', locs)
627            else:
628                outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs)
629
630            try:
631                extra_data = {}
632
633                owner = d.getVar('SSTATE_HASHEQUIV_OWNER')
634                if owner:
635                    extra_data['owner'] = owner
636
637                if report_taskdata:
638                    sigfile.seek(0)
639
640                    extra_data['PN'] = d.getVar('PN')
641                    extra_data['PV'] = d.getVar('PV')
642                    extra_data['PR'] = d.getVar('PR')
643                    extra_data['task'] = task
644                    extra_data['outhash_siginfo'] = sigfile.read().decode('utf-8')
645
646                method = self.method
647                if tid in self.extramethod:
648                    method = method + self.extramethod[tid]
649
650                data = self.client().report_unihash(taskhash, method, outhash, unihash, extra_data)
651                new_unihash = data['unihash']
652
653                if new_unihash != unihash:
654                    hashequiv_logger.debug('Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server))
655                    bb.event.fire(bb.runqueue.taskUniHashUpdate(fn + ':do_' + task, new_unihash), d)
656                    self.set_unihash(tid, new_unihash)
657                    d.setVar('BB_UNIHASH', new_unihash)
658                else:
659                    hashequiv_logger.debug('Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server))
660            except ConnectionError as e:
661                bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
662        finally:
663            if sigfile:
664                sigfile.close()
665
666                sigfile_link_path = os.path.join(tempdir, sigfile_link)
667                bb.utils.remove(sigfile_link_path)
668
669                try:
670                    os.symlink(sigfile_name, sigfile_link_path)
671                except OSError:
672                    pass
673
674    def report_unihash_equiv(self, tid, taskhash, wanted_unihash, current_unihash, datacaches):
675        try:
676            extra_data = {}
677            method = self.method
678            if tid in self.extramethod:
679                method = method + self.extramethod[tid]
680
681            data = self.client().report_unihash_equiv(taskhash, method, wanted_unihash, extra_data)
682            hashequiv_logger.verbose('Reported task %s as unihash %s to %s (%s)' % (tid, wanted_unihash, self.server, str(data)))
683
684            if data is None:
685                bb.warn("Server unable to handle unihash report")
686                return False
687
688            finalunihash = data['unihash']
689
690            if finalunihash == current_unihash:
691                hashequiv_logger.verbose('Task %s unihash %s unchanged by server' % (tid, finalunihash))
692            elif finalunihash == wanted_unihash:
693                hashequiv_logger.verbose('Task %s unihash changed %s -> %s as wanted' % (tid, current_unihash, finalunihash))
694                self.set_unihash(tid, finalunihash)
695                return True
696            else:
697                # TODO: What to do here?
698                hashequiv_logger.verbose('Task %s unihash reported as unwanted hash %s' % (tid, finalunihash))
699
700        except ConnectionError as e:
701            bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
702
703        return False
704
705#
706# Dummy class used for bitbake-selftest
707#
708class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureGeneratorBasicHash):
709    name = "TestEquivHash"
710    def init_rundepcheck(self, data):
711        super().init_rundepcheck(data)
712        self.server = data.getVar('BB_HASHSERVE')
713        self.method = "sstate_output_hash"
714
715#
716# Dummy class used for bitbake-selftest
717#
718class SignatureGeneratorTestMulticonfigDepends(SignatureGeneratorBasicHash):
719    name = "TestMulticonfigDepends"
720    supports_multiconfig_datacaches = True
721
722def dump_this_task(outfile, d):
723    import bb.parse
724    fn = d.getVar("BB_FILENAME")
725    task = "do_" + d.getVar("BB_CURRENTTASK")
726    referencestamp = bb.build.stamp_internal(task, d, None, True)
727    bb.parse.siggen.dump_sigtask(fn, task, outfile, "customfile:" + referencestamp)
728
729def init_colors(enable_color):
730    """Initialise colour dict for passing to compare_sigfiles()"""
731    # First set up the colours
732    colors = {'color_title':   '\033[1m',
733              'color_default': '\033[0m',
734              'color_add':     '\033[0;32m',
735              'color_remove':  '\033[0;31m',
736             }
737    # Leave all keys present but clear the values
738    if not enable_color:
739        for k in colors.keys():
740            colors[k] = ''
741    return colors
742
743def worddiff_str(oldstr, newstr, colors=None):
744    if not colors:
745        colors = init_colors(False)
746    diff = simplediff.diff(oldstr.split(' '), newstr.split(' '))
747    ret = []
748    for change, value in diff:
749        value = ' '.join(value)
750        if change == '=':
751            ret.append(value)
752        elif change == '+':
753            item = '{color_add}{{+{value}+}}{color_default}'.format(value=value, **colors)
754            ret.append(item)
755        elif change == '-':
756            item = '{color_remove}[-{value}-]{color_default}'.format(value=value, **colors)
757            ret.append(item)
758    whitespace_note = ''
759    if oldstr != newstr and ' '.join(oldstr.split()) == ' '.join(newstr.split()):
760        whitespace_note = ' (whitespace changed)'
761    return '"%s"%s' % (' '.join(ret), whitespace_note)
762
763def list_inline_diff(oldlist, newlist, colors=None):
764    if not colors:
765        colors = init_colors(False)
766    diff = simplediff.diff(oldlist, newlist)
767    ret = []
768    for change, value in diff:
769        value = ' '.join(value)
770        if change == '=':
771            ret.append("'%s'" % value)
772        elif change == '+':
773            item = '{color_add}+{value}{color_default}'.format(value=value, **colors)
774            ret.append(item)
775        elif change == '-':
776            item = '{color_remove}-{value}{color_default}'.format(value=value, **colors)
777            ret.append(item)
778    return '[%s]' % (', '.join(ret))
779
780def clean_basepath(basepath):
781    basepath, dir, recipe_task = basepath.rsplit("/", 2)
782    cleaned = dir + '/' + recipe_task
783
784    if basepath[0] == '/':
785        return cleaned
786
787    if basepath.startswith("mc:") and basepath.count(':') >= 2:
788        mc, mc_name, basepath = basepath.split(":", 2)
789        mc_suffix = ':mc:' + mc_name
790    else:
791        mc_suffix = ''
792
793    # mc stuff now removed from basepath. Whatever was next, if present will be the first
794    # suffix. ':/', recipe path start, marks the end of this. Something like
795    # 'virtual:a[:b[:c]]:/path...' (b and c being optional)
796    if basepath[0] != '/':
797        cleaned += ':' + basepath.split(':/', 1)[0]
798
799    return cleaned + mc_suffix
800
801def clean_basepaths(a):
802    b = {}
803    for x in a:
804        b[clean_basepath(x)] = a[x]
805    return b
806
807def clean_basepaths_list(a):
808    b = []
809    for x in a:
810        b.append(clean_basepath(x))
811    return b
812
813# Handled renamed fields
814def handle_renames(data):
815    if 'basewhitelist' in data:
816        data['basehash_ignore_vars'] = data['basewhitelist']
817        del data['basewhitelist']
818    if 'taskwhitelist' in data:
819        data['taskhash_ignore_tasks'] = data['taskwhitelist']
820        del data['taskwhitelist']
821
822
823def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
824    output = []
825
826    colors = init_colors(color)
827    def color_format(formatstr, **values):
828        """
829        Return colour formatted string.
830        NOTE: call with the format string, not an already formatted string
831        containing values (otherwise you could have trouble with { and }
832        characters)
833        """
834        if not formatstr.endswith('{color_default}'):
835            formatstr += '{color_default}'
836        # In newer python 3 versions you can pass both of these directly,
837        # but we only require 3.4 at the moment
838        formatparams = {}
839        formatparams.update(colors)
840        formatparams.update(values)
841        return formatstr.format(**formatparams)
842
843    with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f:
844        a_data = json.load(f, object_hook=SetDecoder)
845    with bb.compress.zstd.open(b, "rt", encoding="utf-8", num_threads=1) as f:
846        b_data = json.load(f, object_hook=SetDecoder)
847
848    for data in [a_data, b_data]:
849        handle_renames(data)
850
851    def dict_diff(a, b, ignored_vars=set()):
852        sa = set(a.keys())
853        sb = set(b.keys())
854        common = sa & sb
855        changed = set()
856        for i in common:
857            if a[i] != b[i] and i not in ignored_vars:
858                changed.add(i)
859        added = sb - sa
860        removed = sa - sb
861        return changed, added, removed
862
863    def file_checksums_diff(a, b):
864        from collections import Counter
865
866        # Convert lists back to tuples
867        a = [(f[0], f[1]) for f in a]
868        b = [(f[0], f[1]) for f in b]
869
870        # Compare lists, ensuring we can handle duplicate filenames if they exist
871        removedcount = Counter(a)
872        removedcount.subtract(b)
873        addedcount = Counter(b)
874        addedcount.subtract(a)
875        added = []
876        for x in b:
877            if addedcount[x] > 0:
878                addedcount[x] -= 1
879                added.append(x)
880        removed = []
881        changed = []
882        for x in a:
883            if removedcount[x] > 0:
884                removedcount[x] -= 1
885                for y in added:
886                    if y[0] == x[0]:
887                        changed.append((x[0], x[1], y[1]))
888                        added.remove(y)
889                        break
890                else:
891                    removed.append(x)
892        added = [x[0] for x in added]
893        removed = [x[0] for x in removed]
894        return changed, added, removed
895
896    if 'basehash_ignore_vars' in a_data and a_data['basehash_ignore_vars'] != b_data['basehash_ignore_vars']:
897        output.append(color_format("{color_title}basehash_ignore_vars changed{color_default} from '%s' to '%s'") % (a_data['basehash_ignore_vars'], b_data['basehash_ignore_vars']))
898        if a_data['basehash_ignore_vars'] and b_data['basehash_ignore_vars']:
899            output.append("changed items: %s" % a_data['basehash_ignore_vars'].symmetric_difference(b_data['basehash_ignore_vars']))
900
901    if 'taskhash_ignore_tasks' in a_data and a_data['taskhash_ignore_tasks'] != b_data['taskhash_ignore_tasks']:
902        output.append(color_format("{color_title}taskhash_ignore_tasks changed{color_default} from '%s' to '%s'") % (a_data['taskhash_ignore_tasks'], b_data['taskhash_ignore_tasks']))
903        if a_data['taskhash_ignore_tasks'] and b_data['taskhash_ignore_tasks']:
904            output.append("changed items: %s" % a_data['taskhash_ignore_tasks'].symmetric_difference(b_data['taskhash_ignore_tasks']))
905
906    if a_data['taskdeps'] != b_data['taskdeps']:
907        output.append(color_format("{color_title}Task dependencies changed{color_default} from:\n%s\nto:\n%s") % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps'])))
908
909    if a_data['basehash'] != b_data['basehash'] and not collapsed:
910        output.append(color_format("{color_title}basehash changed{color_default} from %s to %s") % (a_data['basehash'], b_data['basehash']))
911
912    changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basehash_ignore_vars'] & b_data['basehash_ignore_vars'])
913    if changed:
914        for dep in sorted(changed):
915            output.append(color_format("{color_title}List of dependencies for variable %s changed from '{color_default}%s{color_title}' to '{color_default}%s{color_title}'") % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep]))
916            if a_data['gendeps'][dep] and b_data['gendeps'][dep]:
917                output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep]))
918    if added:
919        for dep in sorted(added):
920            output.append(color_format("{color_title}Dependency on variable %s was added") % (dep))
921    if removed:
922        for dep in sorted(removed):
923            output.append(color_format("{color_title}Dependency on Variable %s was removed") % (dep))
924
925
926    changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals'])
927    if changed:
928        for dep in sorted(changed):
929            oldval = a_data['varvals'][dep]
930            newval = b_data['varvals'][dep]
931            if newval and oldval and ('\n' in oldval or '\n' in newval):
932                diff = difflib.unified_diff(oldval.splitlines(), newval.splitlines(), lineterm='')
933                # Cut off the first two lines, since we aren't interested in
934                # the old/new filename (they are blank anyway in this case)
935                difflines = list(diff)[2:]
936                if color:
937                    # Add colour to diff output
938                    for i, line in enumerate(difflines):
939                        if line.startswith('+'):
940                            line = color_format('{color_add}{line}', line=line)
941                            difflines[i] = line
942                        elif line.startswith('-'):
943                            line = color_format('{color_remove}{line}', line=line)
944                            difflines[i] = line
945                output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff='\n'.join(difflines)))
946            elif newval and oldval and (' ' in oldval or ' ' in newval):
947                output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff=worddiff_str(oldval, newval, colors)))
948            else:
949                output.append(color_format("{color_title}Variable {var} value changed from '{color_default}{oldval}{color_title}' to '{color_default}{newval}{color_title}'{color_default}", var=dep, oldval=oldval, newval=newval))
950
951    if not 'file_checksum_values' in a_data:
952         a_data['file_checksum_values'] = []
953    if not 'file_checksum_values' in b_data:
954         b_data['file_checksum_values'] = []
955
956    changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values'])
957    if changed:
958        for f, old, new in changed:
959            output.append(color_format("{color_title}Checksum for file %s changed{color_default} from %s to %s") % (f, old, new))
960    if added:
961        for f in added:
962            output.append(color_format("{color_title}Dependency on checksum of file %s was added") % (f))
963    if removed:
964        for f in removed:
965            output.append(color_format("{color_title}Dependency on checksum of file %s was removed") % (f))
966
967    if not 'runtaskdeps' in a_data:
968         a_data['runtaskdeps'] = {}
969    if not 'runtaskdeps' in b_data:
970         b_data['runtaskdeps'] = {}
971
972    if not collapsed:
973        if len(a_data['runtaskdeps']) != len(b_data['runtaskdeps']):
974            changed = ["Number of task dependencies changed"]
975        else:
976            changed = []
977            for idx, task in enumerate(a_data['runtaskdeps']):
978                a = a_data['runtaskdeps'][idx]
979                b = b_data['runtaskdeps'][idx]
980                if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b] and not collapsed:
981                    changed.append("%s with hash %s\n changed to\n%s with hash %s" % (clean_basepath(a), a_data['runtaskhashes'][a], clean_basepath(b), b_data['runtaskhashes'][b]))
982
983        if changed:
984            clean_a = clean_basepaths_list(a_data['runtaskdeps'])
985            clean_b = clean_basepaths_list(b_data['runtaskdeps'])
986            if clean_a != clean_b:
987                output.append(color_format("{color_title}runtaskdeps changed:{color_default}\n%s") % list_inline_diff(clean_a, clean_b, colors))
988            else:
989                output.append(color_format("{color_title}runtaskdeps changed:"))
990            output.append("\n".join(changed))
991
992
993    if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data:
994        a = a_data['runtaskhashes']
995        b = b_data['runtaskhashes']
996        changed, added, removed = dict_diff(a, b)
997        if added:
998            for dep in sorted(added):
999                bdep_found = False
1000                if removed:
1001                    for bdep in removed:
1002                        if b[dep] == a[bdep]:
1003                            #output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep))
1004                            bdep_found = True
1005                if not bdep_found:
1006                    output.append(color_format("{color_title}Dependency on task %s was added{color_default} with hash %s") % (clean_basepath(dep), b[dep]))
1007        if removed:
1008            for dep in sorted(removed):
1009                adep_found = False
1010                if added:
1011                    for adep in added:
1012                        if b[adep] == a[dep]:
1013                            #output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep))
1014                            adep_found = True
1015                if not adep_found:
1016                    output.append(color_format("{color_title}Dependency on task %s was removed{color_default} with hash %s") % (clean_basepath(dep), a[dep]))
1017        if changed:
1018            for dep in sorted(changed):
1019                if not collapsed:
1020                    output.append(color_format("{color_title}Hash for task dependency %s changed{color_default} from %s to %s") % (clean_basepath(dep), a[dep], b[dep]))
1021                if callable(recursecb):
1022                    recout = recursecb(dep, a[dep], b[dep])
1023                    if recout:
1024                        if collapsed:
1025                            output.extend(recout)
1026                        else:
1027                            # If a dependent hash changed, might as well print the line above and then defer to the changes in
1028                            # that hash since in all likelyhood, they're the same changes this task also saw.
1029                            output = [output[-1]] + recout
1030
1031    a_taint = a_data.get('taint', None)
1032    b_taint = b_data.get('taint', None)
1033    if a_taint != b_taint:
1034        if a_taint and a_taint.startswith('nostamp:'):
1035            a_taint = a_taint.replace('nostamp:', 'nostamp(uuid4):')
1036        if b_taint and b_taint.startswith('nostamp:'):
1037            b_taint = b_taint.replace('nostamp:', 'nostamp(uuid4):')
1038        output.append(color_format("{color_title}Taint (by forced/invalidated task) changed{color_default} from %s to %s") % (a_taint, b_taint))
1039
1040    return output
1041
1042
1043def calc_basehash(sigdata):
1044    task = sigdata['task']
1045    basedata = sigdata['varvals'][task]
1046
1047    if basedata is None:
1048        basedata = ''
1049
1050    alldeps = sigdata['taskdeps']
1051    for dep in alldeps:
1052        basedata = basedata + dep
1053        val = sigdata['varvals'][dep]
1054        if val is not None:
1055            basedata = basedata + str(val)
1056
1057    return hashlib.sha256(basedata.encode("utf-8")).hexdigest()
1058
1059def calc_taskhash(sigdata):
1060    data = sigdata['basehash']
1061
1062    for dep in sigdata['runtaskdeps']:
1063        data = data + sigdata['runtaskhashes'][dep]
1064
1065    for c in sigdata['file_checksum_values']:
1066        if c[1]:
1067            if "./" in c[0]:
1068                data = data + c[0]
1069            data = data + c[1]
1070
1071    if 'taint' in sigdata:
1072        if 'nostamp:' in sigdata['taint']:
1073            data = data + sigdata['taint'][8:]
1074        else:
1075            data = data + sigdata['taint']
1076
1077    return hashlib.sha256(data.encode("utf-8")).hexdigest()
1078
1079
1080def dump_sigfile(a):
1081    output = []
1082
1083    with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f:
1084        a_data = json.load(f, object_hook=SetDecoder)
1085
1086    handle_renames(a_data)
1087
1088    output.append("basehash_ignore_vars: %s" % (sorted(a_data['basehash_ignore_vars'])))
1089
1090    output.append("taskhash_ignore_tasks: %s" % (sorted(a_data['taskhash_ignore_tasks'] or [])))
1091
1092    output.append("Task dependencies: %s" % (sorted(a_data['taskdeps'])))
1093
1094    output.append("basehash: %s" % (a_data['basehash']))
1095
1096    for dep in sorted(a_data['gendeps']):
1097        output.append("List of dependencies for variable %s is %s" % (dep, sorted(a_data['gendeps'][dep])))
1098
1099    for dep in sorted(a_data['varvals']):
1100        output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep]))
1101
1102    if 'runtaskdeps' in a_data:
1103        output.append("Tasks this task depends on: %s" % (sorted(a_data['runtaskdeps'])))
1104
1105    if 'file_checksum_values' in a_data:
1106        output.append("This task depends on the checksums of files: %s" % (sorted(a_data['file_checksum_values'])))
1107
1108    if 'runtaskhashes' in a_data:
1109        for dep in sorted(a_data['runtaskhashes']):
1110            output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep]))
1111
1112    if 'taint' in a_data:
1113        if a_data['taint'].startswith('nostamp:'):
1114            msg = a_data['taint'].replace('nostamp:', 'nostamp(uuid4):')
1115        else:
1116            msg = a_data['taint']
1117        output.append("Tainted (by forced/invalidated task): %s" % msg)
1118
1119    if 'task' in a_data:
1120        computed_basehash = calc_basehash(a_data)
1121        output.append("Computed base hash is %s and from file %s" % (computed_basehash, a_data['basehash']))
1122    else:
1123        output.append("Unable to compute base hash")
1124
1125    computed_taskhash = calc_taskhash(a_data)
1126    output.append("Computed task hash is %s" % computed_taskhash)
1127
1128    return output
1129