xref: /openbmc/openbmc/poky/bitbake/lib/bb/siggen.py (revision d583833a)
1#
2# SPDX-License-Identifier: GPL-2.0-only
3#
4
5import hashlib
6import logging
7import os
8import re
9import tempfile
10import pickle
11import bb.data
12import difflib
13import simplediff
14import json
15import bb.compress.zstd
16from bb.checksum import FileChecksumCache
17from bb import runqueue
18import hashserv
19import hashserv.client
20
21logger = logging.getLogger('BitBake.SigGen')
22hashequiv_logger = logging.getLogger('BitBake.SigGen.HashEquiv')
23
24class SetEncoder(json.JSONEncoder):
25    def default(self, obj):
26        if isinstance(obj, set):
27            return dict(_set_object=list(sorted(obj)))
28        return json.JSONEncoder.default(self, obj)
29
30def SetDecoder(dct):
31    if '_set_object' in dct:
32        return set(dct['_set_object'])
33    return dct
34
35def init(d):
36    siggens = [obj for obj in globals().values()
37                      if type(obj) is type and issubclass(obj, SignatureGenerator)]
38
39    desired = d.getVar("BB_SIGNATURE_HANDLER") or "noop"
40    for sg in siggens:
41        if desired == sg.name:
42            return sg(d)
43    else:
44        logger.error("Invalid signature generator '%s', using default 'noop'\n"
45                     "Available generators: %s", desired,
46                     ', '.join(obj.name for obj in siggens))
47        return SignatureGenerator(d)
48
49class SignatureGenerator(object):
50    """
51    """
52    name = "noop"
53
54    # If the derived class supports multiconfig datacaches, set this to True
55    # The default is False for backward compatibility with derived signature
56    # generators that do not understand multiconfig caches
57    supports_multiconfig_datacaches = False
58
59    def __init__(self, data):
60        self.basehash = {}
61        self.taskhash = {}
62        self.unihash = {}
63        self.runtaskdeps = {}
64        self.file_checksum_values = {}
65        self.taints = {}
66        self.unitaskhashes = {}
67        self.tidtopn = {}
68        self.setscenetasks = set()
69
70    def finalise(self, fn, d, varient):
71        return
72
73    def postparsing_clean_cache(self):
74        return
75
76    def get_unihash(self, tid):
77        return self.taskhash[tid]
78
79    def prep_taskhash(self, tid, deps, dataCaches):
80        return
81
82    def get_taskhash(self, tid, deps, dataCaches):
83        self.taskhash[tid] = hashlib.sha256(tid.encode("utf-8")).hexdigest()
84        return self.taskhash[tid]
85
86    def writeout_file_checksum_cache(self):
87        """Write/update the file checksum cache onto disk"""
88        return
89
90    def stampfile(self, stampbase, file_name, taskname, extrainfo):
91        return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
92
93    def stampcleanmask(self, stampbase, file_name, taskname, extrainfo):
94        return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
95
96    def dump_sigtask(self, fn, task, stampbase, runtime):
97        return
98
99    def invalidate_task(self, task, d, fn):
100        bb.build.del_stamp(task, d, fn)
101
102    def dump_sigs(self, dataCache, options):
103        return
104
105    def get_taskdata(self):
106        return (self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks)
107
108    def set_taskdata(self, data):
109        self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks = data
110
111    def reset(self, data):
112        self.__init__(data)
113
114    def get_taskhashes(self):
115        return self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn
116
117    def set_taskhashes(self, hashes):
118        self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn = hashes
119
120    def save_unitaskhashes(self):
121        return
122
123    def set_setscene_tasks(self, setscene_tasks):
124        return
125
126    @classmethod
127    def get_data_caches(cls, dataCaches, mc):
128        """
129        This function returns the datacaches that should be passed to signature
130        generator functions. If the signature generator supports multiconfig
131        caches, the entire dictionary of data caches is sent, otherwise a
132        special proxy is sent that support both index access to all
133        multiconfigs, and also direct access for the default multiconfig.
134
135        The proxy class allows code in this class itself to always use
136        multiconfig aware code (to ease maintenance), but derived classes that
137        are unaware of multiconfig data caches can still access the default
138        multiconfig as expected.
139
140        Do not override this function in derived classes; it will be removed in
141        the future when support for multiconfig data caches is mandatory
142        """
143        class DataCacheProxy(object):
144            def __init__(self):
145                pass
146
147            def __getitem__(self, key):
148                return dataCaches[key]
149
150            def __getattr__(self, name):
151                return getattr(dataCaches[mc], name)
152
153        if cls.supports_multiconfig_datacaches:
154            return dataCaches
155
156        return DataCacheProxy()
157
158    def exit(self):
159        return
160
161class SignatureGeneratorBasic(SignatureGenerator):
162    """
163    """
164    name = "basic"
165
166    def __init__(self, data):
167        self.basehash = {}
168        self.taskhash = {}
169        self.unihash = {}
170        self.taskdeps = {}
171        self.runtaskdeps = {}
172        self.file_checksum_values = {}
173        self.taints = {}
174        self.gendeps = {}
175        self.lookupcache = {}
176        self.setscenetasks = set()
177        self.basehash_ignore_vars = set((data.getVar("BB_BASEHASH_IGNORE_VARS") or "").split())
178        self.taskhash_ignore_tasks = None
179        self.init_rundepcheck(data)
180        checksum_cache_file = data.getVar("BB_HASH_CHECKSUM_CACHE_FILE")
181        if checksum_cache_file:
182            self.checksum_cache = FileChecksumCache()
183            self.checksum_cache.init_cache(data, checksum_cache_file)
184        else:
185            self.checksum_cache = None
186
187        self.unihash_cache = bb.cache.SimpleCache("3")
188        self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {})
189        self.localdirsexclude = (data.getVar("BB_SIGNATURE_LOCAL_DIRS_EXCLUDE") or "CVS .bzr .git .hg .osc .p4 .repo .svn").split()
190        self.tidtopn = {}
191
192    def init_rundepcheck(self, data):
193        self.taskhash_ignore_tasks = data.getVar("BB_TASKHASH_IGNORE_TASKS") or None
194        if self.taskhash_ignore_tasks:
195            self.twl = re.compile(self.taskhash_ignore_tasks)
196        else:
197            self.twl = None
198
199    def _build_data(self, fn, d):
200
201        ignore_mismatch = ((d.getVar("BB_HASH_IGNORE_MISMATCH") or '') == '1')
202        tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d, self.basehash_ignore_vars)
203
204        taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, self.basehash_ignore_vars, fn)
205
206        for task in tasklist:
207            tid = fn + ":" + task
208            if not ignore_mismatch and tid in self.basehash and self.basehash[tid] != basehash[tid]:
209                bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (tid, self.basehash[tid], basehash[tid]))
210                bb.error("The following commands may help:")
211                cmd = "$ bitbake %s -c%s" % (d.getVar('PN'), task)
212                # Make sure sigdata is dumped before run printdiff
213                bb.error("%s -Snone" % cmd)
214                bb.error("Then:")
215                bb.error("%s -Sprintdiff\n" % cmd)
216            self.basehash[tid] = basehash[tid]
217
218        self.taskdeps[fn] = taskdeps
219        self.gendeps[fn] = gendeps
220        self.lookupcache[fn] = lookupcache
221
222        return taskdeps
223
224    def set_setscene_tasks(self, setscene_tasks):
225        self.setscenetasks = set(setscene_tasks)
226
227    def finalise(self, fn, d, variant):
228
229        mc = d.getVar("__BBMULTICONFIG", False) or ""
230        if variant or mc:
231            fn = bb.cache.realfn2virtual(fn, variant, mc)
232
233        try:
234            taskdeps = self._build_data(fn, d)
235        except bb.parse.SkipRecipe:
236            raise
237        except:
238            bb.warn("Error during finalise of %s" % fn)
239            raise
240
241        #Slow but can be useful for debugging mismatched basehashes
242        #for task in self.taskdeps[fn]:
243        #    self.dump_sigtask(fn, task, d.getVar("STAMP"), False)
244
245        for task in taskdeps:
246            d.setVar("BB_BASEHASH:task-%s" % task, self.basehash[fn + ":" + task])
247
248    def postparsing_clean_cache(self):
249        #
250        # After parsing we can remove some things from memory to reduce our memory footprint
251        #
252        self.gendeps = {}
253        self.lookupcache = {}
254        self.taskdeps = {}
255
256    def rundep_check(self, fn, recipename, task, dep, depname, dataCaches):
257        # Return True if we should keep the dependency, False to drop it
258        # We only manipulate the dependencies for packages not in the ignore
259        # list
260        if self.twl and not self.twl.search(recipename):
261            # then process the actual dependencies
262            if self.twl.search(depname):
263                return False
264        return True
265
266    def read_taint(self, fn, task, stampbase):
267        taint = None
268        try:
269            with open(stampbase + '.' + task + '.taint', 'r') as taintf:
270                taint = taintf.read()
271        except IOError:
272            pass
273        return taint
274
275    def prep_taskhash(self, tid, deps, dataCaches):
276
277        (mc, _, task, fn) = bb.runqueue.split_tid_mcfn(tid)
278
279        self.basehash[tid] = dataCaches[mc].basetaskhash[tid]
280        self.runtaskdeps[tid] = []
281        self.file_checksum_values[tid] = []
282        recipename = dataCaches[mc].pkg_fn[fn]
283
284        self.tidtopn[tid] = recipename
285
286        for dep in sorted(deps, key=clean_basepath):
287            (depmc, _, _, depmcfn) = bb.runqueue.split_tid_mcfn(dep)
288            depname = dataCaches[depmc].pkg_fn[depmcfn]
289            if not self.supports_multiconfig_datacaches and mc != depmc:
290                # If the signature generator doesn't understand multiconfig
291                # data caches, any dependency not in the same multiconfig must
292                # be skipped for backward compatibility
293                continue
294            if not self.rundep_check(fn, recipename, task, dep, depname, dataCaches):
295                continue
296            if dep not in self.taskhash:
297                bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep)
298            self.runtaskdeps[tid].append(dep)
299
300        if task in dataCaches[mc].file_checksums[fn]:
301            if self.checksum_cache:
302                checksums = self.checksum_cache.get_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude)
303            else:
304                checksums = bb.fetch2.get_file_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude)
305            for (f,cs) in checksums:
306                self.file_checksum_values[tid].append((f,cs))
307
308        taskdep = dataCaches[mc].task_deps[fn]
309        if 'nostamp' in taskdep and task in taskdep['nostamp']:
310            # Nostamp tasks need an implicit taint so that they force any dependent tasks to run
311            if tid in self.taints and self.taints[tid].startswith("nostamp:"):
312                # Don't reset taint value upon every call
313                pass
314            else:
315                import uuid
316                taint = str(uuid.uuid4())
317                self.taints[tid] = "nostamp:" + taint
318
319        taint = self.read_taint(fn, task, dataCaches[mc].stamp[fn])
320        if taint:
321            self.taints[tid] = taint
322            logger.warning("%s is tainted from a forced run" % tid)
323
324        return
325
326    def get_taskhash(self, tid, deps, dataCaches):
327
328        data = self.basehash[tid]
329        for dep in self.runtaskdeps[tid]:
330            data = data + self.get_unihash(dep)
331
332        for (f, cs) in self.file_checksum_values[tid]:
333            if cs:
334                if "/./" in f:
335                    data = data + "./" + f.split("/./")[1]
336                data = data + cs
337
338        if tid in self.taints:
339            if self.taints[tid].startswith("nostamp:"):
340                data = data + self.taints[tid][8:]
341            else:
342                data = data + self.taints[tid]
343
344        h = hashlib.sha256(data.encode("utf-8")).hexdigest()
345        self.taskhash[tid] = h
346        #d.setVar("BB_TASKHASH:task-%s" % task, taskhash[task])
347        return h
348
349    def writeout_file_checksum_cache(self):
350        """Write/update the file checksum cache onto disk"""
351        if self.checksum_cache:
352            self.checksum_cache.save_extras()
353            self.checksum_cache.save_merge()
354        else:
355            bb.fetch2.fetcher_parse_save()
356            bb.fetch2.fetcher_parse_done()
357
358    def save_unitaskhashes(self):
359        self.unihash_cache.save(self.unitaskhashes)
360
361    def dump_sigtask(self, fn, task, stampbase, runtime):
362
363        tid = fn + ":" + task
364        referencestamp = stampbase
365        if isinstance(runtime, str) and runtime.startswith("customfile"):
366            sigfile = stampbase
367            referencestamp = runtime[11:]
368        elif runtime and tid in self.taskhash:
369            sigfile = stampbase + "." + task + ".sigdata" + "." + self.get_unihash(tid)
370        else:
371            sigfile = stampbase + "." + task + ".sigbasedata" + "." + self.basehash[tid]
372
373        with bb.utils.umask(0o002):
374            bb.utils.mkdirhier(os.path.dirname(sigfile))
375
376        data = {}
377        data['task'] = task
378        data['basehash_ignore_vars'] = self.basehash_ignore_vars
379        data['taskhash_ignore_tasks'] = self.taskhash_ignore_tasks
380        data['taskdeps'] = self.taskdeps[fn][task]
381        data['basehash'] = self.basehash[tid]
382        data['gendeps'] = {}
383        data['varvals'] = {}
384        data['varvals'][task] = self.lookupcache[fn][task]
385        for dep in self.taskdeps[fn][task]:
386            if dep in self.basehash_ignore_vars:
387                continue
388            data['gendeps'][dep] = self.gendeps[fn][dep]
389            data['varvals'][dep] = self.lookupcache[fn][dep]
390
391        if runtime and tid in self.taskhash:
392            data['runtaskdeps'] = self.runtaskdeps[tid]
393            data['file_checksum_values'] = []
394            for f,cs in self.file_checksum_values[tid]:
395                if "/./" in f:
396                    data['file_checksum_values'].append(("./" + f.split("/./")[1], cs))
397                else:
398                    data['file_checksum_values'].append((os.path.basename(f), cs))
399            data['runtaskhashes'] = {}
400            for dep in data['runtaskdeps']:
401                data['runtaskhashes'][dep] = self.get_unihash(dep)
402            data['taskhash'] = self.taskhash[tid]
403            data['unihash'] = self.get_unihash(tid)
404
405        taint = self.read_taint(fn, task, referencestamp)
406        if taint:
407            data['taint'] = taint
408
409        if runtime and tid in self.taints:
410            if 'nostamp:' in self.taints[tid]:
411                data['taint'] = self.taints[tid]
412
413        computed_basehash = calc_basehash(data)
414        if computed_basehash != self.basehash[tid]:
415            bb.error("Basehash mismatch %s versus %s for %s" % (computed_basehash, self.basehash[tid], tid))
416        if runtime and tid in self.taskhash:
417            computed_taskhash = calc_taskhash(data)
418            if computed_taskhash != self.taskhash[tid]:
419                bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid))
420                sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash)
421
422        fd, tmpfile = tempfile.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.")
423        try:
424            with bb.compress.zstd.open(fd, "wt", encoding="utf-8", num_threads=1) as f:
425                json.dump(data, f, sort_keys=True, separators=(",", ":"), cls=SetEncoder)
426                f.flush()
427            os.chmod(tmpfile, 0o664)
428            bb.utils.rename(tmpfile, sigfile)
429        except (OSError, IOError) as err:
430            try:
431                os.unlink(tmpfile)
432            except OSError:
433                pass
434            raise err
435
436    def dump_sigfn(self, fn, dataCaches, options):
437        if fn in self.taskdeps:
438            for task in self.taskdeps[fn]:
439                tid = fn + ":" + task
440                mc = bb.runqueue.mc_from_tid(tid)
441                if tid not in self.taskhash:
442                    continue
443                if dataCaches[mc].basetaskhash[tid] != self.basehash[tid]:
444                    bb.error("Bitbake's cached basehash does not match the one we just generated (%s)!" % tid)
445                    bb.error("The mismatched hashes were %s and %s" % (dataCaches[mc].basetaskhash[tid], self.basehash[tid]))
446                self.dump_sigtask(fn, task, dataCaches[mc].stamp[fn], True)
447
448class SignatureGeneratorBasicHash(SignatureGeneratorBasic):
449    name = "basichash"
450
451    def get_stampfile_hash(self, tid):
452        if tid in self.taskhash:
453            return self.taskhash[tid]
454
455        # If task is not in basehash, then error
456        return self.basehash[tid]
457
458    def stampfile(self, stampbase, fn, taskname, extrainfo, clean=False):
459        if taskname != "do_setscene" and taskname.endswith("_setscene"):
460            tid = fn + ":" + taskname[:-9]
461        else:
462            tid = fn + ":" + taskname
463        if clean:
464            h = "*"
465        else:
466            h = self.get_stampfile_hash(tid)
467
468        return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.')
469
470    def stampcleanmask(self, stampbase, fn, taskname, extrainfo):
471        return self.stampfile(stampbase, fn, taskname, extrainfo, clean=True)
472
473    def invalidate_task(self, task, d, fn):
474        bb.note("Tainting hash to force rebuild of task %s, %s" % (fn, task))
475        bb.build.write_taint(task, d, fn)
476
477class SignatureGeneratorUniHashMixIn(object):
478    def __init__(self, data):
479        self.extramethod = {}
480        super().__init__(data)
481
482    def get_taskdata(self):
483        return (self.server, self.method, self.extramethod) + super().get_taskdata()
484
485    def set_taskdata(self, data):
486        self.server, self.method, self.extramethod = data[:3]
487        super().set_taskdata(data[3:])
488
489    def client(self):
490        if getattr(self, '_client', None) is None:
491            self._client = hashserv.create_client(self.server)
492        return self._client
493
494    def reset(self, data):
495        if getattr(self, '_client', None) is not None:
496            self._client.close()
497            self._client = None
498        return super().reset(data)
499
500    def exit(self):
501        if getattr(self, '_client', None) is not None:
502            self._client.close()
503            self._client = None
504        return super().exit()
505
506    def get_stampfile_hash(self, tid):
507        if tid in self.taskhash:
508            # If a unique hash is reported, use it as the stampfile hash. This
509            # ensures that if a task won't be re-run if the taskhash changes,
510            # but it would result in the same output hash
511            unihash = self._get_unihash(tid)
512            if unihash is not None:
513                return unihash
514
515        return super().get_stampfile_hash(tid)
516
517    def set_unihash(self, tid, unihash):
518        (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
519        key = mc + ":" + self.tidtopn[tid] + ":" + taskname
520        self.unitaskhashes[key] = (self.taskhash[tid], unihash)
521        self.unihash[tid] = unihash
522
523    def _get_unihash(self, tid, checkkey=None):
524        if tid not in self.tidtopn:
525            return None
526        (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
527        key = mc + ":" + self.tidtopn[tid] + ":" + taskname
528        if key not in self.unitaskhashes:
529            return None
530        if not checkkey:
531            checkkey = self.taskhash[tid]
532        (key, unihash) = self.unitaskhashes[key]
533        if key != checkkey:
534            return None
535        return unihash
536
537    def get_unihash(self, tid):
538        taskhash = self.taskhash[tid]
539
540        # If its not a setscene task we can return
541        if self.setscenetasks and tid not in self.setscenetasks:
542            self.unihash[tid] = None
543            return taskhash
544
545        # TODO: This cache can grow unbounded. It probably only needs to keep
546        # for each task
547        unihash =  self._get_unihash(tid)
548        if unihash is not None:
549            self.unihash[tid] = unihash
550            return unihash
551
552        # In the absence of being able to discover a unique hash from the
553        # server, make it be equivalent to the taskhash. The unique "hash" only
554        # really needs to be a unique string (not even necessarily a hash), but
555        # making it match the taskhash has a few advantages:
556        #
557        # 1) All of the sstate code that assumes hashes can be the same
558        # 2) It provides maximal compatibility with builders that don't use
559        #    an equivalency server
560        # 3) The value is easy for multiple independent builders to derive the
561        #    same unique hash from the same input. This means that if the
562        #    independent builders find the same taskhash, but it isn't reported
563        #    to the server, there is a better chance that they will agree on
564        #    the unique hash.
565        unihash = taskhash
566
567        try:
568            method = self.method
569            if tid in self.extramethod:
570                method = method + self.extramethod[tid]
571            data = self.client().get_unihash(method, self.taskhash[tid])
572            if data:
573                unihash = data
574                # A unique hash equal to the taskhash is not very interesting,
575                # so it is reported it at debug level 2. If they differ, that
576                # is much more interesting, so it is reported at debug level 1
577                hashequiv_logger.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server))
578            else:
579                hashequiv_logger.debug2('No reported unihash for %s:%s from %s' % (tid, taskhash, self.server))
580        except ConnectionError as e:
581            bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
582
583        self.set_unihash(tid, unihash)
584        self.unihash[tid] = unihash
585        return unihash
586
587    def report_unihash(self, path, task, d):
588        import importlib
589
590        taskhash = d.getVar('BB_TASKHASH')
591        unihash = d.getVar('BB_UNIHASH')
592        report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1'
593        tempdir = d.getVar('T')
594        fn = d.getVar('BB_FILENAME')
595        tid = fn + ':do_' + task
596        key = tid + ':' + taskhash
597
598        if self.setscenetasks and tid not in self.setscenetasks:
599            return
600
601        # This can happen if locked sigs are in action. Detect and just exit
602        if taskhash != self.taskhash[tid]:
603            return
604
605        # Sanity checks
606        cache_unihash = self._get_unihash(tid, checkkey=taskhash)
607        if cache_unihash is None:
608            bb.fatal('%s not in unihash cache. Please report this error' % key)
609
610        if cache_unihash != unihash:
611            bb.fatal("Cache unihash %s doesn't match BB_UNIHASH %s" % (cache_unihash, unihash))
612
613        sigfile = None
614        sigfile_name = "depsig.do_%s.%d" % (task, os.getpid())
615        sigfile_link = "depsig.do_%s" % task
616
617        try:
618            sigfile = open(os.path.join(tempdir, sigfile_name), 'w+b')
619
620            locs = {'path': path, 'sigfile': sigfile, 'task': task, 'd': d}
621
622            if "." in self.method:
623                (module, method) = self.method.rsplit('.', 1)
624                locs['method'] = getattr(importlib.import_module(module), method)
625                outhash = bb.utils.better_eval('method(path, sigfile, task, d)', locs)
626            else:
627                outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs)
628
629            try:
630                extra_data = {}
631
632                owner = d.getVar('SSTATE_HASHEQUIV_OWNER')
633                if owner:
634                    extra_data['owner'] = owner
635
636                if report_taskdata:
637                    sigfile.seek(0)
638
639                    extra_data['PN'] = d.getVar('PN')
640                    extra_data['PV'] = d.getVar('PV')
641                    extra_data['PR'] = d.getVar('PR')
642                    extra_data['task'] = task
643                    extra_data['outhash_siginfo'] = sigfile.read().decode('utf-8')
644
645                method = self.method
646                if tid in self.extramethod:
647                    method = method + self.extramethod[tid]
648
649                data = self.client().report_unihash(taskhash, method, outhash, unihash, extra_data)
650                new_unihash = data['unihash']
651
652                if new_unihash != unihash:
653                    hashequiv_logger.debug('Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server))
654                    bb.event.fire(bb.runqueue.taskUniHashUpdate(fn + ':do_' + task, new_unihash), d)
655                    self.set_unihash(tid, new_unihash)
656                    d.setVar('BB_UNIHASH', new_unihash)
657                else:
658                    hashequiv_logger.debug('Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server))
659            except ConnectionError as e:
660                bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
661        finally:
662            if sigfile:
663                sigfile.close()
664
665                sigfile_link_path = os.path.join(tempdir, sigfile_link)
666                bb.utils.remove(sigfile_link_path)
667
668                try:
669                    os.symlink(sigfile_name, sigfile_link_path)
670                except OSError:
671                    pass
672
673    def report_unihash_equiv(self, tid, taskhash, wanted_unihash, current_unihash, datacaches):
674        try:
675            extra_data = {}
676            method = self.method
677            if tid in self.extramethod:
678                method = method + self.extramethod[tid]
679
680            data = self.client().report_unihash_equiv(taskhash, method, wanted_unihash, extra_data)
681            hashequiv_logger.verbose('Reported task %s as unihash %s to %s (%s)' % (tid, wanted_unihash, self.server, str(data)))
682
683            if data is None:
684                bb.warn("Server unable to handle unihash report")
685                return False
686
687            finalunihash = data['unihash']
688
689            if finalunihash == current_unihash:
690                hashequiv_logger.verbose('Task %s unihash %s unchanged by server' % (tid, finalunihash))
691            elif finalunihash == wanted_unihash:
692                hashequiv_logger.verbose('Task %s unihash changed %s -> %s as wanted' % (tid, current_unihash, finalunihash))
693                self.set_unihash(tid, finalunihash)
694                return True
695            else:
696                # TODO: What to do here?
697                hashequiv_logger.verbose('Task %s unihash reported as unwanted hash %s' % (tid, finalunihash))
698
699        except ConnectionError as e:
700            bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
701
702        return False
703
704#
705# Dummy class used for bitbake-selftest
706#
707class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureGeneratorBasicHash):
708    name = "TestEquivHash"
709    def init_rundepcheck(self, data):
710        super().init_rundepcheck(data)
711        self.server = data.getVar('BB_HASHSERVE')
712        self.method = "sstate_output_hash"
713
714#
715# Dummy class used for bitbake-selftest
716#
717class SignatureGeneratorTestMulticonfigDepends(SignatureGeneratorBasicHash):
718    name = "TestMulticonfigDepends"
719    supports_multiconfig_datacaches = True
720
721def dump_this_task(outfile, d):
722    import bb.parse
723    fn = d.getVar("BB_FILENAME")
724    task = "do_" + d.getVar("BB_CURRENTTASK")
725    referencestamp = bb.build.stamp_internal(task, d, None, True)
726    bb.parse.siggen.dump_sigtask(fn, task, outfile, "customfile:" + referencestamp)
727
728def init_colors(enable_color):
729    """Initialise colour dict for passing to compare_sigfiles()"""
730    # First set up the colours
731    colors = {'color_title':   '\033[1m',
732              'color_default': '\033[0m',
733              'color_add':     '\033[0;32m',
734              'color_remove':  '\033[0;31m',
735             }
736    # Leave all keys present but clear the values
737    if not enable_color:
738        for k in colors.keys():
739            colors[k] = ''
740    return colors
741
742def worddiff_str(oldstr, newstr, colors=None):
743    if not colors:
744        colors = init_colors(False)
745    diff = simplediff.diff(oldstr.split(' '), newstr.split(' '))
746    ret = []
747    for change, value in diff:
748        value = ' '.join(value)
749        if change == '=':
750            ret.append(value)
751        elif change == '+':
752            item = '{color_add}{{+{value}+}}{color_default}'.format(value=value, **colors)
753            ret.append(item)
754        elif change == '-':
755            item = '{color_remove}[-{value}-]{color_default}'.format(value=value, **colors)
756            ret.append(item)
757    whitespace_note = ''
758    if oldstr != newstr and ' '.join(oldstr.split()) == ' '.join(newstr.split()):
759        whitespace_note = ' (whitespace changed)'
760    return '"%s"%s' % (' '.join(ret), whitespace_note)
761
762def list_inline_diff(oldlist, newlist, colors=None):
763    if not colors:
764        colors = init_colors(False)
765    diff = simplediff.diff(oldlist, newlist)
766    ret = []
767    for change, value in diff:
768        value = ' '.join(value)
769        if change == '=':
770            ret.append("'%s'" % value)
771        elif change == '+':
772            item = '{color_add}+{value}{color_default}'.format(value=value, **colors)
773            ret.append(item)
774        elif change == '-':
775            item = '{color_remove}-{value}{color_default}'.format(value=value, **colors)
776            ret.append(item)
777    return '[%s]' % (', '.join(ret))
778
779def clean_basepath(basepath):
780    basepath, dir, recipe_task = basepath.rsplit("/", 2)
781    cleaned = dir + '/' + recipe_task
782
783    if basepath[0] == '/':
784        return cleaned
785
786    if basepath.startswith("mc:") and basepath.count(':') >= 2:
787        mc, mc_name, basepath = basepath.split(":", 2)
788        mc_suffix = ':mc:' + mc_name
789    else:
790        mc_suffix = ''
791
792    # mc stuff now removed from basepath. Whatever was next, if present will be the first
793    # suffix. ':/', recipe path start, marks the end of this. Something like
794    # 'virtual:a[:b[:c]]:/path...' (b and c being optional)
795    if basepath[0] != '/':
796        cleaned += ':' + basepath.split(':/', 1)[0]
797
798    return cleaned + mc_suffix
799
800def clean_basepaths(a):
801    b = {}
802    for x in a:
803        b[clean_basepath(x)] = a[x]
804    return b
805
806def clean_basepaths_list(a):
807    b = []
808    for x in a:
809        b.append(clean_basepath(x))
810    return b
811
812# Handled renamed fields
813def handle_renames(data):
814    if 'basewhitelist' in data:
815        data['basehash_ignore_vars'] = data['basewhitelist']
816        del data['basewhitelist']
817    if 'taskwhitelist' in data:
818        data['taskhash_ignore_tasks'] = data['taskwhitelist']
819        del data['taskwhitelist']
820
821
822def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
823    output = []
824
825    colors = init_colors(color)
826    def color_format(formatstr, **values):
827        """
828        Return colour formatted string.
829        NOTE: call with the format string, not an already formatted string
830        containing values (otherwise you could have trouble with { and }
831        characters)
832        """
833        if not formatstr.endswith('{color_default}'):
834            formatstr += '{color_default}'
835        # In newer python 3 versions you can pass both of these directly,
836        # but we only require 3.4 at the moment
837        formatparams = {}
838        formatparams.update(colors)
839        formatparams.update(values)
840        return formatstr.format(**formatparams)
841
842    with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f:
843        a_data = json.load(f, object_hook=SetDecoder)
844    with bb.compress.zstd.open(b, "rt", encoding="utf-8", num_threads=1) as f:
845        b_data = json.load(f, object_hook=SetDecoder)
846
847    for data in [a_data, b_data]:
848        handle_renames(data)
849
850    def dict_diff(a, b, ignored_vars=set()):
851        sa = set(a.keys())
852        sb = set(b.keys())
853        common = sa & sb
854        changed = set()
855        for i in common:
856            if a[i] != b[i] and i not in ignored_vars:
857                changed.add(i)
858        added = sb - sa
859        removed = sa - sb
860        return changed, added, removed
861
862    def file_checksums_diff(a, b):
863        from collections import Counter
864
865        # Convert lists back to tuples
866        a = [(f[0], f[1]) for f in a]
867        b = [(f[0], f[1]) for f in b]
868
869        # Compare lists, ensuring we can handle duplicate filenames if they exist
870        removedcount = Counter(a)
871        removedcount.subtract(b)
872        addedcount = Counter(b)
873        addedcount.subtract(a)
874        added = []
875        for x in b:
876            if addedcount[x] > 0:
877                addedcount[x] -= 1
878                added.append(x)
879        removed = []
880        changed = []
881        for x in a:
882            if removedcount[x] > 0:
883                removedcount[x] -= 1
884                for y in added:
885                    if y[0] == x[0]:
886                        changed.append((x[0], x[1], y[1]))
887                        added.remove(y)
888                        break
889                else:
890                    removed.append(x)
891        added = [x[0] for x in added]
892        removed = [x[0] for x in removed]
893        return changed, added, removed
894
895    if 'basehash_ignore_vars' in a_data and a_data['basehash_ignore_vars'] != b_data['basehash_ignore_vars']:
896        output.append(color_format("{color_title}basehash_ignore_vars changed{color_default} from '%s' to '%s'") % (a_data['basehash_ignore_vars'], b_data['basehash_ignore_vars']))
897        if a_data['basehash_ignore_vars'] and b_data['basehash_ignore_vars']:
898            output.append("changed items: %s" % a_data['basehash_ignore_vars'].symmetric_difference(b_data['basehash_ignore_vars']))
899
900    if 'taskhash_ignore_tasks' in a_data and a_data['taskhash_ignore_tasks'] != b_data['taskhash_ignore_tasks']:
901        output.append(color_format("{color_title}taskhash_ignore_tasks changed{color_default} from '%s' to '%s'") % (a_data['taskhash_ignore_tasks'], b_data['taskhash_ignore_tasks']))
902        if a_data['taskhash_ignore_tasks'] and b_data['taskhash_ignore_tasks']:
903            output.append("changed items: %s" % a_data['taskhash_ignore_tasks'].symmetric_difference(b_data['taskhash_ignore_tasks']))
904
905    if a_data['taskdeps'] != b_data['taskdeps']:
906        output.append(color_format("{color_title}Task dependencies changed{color_default} from:\n%s\nto:\n%s") % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps'])))
907
908    if a_data['basehash'] != b_data['basehash'] and not collapsed:
909        output.append(color_format("{color_title}basehash changed{color_default} from %s to %s") % (a_data['basehash'], b_data['basehash']))
910
911    changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basehash_ignore_vars'] & b_data['basehash_ignore_vars'])
912    if changed:
913        for dep in sorted(changed):
914            output.append(color_format("{color_title}List of dependencies for variable %s changed from '{color_default}%s{color_title}' to '{color_default}%s{color_title}'") % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep]))
915            if a_data['gendeps'][dep] and b_data['gendeps'][dep]:
916                output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep]))
917    if added:
918        for dep in sorted(added):
919            output.append(color_format("{color_title}Dependency on variable %s was added") % (dep))
920    if removed:
921        for dep in sorted(removed):
922            output.append(color_format("{color_title}Dependency on Variable %s was removed") % (dep))
923
924
925    changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals'])
926    if changed:
927        for dep in sorted(changed):
928            oldval = a_data['varvals'][dep]
929            newval = b_data['varvals'][dep]
930            if newval and oldval and ('\n' in oldval or '\n' in newval):
931                diff = difflib.unified_diff(oldval.splitlines(), newval.splitlines(), lineterm='')
932                # Cut off the first two lines, since we aren't interested in
933                # the old/new filename (they are blank anyway in this case)
934                difflines = list(diff)[2:]
935                if color:
936                    # Add colour to diff output
937                    for i, line in enumerate(difflines):
938                        if line.startswith('+'):
939                            line = color_format('{color_add}{line}', line=line)
940                            difflines[i] = line
941                        elif line.startswith('-'):
942                            line = color_format('{color_remove}{line}', line=line)
943                            difflines[i] = line
944                output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff='\n'.join(difflines)))
945            elif newval and oldval and (' ' in oldval or ' ' in newval):
946                output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff=worddiff_str(oldval, newval, colors)))
947            else:
948                output.append(color_format("{color_title}Variable {var} value changed from '{color_default}{oldval}{color_title}' to '{color_default}{newval}{color_title}'{color_default}", var=dep, oldval=oldval, newval=newval))
949
950    if not 'file_checksum_values' in a_data:
951         a_data['file_checksum_values'] = []
952    if not 'file_checksum_values' in b_data:
953         b_data['file_checksum_values'] = []
954
955    changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values'])
956    if changed:
957        for f, old, new in changed:
958            output.append(color_format("{color_title}Checksum for file %s changed{color_default} from %s to %s") % (f, old, new))
959    if added:
960        for f in added:
961            output.append(color_format("{color_title}Dependency on checksum of file %s was added") % (f))
962    if removed:
963        for f in removed:
964            output.append(color_format("{color_title}Dependency on checksum of file %s was removed") % (f))
965
966    if not 'runtaskdeps' in a_data:
967         a_data['runtaskdeps'] = {}
968    if not 'runtaskdeps' in b_data:
969         b_data['runtaskdeps'] = {}
970
971    if not collapsed:
972        if len(a_data['runtaskdeps']) != len(b_data['runtaskdeps']):
973            changed = ["Number of task dependencies changed"]
974        else:
975            changed = []
976            for idx, task in enumerate(a_data['runtaskdeps']):
977                a = a_data['runtaskdeps'][idx]
978                b = b_data['runtaskdeps'][idx]
979                if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b] and not collapsed:
980                    changed.append("%s with hash %s\n changed to\n%s with hash %s" % (clean_basepath(a), a_data['runtaskhashes'][a], clean_basepath(b), b_data['runtaskhashes'][b]))
981
982        if changed:
983            clean_a = clean_basepaths_list(a_data['runtaskdeps'])
984            clean_b = clean_basepaths_list(b_data['runtaskdeps'])
985            if clean_a != clean_b:
986                output.append(color_format("{color_title}runtaskdeps changed:{color_default}\n%s") % list_inline_diff(clean_a, clean_b, colors))
987            else:
988                output.append(color_format("{color_title}runtaskdeps changed:"))
989            output.append("\n".join(changed))
990
991
992    if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data:
993        a = clean_basepaths(a_data['runtaskhashes'])
994        b = clean_basepaths(b_data['runtaskhashes'])
995        changed, added, removed = dict_diff(a, b)
996        if added:
997            for dep in sorted(added):
998                bdep_found = False
999                if removed:
1000                    for bdep in removed:
1001                        if b[dep] == a[bdep]:
1002                            #output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep))
1003                            bdep_found = True
1004                if not bdep_found:
1005                    output.append(color_format("{color_title}Dependency on task %s was added{color_default} with hash %s") % (dep, b[dep]))
1006        if removed:
1007            for dep in sorted(removed):
1008                adep_found = False
1009                if added:
1010                    for adep in added:
1011                        if b[adep] == a[dep]:
1012                            #output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep))
1013                            adep_found = True
1014                if not adep_found:
1015                    output.append(color_format("{color_title}Dependency on task %s was removed{color_default} with hash %s") % (dep, a[dep]))
1016        if changed:
1017            for dep in sorted(changed):
1018                if not collapsed:
1019                    output.append(color_format("{color_title}Hash for task dependency %s changed{color_default} from %s to %s") % (dep, a[dep], b[dep]))
1020                if callable(recursecb):
1021                    recout = recursecb(dep, a[dep], b[dep])
1022                    if recout:
1023                        if collapsed:
1024                            output.extend(recout)
1025                        else:
1026                            # If a dependent hash changed, might as well print the line above and then defer to the changes in
1027                            # that hash since in all likelyhood, they're the same changes this task also saw.
1028                            output = [output[-1]] + recout
1029                            break
1030
1031    a_taint = a_data.get('taint', None)
1032    b_taint = b_data.get('taint', None)
1033    if a_taint != b_taint:
1034        if a_taint and a_taint.startswith('nostamp:'):
1035            a_taint = a_taint.replace('nostamp:', 'nostamp(uuid4):')
1036        if b_taint and b_taint.startswith('nostamp:'):
1037            b_taint = b_taint.replace('nostamp:', 'nostamp(uuid4):')
1038        output.append(color_format("{color_title}Taint (by forced/invalidated task) changed{color_default} from %s to %s") % (a_taint, b_taint))
1039
1040    return output
1041
1042
1043def calc_basehash(sigdata):
1044    task = sigdata['task']
1045    basedata = sigdata['varvals'][task]
1046
1047    if basedata is None:
1048        basedata = ''
1049
1050    alldeps = sigdata['taskdeps']
1051    for dep in alldeps:
1052        basedata = basedata + dep
1053        val = sigdata['varvals'][dep]
1054        if val is not None:
1055            basedata = basedata + str(val)
1056
1057    return hashlib.sha256(basedata.encode("utf-8")).hexdigest()
1058
1059def calc_taskhash(sigdata):
1060    data = sigdata['basehash']
1061
1062    for dep in sigdata['runtaskdeps']:
1063        data = data + sigdata['runtaskhashes'][dep]
1064
1065    for c in sigdata['file_checksum_values']:
1066        if c[1]:
1067            if "./" in c[0]:
1068                data = data + c[0]
1069            data = data + c[1]
1070
1071    if 'taint' in sigdata:
1072        if 'nostamp:' in sigdata['taint']:
1073            data = data + sigdata['taint'][8:]
1074        else:
1075            data = data + sigdata['taint']
1076
1077    return hashlib.sha256(data.encode("utf-8")).hexdigest()
1078
1079
1080def dump_sigfile(a):
1081    output = []
1082
1083    with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f:
1084        a_data = json.load(f, object_hook=SetDecoder)
1085
1086    handle_renames(a_data)
1087
1088    output.append("basehash_ignore_vars: %s" % (sorted(a_data['basehash_ignore_vars'])))
1089
1090    output.append("taskhash_ignore_tasks: %s" % (sorted(a_data['taskhash_ignore_tasks'] or [])))
1091
1092    output.append("Task dependencies: %s" % (sorted(a_data['taskdeps'])))
1093
1094    output.append("basehash: %s" % (a_data['basehash']))
1095
1096    for dep in sorted(a_data['gendeps']):
1097        output.append("List of dependencies for variable %s is %s" % (dep, sorted(a_data['gendeps'][dep])))
1098
1099    for dep in sorted(a_data['varvals']):
1100        output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep]))
1101
1102    if 'runtaskdeps' in a_data:
1103        output.append("Tasks this task depends on: %s" % (sorted(a_data['runtaskdeps'])))
1104
1105    if 'file_checksum_values' in a_data:
1106        output.append("This task depends on the checksums of files: %s" % (sorted(a_data['file_checksum_values'])))
1107
1108    if 'runtaskhashes' in a_data:
1109        for dep in sorted(a_data['runtaskhashes']):
1110            output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep]))
1111
1112    if 'taint' in a_data:
1113        if a_data['taint'].startswith('nostamp:'):
1114            msg = a_data['taint'].replace('nostamp:', 'nostamp(uuid4):')
1115        else:
1116            msg = a_data['taint']
1117        output.append("Tainted (by forced/invalidated task): %s" % msg)
1118
1119    if 'task' in a_data:
1120        computed_basehash = calc_basehash(a_data)
1121        output.append("Computed base hash is %s and from file %s" % (computed_basehash, a_data['basehash']))
1122    else:
1123        output.append("Unable to compute base hash")
1124
1125    computed_taskhash = calc_taskhash(a_data)
1126    output.append("Computed task hash is %s" % computed_taskhash)
1127
1128    return output
1129