xref: /openbmc/openbmc/poky/bitbake/lib/bb/siggen.py (revision d25ed324)
1#
2# SPDX-License-Identifier: GPL-2.0-only
3#
4
5import hashlib
6import logging
7import os
8import re
9import tempfile
10import pickle
11import bb.data
12import difflib
13import simplediff
14from bb.checksum import FileChecksumCache
15from bb import runqueue
16import hashserv
17
18logger = logging.getLogger('BitBake.SigGen')
19hashequiv_logger = logging.getLogger('BitBake.SigGen.HashEquiv')
20
21def init(d):
22    siggens = [obj for obj in globals().values()
23                      if type(obj) is type and issubclass(obj, SignatureGenerator)]
24
25    desired = d.getVar("BB_SIGNATURE_HANDLER") or "noop"
26    for sg in siggens:
27        if desired == sg.name:
28            return sg(d)
29            break
30    else:
31        logger.error("Invalid signature generator '%s', using default 'noop'\n"
32                     "Available generators: %s", desired,
33                     ', '.join(obj.name for obj in siggens))
34        return SignatureGenerator(d)
35
36class SignatureGenerator(object):
37    """
38    """
39    name = "noop"
40
41    # If the derived class supports multiconfig datacaches, set this to True
42    # The default is False for backward compatibility with derived signature
43    # generators that do not understand multiconfig caches
44    supports_multiconfig_datacaches = False
45
46    def __init__(self, data):
47        self.basehash = {}
48        self.taskhash = {}
49        self.unihash = {}
50        self.runtaskdeps = {}
51        self.file_checksum_values = {}
52        self.taints = {}
53        self.unitaskhashes = {}
54        self.tidtopn = {}
55        self.setscenetasks = set()
56
57    def finalise(self, fn, d, varient):
58        return
59
60    def postparsing_clean_cache(self):
61        return
62
63    def get_unihash(self, tid):
64        return self.taskhash[tid]
65
66    def prep_taskhash(self, tid, deps, dataCaches):
67        return
68
69    def get_taskhash(self, tid, deps, dataCaches):
70        self.taskhash[tid] = hashlib.sha256(tid.encode("utf-8")).hexdigest()
71        return self.taskhash[tid]
72
73    def writeout_file_checksum_cache(self):
74        """Write/update the file checksum cache onto disk"""
75        return
76
77    def stampfile(self, stampbase, file_name, taskname, extrainfo):
78        return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
79
80    def stampcleanmask(self, stampbase, file_name, taskname, extrainfo):
81        return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
82
83    def dump_sigtask(self, fn, task, stampbase, runtime):
84        return
85
86    def invalidate_task(self, task, d, fn):
87        bb.build.del_stamp(task, d, fn)
88
89    def dump_sigs(self, dataCache, options):
90        return
91
92    def get_taskdata(self):
93        return (self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks)
94
95    def set_taskdata(self, data):
96        self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks = data
97
98    def reset(self, data):
99        self.__init__(data)
100
101    def get_taskhashes(self):
102        return self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn
103
104    def set_taskhashes(self, hashes):
105        self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn = hashes
106
107    def save_unitaskhashes(self):
108        return
109
110    def set_setscene_tasks(self, setscene_tasks):
111        return
112
113    @classmethod
114    def get_data_caches(cls, dataCaches, mc):
115        """
116        This function returns the datacaches that should be passed to signature
117        generator functions. If the signature generator supports multiconfig
118        caches, the entire dictionary of data caches is sent, otherwise a
119        special proxy is sent that support both index access to all
120        multiconfigs, and also direct access for the default multiconfig.
121
122        The proxy class allows code in this class itself to always use
123        multiconfig aware code (to ease maintenance), but derived classes that
124        are unaware of multiconfig data caches can still access the default
125        multiconfig as expected.
126
127        Do not override this function in derived classes; it will be removed in
128        the future when support for multiconfig data caches is mandatory
129        """
130        class DataCacheProxy(object):
131            def __init__(self):
132                pass
133
134            def __getitem__(self, key):
135                return dataCaches[key]
136
137            def __getattr__(self, name):
138                return getattr(dataCaches[mc], name)
139
140        if cls.supports_multiconfig_datacaches:
141            return dataCaches
142
143        return DataCacheProxy()
144
145class SignatureGeneratorBasic(SignatureGenerator):
146    """
147    """
148    name = "basic"
149
150    def __init__(self, data):
151        self.basehash = {}
152        self.taskhash = {}
153        self.unihash = {}
154        self.taskdeps = {}
155        self.runtaskdeps = {}
156        self.file_checksum_values = {}
157        self.taints = {}
158        self.gendeps = {}
159        self.lookupcache = {}
160        self.setscenetasks = set()
161        self.basewhitelist = set((data.getVar("BB_HASHBASE_WHITELIST") or "").split())
162        self.taskwhitelist = None
163        self.init_rundepcheck(data)
164        checksum_cache_file = data.getVar("BB_HASH_CHECKSUM_CACHE_FILE")
165        if checksum_cache_file:
166            self.checksum_cache = FileChecksumCache()
167            self.checksum_cache.init_cache(data, checksum_cache_file)
168        else:
169            self.checksum_cache = None
170
171        self.unihash_cache = bb.cache.SimpleCache("3")
172        self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {})
173        self.localdirsexclude = (data.getVar("BB_SIGNATURE_LOCAL_DIRS_EXCLUDE") or "CVS .bzr .git .hg .osc .p4 .repo .svn").split()
174        self.tidtopn = {}
175
176    def init_rundepcheck(self, data):
177        self.taskwhitelist = data.getVar("BB_HASHTASK_WHITELIST") or None
178        if self.taskwhitelist:
179            self.twl = re.compile(self.taskwhitelist)
180        else:
181            self.twl = None
182
183    def _build_data(self, fn, d):
184
185        ignore_mismatch = ((d.getVar("BB_HASH_IGNORE_MISMATCH") or '') == '1')
186        tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d, self.basewhitelist)
187
188        taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, self.basewhitelist, fn)
189
190        for task in tasklist:
191            tid = fn + ":" + task
192            if not ignore_mismatch and tid in self.basehash and self.basehash[tid] != basehash[tid]:
193                bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (tid, self.basehash[tid], basehash[tid]))
194                bb.error("The following commands may help:")
195                cmd = "$ bitbake %s -c%s" % (d.getVar('PN'), task)
196                # Make sure sigdata is dumped before run printdiff
197                bb.error("%s -Snone" % cmd)
198                bb.error("Then:")
199                bb.error("%s -Sprintdiff\n" % cmd)
200            self.basehash[tid] = basehash[tid]
201
202        self.taskdeps[fn] = taskdeps
203        self.gendeps[fn] = gendeps
204        self.lookupcache[fn] = lookupcache
205
206        return taskdeps
207
208    def set_setscene_tasks(self, setscene_tasks):
209        self.setscenetasks = set(setscene_tasks)
210
211    def finalise(self, fn, d, variant):
212
213        mc = d.getVar("__BBMULTICONFIG", False) or ""
214        if variant or mc:
215            fn = bb.cache.realfn2virtual(fn, variant, mc)
216
217        try:
218            taskdeps = self._build_data(fn, d)
219        except bb.parse.SkipRecipe:
220            raise
221        except:
222            bb.warn("Error during finalise of %s" % fn)
223            raise
224
225        #Slow but can be useful for debugging mismatched basehashes
226        #for task in self.taskdeps[fn]:
227        #    self.dump_sigtask(fn, task, d.getVar("STAMP"), False)
228
229        for task in taskdeps:
230            d.setVar("BB_BASEHASH_task-%s" % task, self.basehash[fn + ":" + task])
231
232    def postparsing_clean_cache(self):
233        #
234        # After parsing we can remove some things from memory to reduce our memory footprint
235        #
236        self.gendeps = {}
237        self.lookupcache = {}
238        self.taskdeps = {}
239
240    def rundep_check(self, fn, recipename, task, dep, depname, dataCaches):
241        # Return True if we should keep the dependency, False to drop it
242        # We only manipulate the dependencies for packages not in the whitelist
243        if self.twl and not self.twl.search(recipename):
244            # then process the actual dependencies
245            if self.twl.search(depname):
246                return False
247        return True
248
249    def read_taint(self, fn, task, stampbase):
250        taint = None
251        try:
252            with open(stampbase + '.' + task + '.taint', 'r') as taintf:
253                taint = taintf.read()
254        except IOError:
255            pass
256        return taint
257
258    def prep_taskhash(self, tid, deps, dataCaches):
259
260        (mc, _, task, fn) = bb.runqueue.split_tid_mcfn(tid)
261
262        self.basehash[tid] = dataCaches[mc].basetaskhash[tid]
263        self.runtaskdeps[tid] = []
264        self.file_checksum_values[tid] = []
265        recipename = dataCaches[mc].pkg_fn[fn]
266
267        self.tidtopn[tid] = recipename
268
269        for dep in sorted(deps, key=clean_basepath):
270            (depmc, _, _, depmcfn) = bb.runqueue.split_tid_mcfn(dep)
271            depname = dataCaches[depmc].pkg_fn[depmcfn]
272            if not self.supports_multiconfig_datacaches and mc != depmc:
273                # If the signature generator doesn't understand multiconfig
274                # data caches, any dependency not in the same multiconfig must
275                # be skipped for backward compatibility
276                continue
277            if not self.rundep_check(fn, recipename, task, dep, depname, dataCaches):
278                continue
279            if dep not in self.taskhash:
280                bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep)
281            self.runtaskdeps[tid].append(dep)
282
283        if task in dataCaches[mc].file_checksums[fn]:
284            if self.checksum_cache:
285                checksums = self.checksum_cache.get_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude)
286            else:
287                checksums = bb.fetch2.get_file_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude)
288            for (f,cs) in checksums:
289                self.file_checksum_values[tid].append((f,cs))
290
291        taskdep = dataCaches[mc].task_deps[fn]
292        if 'nostamp' in taskdep and task in taskdep['nostamp']:
293            # Nostamp tasks need an implicit taint so that they force any dependent tasks to run
294            if tid in self.taints and self.taints[tid].startswith("nostamp:"):
295                # Don't reset taint value upon every call
296                pass
297            else:
298                import uuid
299                taint = str(uuid.uuid4())
300                self.taints[tid] = "nostamp:" + taint
301
302        taint = self.read_taint(fn, task, dataCaches[mc].stamp[fn])
303        if taint:
304            self.taints[tid] = taint
305            logger.warning("%s is tainted from a forced run" % tid)
306
307        return
308
309    def get_taskhash(self, tid, deps, dataCaches):
310
311        data = self.basehash[tid]
312        for dep in self.runtaskdeps[tid]:
313            if dep in self.unihash:
314                if self.unihash[dep] is None:
315                    data = data + self.taskhash[dep]
316                else:
317                    data = data + self.unihash[dep]
318            else:
319                data = data + self.get_unihash(dep)
320
321        for (f, cs) in self.file_checksum_values[tid]:
322            if cs:
323                data = data + cs
324
325        if tid in self.taints:
326            if self.taints[tid].startswith("nostamp:"):
327                data = data + self.taints[tid][8:]
328            else:
329                data = data + self.taints[tid]
330
331        h = hashlib.sha256(data.encode("utf-8")).hexdigest()
332        self.taskhash[tid] = h
333        #d.setVar("BB_TASKHASH_task-%s" % task, taskhash[task])
334        return h
335
336    def writeout_file_checksum_cache(self):
337        """Write/update the file checksum cache onto disk"""
338        if self.checksum_cache:
339            self.checksum_cache.save_extras()
340            self.checksum_cache.save_merge()
341        else:
342            bb.fetch2.fetcher_parse_save()
343            bb.fetch2.fetcher_parse_done()
344
345    def save_unitaskhashes(self):
346        self.unihash_cache.save(self.unitaskhashes)
347
348    def dump_sigtask(self, fn, task, stampbase, runtime):
349
350        tid = fn + ":" + task
351        referencestamp = stampbase
352        if isinstance(runtime, str) and runtime.startswith("customfile"):
353            sigfile = stampbase
354            referencestamp = runtime[11:]
355        elif runtime and tid in self.taskhash:
356            sigfile = stampbase + "." + task + ".sigdata" + "." + self.get_unihash(tid)
357        else:
358            sigfile = stampbase + "." + task + ".sigbasedata" + "." + self.basehash[tid]
359
360        bb.utils.mkdirhier(os.path.dirname(sigfile))
361
362        data = {}
363        data['task'] = task
364        data['basewhitelist'] = self.basewhitelist
365        data['taskwhitelist'] = self.taskwhitelist
366        data['taskdeps'] = self.taskdeps[fn][task]
367        data['basehash'] = self.basehash[tid]
368        data['gendeps'] = {}
369        data['varvals'] = {}
370        data['varvals'][task] = self.lookupcache[fn][task]
371        for dep in self.taskdeps[fn][task]:
372            if dep in self.basewhitelist:
373                continue
374            data['gendeps'][dep] = self.gendeps[fn][dep]
375            data['varvals'][dep] = self.lookupcache[fn][dep]
376
377        if runtime and tid in self.taskhash:
378            data['runtaskdeps'] = self.runtaskdeps[tid]
379            data['file_checksum_values'] = [(os.path.basename(f), cs) for f,cs in self.file_checksum_values[tid]]
380            data['runtaskhashes'] = {}
381            for dep in data['runtaskdeps']:
382                data['runtaskhashes'][dep] = self.get_unihash(dep)
383            data['taskhash'] = self.taskhash[tid]
384            data['unihash'] = self.get_unihash(tid)
385
386        taint = self.read_taint(fn, task, referencestamp)
387        if taint:
388            data['taint'] = taint
389
390        if runtime and tid in self.taints:
391            if 'nostamp:' in self.taints[tid]:
392                data['taint'] = self.taints[tid]
393
394        computed_basehash = calc_basehash(data)
395        if computed_basehash != self.basehash[tid]:
396            bb.error("Basehash mismatch %s versus %s for %s" % (computed_basehash, self.basehash[tid], tid))
397        if runtime and tid in self.taskhash:
398            computed_taskhash = calc_taskhash(data)
399            if computed_taskhash != self.taskhash[tid]:
400                bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid))
401                sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash)
402
403        fd, tmpfile = tempfile.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.")
404        try:
405            with os.fdopen(fd, "wb") as stream:
406                p = pickle.dump(data, stream, -1)
407                stream.flush()
408            os.chmod(tmpfile, 0o664)
409            os.rename(tmpfile, sigfile)
410        except (OSError, IOError) as err:
411            try:
412                os.unlink(tmpfile)
413            except OSError:
414                pass
415            raise err
416
417    def dump_sigfn(self, fn, dataCaches, options):
418        if fn in self.taskdeps:
419            for task in self.taskdeps[fn]:
420                tid = fn + ":" + task
421                mc = bb.runqueue.mc_from_tid(tid)
422                if tid not in self.taskhash:
423                    continue
424                if dataCaches[mc].basetaskhash[tid] != self.basehash[tid]:
425                    bb.error("Bitbake's cached basehash does not match the one we just generated (%s)!" % tid)
426                    bb.error("The mismatched hashes were %s and %s" % (dataCaches[mc].basetaskhash[tid], self.basehash[tid]))
427                self.dump_sigtask(fn, task, dataCaches[mc].stamp[fn], True)
428
429class SignatureGeneratorBasicHash(SignatureGeneratorBasic):
430    name = "basichash"
431
432    def get_stampfile_hash(self, tid):
433        if tid in self.taskhash:
434            return self.taskhash[tid]
435
436        # If task is not in basehash, then error
437        return self.basehash[tid]
438
439    def stampfile(self, stampbase, fn, taskname, extrainfo, clean=False):
440        if taskname != "do_setscene" and taskname.endswith("_setscene"):
441            tid = fn + ":" + taskname[:-9]
442        else:
443            tid = fn + ":" + taskname
444        if clean:
445            h = "*"
446        else:
447            h = self.get_stampfile_hash(tid)
448
449        return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.')
450
451    def stampcleanmask(self, stampbase, fn, taskname, extrainfo):
452        return self.stampfile(stampbase, fn, taskname, extrainfo, clean=True)
453
454    def invalidate_task(self, task, d, fn):
455        bb.note("Tainting hash to force rebuild of task %s, %s" % (fn, task))
456        bb.build.write_taint(task, d, fn)
457
458class SignatureGeneratorUniHashMixIn(object):
459    def __init__(self, data):
460        self.extramethod = {}
461        super().__init__(data)
462
463    def get_taskdata(self):
464        return (self.server, self.method, self.extramethod) + super().get_taskdata()
465
466    def set_taskdata(self, data):
467        self.server, self.method, self.extramethod = data[:3]
468        super().set_taskdata(data[3:])
469
470    def client(self):
471        if getattr(self, '_client', None) is None:
472            self._client = hashserv.create_client(self.server)
473        return self._client
474
475    def get_stampfile_hash(self, tid):
476        if tid in self.taskhash:
477            # If a unique hash is reported, use it as the stampfile hash. This
478            # ensures that if a task won't be re-run if the taskhash changes,
479            # but it would result in the same output hash
480            unihash = self._get_unihash(tid)
481            if unihash is not None:
482                return unihash
483
484        return super().get_stampfile_hash(tid)
485
486    def set_unihash(self, tid, unihash):
487        (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
488        key = mc + ":" + self.tidtopn[tid] + ":" + taskname
489        self.unitaskhashes[key] = (self.taskhash[tid], unihash)
490        self.unihash[tid] = unihash
491
492    def _get_unihash(self, tid, checkkey=None):
493        if tid not in self.tidtopn:
494            return None
495        (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
496        key = mc + ":" + self.tidtopn[tid] + ":" + taskname
497        if key not in self.unitaskhashes:
498            return None
499        if not checkkey:
500            checkkey = self.taskhash[tid]
501        (key, unihash) = self.unitaskhashes[key]
502        if key != checkkey:
503            return None
504        return unihash
505
506    def get_unihash(self, tid):
507        taskhash = self.taskhash[tid]
508
509        # If its not a setscene task we can return
510        if self.setscenetasks and tid not in self.setscenetasks:
511            self.unihash[tid] = None
512            return taskhash
513
514        # TODO: This cache can grow unbounded. It probably only needs to keep
515        # for each task
516        unihash =  self._get_unihash(tid)
517        if unihash is not None:
518            self.unihash[tid] = unihash
519            return unihash
520
521        # In the absence of being able to discover a unique hash from the
522        # server, make it be equivalent to the taskhash. The unique "hash" only
523        # really needs to be a unique string (not even necessarily a hash), but
524        # making it match the taskhash has a few advantages:
525        #
526        # 1) All of the sstate code that assumes hashes can be the same
527        # 2) It provides maximal compatibility with builders that don't use
528        #    an equivalency server
529        # 3) The value is easy for multiple independent builders to derive the
530        #    same unique hash from the same input. This means that if the
531        #    independent builders find the same taskhash, but it isn't reported
532        #    to the server, there is a better chance that they will agree on
533        #    the unique hash.
534        unihash = taskhash
535
536        try:
537            method = self.method
538            if tid in self.extramethod:
539                method = method + self.extramethod[tid]
540            data = self.client().get_unihash(method, self.taskhash[tid])
541            if data:
542                unihash = data
543                # A unique hash equal to the taskhash is not very interesting,
544                # so it is reported it at debug level 2. If they differ, that
545                # is much more interesting, so it is reported at debug level 1
546                hashequiv_logger.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server))
547            else:
548                hashequiv_logger.debug(2, 'No reported unihash for %s:%s from %s' % (tid, taskhash, self.server))
549        except hashserv.client.HashConnectionError as e:
550            bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
551
552        self.set_unihash(tid, unihash)
553        self.unihash[tid] = unihash
554        return unihash
555
556    def report_unihash(self, path, task, d):
557        import importlib
558
559        taskhash = d.getVar('BB_TASKHASH')
560        unihash = d.getVar('BB_UNIHASH')
561        report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1'
562        tempdir = d.getVar('T')
563        fn = d.getVar('BB_FILENAME')
564        tid = fn + ':do_' + task
565        key = tid + ':' + taskhash
566
567        if self.setscenetasks and tid not in self.setscenetasks:
568            return
569
570        # This can happen if locked sigs are in action. Detect and just abort
571        if taskhash != self.taskhash[tid]:
572            return
573
574        # Sanity checks
575        cache_unihash = self._get_unihash(tid, checkkey=taskhash)
576        if cache_unihash is None:
577            bb.fatal('%s not in unihash cache. Please report this error' % key)
578
579        if cache_unihash != unihash:
580            bb.fatal("Cache unihash %s doesn't match BB_UNIHASH %s" % (cache_unihash, unihash))
581
582        sigfile = None
583        sigfile_name = "depsig.do_%s.%d" % (task, os.getpid())
584        sigfile_link = "depsig.do_%s" % task
585
586        try:
587            sigfile = open(os.path.join(tempdir, sigfile_name), 'w+b')
588
589            locs = {'path': path, 'sigfile': sigfile, 'task': task, 'd': d}
590
591            if "." in self.method:
592                (module, method) = self.method.rsplit('.', 1)
593                locs['method'] = getattr(importlib.import_module(module), method)
594                outhash = bb.utils.better_eval('method(path, sigfile, task, d)', locs)
595            else:
596                outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs)
597
598            try:
599                extra_data = {}
600
601                owner = d.getVar('SSTATE_HASHEQUIV_OWNER')
602                if owner:
603                    extra_data['owner'] = owner
604
605                if report_taskdata:
606                    sigfile.seek(0)
607
608                    extra_data['PN'] = d.getVar('PN')
609                    extra_data['PV'] = d.getVar('PV')
610                    extra_data['PR'] = d.getVar('PR')
611                    extra_data['task'] = task
612                    extra_data['outhash_siginfo'] = sigfile.read().decode('utf-8')
613
614                method = self.method
615                if tid in self.extramethod:
616                    method = method + self.extramethod[tid]
617
618                data = self.client().report_unihash(taskhash, method, outhash, unihash, extra_data)
619                new_unihash = data['unihash']
620
621                if new_unihash != unihash:
622                    hashequiv_logger.debug(1, 'Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server))
623                    bb.event.fire(bb.runqueue.taskUniHashUpdate(fn + ':do_' + task, new_unihash), d)
624                    self.set_unihash(tid, new_unihash)
625                    d.setVar('BB_UNIHASH', new_unihash)
626                else:
627                    hashequiv_logger.debug(1, 'Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server))
628            except hashserv.client.HashConnectionError as e:
629                bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
630        finally:
631            if sigfile:
632                sigfile.close()
633
634                sigfile_link_path = os.path.join(tempdir, sigfile_link)
635                bb.utils.remove(sigfile_link_path)
636
637                try:
638                    os.symlink(sigfile_name, sigfile_link_path)
639                except OSError:
640                    pass
641
642    def report_unihash_equiv(self, tid, taskhash, wanted_unihash, current_unihash, datacaches):
643        try:
644            extra_data = {}
645            method = self.method
646            if tid in self.extramethod:
647                method = method + self.extramethod[tid]
648
649            data = self.client().report_unihash_equiv(taskhash, method, wanted_unihash, extra_data)
650            hashequiv_logger.verbose('Reported task %s as unihash %s to %s (%s)' % (tid, wanted_unihash, self.server, str(data)))
651
652            if data is None:
653                bb.warn("Server unable to handle unihash report")
654                return False
655
656            finalunihash = data['unihash']
657
658            if finalunihash == current_unihash:
659                hashequiv_logger.verbose('Task %s unihash %s unchanged by server' % (tid, finalunihash))
660            elif finalunihash == wanted_unihash:
661                hashequiv_logger.verbose('Task %s unihash changed %s -> %s as wanted' % (tid, current_unihash, finalunihash))
662                self.set_unihash(tid, finalunihash)
663                return True
664            else:
665                # TODO: What to do here?
666                hashequiv_logger.verbose('Task %s unihash reported as unwanted hash %s' % (tid, finalunihash))
667
668        except hashserv.client.HashConnectionError as e:
669            bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
670
671        return False
672
673#
674# Dummy class used for bitbake-selftest
675#
676class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureGeneratorBasicHash):
677    name = "TestEquivHash"
678    def init_rundepcheck(self, data):
679        super().init_rundepcheck(data)
680        self.server = data.getVar('BB_HASHSERVE')
681        self.method = "sstate_output_hash"
682
683#
684# Dummy class used for bitbake-selftest
685#
686class SignatureGeneratorTestMulticonfigDepends(SignatureGeneratorBasicHash):
687    name = "TestMulticonfigDepends"
688    supports_multiconfig_datacaches = True
689
690def dump_this_task(outfile, d):
691    import bb.parse
692    fn = d.getVar("BB_FILENAME")
693    task = "do_" + d.getVar("BB_CURRENTTASK")
694    referencestamp = bb.build.stamp_internal(task, d, None, True)
695    bb.parse.siggen.dump_sigtask(fn, task, outfile, "customfile:" + referencestamp)
696
697def init_colors(enable_color):
698    """Initialise colour dict for passing to compare_sigfiles()"""
699    # First set up the colours
700    colors = {'color_title':   '\033[1m',
701              'color_default': '\033[0m',
702              'color_add':     '\033[0;32m',
703              'color_remove':  '\033[0;31m',
704             }
705    # Leave all keys present but clear the values
706    if not enable_color:
707        for k in colors.keys():
708            colors[k] = ''
709    return colors
710
711def worddiff_str(oldstr, newstr, colors=None):
712    if not colors:
713        colors = init_colors(False)
714    diff = simplediff.diff(oldstr.split(' '), newstr.split(' '))
715    ret = []
716    for change, value in diff:
717        value = ' '.join(value)
718        if change == '=':
719            ret.append(value)
720        elif change == '+':
721            item = '{color_add}{{+{value}+}}{color_default}'.format(value=value, **colors)
722            ret.append(item)
723        elif change == '-':
724            item = '{color_remove}[-{value}-]{color_default}'.format(value=value, **colors)
725            ret.append(item)
726    whitespace_note = ''
727    if oldstr != newstr and ' '.join(oldstr.split()) == ' '.join(newstr.split()):
728        whitespace_note = ' (whitespace changed)'
729    return '"%s"%s' % (' '.join(ret), whitespace_note)
730
731def list_inline_diff(oldlist, newlist, colors=None):
732    if not colors:
733        colors = init_colors(False)
734    diff = simplediff.diff(oldlist, newlist)
735    ret = []
736    for change, value in diff:
737        value = ' '.join(value)
738        if change == '=':
739            ret.append("'%s'" % value)
740        elif change == '+':
741            item = '{color_add}+{value}{color_default}'.format(value=value, **colors)
742            ret.append(item)
743        elif change == '-':
744            item = '{color_remove}-{value}{color_default}'.format(value=value, **colors)
745            ret.append(item)
746    return '[%s]' % (', '.join(ret))
747
748def clean_basepath(a):
749    mc = None
750    if a.startswith("mc:"):
751        _, mc, a = a.split(":", 2)
752    b = a.rsplit("/", 2)[1] + '/' + a.rsplit("/", 2)[2]
753    if a.startswith("virtual:"):
754        b = b + ":" + a.rsplit(":", 1)[0]
755    if mc:
756        b = b + ":mc:" + mc
757    return b
758
759def clean_basepaths(a):
760    b = {}
761    for x in a:
762        b[clean_basepath(x)] = a[x]
763    return b
764
765def clean_basepaths_list(a):
766    b = []
767    for x in a:
768        b.append(clean_basepath(x))
769    return b
770
771def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
772    output = []
773
774    colors = init_colors(color)
775    def color_format(formatstr, **values):
776        """
777        Return colour formatted string.
778        NOTE: call with the format string, not an already formatted string
779        containing values (otherwise you could have trouble with { and }
780        characters)
781        """
782        if not formatstr.endswith('{color_default}'):
783            formatstr += '{color_default}'
784        # In newer python 3 versions you can pass both of these directly,
785        # but we only require 3.4 at the moment
786        formatparams = {}
787        formatparams.update(colors)
788        formatparams.update(values)
789        return formatstr.format(**formatparams)
790
791    with open(a, 'rb') as f:
792        p1 = pickle.Unpickler(f)
793        a_data = p1.load()
794    with open(b, 'rb') as f:
795        p2 = pickle.Unpickler(f)
796        b_data = p2.load()
797
798    def dict_diff(a, b, whitelist=set()):
799        sa = set(a.keys())
800        sb = set(b.keys())
801        common = sa & sb
802        changed = set()
803        for i in common:
804            if a[i] != b[i] and i not in whitelist:
805                changed.add(i)
806        added = sb - sa
807        removed = sa - sb
808        return changed, added, removed
809
810    def file_checksums_diff(a, b):
811        from collections import Counter
812        # Handle old siginfo format
813        if isinstance(a, dict):
814            a = [(os.path.basename(f), cs) for f, cs in a.items()]
815        if isinstance(b, dict):
816            b = [(os.path.basename(f), cs) for f, cs in b.items()]
817        # Compare lists, ensuring we can handle duplicate filenames if they exist
818        removedcount = Counter(a)
819        removedcount.subtract(b)
820        addedcount = Counter(b)
821        addedcount.subtract(a)
822        added = []
823        for x in b:
824            if addedcount[x] > 0:
825                addedcount[x] -= 1
826                added.append(x)
827        removed = []
828        changed = []
829        for x in a:
830            if removedcount[x] > 0:
831                removedcount[x] -= 1
832                for y in added:
833                    if y[0] == x[0]:
834                        changed.append((x[0], x[1], y[1]))
835                        added.remove(y)
836                        break
837                else:
838                    removed.append(x)
839        added = [x[0] for x in added]
840        removed = [x[0] for x in removed]
841        return changed, added, removed
842
843    if 'basewhitelist' in a_data and a_data['basewhitelist'] != b_data['basewhitelist']:
844        output.append(color_format("{color_title}basewhitelist changed{color_default} from '%s' to '%s'") % (a_data['basewhitelist'], b_data['basewhitelist']))
845        if a_data['basewhitelist'] and b_data['basewhitelist']:
846            output.append("changed items: %s" % a_data['basewhitelist'].symmetric_difference(b_data['basewhitelist']))
847
848    if 'taskwhitelist' in a_data and a_data['taskwhitelist'] != b_data['taskwhitelist']:
849        output.append(color_format("{color_title}taskwhitelist changed{color_default} from '%s' to '%s'") % (a_data['taskwhitelist'], b_data['taskwhitelist']))
850        if a_data['taskwhitelist'] and b_data['taskwhitelist']:
851            output.append("changed items: %s" % a_data['taskwhitelist'].symmetric_difference(b_data['taskwhitelist']))
852
853    if a_data['taskdeps'] != b_data['taskdeps']:
854        output.append(color_format("{color_title}Task dependencies changed{color_default} from:\n%s\nto:\n%s") % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps'])))
855
856    if a_data['basehash'] != b_data['basehash'] and not collapsed:
857        output.append(color_format("{color_title}basehash changed{color_default} from %s to %s") % (a_data['basehash'], b_data['basehash']))
858
859    changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basewhitelist'] & b_data['basewhitelist'])
860    if changed:
861        for dep in changed:
862            output.append(color_format("{color_title}List of dependencies for variable %s changed from '{color_default}%s{color_title}' to '{color_default}%s{color_title}'") % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep]))
863            if a_data['gendeps'][dep] and b_data['gendeps'][dep]:
864                output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep]))
865    if added:
866        for dep in added:
867            output.append(color_format("{color_title}Dependency on variable %s was added") % (dep))
868    if removed:
869        for dep in removed:
870            output.append(color_format("{color_title}Dependency on Variable %s was removed") % (dep))
871
872
873    changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals'])
874    if changed:
875        for dep in changed:
876            oldval = a_data['varvals'][dep]
877            newval = b_data['varvals'][dep]
878            if newval and oldval and ('\n' in oldval or '\n' in newval):
879                diff = difflib.unified_diff(oldval.splitlines(), newval.splitlines(), lineterm='')
880                # Cut off the first two lines, since we aren't interested in
881                # the old/new filename (they are blank anyway in this case)
882                difflines = list(diff)[2:]
883                if color:
884                    # Add colour to diff output
885                    for i, line in enumerate(difflines):
886                        if line.startswith('+'):
887                            line = color_format('{color_add}{line}', line=line)
888                            difflines[i] = line
889                        elif line.startswith('-'):
890                            line = color_format('{color_remove}{line}', line=line)
891                            difflines[i] = line
892                output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff='\n'.join(difflines)))
893            elif newval and oldval and (' ' in oldval or ' ' in newval):
894                output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff=worddiff_str(oldval, newval, colors)))
895            else:
896                output.append(color_format("{color_title}Variable {var} value changed from '{color_default}{oldval}{color_title}' to '{color_default}{newval}{color_title}'{color_default}", var=dep, oldval=oldval, newval=newval))
897
898    if not 'file_checksum_values' in a_data:
899         a_data['file_checksum_values'] = {}
900    if not 'file_checksum_values' in b_data:
901         b_data['file_checksum_values'] = {}
902
903    changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values'])
904    if changed:
905        for f, old, new in changed:
906            output.append(color_format("{color_title}Checksum for file %s changed{color_default} from %s to %s") % (f, old, new))
907    if added:
908        for f in added:
909            output.append(color_format("{color_title}Dependency on checksum of file %s was added") % (f))
910    if removed:
911        for f in removed:
912            output.append(color_format("{color_title}Dependency on checksum of file %s was removed") % (f))
913
914    if not 'runtaskdeps' in a_data:
915         a_data['runtaskdeps'] = {}
916    if not 'runtaskdeps' in b_data:
917         b_data['runtaskdeps'] = {}
918
919    if not collapsed:
920        if len(a_data['runtaskdeps']) != len(b_data['runtaskdeps']):
921            changed = ["Number of task dependencies changed"]
922        else:
923            changed = []
924            for idx, task in enumerate(a_data['runtaskdeps']):
925                a = a_data['runtaskdeps'][idx]
926                b = b_data['runtaskdeps'][idx]
927                if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b] and not collapsed:
928                    changed.append("%s with hash %s\n changed to\n%s with hash %s" % (clean_basepath(a), a_data['runtaskhashes'][a], clean_basepath(b), b_data['runtaskhashes'][b]))
929
930        if changed:
931            clean_a = clean_basepaths_list(a_data['runtaskdeps'])
932            clean_b = clean_basepaths_list(b_data['runtaskdeps'])
933            if clean_a != clean_b:
934                output.append(color_format("{color_title}runtaskdeps changed:{color_default}\n%s") % list_inline_diff(clean_a, clean_b, colors))
935            else:
936                output.append(color_format("{color_title}runtaskdeps changed:"))
937            output.append("\n".join(changed))
938
939
940    if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data:
941        a = a_data['runtaskhashes']
942        b = b_data['runtaskhashes']
943        changed, added, removed = dict_diff(a, b)
944        if added:
945            for dep in added:
946                bdep_found = False
947                if removed:
948                    for bdep in removed:
949                        if b[dep] == a[bdep]:
950                            #output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep))
951                            bdep_found = True
952                if not bdep_found:
953                    output.append(color_format("{color_title}Dependency on task %s was added{color_default} with hash %s") % (clean_basepath(dep), b[dep]))
954        if removed:
955            for dep in removed:
956                adep_found = False
957                if added:
958                    for adep in added:
959                        if b[adep] == a[dep]:
960                            #output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep))
961                            adep_found = True
962                if not adep_found:
963                    output.append(color_format("{color_title}Dependency on task %s was removed{color_default} with hash %s") % (clean_basepath(dep), a[dep]))
964        if changed:
965            for dep in changed:
966                if not collapsed:
967                    output.append(color_format("{color_title}Hash for dependent task %s changed{color_default} from %s to %s") % (clean_basepath(dep), a[dep], b[dep]))
968                if callable(recursecb):
969                    recout = recursecb(dep, a[dep], b[dep])
970                    if recout:
971                        if collapsed:
972                            output.extend(recout)
973                        else:
974                            # If a dependent hash changed, might as well print the line above and then defer to the changes in
975                            # that hash since in all likelyhood, they're the same changes this task also saw.
976                            output = [output[-1]] + recout
977
978    a_taint = a_data.get('taint', None)
979    b_taint = b_data.get('taint', None)
980    if a_taint != b_taint:
981        if a_taint and a_taint.startswith('nostamp:'):
982            a_taint = a_taint.replace('nostamp:', 'nostamp(uuid4):')
983        if b_taint and b_taint.startswith('nostamp:'):
984            b_taint = b_taint.replace('nostamp:', 'nostamp(uuid4):')
985        output.append(color_format("{color_title}Taint (by forced/invalidated task) changed{color_default} from %s to %s") % (a_taint, b_taint))
986
987    return output
988
989
990def calc_basehash(sigdata):
991    task = sigdata['task']
992    basedata = sigdata['varvals'][task]
993
994    if basedata is None:
995        basedata = ''
996
997    alldeps = sigdata['taskdeps']
998    for dep in alldeps:
999        basedata = basedata + dep
1000        val = sigdata['varvals'][dep]
1001        if val is not None:
1002            basedata = basedata + str(val)
1003
1004    return hashlib.sha256(basedata.encode("utf-8")).hexdigest()
1005
1006def calc_taskhash(sigdata):
1007    data = sigdata['basehash']
1008
1009    for dep in sigdata['runtaskdeps']:
1010        data = data + sigdata['runtaskhashes'][dep]
1011
1012    for c in sigdata['file_checksum_values']:
1013        if c[1]:
1014            data = data + c[1]
1015
1016    if 'taint' in sigdata:
1017        if 'nostamp:' in sigdata['taint']:
1018            data = data + sigdata['taint'][8:]
1019        else:
1020            data = data + sigdata['taint']
1021
1022    return hashlib.sha256(data.encode("utf-8")).hexdigest()
1023
1024
1025def dump_sigfile(a):
1026    output = []
1027
1028    with open(a, 'rb') as f:
1029        p1 = pickle.Unpickler(f)
1030        a_data = p1.load()
1031
1032    output.append("basewhitelist: %s" % (a_data['basewhitelist']))
1033
1034    output.append("taskwhitelist: %s" % (a_data['taskwhitelist']))
1035
1036    output.append("Task dependencies: %s" % (sorted(a_data['taskdeps'])))
1037
1038    output.append("basehash: %s" % (a_data['basehash']))
1039
1040    for dep in a_data['gendeps']:
1041        output.append("List of dependencies for variable %s is %s" % (dep, a_data['gendeps'][dep]))
1042
1043    for dep in a_data['varvals']:
1044        output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep]))
1045
1046    if 'runtaskdeps' in a_data:
1047        output.append("Tasks this task depends on: %s" % (a_data['runtaskdeps']))
1048
1049    if 'file_checksum_values' in a_data:
1050        output.append("This task depends on the checksums of files: %s" % (a_data['file_checksum_values']))
1051
1052    if 'runtaskhashes' in a_data:
1053        for dep in a_data['runtaskhashes']:
1054            output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep]))
1055
1056    if 'taint' in a_data:
1057        if a_data['taint'].startswith('nostamp:'):
1058            msg = a_data['taint'].replace('nostamp:', 'nostamp(uuid4):')
1059        else:
1060            msg = a_data['taint']
1061        output.append("Tainted (by forced/invalidated task): %s" % msg)
1062
1063    if 'task' in a_data:
1064        computed_basehash = calc_basehash(a_data)
1065        output.append("Computed base hash is %s and from file %s" % (computed_basehash, a_data['basehash']))
1066    else:
1067        output.append("Unable to compute base hash")
1068
1069    computed_taskhash = calc_taskhash(a_data)
1070    output.append("Computed task hash is %s" % computed_taskhash)
1071
1072    return output
1073