xref: /openbmc/openbmc/poky/bitbake/lib/bb/siggen.py (revision 635e0e46)
1#
2# SPDX-License-Identifier: GPL-2.0-only
3#
4
5import hashlib
6import logging
7import os
8import re
9import tempfile
10import pickle
11import bb.data
12import difflib
13import simplediff
14from bb.checksum import FileChecksumCache
15from bb import runqueue
16import hashserv
17import hashserv.client
18
19logger = logging.getLogger('BitBake.SigGen')
20hashequiv_logger = logging.getLogger('BitBake.SigGen.HashEquiv')
21
22def init(d):
23    siggens = [obj for obj in globals().values()
24                      if type(obj) is type and issubclass(obj, SignatureGenerator)]
25
26    desired = d.getVar("BB_SIGNATURE_HANDLER") or "noop"
27    for sg in siggens:
28        if desired == sg.name:
29            return sg(d)
30            break
31    else:
32        logger.error("Invalid signature generator '%s', using default 'noop'\n"
33                     "Available generators: %s", desired,
34                     ', '.join(obj.name for obj in siggens))
35        return SignatureGenerator(d)
36
37class SignatureGenerator(object):
38    """
39    """
40    name = "noop"
41
42    # If the derived class supports multiconfig datacaches, set this to True
43    # The default is False for backward compatibility with derived signature
44    # generators that do not understand multiconfig caches
45    supports_multiconfig_datacaches = False
46
47    def __init__(self, data):
48        self.basehash = {}
49        self.taskhash = {}
50        self.unihash = {}
51        self.runtaskdeps = {}
52        self.file_checksum_values = {}
53        self.taints = {}
54        self.unitaskhashes = {}
55        self.tidtopn = {}
56        self.setscenetasks = set()
57
58    def finalise(self, fn, d, varient):
59        return
60
61    def postparsing_clean_cache(self):
62        return
63
64    def get_unihash(self, tid):
65        return self.taskhash[tid]
66
67    def prep_taskhash(self, tid, deps, dataCaches):
68        return
69
70    def get_taskhash(self, tid, deps, dataCaches):
71        self.taskhash[tid] = hashlib.sha256(tid.encode("utf-8")).hexdigest()
72        return self.taskhash[tid]
73
74    def writeout_file_checksum_cache(self):
75        """Write/update the file checksum cache onto disk"""
76        return
77
78    def stampfile(self, stampbase, file_name, taskname, extrainfo):
79        return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
80
81    def stampcleanmask(self, stampbase, file_name, taskname, extrainfo):
82        return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
83
84    def dump_sigtask(self, fn, task, stampbase, runtime):
85        return
86
87    def invalidate_task(self, task, d, fn):
88        bb.build.del_stamp(task, d, fn)
89
90    def dump_sigs(self, dataCache, options):
91        return
92
93    def get_taskdata(self):
94        return (self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks)
95
96    def set_taskdata(self, data):
97        self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks = data
98
99    def reset(self, data):
100        self.__init__(data)
101
102    def get_taskhashes(self):
103        return self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn
104
105    def set_taskhashes(self, hashes):
106        self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn = hashes
107
108    def save_unitaskhashes(self):
109        return
110
111    def set_setscene_tasks(self, setscene_tasks):
112        return
113
114    @classmethod
115    def get_data_caches(cls, dataCaches, mc):
116        """
117        This function returns the datacaches that should be passed to signature
118        generator functions. If the signature generator supports multiconfig
119        caches, the entire dictionary of data caches is sent, otherwise a
120        special proxy is sent that support both index access to all
121        multiconfigs, and also direct access for the default multiconfig.
122
123        The proxy class allows code in this class itself to always use
124        multiconfig aware code (to ease maintenance), but derived classes that
125        are unaware of multiconfig data caches can still access the default
126        multiconfig as expected.
127
128        Do not override this function in derived classes; it will be removed in
129        the future when support for multiconfig data caches is mandatory
130        """
131        class DataCacheProxy(object):
132            def __init__(self):
133                pass
134
135            def __getitem__(self, key):
136                return dataCaches[key]
137
138            def __getattr__(self, name):
139                return getattr(dataCaches[mc], name)
140
141        if cls.supports_multiconfig_datacaches:
142            return dataCaches
143
144        return DataCacheProxy()
145
146class SignatureGeneratorBasic(SignatureGenerator):
147    """
148    """
149    name = "basic"
150
151    def __init__(self, data):
152        self.basehash = {}
153        self.taskhash = {}
154        self.unihash = {}
155        self.taskdeps = {}
156        self.runtaskdeps = {}
157        self.file_checksum_values = {}
158        self.taints = {}
159        self.gendeps = {}
160        self.lookupcache = {}
161        self.setscenetasks = set()
162        self.basewhitelist = set((data.getVar("BB_HASHBASE_WHITELIST") or "").split())
163        self.taskwhitelist = None
164        self.init_rundepcheck(data)
165        checksum_cache_file = data.getVar("BB_HASH_CHECKSUM_CACHE_FILE")
166        if checksum_cache_file:
167            self.checksum_cache = FileChecksumCache()
168            self.checksum_cache.init_cache(data, checksum_cache_file)
169        else:
170            self.checksum_cache = None
171
172        self.unihash_cache = bb.cache.SimpleCache("3")
173        self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {})
174        self.localdirsexclude = (data.getVar("BB_SIGNATURE_LOCAL_DIRS_EXCLUDE") or "CVS .bzr .git .hg .osc .p4 .repo .svn").split()
175        self.tidtopn = {}
176
177    def init_rundepcheck(self, data):
178        self.taskwhitelist = data.getVar("BB_HASHTASK_WHITELIST") or None
179        if self.taskwhitelist:
180            self.twl = re.compile(self.taskwhitelist)
181        else:
182            self.twl = None
183
184    def _build_data(self, fn, d):
185
186        ignore_mismatch = ((d.getVar("BB_HASH_IGNORE_MISMATCH") or '') == '1')
187        tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d, self.basewhitelist)
188
189        taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, self.basewhitelist, fn)
190
191        for task in tasklist:
192            tid = fn + ":" + task
193            if not ignore_mismatch and tid in self.basehash and self.basehash[tid] != basehash[tid]:
194                bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (tid, self.basehash[tid], basehash[tid]))
195                bb.error("The following commands may help:")
196                cmd = "$ bitbake %s -c%s" % (d.getVar('PN'), task)
197                # Make sure sigdata is dumped before run printdiff
198                bb.error("%s -Snone" % cmd)
199                bb.error("Then:")
200                bb.error("%s -Sprintdiff\n" % cmd)
201            self.basehash[tid] = basehash[tid]
202
203        self.taskdeps[fn] = taskdeps
204        self.gendeps[fn] = gendeps
205        self.lookupcache[fn] = lookupcache
206
207        return taskdeps
208
209    def set_setscene_tasks(self, setscene_tasks):
210        self.setscenetasks = set(setscene_tasks)
211
212    def finalise(self, fn, d, variant):
213
214        mc = d.getVar("__BBMULTICONFIG", False) or ""
215        if variant or mc:
216            fn = bb.cache.realfn2virtual(fn, variant, mc)
217
218        try:
219            taskdeps = self._build_data(fn, d)
220        except bb.parse.SkipRecipe:
221            raise
222        except:
223            bb.warn("Error during finalise of %s" % fn)
224            raise
225
226        #Slow but can be useful for debugging mismatched basehashes
227        #for task in self.taskdeps[fn]:
228        #    self.dump_sigtask(fn, task, d.getVar("STAMP"), False)
229
230        for task in taskdeps:
231            d.setVar("BB_BASEHASH_task-%s" % task, self.basehash[fn + ":" + task])
232
233    def postparsing_clean_cache(self):
234        #
235        # After parsing we can remove some things from memory to reduce our memory footprint
236        #
237        self.gendeps = {}
238        self.lookupcache = {}
239        self.taskdeps = {}
240
241    def rundep_check(self, fn, recipename, task, dep, depname, dataCaches):
242        # Return True if we should keep the dependency, False to drop it
243        # We only manipulate the dependencies for packages not in the whitelist
244        if self.twl and not self.twl.search(recipename):
245            # then process the actual dependencies
246            if self.twl.search(depname):
247                return False
248        return True
249
250    def read_taint(self, fn, task, stampbase):
251        taint = None
252        try:
253            with open(stampbase + '.' + task + '.taint', 'r') as taintf:
254                taint = taintf.read()
255        except IOError:
256            pass
257        return taint
258
259    def prep_taskhash(self, tid, deps, dataCaches):
260
261        (mc, _, task, fn) = bb.runqueue.split_tid_mcfn(tid)
262
263        self.basehash[tid] = dataCaches[mc].basetaskhash[tid]
264        self.runtaskdeps[tid] = []
265        self.file_checksum_values[tid] = []
266        recipename = dataCaches[mc].pkg_fn[fn]
267
268        self.tidtopn[tid] = recipename
269
270        for dep in sorted(deps, key=clean_basepath):
271            (depmc, _, _, depmcfn) = bb.runqueue.split_tid_mcfn(dep)
272            depname = dataCaches[depmc].pkg_fn[depmcfn]
273            if not self.supports_multiconfig_datacaches and mc != depmc:
274                # If the signature generator doesn't understand multiconfig
275                # data caches, any dependency not in the same multiconfig must
276                # be skipped for backward compatibility
277                continue
278            if not self.rundep_check(fn, recipename, task, dep, depname, dataCaches):
279                continue
280            if dep not in self.taskhash:
281                bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep)
282            self.runtaskdeps[tid].append(dep)
283
284        if task in dataCaches[mc].file_checksums[fn]:
285            if self.checksum_cache:
286                checksums = self.checksum_cache.get_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude)
287            else:
288                checksums = bb.fetch2.get_file_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude)
289            for (f,cs) in checksums:
290                self.file_checksum_values[tid].append((f,cs))
291
292        taskdep = dataCaches[mc].task_deps[fn]
293        if 'nostamp' in taskdep and task in taskdep['nostamp']:
294            # Nostamp tasks need an implicit taint so that they force any dependent tasks to run
295            if tid in self.taints and self.taints[tid].startswith("nostamp:"):
296                # Don't reset taint value upon every call
297                pass
298            else:
299                import uuid
300                taint = str(uuid.uuid4())
301                self.taints[tid] = "nostamp:" + taint
302
303        taint = self.read_taint(fn, task, dataCaches[mc].stamp[fn])
304        if taint:
305            self.taints[tid] = taint
306            logger.warning("%s is tainted from a forced run" % tid)
307
308        return
309
310    def get_taskhash(self, tid, deps, dataCaches):
311
312        data = self.basehash[tid]
313        for dep in self.runtaskdeps[tid]:
314            if dep in self.unihash:
315                if self.unihash[dep] is None:
316                    data = data + self.taskhash[dep]
317                else:
318                    data = data + self.unihash[dep]
319            else:
320                data = data + self.get_unihash(dep)
321
322        for (f, cs) in self.file_checksum_values[tid]:
323            if cs:
324                data = data + cs
325
326        if tid in self.taints:
327            if self.taints[tid].startswith("nostamp:"):
328                data = data + self.taints[tid][8:]
329            else:
330                data = data + self.taints[tid]
331
332        h = hashlib.sha256(data.encode("utf-8")).hexdigest()
333        self.taskhash[tid] = h
334        #d.setVar("BB_TASKHASH_task-%s" % task, taskhash[task])
335        return h
336
337    def writeout_file_checksum_cache(self):
338        """Write/update the file checksum cache onto disk"""
339        if self.checksum_cache:
340            self.checksum_cache.save_extras()
341            self.checksum_cache.save_merge()
342        else:
343            bb.fetch2.fetcher_parse_save()
344            bb.fetch2.fetcher_parse_done()
345
346    def save_unitaskhashes(self):
347        self.unihash_cache.save(self.unitaskhashes)
348
349    def dump_sigtask(self, fn, task, stampbase, runtime):
350
351        tid = fn + ":" + task
352        referencestamp = stampbase
353        if isinstance(runtime, str) and runtime.startswith("customfile"):
354            sigfile = stampbase
355            referencestamp = runtime[11:]
356        elif runtime and tid in self.taskhash:
357            sigfile = stampbase + "." + task + ".sigdata" + "." + self.get_unihash(tid)
358        else:
359            sigfile = stampbase + "." + task + ".sigbasedata" + "." + self.basehash[tid]
360
361        bb.utils.mkdirhier(os.path.dirname(sigfile))
362
363        data = {}
364        data['task'] = task
365        data['basewhitelist'] = self.basewhitelist
366        data['taskwhitelist'] = self.taskwhitelist
367        data['taskdeps'] = self.taskdeps[fn][task]
368        data['basehash'] = self.basehash[tid]
369        data['gendeps'] = {}
370        data['varvals'] = {}
371        data['varvals'][task] = self.lookupcache[fn][task]
372        for dep in self.taskdeps[fn][task]:
373            if dep in self.basewhitelist:
374                continue
375            data['gendeps'][dep] = self.gendeps[fn][dep]
376            data['varvals'][dep] = self.lookupcache[fn][dep]
377
378        if runtime and tid in self.taskhash:
379            data['runtaskdeps'] = self.runtaskdeps[tid]
380            data['file_checksum_values'] = [(os.path.basename(f), cs) for f,cs in self.file_checksum_values[tid]]
381            data['runtaskhashes'] = {}
382            for dep in data['runtaskdeps']:
383                data['runtaskhashes'][dep] = self.get_unihash(dep)
384            data['taskhash'] = self.taskhash[tid]
385            data['unihash'] = self.get_unihash(tid)
386
387        taint = self.read_taint(fn, task, referencestamp)
388        if taint:
389            data['taint'] = taint
390
391        if runtime and tid in self.taints:
392            if 'nostamp:' in self.taints[tid]:
393                data['taint'] = self.taints[tid]
394
395        computed_basehash = calc_basehash(data)
396        if computed_basehash != self.basehash[tid]:
397            bb.error("Basehash mismatch %s versus %s for %s" % (computed_basehash, self.basehash[tid], tid))
398        if runtime and tid in self.taskhash:
399            computed_taskhash = calc_taskhash(data)
400            if computed_taskhash != self.taskhash[tid]:
401                bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid))
402                sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash)
403
404        fd, tmpfile = tempfile.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.")
405        try:
406            with os.fdopen(fd, "wb") as stream:
407                p = pickle.dump(data, stream, -1)
408                stream.flush()
409            os.chmod(tmpfile, 0o664)
410            os.rename(tmpfile, sigfile)
411        except (OSError, IOError) as err:
412            try:
413                os.unlink(tmpfile)
414            except OSError:
415                pass
416            raise err
417
418    def dump_sigfn(self, fn, dataCaches, options):
419        if fn in self.taskdeps:
420            for task in self.taskdeps[fn]:
421                tid = fn + ":" + task
422                mc = bb.runqueue.mc_from_tid(tid)
423                if tid not in self.taskhash:
424                    continue
425                if dataCaches[mc].basetaskhash[tid] != self.basehash[tid]:
426                    bb.error("Bitbake's cached basehash does not match the one we just generated (%s)!" % tid)
427                    bb.error("The mismatched hashes were %s and %s" % (dataCaches[mc].basetaskhash[tid], self.basehash[tid]))
428                self.dump_sigtask(fn, task, dataCaches[mc].stamp[fn], True)
429
430class SignatureGeneratorBasicHash(SignatureGeneratorBasic):
431    name = "basichash"
432
433    def get_stampfile_hash(self, tid):
434        if tid in self.taskhash:
435            return self.taskhash[tid]
436
437        # If task is not in basehash, then error
438        return self.basehash[tid]
439
440    def stampfile(self, stampbase, fn, taskname, extrainfo, clean=False):
441        if taskname != "do_setscene" and taskname.endswith("_setscene"):
442            tid = fn + ":" + taskname[:-9]
443        else:
444            tid = fn + ":" + taskname
445        if clean:
446            h = "*"
447        else:
448            h = self.get_stampfile_hash(tid)
449
450        return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.')
451
452    def stampcleanmask(self, stampbase, fn, taskname, extrainfo):
453        return self.stampfile(stampbase, fn, taskname, extrainfo, clean=True)
454
455    def invalidate_task(self, task, d, fn):
456        bb.note("Tainting hash to force rebuild of task %s, %s" % (fn, task))
457        bb.build.write_taint(task, d, fn)
458
459class SignatureGeneratorUniHashMixIn(object):
460    def __init__(self, data):
461        self.extramethod = {}
462        super().__init__(data)
463
464    def get_taskdata(self):
465        return (self.server, self.method, self.extramethod) + super().get_taskdata()
466
467    def set_taskdata(self, data):
468        self.server, self.method, self.extramethod = data[:3]
469        super().set_taskdata(data[3:])
470
471    def client(self):
472        if getattr(self, '_client', None) is None:
473            self._client = hashserv.create_client(self.server)
474        return self._client
475
476    def get_stampfile_hash(self, tid):
477        if tid in self.taskhash:
478            # If a unique hash is reported, use it as the stampfile hash. This
479            # ensures that if a task won't be re-run if the taskhash changes,
480            # but it would result in the same output hash
481            unihash = self._get_unihash(tid)
482            if unihash is not None:
483                return unihash
484
485        return super().get_stampfile_hash(tid)
486
487    def set_unihash(self, tid, unihash):
488        (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
489        key = mc + ":" + self.tidtopn[tid] + ":" + taskname
490        self.unitaskhashes[key] = (self.taskhash[tid], unihash)
491        self.unihash[tid] = unihash
492
493    def _get_unihash(self, tid, checkkey=None):
494        if tid not in self.tidtopn:
495            return None
496        (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
497        key = mc + ":" + self.tidtopn[tid] + ":" + taskname
498        if key not in self.unitaskhashes:
499            return None
500        if not checkkey:
501            checkkey = self.taskhash[tid]
502        (key, unihash) = self.unitaskhashes[key]
503        if key != checkkey:
504            return None
505        return unihash
506
507    def get_unihash(self, tid):
508        taskhash = self.taskhash[tid]
509
510        # If its not a setscene task we can return
511        if self.setscenetasks and tid not in self.setscenetasks:
512            self.unihash[tid] = None
513            return taskhash
514
515        # TODO: This cache can grow unbounded. It probably only needs to keep
516        # for each task
517        unihash =  self._get_unihash(tid)
518        if unihash is not None:
519            self.unihash[tid] = unihash
520            return unihash
521
522        # In the absence of being able to discover a unique hash from the
523        # server, make it be equivalent to the taskhash. The unique "hash" only
524        # really needs to be a unique string (not even necessarily a hash), but
525        # making it match the taskhash has a few advantages:
526        #
527        # 1) All of the sstate code that assumes hashes can be the same
528        # 2) It provides maximal compatibility with builders that don't use
529        #    an equivalency server
530        # 3) The value is easy for multiple independent builders to derive the
531        #    same unique hash from the same input. This means that if the
532        #    independent builders find the same taskhash, but it isn't reported
533        #    to the server, there is a better chance that they will agree on
534        #    the unique hash.
535        unihash = taskhash
536
537        try:
538            method = self.method
539            if tid in self.extramethod:
540                method = method + self.extramethod[tid]
541            data = self.client().get_unihash(method, self.taskhash[tid])
542            if data:
543                unihash = data
544                # A unique hash equal to the taskhash is not very interesting,
545                # so it is reported it at debug level 2. If they differ, that
546                # is much more interesting, so it is reported at debug level 1
547                hashequiv_logger.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server))
548            else:
549                hashequiv_logger.debug(2, 'No reported unihash for %s:%s from %s' % (tid, taskhash, self.server))
550        except hashserv.client.HashConnectionError as e:
551            bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
552
553        self.set_unihash(tid, unihash)
554        self.unihash[tid] = unihash
555        return unihash
556
557    def report_unihash(self, path, task, d):
558        import importlib
559
560        taskhash = d.getVar('BB_TASKHASH')
561        unihash = d.getVar('BB_UNIHASH')
562        report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1'
563        tempdir = d.getVar('T')
564        fn = d.getVar('BB_FILENAME')
565        tid = fn + ':do_' + task
566        key = tid + ':' + taskhash
567
568        if self.setscenetasks and tid not in self.setscenetasks:
569            return
570
571        # This can happen if locked sigs are in action. Detect and just abort
572        if taskhash != self.taskhash[tid]:
573            return
574
575        # Sanity checks
576        cache_unihash = self._get_unihash(tid, checkkey=taskhash)
577        if cache_unihash is None:
578            bb.fatal('%s not in unihash cache. Please report this error' % key)
579
580        if cache_unihash != unihash:
581            bb.fatal("Cache unihash %s doesn't match BB_UNIHASH %s" % (cache_unihash, unihash))
582
583        sigfile = None
584        sigfile_name = "depsig.do_%s.%d" % (task, os.getpid())
585        sigfile_link = "depsig.do_%s" % task
586
587        try:
588            sigfile = open(os.path.join(tempdir, sigfile_name), 'w+b')
589
590            locs = {'path': path, 'sigfile': sigfile, 'task': task, 'd': d}
591
592            if "." in self.method:
593                (module, method) = self.method.rsplit('.', 1)
594                locs['method'] = getattr(importlib.import_module(module), method)
595                outhash = bb.utils.better_eval('method(path, sigfile, task, d)', locs)
596            else:
597                outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs)
598
599            try:
600                extra_data = {}
601
602                owner = d.getVar('SSTATE_HASHEQUIV_OWNER')
603                if owner:
604                    extra_data['owner'] = owner
605
606                if report_taskdata:
607                    sigfile.seek(0)
608
609                    extra_data['PN'] = d.getVar('PN')
610                    extra_data['PV'] = d.getVar('PV')
611                    extra_data['PR'] = d.getVar('PR')
612                    extra_data['task'] = task
613                    extra_data['outhash_siginfo'] = sigfile.read().decode('utf-8')
614
615                method = self.method
616                if tid in self.extramethod:
617                    method = method + self.extramethod[tid]
618
619                data = self.client().report_unihash(taskhash, method, outhash, unihash, extra_data)
620                new_unihash = data['unihash']
621
622                if new_unihash != unihash:
623                    hashequiv_logger.debug(1, 'Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server))
624                    bb.event.fire(bb.runqueue.taskUniHashUpdate(fn + ':do_' + task, new_unihash), d)
625                    self.set_unihash(tid, new_unihash)
626                    d.setVar('BB_UNIHASH', new_unihash)
627                else:
628                    hashequiv_logger.debug(1, 'Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server))
629            except hashserv.client.HashConnectionError as e:
630                bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
631        finally:
632            if sigfile:
633                sigfile.close()
634
635                sigfile_link_path = os.path.join(tempdir, sigfile_link)
636                bb.utils.remove(sigfile_link_path)
637
638                try:
639                    os.symlink(sigfile_name, sigfile_link_path)
640                except OSError:
641                    pass
642
643    def report_unihash_equiv(self, tid, taskhash, wanted_unihash, current_unihash, datacaches):
644        try:
645            extra_data = {}
646            method = self.method
647            if tid in self.extramethod:
648                method = method + self.extramethod[tid]
649
650            data = self.client().report_unihash_equiv(taskhash, method, wanted_unihash, extra_data)
651            hashequiv_logger.verbose('Reported task %s as unihash %s to %s (%s)' % (tid, wanted_unihash, self.server, str(data)))
652
653            if data is None:
654                bb.warn("Server unable to handle unihash report")
655                return False
656
657            finalunihash = data['unihash']
658
659            if finalunihash == current_unihash:
660                hashequiv_logger.verbose('Task %s unihash %s unchanged by server' % (tid, finalunihash))
661            elif finalunihash == wanted_unihash:
662                hashequiv_logger.verbose('Task %s unihash changed %s -> %s as wanted' % (tid, current_unihash, finalunihash))
663                self.set_unihash(tid, finalunihash)
664                return True
665            else:
666                # TODO: What to do here?
667                hashequiv_logger.verbose('Task %s unihash reported as unwanted hash %s' % (tid, finalunihash))
668
669        except hashserv.client.HashConnectionError as e:
670            bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
671
672        return False
673
674#
675# Dummy class used for bitbake-selftest
676#
677class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureGeneratorBasicHash):
678    name = "TestEquivHash"
679    def init_rundepcheck(self, data):
680        super().init_rundepcheck(data)
681        self.server = data.getVar('BB_HASHSERVE')
682        self.method = "sstate_output_hash"
683
684#
685# Dummy class used for bitbake-selftest
686#
687class SignatureGeneratorTestMulticonfigDepends(SignatureGeneratorBasicHash):
688    name = "TestMulticonfigDepends"
689    supports_multiconfig_datacaches = True
690
691def dump_this_task(outfile, d):
692    import bb.parse
693    fn = d.getVar("BB_FILENAME")
694    task = "do_" + d.getVar("BB_CURRENTTASK")
695    referencestamp = bb.build.stamp_internal(task, d, None, True)
696    bb.parse.siggen.dump_sigtask(fn, task, outfile, "customfile:" + referencestamp)
697
698def init_colors(enable_color):
699    """Initialise colour dict for passing to compare_sigfiles()"""
700    # First set up the colours
701    colors = {'color_title':   '\033[1m',
702              'color_default': '\033[0m',
703              'color_add':     '\033[0;32m',
704              'color_remove':  '\033[0;31m',
705             }
706    # Leave all keys present but clear the values
707    if not enable_color:
708        for k in colors.keys():
709            colors[k] = ''
710    return colors
711
712def worddiff_str(oldstr, newstr, colors=None):
713    if not colors:
714        colors = init_colors(False)
715    diff = simplediff.diff(oldstr.split(' '), newstr.split(' '))
716    ret = []
717    for change, value in diff:
718        value = ' '.join(value)
719        if change == '=':
720            ret.append(value)
721        elif change == '+':
722            item = '{color_add}{{+{value}+}}{color_default}'.format(value=value, **colors)
723            ret.append(item)
724        elif change == '-':
725            item = '{color_remove}[-{value}-]{color_default}'.format(value=value, **colors)
726            ret.append(item)
727    whitespace_note = ''
728    if oldstr != newstr and ' '.join(oldstr.split()) == ' '.join(newstr.split()):
729        whitespace_note = ' (whitespace changed)'
730    return '"%s"%s' % (' '.join(ret), whitespace_note)
731
732def list_inline_diff(oldlist, newlist, colors=None):
733    if not colors:
734        colors = init_colors(False)
735    diff = simplediff.diff(oldlist, newlist)
736    ret = []
737    for change, value in diff:
738        value = ' '.join(value)
739        if change == '=':
740            ret.append("'%s'" % value)
741        elif change == '+':
742            item = '{color_add}+{value}{color_default}'.format(value=value, **colors)
743            ret.append(item)
744        elif change == '-':
745            item = '{color_remove}-{value}{color_default}'.format(value=value, **colors)
746            ret.append(item)
747    return '[%s]' % (', '.join(ret))
748
749def clean_basepath(a):
750    mc = None
751    if a.startswith("mc:"):
752        _, mc, a = a.split(":", 2)
753    b = a.rsplit("/", 2)[1] + '/' + a.rsplit("/", 2)[2]
754    if a.startswith("virtual:"):
755        b = b + ":" + a.rsplit(":", 1)[0]
756    if mc:
757        b = b + ":mc:" + mc
758    return b
759
760def clean_basepaths(a):
761    b = {}
762    for x in a:
763        b[clean_basepath(x)] = a[x]
764    return b
765
766def clean_basepaths_list(a):
767    b = []
768    for x in a:
769        b.append(clean_basepath(x))
770    return b
771
772def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
773    output = []
774
775    colors = init_colors(color)
776    def color_format(formatstr, **values):
777        """
778        Return colour formatted string.
779        NOTE: call with the format string, not an already formatted string
780        containing values (otherwise you could have trouble with { and }
781        characters)
782        """
783        if not formatstr.endswith('{color_default}'):
784            formatstr += '{color_default}'
785        # In newer python 3 versions you can pass both of these directly,
786        # but we only require 3.4 at the moment
787        formatparams = {}
788        formatparams.update(colors)
789        formatparams.update(values)
790        return formatstr.format(**formatparams)
791
792    with open(a, 'rb') as f:
793        p1 = pickle.Unpickler(f)
794        a_data = p1.load()
795    with open(b, 'rb') as f:
796        p2 = pickle.Unpickler(f)
797        b_data = p2.load()
798
799    def dict_diff(a, b, whitelist=set()):
800        sa = set(a.keys())
801        sb = set(b.keys())
802        common = sa & sb
803        changed = set()
804        for i in common:
805            if a[i] != b[i] and i not in whitelist:
806                changed.add(i)
807        added = sb - sa
808        removed = sa - sb
809        return changed, added, removed
810
811    def file_checksums_diff(a, b):
812        from collections import Counter
813        # Handle old siginfo format
814        if isinstance(a, dict):
815            a = [(os.path.basename(f), cs) for f, cs in a.items()]
816        if isinstance(b, dict):
817            b = [(os.path.basename(f), cs) for f, cs in b.items()]
818        # Compare lists, ensuring we can handle duplicate filenames if they exist
819        removedcount = Counter(a)
820        removedcount.subtract(b)
821        addedcount = Counter(b)
822        addedcount.subtract(a)
823        added = []
824        for x in b:
825            if addedcount[x] > 0:
826                addedcount[x] -= 1
827                added.append(x)
828        removed = []
829        changed = []
830        for x in a:
831            if removedcount[x] > 0:
832                removedcount[x] -= 1
833                for y in added:
834                    if y[0] == x[0]:
835                        changed.append((x[0], x[1], y[1]))
836                        added.remove(y)
837                        break
838                else:
839                    removed.append(x)
840        added = [x[0] for x in added]
841        removed = [x[0] for x in removed]
842        return changed, added, removed
843
844    if 'basewhitelist' in a_data and a_data['basewhitelist'] != b_data['basewhitelist']:
845        output.append(color_format("{color_title}basewhitelist changed{color_default} from '%s' to '%s'") % (a_data['basewhitelist'], b_data['basewhitelist']))
846        if a_data['basewhitelist'] and b_data['basewhitelist']:
847            output.append("changed items: %s" % a_data['basewhitelist'].symmetric_difference(b_data['basewhitelist']))
848
849    if 'taskwhitelist' in a_data and a_data['taskwhitelist'] != b_data['taskwhitelist']:
850        output.append(color_format("{color_title}taskwhitelist changed{color_default} from '%s' to '%s'") % (a_data['taskwhitelist'], b_data['taskwhitelist']))
851        if a_data['taskwhitelist'] and b_data['taskwhitelist']:
852            output.append("changed items: %s" % a_data['taskwhitelist'].symmetric_difference(b_data['taskwhitelist']))
853
854    if a_data['taskdeps'] != b_data['taskdeps']:
855        output.append(color_format("{color_title}Task dependencies changed{color_default} from:\n%s\nto:\n%s") % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps'])))
856
857    if a_data['basehash'] != b_data['basehash'] and not collapsed:
858        output.append(color_format("{color_title}basehash changed{color_default} from %s to %s") % (a_data['basehash'], b_data['basehash']))
859
860    changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basewhitelist'] & b_data['basewhitelist'])
861    if changed:
862        for dep in changed:
863            output.append(color_format("{color_title}List of dependencies for variable %s changed from '{color_default}%s{color_title}' to '{color_default}%s{color_title}'") % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep]))
864            if a_data['gendeps'][dep] and b_data['gendeps'][dep]:
865                output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep]))
866    if added:
867        for dep in added:
868            output.append(color_format("{color_title}Dependency on variable %s was added") % (dep))
869    if removed:
870        for dep in removed:
871            output.append(color_format("{color_title}Dependency on Variable %s was removed") % (dep))
872
873
874    changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals'])
875    if changed:
876        for dep in changed:
877            oldval = a_data['varvals'][dep]
878            newval = b_data['varvals'][dep]
879            if newval and oldval and ('\n' in oldval or '\n' in newval):
880                diff = difflib.unified_diff(oldval.splitlines(), newval.splitlines(), lineterm='')
881                # Cut off the first two lines, since we aren't interested in
882                # the old/new filename (they are blank anyway in this case)
883                difflines = list(diff)[2:]
884                if color:
885                    # Add colour to diff output
886                    for i, line in enumerate(difflines):
887                        if line.startswith('+'):
888                            line = color_format('{color_add}{line}', line=line)
889                            difflines[i] = line
890                        elif line.startswith('-'):
891                            line = color_format('{color_remove}{line}', line=line)
892                            difflines[i] = line
893                output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff='\n'.join(difflines)))
894            elif newval and oldval and (' ' in oldval or ' ' in newval):
895                output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff=worddiff_str(oldval, newval, colors)))
896            else:
897                output.append(color_format("{color_title}Variable {var} value changed from '{color_default}{oldval}{color_title}' to '{color_default}{newval}{color_title}'{color_default}", var=dep, oldval=oldval, newval=newval))
898
899    if not 'file_checksum_values' in a_data:
900         a_data['file_checksum_values'] = {}
901    if not 'file_checksum_values' in b_data:
902         b_data['file_checksum_values'] = {}
903
904    changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values'])
905    if changed:
906        for f, old, new in changed:
907            output.append(color_format("{color_title}Checksum for file %s changed{color_default} from %s to %s") % (f, old, new))
908    if added:
909        for f in added:
910            output.append(color_format("{color_title}Dependency on checksum of file %s was added") % (f))
911    if removed:
912        for f in removed:
913            output.append(color_format("{color_title}Dependency on checksum of file %s was removed") % (f))
914
915    if not 'runtaskdeps' in a_data:
916         a_data['runtaskdeps'] = {}
917    if not 'runtaskdeps' in b_data:
918         b_data['runtaskdeps'] = {}
919
920    if not collapsed:
921        if len(a_data['runtaskdeps']) != len(b_data['runtaskdeps']):
922            changed = ["Number of task dependencies changed"]
923        else:
924            changed = []
925            for idx, task in enumerate(a_data['runtaskdeps']):
926                a = a_data['runtaskdeps'][idx]
927                b = b_data['runtaskdeps'][idx]
928                if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b] and not collapsed:
929                    changed.append("%s with hash %s\n changed to\n%s with hash %s" % (clean_basepath(a), a_data['runtaskhashes'][a], clean_basepath(b), b_data['runtaskhashes'][b]))
930
931        if changed:
932            clean_a = clean_basepaths_list(a_data['runtaskdeps'])
933            clean_b = clean_basepaths_list(b_data['runtaskdeps'])
934            if clean_a != clean_b:
935                output.append(color_format("{color_title}runtaskdeps changed:{color_default}\n%s") % list_inline_diff(clean_a, clean_b, colors))
936            else:
937                output.append(color_format("{color_title}runtaskdeps changed:"))
938            output.append("\n".join(changed))
939
940
941    if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data:
942        a = a_data['runtaskhashes']
943        b = b_data['runtaskhashes']
944        changed, added, removed = dict_diff(a, b)
945        if added:
946            for dep in added:
947                bdep_found = False
948                if removed:
949                    for bdep in removed:
950                        if b[dep] == a[bdep]:
951                            #output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep))
952                            bdep_found = True
953                if not bdep_found:
954                    output.append(color_format("{color_title}Dependency on task %s was added{color_default} with hash %s") % (clean_basepath(dep), b[dep]))
955        if removed:
956            for dep in removed:
957                adep_found = False
958                if added:
959                    for adep in added:
960                        if b[adep] == a[dep]:
961                            #output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep))
962                            adep_found = True
963                if not adep_found:
964                    output.append(color_format("{color_title}Dependency on task %s was removed{color_default} with hash %s") % (clean_basepath(dep), a[dep]))
965        if changed:
966            for dep in changed:
967                if not collapsed:
968                    output.append(color_format("{color_title}Hash for dependent task %s changed{color_default} from %s to %s") % (clean_basepath(dep), a[dep], b[dep]))
969                if callable(recursecb):
970                    recout = recursecb(dep, a[dep], b[dep])
971                    if recout:
972                        if collapsed:
973                            output.extend(recout)
974                        else:
975                            # If a dependent hash changed, might as well print the line above and then defer to the changes in
976                            # that hash since in all likelyhood, they're the same changes this task also saw.
977                            output = [output[-1]] + recout
978
979    a_taint = a_data.get('taint', None)
980    b_taint = b_data.get('taint', None)
981    if a_taint != b_taint:
982        if a_taint and a_taint.startswith('nostamp:'):
983            a_taint = a_taint.replace('nostamp:', 'nostamp(uuid4):')
984        if b_taint and b_taint.startswith('nostamp:'):
985            b_taint = b_taint.replace('nostamp:', 'nostamp(uuid4):')
986        output.append(color_format("{color_title}Taint (by forced/invalidated task) changed{color_default} from %s to %s") % (a_taint, b_taint))
987
988    return output
989
990
991def calc_basehash(sigdata):
992    task = sigdata['task']
993    basedata = sigdata['varvals'][task]
994
995    if basedata is None:
996        basedata = ''
997
998    alldeps = sigdata['taskdeps']
999    for dep in alldeps:
1000        basedata = basedata + dep
1001        val = sigdata['varvals'][dep]
1002        if val is not None:
1003            basedata = basedata + str(val)
1004
1005    return hashlib.sha256(basedata.encode("utf-8")).hexdigest()
1006
1007def calc_taskhash(sigdata):
1008    data = sigdata['basehash']
1009
1010    for dep in sigdata['runtaskdeps']:
1011        data = data + sigdata['runtaskhashes'][dep]
1012
1013    for c in sigdata['file_checksum_values']:
1014        if c[1]:
1015            data = data + c[1]
1016
1017    if 'taint' in sigdata:
1018        if 'nostamp:' in sigdata['taint']:
1019            data = data + sigdata['taint'][8:]
1020        else:
1021            data = data + sigdata['taint']
1022
1023    return hashlib.sha256(data.encode("utf-8")).hexdigest()
1024
1025
1026def dump_sigfile(a):
1027    output = []
1028
1029    with open(a, 'rb') as f:
1030        p1 = pickle.Unpickler(f)
1031        a_data = p1.load()
1032
1033    output.append("basewhitelist: %s" % (a_data['basewhitelist']))
1034
1035    output.append("taskwhitelist: %s" % (a_data['taskwhitelist']))
1036
1037    output.append("Task dependencies: %s" % (sorted(a_data['taskdeps'])))
1038
1039    output.append("basehash: %s" % (a_data['basehash']))
1040
1041    for dep in a_data['gendeps']:
1042        output.append("List of dependencies for variable %s is %s" % (dep, a_data['gendeps'][dep]))
1043
1044    for dep in a_data['varvals']:
1045        output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep]))
1046
1047    if 'runtaskdeps' in a_data:
1048        output.append("Tasks this task depends on: %s" % (a_data['runtaskdeps']))
1049
1050    if 'file_checksum_values' in a_data:
1051        output.append("This task depends on the checksums of files: %s" % (a_data['file_checksum_values']))
1052
1053    if 'runtaskhashes' in a_data:
1054        for dep in a_data['runtaskhashes']:
1055            output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep]))
1056
1057    if 'taint' in a_data:
1058        if a_data['taint'].startswith('nostamp:'):
1059            msg = a_data['taint'].replace('nostamp:', 'nostamp(uuid4):')
1060        else:
1061            msg = a_data['taint']
1062        output.append("Tainted (by forced/invalidated task): %s" % msg)
1063
1064    if 'task' in a_data:
1065        computed_basehash = calc_basehash(a_data)
1066        output.append("Computed base hash is %s and from file %s" % (computed_basehash, a_data['basehash']))
1067    else:
1068        output.append("Unable to compute base hash")
1069
1070    computed_taskhash = calc_taskhash(a_data)
1071    output.append("Computed task hash is %s" % computed_taskhash)
1072
1073    return output
1074