xref: /openbmc/openbmc/poky/bitbake/lib/bb/siggen.py (revision 517393d9)
1#
2# Copyright BitBake Contributors
3#
4# SPDX-License-Identifier: GPL-2.0-only
5#
6
7import hashlib
8import logging
9import os
10import re
11import tempfile
12import pickle
13import bb.data
14import difflib
15import simplediff
16import json
17import types
18import bb.compress.zstd
19from bb.checksum import FileChecksumCache
20from bb import runqueue
21import hashserv
22import hashserv.client
23
24logger = logging.getLogger('BitBake.SigGen')
25hashequiv_logger = logging.getLogger('BitBake.SigGen.HashEquiv')
26
27class SetEncoder(json.JSONEncoder):
28    def default(self, obj):
29        if isinstance(obj, set) or isinstance(obj, frozenset):
30            return dict(_set_object=list(sorted(obj)))
31        return json.JSONEncoder.default(self, obj)
32
33def SetDecoder(dct):
34    if '_set_object' in dct:
35        return frozenset(dct['_set_object'])
36    return dct
37
38def init(d):
39    siggens = [obj for obj in globals().values()
40                      if type(obj) is type and issubclass(obj, SignatureGenerator)]
41
42    desired = d.getVar("BB_SIGNATURE_HANDLER") or "noop"
43    for sg in siggens:
44        if desired == sg.name:
45            return sg(d)
46    else:
47        logger.error("Invalid signature generator '%s', using default 'noop'\n"
48                     "Available generators: %s", desired,
49                     ', '.join(obj.name for obj in siggens))
50        return SignatureGenerator(d)
51
52class SignatureGenerator(object):
53    """
54    """
55    name = "noop"
56
57    def __init__(self, data):
58        self.basehash = {}
59        self.taskhash = {}
60        self.unihash = {}
61        self.runtaskdeps = {}
62        self.file_checksum_values = {}
63        self.taints = {}
64        self.unitaskhashes = {}
65        self.tidtopn = {}
66        self.setscenetasks = set()
67
68    def finalise(self, fn, d, varient):
69        return
70
71    def postparsing_clean_cache(self):
72        return
73
74    def setup_datacache(self, datacaches):
75        self.datacaches = datacaches
76
77    def setup_datacache_from_datastore(self, mcfn, d):
78        # In task context we have no cache so setup internal data structures
79        # from the fully parsed data store provided
80
81        mc = d.getVar("__BBMULTICONFIG", False) or ""
82        tasks = d.getVar('__BBTASKS', False)
83
84        self.datacaches = {}
85        self.datacaches[mc] = types.SimpleNamespace()
86        setattr(self.datacaches[mc], "stamp", {})
87        self.datacaches[mc].stamp[mcfn] = d.getVar('STAMP')
88        setattr(self.datacaches[mc], "stamp_extrainfo", {})
89        self.datacaches[mc].stamp_extrainfo[mcfn] = {}
90        for t in tasks:
91            flag = d.getVarFlag(t, "stamp-extra-info")
92            if flag:
93                self.datacaches[mc].stamp_extrainfo[mcfn][t] = flag
94
95    def get_unihash(self, tid):
96        return self.taskhash[tid]
97
98    def prep_taskhash(self, tid, deps, dataCaches):
99        return
100
101    def get_taskhash(self, tid, deps, dataCaches):
102        self.taskhash[tid] = hashlib.sha256(tid.encode("utf-8")).hexdigest()
103        return self.taskhash[tid]
104
105    def writeout_file_checksum_cache(self):
106        """Write/update the file checksum cache onto disk"""
107        return
108
109    def stampfile_base(self, mcfn):
110        mc = bb.runqueue.mc_from_tid(mcfn)
111        return self.datacaches[mc].stamp[mcfn]
112
113    def stampfile_mcfn(self, taskname, mcfn, extrainfo=True):
114        mc = bb.runqueue.mc_from_tid(mcfn)
115        stamp = self.datacaches[mc].stamp[mcfn]
116        if not stamp:
117            return
118
119        stamp_extrainfo = ""
120        if extrainfo:
121            taskflagname = taskname
122            if taskname.endswith("_setscene"):
123                taskflagname = taskname.replace("_setscene", "")
124            stamp_extrainfo = self.datacaches[mc].stamp_extrainfo[mcfn].get(taskflagname) or ""
125
126        return self.stampfile(stamp, mcfn, taskname, stamp_extrainfo)
127
128    def stampfile(self, stampbase, file_name, taskname, extrainfo):
129        return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
130
131    def stampcleanmask_mcfn(self, taskname, mcfn):
132        mc = bb.runqueue.mc_from_tid(mcfn)
133        stamp = self.datacaches[mc].stamp[mcfn]
134        if not stamp:
135            return []
136
137        taskflagname = taskname
138        if taskname.endswith("_setscene"):
139            taskflagname = taskname.replace("_setscene", "")
140        stamp_extrainfo = self.datacaches[mc].stamp_extrainfo[mcfn].get(taskflagname) or ""
141
142        return self.stampcleanmask(stamp, mcfn, taskname, stamp_extrainfo)
143
144    def stampcleanmask(self, stampbase, file_name, taskname, extrainfo):
145        return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
146
147    def dump_sigtask(self, mcfn, task, stampbase, runtime):
148        return
149
150    def invalidate_task(self, task, mcfn):
151        mc = bb.runqueue.mc_from_tid(mcfn)
152        stamp = self.datacaches[mc].stamp[mcfn]
153        bb.utils.remove(stamp)
154
155    def dump_sigs(self, dataCache, options):
156        return
157
158    def get_taskdata(self):
159        return (self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks)
160
161    def set_taskdata(self, data):
162        self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks = data
163
164    def reset(self, data):
165        self.__init__(data)
166
167    def get_taskhashes(self):
168        return self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn
169
170    def set_taskhashes(self, hashes):
171        self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn = hashes
172
173    def save_unitaskhashes(self):
174        return
175
176    def copy_unitaskhashes(self, targetdir):
177        return
178
179    def set_setscene_tasks(self, setscene_tasks):
180        return
181
182    def exit(self):
183        return
184
185class SignatureGeneratorBasic(SignatureGenerator):
186    """
187    """
188    name = "basic"
189
190    def __init__(self, data):
191        self.basehash = {}
192        self.taskhash = {}
193        self.unihash = {}
194        self.runtaskdeps = {}
195        self.file_checksum_values = {}
196        self.taints = {}
197        self.setscenetasks = set()
198        self.basehash_ignore_vars = set((data.getVar("BB_BASEHASH_IGNORE_VARS") or "").split())
199        self.taskhash_ignore_tasks = None
200        self.init_rundepcheck(data)
201        checksum_cache_file = data.getVar("BB_HASH_CHECKSUM_CACHE_FILE")
202        if checksum_cache_file:
203            self.checksum_cache = FileChecksumCache()
204            self.checksum_cache.init_cache(data, checksum_cache_file)
205        else:
206            self.checksum_cache = None
207
208        self.unihash_cache = bb.cache.SimpleCache("3")
209        self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {})
210        self.localdirsexclude = (data.getVar("BB_SIGNATURE_LOCAL_DIRS_EXCLUDE") or "CVS .bzr .git .hg .osc .p4 .repo .svn").split()
211        self.tidtopn = {}
212
213    def init_rundepcheck(self, data):
214        self.taskhash_ignore_tasks = data.getVar("BB_TASKHASH_IGNORE_TASKS") or None
215        if self.taskhash_ignore_tasks:
216            self.twl = re.compile(self.taskhash_ignore_tasks)
217        else:
218            self.twl = None
219
220    def _build_data(self, mcfn, d):
221
222        ignore_mismatch = ((d.getVar("BB_HASH_IGNORE_MISMATCH") or '') == '1')
223        tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d, self.basehash_ignore_vars)
224
225        taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, self.basehash_ignore_vars, mcfn)
226
227        for task in tasklist:
228            tid = mcfn + ":" + task
229            if not ignore_mismatch and tid in self.basehash and self.basehash[tid] != basehash[tid]:
230                bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (tid, self.basehash[tid], basehash[tid]))
231                bb.error("The following commands may help:")
232                cmd = "$ bitbake %s -c%s" % (d.getVar('PN'), task)
233                # Make sure sigdata is dumped before run printdiff
234                bb.error("%s -Snone" % cmd)
235                bb.error("Then:")
236                bb.error("%s -Sprintdiff\n" % cmd)
237            self.basehash[tid] = basehash[tid]
238
239        return taskdeps, gendeps, lookupcache
240
241    def set_setscene_tasks(self, setscene_tasks):
242        self.setscenetasks = set(setscene_tasks)
243
244    def finalise(self, fn, d, variant):
245
246        mc = d.getVar("__BBMULTICONFIG", False) or ""
247        mcfn = fn
248        if variant or mc:
249            mcfn = bb.cache.realfn2virtual(fn, variant, mc)
250
251        try:
252            taskdeps, gendeps, lookupcache = self._build_data(mcfn, d)
253        except bb.parse.SkipRecipe:
254            raise
255        except:
256            bb.warn("Error during finalise of %s" % mcfn)
257            raise
258
259        #Slow but can be useful for debugging mismatched basehashes
260        #for task in self.taskdeps[mcfn]:
261        #    self.dump_sigtask(mcfn, task, d.getVar("STAMP"), False)
262
263        basehashes = {}
264        for task in taskdeps:
265            basehashes[task] = self.basehash[mcfn + ":" + task]
266
267        d.setVar("__siggen_basehashes", basehashes)
268        d.setVar("__siggen_gendeps", gendeps)
269        d.setVar("__siggen_varvals", lookupcache)
270        d.setVar("__siggen_taskdeps", taskdeps)
271
272    def setup_datacache_from_datastore(self, mcfn, d):
273        super().setup_datacache_from_datastore(mcfn, d)
274
275        mc = bb.runqueue.mc_from_tid(mcfn)
276        for attr in ["siggen_varvals", "siggen_taskdeps", "siggen_gendeps"]:
277            if not hasattr(self.datacaches[mc], attr):
278                setattr(self.datacaches[mc], attr, {})
279        self.datacaches[mc].siggen_varvals[mcfn] = d.getVar("__siggen_varvals")
280        self.datacaches[mc].siggen_taskdeps[mcfn] = d.getVar("__siggen_taskdeps")
281        self.datacaches[mc].siggen_gendeps[mcfn] = d.getVar("__siggen_gendeps")
282
283    def rundep_check(self, fn, recipename, task, dep, depname, dataCaches):
284        # Return True if we should keep the dependency, False to drop it
285        # We only manipulate the dependencies for packages not in the ignore
286        # list
287        if self.twl and not self.twl.search(recipename):
288            # then process the actual dependencies
289            if self.twl.search(depname):
290                return False
291        return True
292
293    def read_taint(self, fn, task, stampbase):
294        taint = None
295        try:
296            with open(stampbase + '.' + task + '.taint', 'r') as taintf:
297                taint = taintf.read()
298        except IOError:
299            pass
300        return taint
301
302    def prep_taskhash(self, tid, deps, dataCaches):
303
304        (mc, _, task, mcfn) = bb.runqueue.split_tid_mcfn(tid)
305
306        self.basehash[tid] = dataCaches[mc].basetaskhash[tid]
307        self.runtaskdeps[tid] = []
308        self.file_checksum_values[tid] = []
309        recipename = dataCaches[mc].pkg_fn[mcfn]
310
311        self.tidtopn[tid] = recipename
312
313        for dep in sorted(deps, key=clean_basepath):
314            (depmc, _, _, depmcfn) = bb.runqueue.split_tid_mcfn(dep)
315            depname = dataCaches[depmc].pkg_fn[depmcfn]
316            if not self.rundep_check(mcfn, recipename, task, dep, depname, dataCaches):
317                continue
318            if dep not in self.taskhash:
319                bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep)
320            self.runtaskdeps[tid].append(dep)
321
322        if task in dataCaches[mc].file_checksums[mcfn]:
323            if self.checksum_cache:
324                checksums = self.checksum_cache.get_checksums(dataCaches[mc].file_checksums[mcfn][task], recipename, self.localdirsexclude)
325            else:
326                checksums = bb.fetch2.get_file_checksums(dataCaches[mc].file_checksums[mcfn][task], recipename, self.localdirsexclude)
327            for (f,cs) in checksums:
328                self.file_checksum_values[tid].append((f,cs))
329
330        taskdep = dataCaches[mc].task_deps[mcfn]
331        if 'nostamp' in taskdep and task in taskdep['nostamp']:
332            # Nostamp tasks need an implicit taint so that they force any dependent tasks to run
333            if tid in self.taints and self.taints[tid].startswith("nostamp:"):
334                # Don't reset taint value upon every call
335                pass
336            else:
337                import uuid
338                taint = str(uuid.uuid4())
339                self.taints[tid] = "nostamp:" + taint
340
341        taint = self.read_taint(mcfn, task, dataCaches[mc].stamp[mcfn])
342        if taint:
343            self.taints[tid] = taint
344            logger.warning("%s is tainted from a forced run" % tid)
345
346        return
347
348    def get_taskhash(self, tid, deps, dataCaches):
349
350        data = self.basehash[tid]
351        for dep in self.runtaskdeps[tid]:
352            data = data + self.get_unihash(dep)
353
354        for (f, cs) in self.file_checksum_values[tid]:
355            if cs:
356                if "/./" in f:
357                    data = data + "./" + f.split("/./")[1]
358                data = data + cs
359
360        if tid in self.taints:
361            if self.taints[tid].startswith("nostamp:"):
362                data = data + self.taints[tid][8:]
363            else:
364                data = data + self.taints[tid]
365
366        h = hashlib.sha256(data.encode("utf-8")).hexdigest()
367        self.taskhash[tid] = h
368        #d.setVar("BB_TASKHASH:task-%s" % task, taskhash[task])
369        return h
370
371    def writeout_file_checksum_cache(self):
372        """Write/update the file checksum cache onto disk"""
373        if self.checksum_cache:
374            self.checksum_cache.save_extras()
375            self.checksum_cache.save_merge()
376        else:
377            bb.fetch2.fetcher_parse_save()
378            bb.fetch2.fetcher_parse_done()
379
380    def save_unitaskhashes(self):
381        self.unihash_cache.save(self.unitaskhashes)
382
383    def copy_unitaskhashes(self, targetdir):
384        self.unihash_cache.copyfile(targetdir)
385
386    def dump_sigtask(self, mcfn, task, stampbase, runtime):
387        tid = mcfn + ":" + task
388        mc = bb.runqueue.mc_from_tid(mcfn)
389        referencestamp = stampbase
390        if isinstance(runtime, str) and runtime.startswith("customfile"):
391            sigfile = stampbase
392            referencestamp = runtime[11:]
393        elif runtime and tid in self.taskhash:
394            sigfile = stampbase + "." + task + ".sigdata" + "." + self.get_unihash(tid)
395        else:
396            sigfile = stampbase + "." + task + ".sigbasedata" + "." + self.basehash[tid]
397
398        with bb.utils.umask(0o002):
399            bb.utils.mkdirhier(os.path.dirname(sigfile))
400
401        data = {}
402        data['task'] = task
403        data['basehash_ignore_vars'] = self.basehash_ignore_vars
404        data['taskhash_ignore_tasks'] = self.taskhash_ignore_tasks
405        data['taskdeps'] = self.datacaches[mc].siggen_taskdeps[mcfn][task]
406        data['basehash'] = self.basehash[tid]
407        data['gendeps'] = {}
408        data['varvals'] = {}
409        data['varvals'][task] = self.datacaches[mc].siggen_varvals[mcfn][task]
410        for dep in self.datacaches[mc].siggen_taskdeps[mcfn][task]:
411            if dep in self.basehash_ignore_vars:
412               continue
413            data['gendeps'][dep] = self.datacaches[mc].siggen_gendeps[mcfn][dep]
414            data['varvals'][dep] = self.datacaches[mc].siggen_varvals[mcfn][dep]
415
416        if runtime and tid in self.taskhash:
417            data['runtaskdeps'] = self.runtaskdeps[tid]
418            data['file_checksum_values'] = []
419            for f,cs in self.file_checksum_values[tid]:
420                if "/./" in f:
421                    data['file_checksum_values'].append(("./" + f.split("/./")[1], cs))
422                else:
423                    data['file_checksum_values'].append((os.path.basename(f), cs))
424            data['runtaskhashes'] = {}
425            for dep in data['runtaskdeps']:
426                data['runtaskhashes'][dep] = self.get_unihash(dep)
427            data['taskhash'] = self.taskhash[tid]
428            data['unihash'] = self.get_unihash(tid)
429
430        taint = self.read_taint(mcfn, task, referencestamp)
431        if taint:
432            data['taint'] = taint
433
434        if runtime and tid in self.taints:
435            if 'nostamp:' in self.taints[tid]:
436                data['taint'] = self.taints[tid]
437
438        computed_basehash = calc_basehash(data)
439        if computed_basehash != self.basehash[tid]:
440            bb.error("Basehash mismatch %s versus %s for %s" % (computed_basehash, self.basehash[tid], tid))
441        if runtime and tid in self.taskhash:
442            computed_taskhash = calc_taskhash(data)
443            if computed_taskhash != self.taskhash[tid]:
444                bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid))
445                sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash)
446
447        fd, tmpfile = bb.utils.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.")
448        try:
449            with bb.compress.zstd.open(fd, "wt", encoding="utf-8", num_threads=1) as f:
450                json.dump(data, f, sort_keys=True, separators=(",", ":"), cls=SetEncoder)
451                f.flush()
452            os.chmod(tmpfile, 0o664)
453            bb.utils.rename(tmpfile, sigfile)
454        except (OSError, IOError) as err:
455            try:
456                os.unlink(tmpfile)
457            except OSError:
458                pass
459            raise err
460
461class SignatureGeneratorBasicHash(SignatureGeneratorBasic):
462    name = "basichash"
463
464    def get_stampfile_hash(self, tid):
465        if tid in self.taskhash:
466            return self.taskhash[tid]
467
468        # If task is not in basehash, then error
469        return self.basehash[tid]
470
471    def stampfile(self, stampbase, mcfn, taskname, extrainfo, clean=False):
472        if taskname.endswith("_setscene"):
473            tid = mcfn + ":" + taskname[:-9]
474        else:
475            tid = mcfn + ":" + taskname
476        if clean:
477            h = "*"
478        else:
479            h = self.get_stampfile_hash(tid)
480
481        return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.')
482
483    def stampcleanmask(self, stampbase, mcfn, taskname, extrainfo):
484        return self.stampfile(stampbase, mcfn, taskname, extrainfo, clean=True)
485
486    def invalidate_task(self, task, mcfn):
487        bb.note("Tainting hash to force rebuild of task %s, %s" % (mcfn, task))
488
489        mc = bb.runqueue.mc_from_tid(mcfn)
490        stamp = self.datacaches[mc].stamp[mcfn]
491
492        taintfn = stamp + '.' + task + '.taint'
493
494        import uuid
495        bb.utils.mkdirhier(os.path.dirname(taintfn))
496        # The specific content of the taint file is not really important,
497        # we just need it to be random, so a random UUID is used
498        with open(taintfn, 'w') as taintf:
499            taintf.write(str(uuid.uuid4()))
500
501class SignatureGeneratorUniHashMixIn(object):
502    def __init__(self, data):
503        self.extramethod = {}
504        super().__init__(data)
505
506    def get_taskdata(self):
507        return (self.server, self.method, self.extramethod) + super().get_taskdata()
508
509    def set_taskdata(self, data):
510        self.server, self.method, self.extramethod = data[:3]
511        super().set_taskdata(data[3:])
512
513    def client(self):
514        if getattr(self, '_client', None) is None:
515            self._client = hashserv.create_client(self.server)
516        return self._client
517
518    def reset(self, data):
519        if getattr(self, '_client', None) is not None:
520            self._client.close()
521            self._client = None
522        return super().reset(data)
523
524    def exit(self):
525        if getattr(self, '_client', None) is not None:
526            self._client.close()
527            self._client = None
528        return super().exit()
529
530    def get_stampfile_hash(self, tid):
531        if tid in self.taskhash:
532            # If a unique hash is reported, use it as the stampfile hash. This
533            # ensures that if a task won't be re-run if the taskhash changes,
534            # but it would result in the same output hash
535            unihash = self._get_unihash(tid)
536            if unihash is not None:
537                return unihash
538
539        return super().get_stampfile_hash(tid)
540
541    def set_unihash(self, tid, unihash):
542        (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
543        key = mc + ":" + self.tidtopn[tid] + ":" + taskname
544        self.unitaskhashes[key] = (self.taskhash[tid], unihash)
545        self.unihash[tid] = unihash
546
547    def _get_unihash(self, tid, checkkey=None):
548        if tid not in self.tidtopn:
549            return None
550        (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
551        key = mc + ":" + self.tidtopn[tid] + ":" + taskname
552        if key not in self.unitaskhashes:
553            return None
554        if not checkkey:
555            checkkey = self.taskhash[tid]
556        (key, unihash) = self.unitaskhashes[key]
557        if key != checkkey:
558            return None
559        return unihash
560
561    def get_unihash(self, tid):
562        taskhash = self.taskhash[tid]
563
564        # If its not a setscene task we can return
565        if self.setscenetasks and tid not in self.setscenetasks:
566            self.unihash[tid] = None
567            return taskhash
568
569        # TODO: This cache can grow unbounded. It probably only needs to keep
570        # for each task
571        unihash =  self._get_unihash(tid)
572        if unihash is not None:
573            self.unihash[tid] = unihash
574            return unihash
575
576        # In the absence of being able to discover a unique hash from the
577        # server, make it be equivalent to the taskhash. The unique "hash" only
578        # really needs to be a unique string (not even necessarily a hash), but
579        # making it match the taskhash has a few advantages:
580        #
581        # 1) All of the sstate code that assumes hashes can be the same
582        # 2) It provides maximal compatibility with builders that don't use
583        #    an equivalency server
584        # 3) The value is easy for multiple independent builders to derive the
585        #    same unique hash from the same input. This means that if the
586        #    independent builders find the same taskhash, but it isn't reported
587        #    to the server, there is a better chance that they will agree on
588        #    the unique hash.
589        unihash = taskhash
590
591        try:
592            method = self.method
593            if tid in self.extramethod:
594                method = method + self.extramethod[tid]
595            data = self.client().get_unihash(method, self.taskhash[tid])
596            if data:
597                unihash = data
598                # A unique hash equal to the taskhash is not very interesting,
599                # so it is reported it at debug level 2. If they differ, that
600                # is much more interesting, so it is reported at debug level 1
601                hashequiv_logger.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server))
602            else:
603                hashequiv_logger.debug2('No reported unihash for %s:%s from %s' % (tid, taskhash, self.server))
604        except ConnectionError as e:
605            bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
606
607        self.set_unihash(tid, unihash)
608        self.unihash[tid] = unihash
609        return unihash
610
611    def report_unihash(self, path, task, d):
612        import importlib
613
614        taskhash = d.getVar('BB_TASKHASH')
615        unihash = d.getVar('BB_UNIHASH')
616        report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1'
617        tempdir = d.getVar('T')
618        mcfn = d.getVar('BB_FILENAME')
619        tid = mcfn + ':do_' + task
620        key = tid + ':' + taskhash
621
622        if self.setscenetasks and tid not in self.setscenetasks:
623            return
624
625        # This can happen if locked sigs are in action. Detect and just exit
626        if taskhash != self.taskhash[tid]:
627            return
628
629        # Sanity checks
630        cache_unihash = self._get_unihash(tid, checkkey=taskhash)
631        if cache_unihash is None:
632            bb.fatal('%s not in unihash cache. Please report this error' % key)
633
634        if cache_unihash != unihash:
635            bb.fatal("Cache unihash %s doesn't match BB_UNIHASH %s" % (cache_unihash, unihash))
636
637        sigfile = None
638        sigfile_name = "depsig.do_%s.%d" % (task, os.getpid())
639        sigfile_link = "depsig.do_%s" % task
640
641        try:
642            sigfile = open(os.path.join(tempdir, sigfile_name), 'w+b')
643
644            locs = {'path': path, 'sigfile': sigfile, 'task': task, 'd': d}
645
646            if "." in self.method:
647                (module, method) = self.method.rsplit('.', 1)
648                locs['method'] = getattr(importlib.import_module(module), method)
649                outhash = bb.utils.better_eval('method(path, sigfile, task, d)', locs)
650            else:
651                outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs)
652
653            try:
654                extra_data = {}
655
656                owner = d.getVar('SSTATE_HASHEQUIV_OWNER')
657                if owner:
658                    extra_data['owner'] = owner
659
660                if report_taskdata:
661                    sigfile.seek(0)
662
663                    extra_data['PN'] = d.getVar('PN')
664                    extra_data['PV'] = d.getVar('PV')
665                    extra_data['PR'] = d.getVar('PR')
666                    extra_data['task'] = task
667                    extra_data['outhash_siginfo'] = sigfile.read().decode('utf-8')
668
669                method = self.method
670                if tid in self.extramethod:
671                    method = method + self.extramethod[tid]
672
673                data = self.client().report_unihash(taskhash, method, outhash, unihash, extra_data)
674                new_unihash = data['unihash']
675
676                if new_unihash != unihash:
677                    hashequiv_logger.debug('Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server))
678                    bb.event.fire(bb.runqueue.taskUniHashUpdate(mcfn + ':do_' + task, new_unihash), d)
679                    self.set_unihash(tid, new_unihash)
680                    d.setVar('BB_UNIHASH', new_unihash)
681                else:
682                    hashequiv_logger.debug('Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server))
683            except ConnectionError as e:
684                bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
685        finally:
686            if sigfile:
687                sigfile.close()
688
689                sigfile_link_path = os.path.join(tempdir, sigfile_link)
690                bb.utils.remove(sigfile_link_path)
691
692                try:
693                    os.symlink(sigfile_name, sigfile_link_path)
694                except OSError:
695                    pass
696
697    def report_unihash_equiv(self, tid, taskhash, wanted_unihash, current_unihash, datacaches):
698        try:
699            extra_data = {}
700            method = self.method
701            if tid in self.extramethod:
702                method = method + self.extramethod[tid]
703
704            data = self.client().report_unihash_equiv(taskhash, method, wanted_unihash, extra_data)
705            hashequiv_logger.verbose('Reported task %s as unihash %s to %s (%s)' % (tid, wanted_unihash, self.server, str(data)))
706
707            if data is None:
708                bb.warn("Server unable to handle unihash report")
709                return False
710
711            finalunihash = data['unihash']
712
713            if finalunihash == current_unihash:
714                hashequiv_logger.verbose('Task %s unihash %s unchanged by server' % (tid, finalunihash))
715            elif finalunihash == wanted_unihash:
716                hashequiv_logger.verbose('Task %s unihash changed %s -> %s as wanted' % (tid, current_unihash, finalunihash))
717                self.set_unihash(tid, finalunihash)
718                return True
719            else:
720                # TODO: What to do here?
721                hashequiv_logger.verbose('Task %s unihash reported as unwanted hash %s' % (tid, finalunihash))
722
723        except ConnectionError as e:
724            bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
725
726        return False
727
728#
729# Dummy class used for bitbake-selftest
730#
731class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureGeneratorBasicHash):
732    name = "TestEquivHash"
733    def init_rundepcheck(self, data):
734        super().init_rundepcheck(data)
735        self.server = data.getVar('BB_HASHSERVE')
736        self.method = "sstate_output_hash"
737
738def dump_this_task(outfile, d):
739    import bb.parse
740    mcfn = d.getVar("BB_FILENAME")
741    task = "do_" + d.getVar("BB_CURRENTTASK")
742    referencestamp = bb.parse.siggen.stampfile_base(mcfn)
743    bb.parse.siggen.dump_sigtask(mcfn, task, outfile, "customfile:" + referencestamp)
744
745def init_colors(enable_color):
746    """Initialise colour dict for passing to compare_sigfiles()"""
747    # First set up the colours
748    colors = {'color_title':   '\033[1m',
749              'color_default': '\033[0m',
750              'color_add':     '\033[0;32m',
751              'color_remove':  '\033[0;31m',
752             }
753    # Leave all keys present but clear the values
754    if not enable_color:
755        for k in colors.keys():
756            colors[k] = ''
757    return colors
758
759def worddiff_str(oldstr, newstr, colors=None):
760    if not colors:
761        colors = init_colors(False)
762    diff = simplediff.diff(oldstr.split(' '), newstr.split(' '))
763    ret = []
764    for change, value in diff:
765        value = ' '.join(value)
766        if change == '=':
767            ret.append(value)
768        elif change == '+':
769            item = '{color_add}{{+{value}+}}{color_default}'.format(value=value, **colors)
770            ret.append(item)
771        elif change == '-':
772            item = '{color_remove}[-{value}-]{color_default}'.format(value=value, **colors)
773            ret.append(item)
774    whitespace_note = ''
775    if oldstr != newstr and ' '.join(oldstr.split()) == ' '.join(newstr.split()):
776        whitespace_note = ' (whitespace changed)'
777    return '"%s"%s' % (' '.join(ret), whitespace_note)
778
779def list_inline_diff(oldlist, newlist, colors=None):
780    if not colors:
781        colors = init_colors(False)
782    diff = simplediff.diff(oldlist, newlist)
783    ret = []
784    for change, value in diff:
785        value = ' '.join(value)
786        if change == '=':
787            ret.append("'%s'" % value)
788        elif change == '+':
789            item = '{color_add}+{value}{color_default}'.format(value=value, **colors)
790            ret.append(item)
791        elif change == '-':
792            item = '{color_remove}-{value}{color_default}'.format(value=value, **colors)
793            ret.append(item)
794    return '[%s]' % (', '.join(ret))
795
796def clean_basepath(basepath):
797    basepath, dir, recipe_task = basepath.rsplit("/", 2)
798    cleaned = dir + '/' + recipe_task
799
800    if basepath[0] == '/':
801        return cleaned
802
803    if basepath.startswith("mc:") and basepath.count(':') >= 2:
804        mc, mc_name, basepath = basepath.split(":", 2)
805        mc_suffix = ':mc:' + mc_name
806    else:
807        mc_suffix = ''
808
809    # mc stuff now removed from basepath. Whatever was next, if present will be the first
810    # suffix. ':/', recipe path start, marks the end of this. Something like
811    # 'virtual:a[:b[:c]]:/path...' (b and c being optional)
812    if basepath[0] != '/':
813        cleaned += ':' + basepath.split(':/', 1)[0]
814
815    return cleaned + mc_suffix
816
817def clean_basepaths(a):
818    b = {}
819    for x in a:
820        b[clean_basepath(x)] = a[x]
821    return b
822
823def clean_basepaths_list(a):
824    b = []
825    for x in a:
826        b.append(clean_basepath(x))
827    return b
828
829# Handled renamed fields
830def handle_renames(data):
831    if 'basewhitelist' in data:
832        data['basehash_ignore_vars'] = data['basewhitelist']
833        del data['basewhitelist']
834    if 'taskwhitelist' in data:
835        data['taskhash_ignore_tasks'] = data['taskwhitelist']
836        del data['taskwhitelist']
837
838
839def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
840    output = []
841
842    colors = init_colors(color)
843    def color_format(formatstr, **values):
844        """
845        Return colour formatted string.
846        NOTE: call with the format string, not an already formatted string
847        containing values (otherwise you could have trouble with { and }
848        characters)
849        """
850        if not formatstr.endswith('{color_default}'):
851            formatstr += '{color_default}'
852        # In newer python 3 versions you can pass both of these directly,
853        # but we only require 3.4 at the moment
854        formatparams = {}
855        formatparams.update(colors)
856        formatparams.update(values)
857        return formatstr.format(**formatparams)
858
859    with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f:
860        a_data = json.load(f, object_hook=SetDecoder)
861    with bb.compress.zstd.open(b, "rt", encoding="utf-8", num_threads=1) as f:
862        b_data = json.load(f, object_hook=SetDecoder)
863
864    for data in [a_data, b_data]:
865        handle_renames(data)
866
867    def dict_diff(a, b, ignored_vars=set()):
868        sa = set(a.keys())
869        sb = set(b.keys())
870        common = sa & sb
871        changed = set()
872        for i in common:
873            if a[i] != b[i] and i not in ignored_vars:
874                changed.add(i)
875        added = sb - sa
876        removed = sa - sb
877        return changed, added, removed
878
879    def file_checksums_diff(a, b):
880        from collections import Counter
881
882        # Convert lists back to tuples
883        a = [(f[0], f[1]) for f in a]
884        b = [(f[0], f[1]) for f in b]
885
886        # Compare lists, ensuring we can handle duplicate filenames if they exist
887        removedcount = Counter(a)
888        removedcount.subtract(b)
889        addedcount = Counter(b)
890        addedcount.subtract(a)
891        added = []
892        for x in b:
893            if addedcount[x] > 0:
894                addedcount[x] -= 1
895                added.append(x)
896        removed = []
897        changed = []
898        for x in a:
899            if removedcount[x] > 0:
900                removedcount[x] -= 1
901                for y in added:
902                    if y[0] == x[0]:
903                        changed.append((x[0], x[1], y[1]))
904                        added.remove(y)
905                        break
906                else:
907                    removed.append(x)
908        added = [x[0] for x in added]
909        removed = [x[0] for x in removed]
910        return changed, added, removed
911
912    if 'basehash_ignore_vars' in a_data and a_data['basehash_ignore_vars'] != b_data['basehash_ignore_vars']:
913        output.append(color_format("{color_title}basehash_ignore_vars changed{color_default} from '%s' to '%s'") % (a_data['basehash_ignore_vars'], b_data['basehash_ignore_vars']))
914        if a_data['basehash_ignore_vars'] and b_data['basehash_ignore_vars']:
915            output.append("changed items: %s" % a_data['basehash_ignore_vars'].symmetric_difference(b_data['basehash_ignore_vars']))
916
917    if 'taskhash_ignore_tasks' in a_data and a_data['taskhash_ignore_tasks'] != b_data['taskhash_ignore_tasks']:
918        output.append(color_format("{color_title}taskhash_ignore_tasks changed{color_default} from '%s' to '%s'") % (a_data['taskhash_ignore_tasks'], b_data['taskhash_ignore_tasks']))
919        if a_data['taskhash_ignore_tasks'] and b_data['taskhash_ignore_tasks']:
920            output.append("changed items: %s" % a_data['taskhash_ignore_tasks'].symmetric_difference(b_data['taskhash_ignore_tasks']))
921
922    if a_data['taskdeps'] != b_data['taskdeps']:
923        output.append(color_format("{color_title}Task dependencies changed{color_default} from:\n%s\nto:\n%s") % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps'])))
924
925    if a_data['basehash'] != b_data['basehash'] and not collapsed:
926        output.append(color_format("{color_title}basehash changed{color_default} from %s to %s") % (a_data['basehash'], b_data['basehash']))
927
928    changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basehash_ignore_vars'] & b_data['basehash_ignore_vars'])
929    if changed:
930        for dep in sorted(changed):
931            output.append(color_format("{color_title}List of dependencies for variable %s changed from '{color_default}%s{color_title}' to '{color_default}%s{color_title}'") % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep]))
932            if a_data['gendeps'][dep] and b_data['gendeps'][dep]:
933                output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep]))
934    if added:
935        for dep in sorted(added):
936            output.append(color_format("{color_title}Dependency on variable %s was added") % (dep))
937    if removed:
938        for dep in sorted(removed):
939            output.append(color_format("{color_title}Dependency on Variable %s was removed") % (dep))
940
941
942    changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals'])
943    if changed:
944        for dep in sorted(changed):
945            oldval = a_data['varvals'][dep]
946            newval = b_data['varvals'][dep]
947            if newval and oldval and ('\n' in oldval or '\n' in newval):
948                diff = difflib.unified_diff(oldval.splitlines(), newval.splitlines(), lineterm='')
949                # Cut off the first two lines, since we aren't interested in
950                # the old/new filename (they are blank anyway in this case)
951                difflines = list(diff)[2:]
952                if color:
953                    # Add colour to diff output
954                    for i, line in enumerate(difflines):
955                        if line.startswith('+'):
956                            line = color_format('{color_add}{line}', line=line)
957                            difflines[i] = line
958                        elif line.startswith('-'):
959                            line = color_format('{color_remove}{line}', line=line)
960                            difflines[i] = line
961                output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff='\n'.join(difflines)))
962            elif newval and oldval and (' ' in oldval or ' ' in newval):
963                output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff=worddiff_str(oldval, newval, colors)))
964            else:
965                output.append(color_format("{color_title}Variable {var} value changed from '{color_default}{oldval}{color_title}' to '{color_default}{newval}{color_title}'{color_default}", var=dep, oldval=oldval, newval=newval))
966
967    if not 'file_checksum_values' in a_data:
968         a_data['file_checksum_values'] = []
969    if not 'file_checksum_values' in b_data:
970         b_data['file_checksum_values'] = []
971
972    changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values'])
973    if changed:
974        for f, old, new in changed:
975            output.append(color_format("{color_title}Checksum for file %s changed{color_default} from %s to %s") % (f, old, new))
976    if added:
977        for f in added:
978            output.append(color_format("{color_title}Dependency on checksum of file %s was added") % (f))
979    if removed:
980        for f in removed:
981            output.append(color_format("{color_title}Dependency on checksum of file %s was removed") % (f))
982
983    if not 'runtaskdeps' in a_data:
984         a_data['runtaskdeps'] = {}
985    if not 'runtaskdeps' in b_data:
986         b_data['runtaskdeps'] = {}
987
988    if not collapsed:
989        if len(a_data['runtaskdeps']) != len(b_data['runtaskdeps']):
990            changed = ["Number of task dependencies changed"]
991        else:
992            changed = []
993            for idx, task in enumerate(a_data['runtaskdeps']):
994                a = a_data['runtaskdeps'][idx]
995                b = b_data['runtaskdeps'][idx]
996                if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b] and not collapsed:
997                    changed.append("%s with hash %s\n changed to\n%s with hash %s" % (clean_basepath(a), a_data['runtaskhashes'][a], clean_basepath(b), b_data['runtaskhashes'][b]))
998
999        if changed:
1000            clean_a = clean_basepaths_list(a_data['runtaskdeps'])
1001            clean_b = clean_basepaths_list(b_data['runtaskdeps'])
1002            if clean_a != clean_b:
1003                output.append(color_format("{color_title}runtaskdeps changed:{color_default}\n%s") % list_inline_diff(clean_a, clean_b, colors))
1004            else:
1005                output.append(color_format("{color_title}runtaskdeps changed:"))
1006            output.append("\n".join(changed))
1007
1008
1009    if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data:
1010        a = clean_basepaths(a_data['runtaskhashes'])
1011        b = clean_basepaths(b_data['runtaskhashes'])
1012        changed, added, removed = dict_diff(a, b)
1013        if added:
1014            for dep in sorted(added):
1015                bdep_found = False
1016                if removed:
1017                    for bdep in removed:
1018                        if b[dep] == a[bdep]:
1019                            #output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep))
1020                            bdep_found = True
1021                if not bdep_found:
1022                    output.append(color_format("{color_title}Dependency on task %s was added{color_default} with hash %s") % (dep, b[dep]))
1023        if removed:
1024            for dep in sorted(removed):
1025                adep_found = False
1026                if added:
1027                    for adep in added:
1028                        if b[adep] == a[dep]:
1029                            #output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep))
1030                            adep_found = True
1031                if not adep_found:
1032                    output.append(color_format("{color_title}Dependency on task %s was removed{color_default} with hash %s") % (dep, a[dep]))
1033        if changed:
1034            for dep in sorted(changed):
1035                if not collapsed:
1036                    output.append(color_format("{color_title}Hash for task dependency %s changed{color_default} from %s to %s") % (dep, a[dep], b[dep]))
1037                if callable(recursecb):
1038                    recout = recursecb(dep, a[dep], b[dep])
1039                    if recout:
1040                        if collapsed:
1041                            output.extend(recout)
1042                        else:
1043                            # If a dependent hash changed, might as well print the line above and then defer to the changes in
1044                            # that hash since in all likelyhood, they're the same changes this task also saw.
1045                            output = [output[-1]] + recout
1046                            break
1047
1048    a_taint = a_data.get('taint', None)
1049    b_taint = b_data.get('taint', None)
1050    if a_taint != b_taint:
1051        if a_taint and a_taint.startswith('nostamp:'):
1052            a_taint = a_taint.replace('nostamp:', 'nostamp(uuid4):')
1053        if b_taint and b_taint.startswith('nostamp:'):
1054            b_taint = b_taint.replace('nostamp:', 'nostamp(uuid4):')
1055        output.append(color_format("{color_title}Taint (by forced/invalidated task) changed{color_default} from %s to %s") % (a_taint, b_taint))
1056
1057    return output
1058
1059
1060def calc_basehash(sigdata):
1061    task = sigdata['task']
1062    basedata = sigdata['varvals'][task]
1063
1064    if basedata is None:
1065        basedata = ''
1066
1067    alldeps = sigdata['taskdeps']
1068    for dep in sorted(alldeps):
1069        basedata = basedata + dep
1070        val = sigdata['varvals'][dep]
1071        if val is not None:
1072            basedata = basedata + str(val)
1073
1074    return hashlib.sha256(basedata.encode("utf-8")).hexdigest()
1075
1076def calc_taskhash(sigdata):
1077    data = sigdata['basehash']
1078
1079    for dep in sigdata['runtaskdeps']:
1080        data = data + sigdata['runtaskhashes'][dep]
1081
1082    for c in sigdata['file_checksum_values']:
1083        if c[1]:
1084            if "./" in c[0]:
1085                data = data + c[0]
1086            data = data + c[1]
1087
1088    if 'taint' in sigdata:
1089        if 'nostamp:' in sigdata['taint']:
1090            data = data + sigdata['taint'][8:]
1091        else:
1092            data = data + sigdata['taint']
1093
1094    return hashlib.sha256(data.encode("utf-8")).hexdigest()
1095
1096
1097def dump_sigfile(a):
1098    output = []
1099
1100    with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f:
1101        a_data = json.load(f, object_hook=SetDecoder)
1102
1103    handle_renames(a_data)
1104
1105    output.append("basehash_ignore_vars: %s" % (sorted(a_data['basehash_ignore_vars'])))
1106
1107    output.append("taskhash_ignore_tasks: %s" % (sorted(a_data['taskhash_ignore_tasks'] or [])))
1108
1109    output.append("Task dependencies: %s" % (sorted(a_data['taskdeps'])))
1110
1111    output.append("basehash: %s" % (a_data['basehash']))
1112
1113    for dep in sorted(a_data['gendeps']):
1114        output.append("List of dependencies for variable %s is %s" % (dep, sorted(a_data['gendeps'][dep])))
1115
1116    for dep in sorted(a_data['varvals']):
1117        output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep]))
1118
1119    if 'runtaskdeps' in a_data:
1120        output.append("Tasks this task depends on: %s" % (sorted(a_data['runtaskdeps'])))
1121
1122    if 'file_checksum_values' in a_data:
1123        output.append("This task depends on the checksums of files: %s" % (sorted(a_data['file_checksum_values'])))
1124
1125    if 'runtaskhashes' in a_data:
1126        for dep in sorted(a_data['runtaskhashes']):
1127            output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep]))
1128
1129    if 'taint' in a_data:
1130        if a_data['taint'].startswith('nostamp:'):
1131            msg = a_data['taint'].replace('nostamp:', 'nostamp(uuid4):')
1132        else:
1133            msg = a_data['taint']
1134        output.append("Tainted (by forced/invalidated task): %s" % msg)
1135
1136    if 'task' in a_data:
1137        computed_basehash = calc_basehash(a_data)
1138        output.append("Computed base hash is %s and from file %s" % (computed_basehash, a_data['basehash']))
1139    else:
1140        output.append("Unable to compute base hash")
1141
1142    computed_taskhash = calc_taskhash(a_data)
1143    output.append("Computed task hash is %s" % computed_taskhash)
1144
1145    return output
1146