1# 2# Copyright BitBake Contributors 3# 4# SPDX-License-Identifier: GPL-2.0-only 5# 6 7import hashlib 8import logging 9import os 10import re 11import tempfile 12import pickle 13import bb.data 14import difflib 15import simplediff 16import json 17import bb.compress.zstd 18from bb.checksum import FileChecksumCache 19from bb import runqueue 20import hashserv 21import hashserv.client 22 23logger = logging.getLogger('BitBake.SigGen') 24hashequiv_logger = logging.getLogger('BitBake.SigGen.HashEquiv') 25 26class SetEncoder(json.JSONEncoder): 27 def default(self, obj): 28 if isinstance(obj, set): 29 return dict(_set_object=list(sorted(obj))) 30 return json.JSONEncoder.default(self, obj) 31 32def SetDecoder(dct): 33 if '_set_object' in dct: 34 return set(dct['_set_object']) 35 return dct 36 37def init(d): 38 siggens = [obj for obj in globals().values() 39 if type(obj) is type and issubclass(obj, SignatureGenerator)] 40 41 desired = d.getVar("BB_SIGNATURE_HANDLER") or "noop" 42 for sg in siggens: 43 if desired == sg.name: 44 return sg(d) 45 else: 46 logger.error("Invalid signature generator '%s', using default 'noop'\n" 47 "Available generators: %s", desired, 48 ', '.join(obj.name for obj in siggens)) 49 return SignatureGenerator(d) 50 51class SignatureGenerator(object): 52 """ 53 """ 54 name = "noop" 55 56 # If the derived class supports multiconfig datacaches, set this to True 57 # The default is False for backward compatibility with derived signature 58 # generators that do not understand multiconfig caches 59 supports_multiconfig_datacaches = False 60 61 def __init__(self, data): 62 self.basehash = {} 63 self.taskhash = {} 64 self.unihash = {} 65 self.runtaskdeps = {} 66 self.file_checksum_values = {} 67 self.taints = {} 68 self.unitaskhashes = {} 69 self.tidtopn = {} 70 self.setscenetasks = set() 71 72 def finalise(self, fn, d, varient): 73 return 74 75 def postparsing_clean_cache(self): 76 return 77 78 def get_unihash(self, tid): 79 return self.taskhash[tid] 80 81 def prep_taskhash(self, tid, deps, dataCaches): 82 return 83 84 def get_taskhash(self, tid, deps, dataCaches): 85 self.taskhash[tid] = hashlib.sha256(tid.encode("utf-8")).hexdigest() 86 return self.taskhash[tid] 87 88 def writeout_file_checksum_cache(self): 89 """Write/update the file checksum cache onto disk""" 90 return 91 92 def stampfile(self, stampbase, file_name, taskname, extrainfo): 93 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.') 94 95 def stampcleanmask(self, stampbase, file_name, taskname, extrainfo): 96 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.') 97 98 def dump_sigtask(self, fn, task, stampbase, runtime): 99 return 100 101 def invalidate_task(self, task, d, fn): 102 bb.build.del_stamp(task, d, fn) 103 104 def dump_sigs(self, dataCache, options): 105 return 106 107 def get_taskdata(self): 108 return (self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks) 109 110 def set_taskdata(self, data): 111 self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks = data 112 113 def reset(self, data): 114 self.__init__(data) 115 116 def get_taskhashes(self): 117 return self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn 118 119 def set_taskhashes(self, hashes): 120 self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn = hashes 121 122 def save_unitaskhashes(self): 123 return 124 125 def copy_unitaskhashes(self, targetdir): 126 return 127 128 def set_setscene_tasks(self, setscene_tasks): 129 return 130 131 @classmethod 132 def get_data_caches(cls, dataCaches, mc): 133 """ 134 This function returns the datacaches that should be passed to signature 135 generator functions. If the signature generator supports multiconfig 136 caches, the entire dictionary of data caches is sent, otherwise a 137 special proxy is sent that support both index access to all 138 multiconfigs, and also direct access for the default multiconfig. 139 140 The proxy class allows code in this class itself to always use 141 multiconfig aware code (to ease maintenance), but derived classes that 142 are unaware of multiconfig data caches can still access the default 143 multiconfig as expected. 144 145 Do not override this function in derived classes; it will be removed in 146 the future when support for multiconfig data caches is mandatory 147 """ 148 class DataCacheProxy(object): 149 def __init__(self): 150 pass 151 152 def __getitem__(self, key): 153 return dataCaches[key] 154 155 def __getattr__(self, name): 156 return getattr(dataCaches[mc], name) 157 158 if cls.supports_multiconfig_datacaches: 159 return dataCaches 160 161 return DataCacheProxy() 162 163 def exit(self): 164 return 165 166class SignatureGeneratorBasic(SignatureGenerator): 167 """ 168 """ 169 name = "basic" 170 171 def __init__(self, data): 172 self.basehash = {} 173 self.taskhash = {} 174 self.unihash = {} 175 self.taskdeps = {} 176 self.runtaskdeps = {} 177 self.file_checksum_values = {} 178 self.taints = {} 179 self.gendeps = {} 180 self.lookupcache = {} 181 self.setscenetasks = set() 182 self.basehash_ignore_vars = set((data.getVar("BB_BASEHASH_IGNORE_VARS") or "").split()) 183 self.taskhash_ignore_tasks = None 184 self.init_rundepcheck(data) 185 checksum_cache_file = data.getVar("BB_HASH_CHECKSUM_CACHE_FILE") 186 if checksum_cache_file: 187 self.checksum_cache = FileChecksumCache() 188 self.checksum_cache.init_cache(data, checksum_cache_file) 189 else: 190 self.checksum_cache = None 191 192 self.unihash_cache = bb.cache.SimpleCache("3") 193 self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {}) 194 self.localdirsexclude = (data.getVar("BB_SIGNATURE_LOCAL_DIRS_EXCLUDE") or "CVS .bzr .git .hg .osc .p4 .repo .svn").split() 195 self.tidtopn = {} 196 197 def init_rundepcheck(self, data): 198 self.taskhash_ignore_tasks = data.getVar("BB_TASKHASH_IGNORE_TASKS") or None 199 if self.taskhash_ignore_tasks: 200 self.twl = re.compile(self.taskhash_ignore_tasks) 201 else: 202 self.twl = None 203 204 def _build_data(self, fn, d): 205 206 ignore_mismatch = ((d.getVar("BB_HASH_IGNORE_MISMATCH") or '') == '1') 207 tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d, self.basehash_ignore_vars) 208 209 taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, self.basehash_ignore_vars, fn) 210 211 for task in tasklist: 212 tid = fn + ":" + task 213 if not ignore_mismatch and tid in self.basehash and self.basehash[tid] != basehash[tid]: 214 bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (tid, self.basehash[tid], basehash[tid])) 215 bb.error("The following commands may help:") 216 cmd = "$ bitbake %s -c%s" % (d.getVar('PN'), task) 217 # Make sure sigdata is dumped before run printdiff 218 bb.error("%s -Snone" % cmd) 219 bb.error("Then:") 220 bb.error("%s -Sprintdiff\n" % cmd) 221 self.basehash[tid] = basehash[tid] 222 223 self.taskdeps[fn] = taskdeps 224 self.gendeps[fn] = gendeps 225 self.lookupcache[fn] = lookupcache 226 227 return taskdeps 228 229 def set_setscene_tasks(self, setscene_tasks): 230 self.setscenetasks = set(setscene_tasks) 231 232 def finalise(self, fn, d, variant): 233 234 mc = d.getVar("__BBMULTICONFIG", False) or "" 235 if variant or mc: 236 fn = bb.cache.realfn2virtual(fn, variant, mc) 237 238 try: 239 taskdeps = self._build_data(fn, d) 240 except bb.parse.SkipRecipe: 241 raise 242 except: 243 bb.warn("Error during finalise of %s" % fn) 244 raise 245 246 #Slow but can be useful for debugging mismatched basehashes 247 #for task in self.taskdeps[fn]: 248 # self.dump_sigtask(fn, task, d.getVar("STAMP"), False) 249 250 for task in taskdeps: 251 d.setVar("BB_BASEHASH:task-%s" % task, self.basehash[fn + ":" + task]) 252 253 def postparsing_clean_cache(self): 254 # 255 # After parsing we can remove some things from memory to reduce our memory footprint 256 # 257 self.gendeps = {} 258 self.lookupcache = {} 259 self.taskdeps = {} 260 261 def rundep_check(self, fn, recipename, task, dep, depname, dataCaches): 262 # Return True if we should keep the dependency, False to drop it 263 # We only manipulate the dependencies for packages not in the ignore 264 # list 265 if self.twl and not self.twl.search(recipename): 266 # then process the actual dependencies 267 if self.twl.search(depname): 268 return False 269 return True 270 271 def read_taint(self, fn, task, stampbase): 272 taint = None 273 try: 274 with open(stampbase + '.' + task + '.taint', 'r') as taintf: 275 taint = taintf.read() 276 except IOError: 277 pass 278 return taint 279 280 def prep_taskhash(self, tid, deps, dataCaches): 281 282 (mc, _, task, fn) = bb.runqueue.split_tid_mcfn(tid) 283 284 self.basehash[tid] = dataCaches[mc].basetaskhash[tid] 285 self.runtaskdeps[tid] = [] 286 self.file_checksum_values[tid] = [] 287 recipename = dataCaches[mc].pkg_fn[fn] 288 289 self.tidtopn[tid] = recipename 290 291 for dep in sorted(deps, key=clean_basepath): 292 (depmc, _, _, depmcfn) = bb.runqueue.split_tid_mcfn(dep) 293 depname = dataCaches[depmc].pkg_fn[depmcfn] 294 if not self.supports_multiconfig_datacaches and mc != depmc: 295 # If the signature generator doesn't understand multiconfig 296 # data caches, any dependency not in the same multiconfig must 297 # be skipped for backward compatibility 298 continue 299 if not self.rundep_check(fn, recipename, task, dep, depname, dataCaches): 300 continue 301 if dep not in self.taskhash: 302 bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep) 303 self.runtaskdeps[tid].append(dep) 304 305 if task in dataCaches[mc].file_checksums[fn]: 306 if self.checksum_cache: 307 checksums = self.checksum_cache.get_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude) 308 else: 309 checksums = bb.fetch2.get_file_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude) 310 for (f,cs) in checksums: 311 self.file_checksum_values[tid].append((f,cs)) 312 313 taskdep = dataCaches[mc].task_deps[fn] 314 if 'nostamp' in taskdep and task in taskdep['nostamp']: 315 # Nostamp tasks need an implicit taint so that they force any dependent tasks to run 316 if tid in self.taints and self.taints[tid].startswith("nostamp:"): 317 # Don't reset taint value upon every call 318 pass 319 else: 320 import uuid 321 taint = str(uuid.uuid4()) 322 self.taints[tid] = "nostamp:" + taint 323 324 taint = self.read_taint(fn, task, dataCaches[mc].stamp[fn]) 325 if taint: 326 self.taints[tid] = taint 327 logger.warning("%s is tainted from a forced run" % tid) 328 329 return 330 331 def get_taskhash(self, tid, deps, dataCaches): 332 333 data = self.basehash[tid] 334 for dep in self.runtaskdeps[tid]: 335 data = data + self.get_unihash(dep) 336 337 for (f, cs) in self.file_checksum_values[tid]: 338 if cs: 339 if "/./" in f: 340 data = data + "./" + f.split("/./")[1] 341 data = data + cs 342 343 if tid in self.taints: 344 if self.taints[tid].startswith("nostamp:"): 345 data = data + self.taints[tid][8:] 346 else: 347 data = data + self.taints[tid] 348 349 h = hashlib.sha256(data.encode("utf-8")).hexdigest() 350 self.taskhash[tid] = h 351 #d.setVar("BB_TASKHASH:task-%s" % task, taskhash[task]) 352 return h 353 354 def writeout_file_checksum_cache(self): 355 """Write/update the file checksum cache onto disk""" 356 if self.checksum_cache: 357 self.checksum_cache.save_extras() 358 self.checksum_cache.save_merge() 359 else: 360 bb.fetch2.fetcher_parse_save() 361 bb.fetch2.fetcher_parse_done() 362 363 def save_unitaskhashes(self): 364 self.unihash_cache.save(self.unitaskhashes) 365 366 def copy_unitaskhashes(self, targetdir): 367 self.unihash_cache.copyfile(targetdir) 368 369 def dump_sigtask(self, fn, task, stampbase, runtime): 370 371 tid = fn + ":" + task 372 referencestamp = stampbase 373 if isinstance(runtime, str) and runtime.startswith("customfile"): 374 sigfile = stampbase 375 referencestamp = runtime[11:] 376 elif runtime and tid in self.taskhash: 377 sigfile = stampbase + "." + task + ".sigdata" + "." + self.get_unihash(tid) 378 else: 379 sigfile = stampbase + "." + task + ".sigbasedata" + "." + self.basehash[tid] 380 381 with bb.utils.umask(0o002): 382 bb.utils.mkdirhier(os.path.dirname(sigfile)) 383 384 data = {} 385 data['task'] = task 386 data['basehash_ignore_vars'] = self.basehash_ignore_vars 387 data['taskhash_ignore_tasks'] = self.taskhash_ignore_tasks 388 data['taskdeps'] = self.taskdeps[fn][task] 389 data['basehash'] = self.basehash[tid] 390 data['gendeps'] = {} 391 data['varvals'] = {} 392 data['varvals'][task] = self.lookupcache[fn][task] 393 for dep in self.taskdeps[fn][task]: 394 if dep in self.basehash_ignore_vars: 395 continue 396 data['gendeps'][dep] = self.gendeps[fn][dep] 397 data['varvals'][dep] = self.lookupcache[fn][dep] 398 399 if runtime and tid in self.taskhash: 400 data['runtaskdeps'] = self.runtaskdeps[tid] 401 data['file_checksum_values'] = [] 402 for f,cs in self.file_checksum_values[tid]: 403 if "/./" in f: 404 data['file_checksum_values'].append(("./" + f.split("/./")[1], cs)) 405 else: 406 data['file_checksum_values'].append((os.path.basename(f), cs)) 407 data['runtaskhashes'] = {} 408 for dep in data['runtaskdeps']: 409 data['runtaskhashes'][dep] = self.get_unihash(dep) 410 data['taskhash'] = self.taskhash[tid] 411 data['unihash'] = self.get_unihash(tid) 412 413 taint = self.read_taint(fn, task, referencestamp) 414 if taint: 415 data['taint'] = taint 416 417 if runtime and tid in self.taints: 418 if 'nostamp:' in self.taints[tid]: 419 data['taint'] = self.taints[tid] 420 421 computed_basehash = calc_basehash(data) 422 if computed_basehash != self.basehash[tid]: 423 bb.error("Basehash mismatch %s versus %s for %s" % (computed_basehash, self.basehash[tid], tid)) 424 if runtime and tid in self.taskhash: 425 computed_taskhash = calc_taskhash(data) 426 if computed_taskhash != self.taskhash[tid]: 427 bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid)) 428 sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash) 429 430 fd, tmpfile = bb.utils.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.") 431 try: 432 with bb.compress.zstd.open(fd, "wt", encoding="utf-8", num_threads=1) as f: 433 json.dump(data, f, sort_keys=True, separators=(",", ":"), cls=SetEncoder) 434 f.flush() 435 os.chmod(tmpfile, 0o664) 436 bb.utils.rename(tmpfile, sigfile) 437 except (OSError, IOError) as err: 438 try: 439 os.unlink(tmpfile) 440 except OSError: 441 pass 442 raise err 443 444 def dump_sigfn(self, fn, dataCaches, options): 445 if fn in self.taskdeps: 446 for task in self.taskdeps[fn]: 447 tid = fn + ":" + task 448 mc = bb.runqueue.mc_from_tid(tid) 449 if tid not in self.taskhash: 450 continue 451 if dataCaches[mc].basetaskhash[tid] != self.basehash[tid]: 452 bb.error("Bitbake's cached basehash does not match the one we just generated (%s)!" % tid) 453 bb.error("The mismatched hashes were %s and %s" % (dataCaches[mc].basetaskhash[tid], self.basehash[tid])) 454 self.dump_sigtask(fn, task, dataCaches[mc].stamp[fn], True) 455 456class SignatureGeneratorBasicHash(SignatureGeneratorBasic): 457 name = "basichash" 458 459 def get_stampfile_hash(self, tid): 460 if tid in self.taskhash: 461 return self.taskhash[tid] 462 463 # If task is not in basehash, then error 464 return self.basehash[tid] 465 466 def stampfile(self, stampbase, fn, taskname, extrainfo, clean=False): 467 if taskname != "do_setscene" and taskname.endswith("_setscene"): 468 tid = fn + ":" + taskname[:-9] 469 else: 470 tid = fn + ":" + taskname 471 if clean: 472 h = "*" 473 else: 474 h = self.get_stampfile_hash(tid) 475 476 return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.') 477 478 def stampcleanmask(self, stampbase, fn, taskname, extrainfo): 479 return self.stampfile(stampbase, fn, taskname, extrainfo, clean=True) 480 481 def invalidate_task(self, task, d, fn): 482 bb.note("Tainting hash to force rebuild of task %s, %s" % (fn, task)) 483 bb.build.write_taint(task, d, fn) 484 485class SignatureGeneratorUniHashMixIn(object): 486 def __init__(self, data): 487 self.extramethod = {} 488 super().__init__(data) 489 490 def get_taskdata(self): 491 return (self.server, self.method, self.extramethod) + super().get_taskdata() 492 493 def set_taskdata(self, data): 494 self.server, self.method, self.extramethod = data[:3] 495 super().set_taskdata(data[3:]) 496 497 def client(self): 498 if getattr(self, '_client', None) is None: 499 self._client = hashserv.create_client(self.server) 500 return self._client 501 502 def reset(self, data): 503 if getattr(self, '_client', None) is not None: 504 self._client.close() 505 self._client = None 506 return super().reset(data) 507 508 def exit(self): 509 if getattr(self, '_client', None) is not None: 510 self._client.close() 511 self._client = None 512 return super().exit() 513 514 def get_stampfile_hash(self, tid): 515 if tid in self.taskhash: 516 # If a unique hash is reported, use it as the stampfile hash. This 517 # ensures that if a task won't be re-run if the taskhash changes, 518 # but it would result in the same output hash 519 unihash = self._get_unihash(tid) 520 if unihash is not None: 521 return unihash 522 523 return super().get_stampfile_hash(tid) 524 525 def set_unihash(self, tid, unihash): 526 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid) 527 key = mc + ":" + self.tidtopn[tid] + ":" + taskname 528 self.unitaskhashes[key] = (self.taskhash[tid], unihash) 529 self.unihash[tid] = unihash 530 531 def _get_unihash(self, tid, checkkey=None): 532 if tid not in self.tidtopn: 533 return None 534 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid) 535 key = mc + ":" + self.tidtopn[tid] + ":" + taskname 536 if key not in self.unitaskhashes: 537 return None 538 if not checkkey: 539 checkkey = self.taskhash[tid] 540 (key, unihash) = self.unitaskhashes[key] 541 if key != checkkey: 542 return None 543 return unihash 544 545 def get_unihash(self, tid): 546 taskhash = self.taskhash[tid] 547 548 # If its not a setscene task we can return 549 if self.setscenetasks and tid not in self.setscenetasks: 550 self.unihash[tid] = None 551 return taskhash 552 553 # TODO: This cache can grow unbounded. It probably only needs to keep 554 # for each task 555 unihash = self._get_unihash(tid) 556 if unihash is not None: 557 self.unihash[tid] = unihash 558 return unihash 559 560 # In the absence of being able to discover a unique hash from the 561 # server, make it be equivalent to the taskhash. The unique "hash" only 562 # really needs to be a unique string (not even necessarily a hash), but 563 # making it match the taskhash has a few advantages: 564 # 565 # 1) All of the sstate code that assumes hashes can be the same 566 # 2) It provides maximal compatibility with builders that don't use 567 # an equivalency server 568 # 3) The value is easy for multiple independent builders to derive the 569 # same unique hash from the same input. This means that if the 570 # independent builders find the same taskhash, but it isn't reported 571 # to the server, there is a better chance that they will agree on 572 # the unique hash. 573 unihash = taskhash 574 575 try: 576 method = self.method 577 if tid in self.extramethod: 578 method = method + self.extramethod[tid] 579 data = self.client().get_unihash(method, self.taskhash[tid]) 580 if data: 581 unihash = data 582 # A unique hash equal to the taskhash is not very interesting, 583 # so it is reported it at debug level 2. If they differ, that 584 # is much more interesting, so it is reported at debug level 1 585 hashequiv_logger.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server)) 586 else: 587 hashequiv_logger.debug2('No reported unihash for %s:%s from %s' % (tid, taskhash, self.server)) 588 except ConnectionError as e: 589 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 590 591 self.set_unihash(tid, unihash) 592 self.unihash[tid] = unihash 593 return unihash 594 595 def report_unihash(self, path, task, d): 596 import importlib 597 598 taskhash = d.getVar('BB_TASKHASH') 599 unihash = d.getVar('BB_UNIHASH') 600 report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1' 601 tempdir = d.getVar('T') 602 fn = d.getVar('BB_FILENAME') 603 tid = fn + ':do_' + task 604 key = tid + ':' + taskhash 605 606 if self.setscenetasks and tid not in self.setscenetasks: 607 return 608 609 # This can happen if locked sigs are in action. Detect and just exit 610 if taskhash != self.taskhash[tid]: 611 return 612 613 # Sanity checks 614 cache_unihash = self._get_unihash(tid, checkkey=taskhash) 615 if cache_unihash is None: 616 bb.fatal('%s not in unihash cache. Please report this error' % key) 617 618 if cache_unihash != unihash: 619 bb.fatal("Cache unihash %s doesn't match BB_UNIHASH %s" % (cache_unihash, unihash)) 620 621 sigfile = None 622 sigfile_name = "depsig.do_%s.%d" % (task, os.getpid()) 623 sigfile_link = "depsig.do_%s" % task 624 625 try: 626 sigfile = open(os.path.join(tempdir, sigfile_name), 'w+b') 627 628 locs = {'path': path, 'sigfile': sigfile, 'task': task, 'd': d} 629 630 if "." in self.method: 631 (module, method) = self.method.rsplit('.', 1) 632 locs['method'] = getattr(importlib.import_module(module), method) 633 outhash = bb.utils.better_eval('method(path, sigfile, task, d)', locs) 634 else: 635 outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs) 636 637 try: 638 extra_data = {} 639 640 owner = d.getVar('SSTATE_HASHEQUIV_OWNER') 641 if owner: 642 extra_data['owner'] = owner 643 644 if report_taskdata: 645 sigfile.seek(0) 646 647 extra_data['PN'] = d.getVar('PN') 648 extra_data['PV'] = d.getVar('PV') 649 extra_data['PR'] = d.getVar('PR') 650 extra_data['task'] = task 651 extra_data['outhash_siginfo'] = sigfile.read().decode('utf-8') 652 653 method = self.method 654 if tid in self.extramethod: 655 method = method + self.extramethod[tid] 656 657 data = self.client().report_unihash(taskhash, method, outhash, unihash, extra_data) 658 new_unihash = data['unihash'] 659 660 if new_unihash != unihash: 661 hashequiv_logger.debug('Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server)) 662 bb.event.fire(bb.runqueue.taskUniHashUpdate(fn + ':do_' + task, new_unihash), d) 663 self.set_unihash(tid, new_unihash) 664 d.setVar('BB_UNIHASH', new_unihash) 665 else: 666 hashequiv_logger.debug('Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server)) 667 except ConnectionError as e: 668 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 669 finally: 670 if sigfile: 671 sigfile.close() 672 673 sigfile_link_path = os.path.join(tempdir, sigfile_link) 674 bb.utils.remove(sigfile_link_path) 675 676 try: 677 os.symlink(sigfile_name, sigfile_link_path) 678 except OSError: 679 pass 680 681 def report_unihash_equiv(self, tid, taskhash, wanted_unihash, current_unihash, datacaches): 682 try: 683 extra_data = {} 684 method = self.method 685 if tid in self.extramethod: 686 method = method + self.extramethod[tid] 687 688 data = self.client().report_unihash_equiv(taskhash, method, wanted_unihash, extra_data) 689 hashequiv_logger.verbose('Reported task %s as unihash %s to %s (%s)' % (tid, wanted_unihash, self.server, str(data))) 690 691 if data is None: 692 bb.warn("Server unable to handle unihash report") 693 return False 694 695 finalunihash = data['unihash'] 696 697 if finalunihash == current_unihash: 698 hashequiv_logger.verbose('Task %s unihash %s unchanged by server' % (tid, finalunihash)) 699 elif finalunihash == wanted_unihash: 700 hashequiv_logger.verbose('Task %s unihash changed %s -> %s as wanted' % (tid, current_unihash, finalunihash)) 701 self.set_unihash(tid, finalunihash) 702 return True 703 else: 704 # TODO: What to do here? 705 hashequiv_logger.verbose('Task %s unihash reported as unwanted hash %s' % (tid, finalunihash)) 706 707 except ConnectionError as e: 708 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 709 710 return False 711 712# 713# Dummy class used for bitbake-selftest 714# 715class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureGeneratorBasicHash): 716 name = "TestEquivHash" 717 def init_rundepcheck(self, data): 718 super().init_rundepcheck(data) 719 self.server = data.getVar('BB_HASHSERVE') 720 self.method = "sstate_output_hash" 721 722# 723# Dummy class used for bitbake-selftest 724# 725class SignatureGeneratorTestMulticonfigDepends(SignatureGeneratorBasicHash): 726 name = "TestMulticonfigDepends" 727 supports_multiconfig_datacaches = True 728 729def dump_this_task(outfile, d): 730 import bb.parse 731 fn = d.getVar("BB_FILENAME") 732 task = "do_" + d.getVar("BB_CURRENTTASK") 733 referencestamp = bb.build.stamp_internal(task, d, None, True) 734 bb.parse.siggen.dump_sigtask(fn, task, outfile, "customfile:" + referencestamp) 735 736def init_colors(enable_color): 737 """Initialise colour dict for passing to compare_sigfiles()""" 738 # First set up the colours 739 colors = {'color_title': '\033[1m', 740 'color_default': '\033[0m', 741 'color_add': '\033[0;32m', 742 'color_remove': '\033[0;31m', 743 } 744 # Leave all keys present but clear the values 745 if not enable_color: 746 for k in colors.keys(): 747 colors[k] = '' 748 return colors 749 750def worddiff_str(oldstr, newstr, colors=None): 751 if not colors: 752 colors = init_colors(False) 753 diff = simplediff.diff(oldstr.split(' '), newstr.split(' ')) 754 ret = [] 755 for change, value in diff: 756 value = ' '.join(value) 757 if change == '=': 758 ret.append(value) 759 elif change == '+': 760 item = '{color_add}{{+{value}+}}{color_default}'.format(value=value, **colors) 761 ret.append(item) 762 elif change == '-': 763 item = '{color_remove}[-{value}-]{color_default}'.format(value=value, **colors) 764 ret.append(item) 765 whitespace_note = '' 766 if oldstr != newstr and ' '.join(oldstr.split()) == ' '.join(newstr.split()): 767 whitespace_note = ' (whitespace changed)' 768 return '"%s"%s' % (' '.join(ret), whitespace_note) 769 770def list_inline_diff(oldlist, newlist, colors=None): 771 if not colors: 772 colors = init_colors(False) 773 diff = simplediff.diff(oldlist, newlist) 774 ret = [] 775 for change, value in diff: 776 value = ' '.join(value) 777 if change == '=': 778 ret.append("'%s'" % value) 779 elif change == '+': 780 item = '{color_add}+{value}{color_default}'.format(value=value, **colors) 781 ret.append(item) 782 elif change == '-': 783 item = '{color_remove}-{value}{color_default}'.format(value=value, **colors) 784 ret.append(item) 785 return '[%s]' % (', '.join(ret)) 786 787def clean_basepath(basepath): 788 basepath, dir, recipe_task = basepath.rsplit("/", 2) 789 cleaned = dir + '/' + recipe_task 790 791 if basepath[0] == '/': 792 return cleaned 793 794 if basepath.startswith("mc:") and basepath.count(':') >= 2: 795 mc, mc_name, basepath = basepath.split(":", 2) 796 mc_suffix = ':mc:' + mc_name 797 else: 798 mc_suffix = '' 799 800 # mc stuff now removed from basepath. Whatever was next, if present will be the first 801 # suffix. ':/', recipe path start, marks the end of this. Something like 802 # 'virtual:a[:b[:c]]:/path...' (b and c being optional) 803 if basepath[0] != '/': 804 cleaned += ':' + basepath.split(':/', 1)[0] 805 806 return cleaned + mc_suffix 807 808def clean_basepaths(a): 809 b = {} 810 for x in a: 811 b[clean_basepath(x)] = a[x] 812 return b 813 814def clean_basepaths_list(a): 815 b = [] 816 for x in a: 817 b.append(clean_basepath(x)) 818 return b 819 820# Handled renamed fields 821def handle_renames(data): 822 if 'basewhitelist' in data: 823 data['basehash_ignore_vars'] = data['basewhitelist'] 824 del data['basewhitelist'] 825 if 'taskwhitelist' in data: 826 data['taskhash_ignore_tasks'] = data['taskwhitelist'] 827 del data['taskwhitelist'] 828 829 830def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False): 831 output = [] 832 833 colors = init_colors(color) 834 def color_format(formatstr, **values): 835 """ 836 Return colour formatted string. 837 NOTE: call with the format string, not an already formatted string 838 containing values (otherwise you could have trouble with { and } 839 characters) 840 """ 841 if not formatstr.endswith('{color_default}'): 842 formatstr += '{color_default}' 843 # In newer python 3 versions you can pass both of these directly, 844 # but we only require 3.4 at the moment 845 formatparams = {} 846 formatparams.update(colors) 847 formatparams.update(values) 848 return formatstr.format(**formatparams) 849 850 with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f: 851 a_data = json.load(f, object_hook=SetDecoder) 852 with bb.compress.zstd.open(b, "rt", encoding="utf-8", num_threads=1) as f: 853 b_data = json.load(f, object_hook=SetDecoder) 854 855 for data in [a_data, b_data]: 856 handle_renames(data) 857 858 def dict_diff(a, b, ignored_vars=set()): 859 sa = set(a.keys()) 860 sb = set(b.keys()) 861 common = sa & sb 862 changed = set() 863 for i in common: 864 if a[i] != b[i] and i not in ignored_vars: 865 changed.add(i) 866 added = sb - sa 867 removed = sa - sb 868 return changed, added, removed 869 870 def file_checksums_diff(a, b): 871 from collections import Counter 872 873 # Convert lists back to tuples 874 a = [(f[0], f[1]) for f in a] 875 b = [(f[0], f[1]) for f in b] 876 877 # Compare lists, ensuring we can handle duplicate filenames if they exist 878 removedcount = Counter(a) 879 removedcount.subtract(b) 880 addedcount = Counter(b) 881 addedcount.subtract(a) 882 added = [] 883 for x in b: 884 if addedcount[x] > 0: 885 addedcount[x] -= 1 886 added.append(x) 887 removed = [] 888 changed = [] 889 for x in a: 890 if removedcount[x] > 0: 891 removedcount[x] -= 1 892 for y in added: 893 if y[0] == x[0]: 894 changed.append((x[0], x[1], y[1])) 895 added.remove(y) 896 break 897 else: 898 removed.append(x) 899 added = [x[0] for x in added] 900 removed = [x[0] for x in removed] 901 return changed, added, removed 902 903 if 'basehash_ignore_vars' in a_data and a_data['basehash_ignore_vars'] != b_data['basehash_ignore_vars']: 904 output.append(color_format("{color_title}basehash_ignore_vars changed{color_default} from '%s' to '%s'") % (a_data['basehash_ignore_vars'], b_data['basehash_ignore_vars'])) 905 if a_data['basehash_ignore_vars'] and b_data['basehash_ignore_vars']: 906 output.append("changed items: %s" % a_data['basehash_ignore_vars'].symmetric_difference(b_data['basehash_ignore_vars'])) 907 908 if 'taskhash_ignore_tasks' in a_data and a_data['taskhash_ignore_tasks'] != b_data['taskhash_ignore_tasks']: 909 output.append(color_format("{color_title}taskhash_ignore_tasks changed{color_default} from '%s' to '%s'") % (a_data['taskhash_ignore_tasks'], b_data['taskhash_ignore_tasks'])) 910 if a_data['taskhash_ignore_tasks'] and b_data['taskhash_ignore_tasks']: 911 output.append("changed items: %s" % a_data['taskhash_ignore_tasks'].symmetric_difference(b_data['taskhash_ignore_tasks'])) 912 913 if a_data['taskdeps'] != b_data['taskdeps']: 914 output.append(color_format("{color_title}Task dependencies changed{color_default} from:\n%s\nto:\n%s") % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps']))) 915 916 if a_data['basehash'] != b_data['basehash'] and not collapsed: 917 output.append(color_format("{color_title}basehash changed{color_default} from %s to %s") % (a_data['basehash'], b_data['basehash'])) 918 919 changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basehash_ignore_vars'] & b_data['basehash_ignore_vars']) 920 if changed: 921 for dep in sorted(changed): 922 output.append(color_format("{color_title}List of dependencies for variable %s changed from '{color_default}%s{color_title}' to '{color_default}%s{color_title}'") % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep])) 923 if a_data['gendeps'][dep] and b_data['gendeps'][dep]: 924 output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep])) 925 if added: 926 for dep in sorted(added): 927 output.append(color_format("{color_title}Dependency on variable %s was added") % (dep)) 928 if removed: 929 for dep in sorted(removed): 930 output.append(color_format("{color_title}Dependency on Variable %s was removed") % (dep)) 931 932 933 changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals']) 934 if changed: 935 for dep in sorted(changed): 936 oldval = a_data['varvals'][dep] 937 newval = b_data['varvals'][dep] 938 if newval and oldval and ('\n' in oldval or '\n' in newval): 939 diff = difflib.unified_diff(oldval.splitlines(), newval.splitlines(), lineterm='') 940 # Cut off the first two lines, since we aren't interested in 941 # the old/new filename (they are blank anyway in this case) 942 difflines = list(diff)[2:] 943 if color: 944 # Add colour to diff output 945 for i, line in enumerate(difflines): 946 if line.startswith('+'): 947 line = color_format('{color_add}{line}', line=line) 948 difflines[i] = line 949 elif line.startswith('-'): 950 line = color_format('{color_remove}{line}', line=line) 951 difflines[i] = line 952 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff='\n'.join(difflines))) 953 elif newval and oldval and (' ' in oldval or ' ' in newval): 954 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff=worddiff_str(oldval, newval, colors))) 955 else: 956 output.append(color_format("{color_title}Variable {var} value changed from '{color_default}{oldval}{color_title}' to '{color_default}{newval}{color_title}'{color_default}", var=dep, oldval=oldval, newval=newval)) 957 958 if not 'file_checksum_values' in a_data: 959 a_data['file_checksum_values'] = [] 960 if not 'file_checksum_values' in b_data: 961 b_data['file_checksum_values'] = [] 962 963 changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values']) 964 if changed: 965 for f, old, new in changed: 966 output.append(color_format("{color_title}Checksum for file %s changed{color_default} from %s to %s") % (f, old, new)) 967 if added: 968 for f in added: 969 output.append(color_format("{color_title}Dependency on checksum of file %s was added") % (f)) 970 if removed: 971 for f in removed: 972 output.append(color_format("{color_title}Dependency on checksum of file %s was removed") % (f)) 973 974 if not 'runtaskdeps' in a_data: 975 a_data['runtaskdeps'] = {} 976 if not 'runtaskdeps' in b_data: 977 b_data['runtaskdeps'] = {} 978 979 if not collapsed: 980 if len(a_data['runtaskdeps']) != len(b_data['runtaskdeps']): 981 changed = ["Number of task dependencies changed"] 982 else: 983 changed = [] 984 for idx, task in enumerate(a_data['runtaskdeps']): 985 a = a_data['runtaskdeps'][idx] 986 b = b_data['runtaskdeps'][idx] 987 if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b] and not collapsed: 988 changed.append("%s with hash %s\n changed to\n%s with hash %s" % (clean_basepath(a), a_data['runtaskhashes'][a], clean_basepath(b), b_data['runtaskhashes'][b])) 989 990 if changed: 991 clean_a = clean_basepaths_list(a_data['runtaskdeps']) 992 clean_b = clean_basepaths_list(b_data['runtaskdeps']) 993 if clean_a != clean_b: 994 output.append(color_format("{color_title}runtaskdeps changed:{color_default}\n%s") % list_inline_diff(clean_a, clean_b, colors)) 995 else: 996 output.append(color_format("{color_title}runtaskdeps changed:")) 997 output.append("\n".join(changed)) 998 999 1000 if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data: 1001 a = clean_basepaths(a_data['runtaskhashes']) 1002 b = clean_basepaths(b_data['runtaskhashes']) 1003 changed, added, removed = dict_diff(a, b) 1004 if added: 1005 for dep in sorted(added): 1006 bdep_found = False 1007 if removed: 1008 for bdep in removed: 1009 if b[dep] == a[bdep]: 1010 #output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep)) 1011 bdep_found = True 1012 if not bdep_found: 1013 output.append(color_format("{color_title}Dependency on task %s was added{color_default} with hash %s") % (dep, b[dep])) 1014 if removed: 1015 for dep in sorted(removed): 1016 adep_found = False 1017 if added: 1018 for adep in added: 1019 if b[adep] == a[dep]: 1020 #output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep)) 1021 adep_found = True 1022 if not adep_found: 1023 output.append(color_format("{color_title}Dependency on task %s was removed{color_default} with hash %s") % (dep, a[dep])) 1024 if changed: 1025 for dep in sorted(changed): 1026 if not collapsed: 1027 output.append(color_format("{color_title}Hash for task dependency %s changed{color_default} from %s to %s") % (dep, a[dep], b[dep])) 1028 if callable(recursecb): 1029 recout = recursecb(dep, a[dep], b[dep]) 1030 if recout: 1031 if collapsed: 1032 output.extend(recout) 1033 else: 1034 # If a dependent hash changed, might as well print the line above and then defer to the changes in 1035 # that hash since in all likelyhood, they're the same changes this task also saw. 1036 output = [output[-1]] + recout 1037 break 1038 1039 a_taint = a_data.get('taint', None) 1040 b_taint = b_data.get('taint', None) 1041 if a_taint != b_taint: 1042 if a_taint and a_taint.startswith('nostamp:'): 1043 a_taint = a_taint.replace('nostamp:', 'nostamp(uuid4):') 1044 if b_taint and b_taint.startswith('nostamp:'): 1045 b_taint = b_taint.replace('nostamp:', 'nostamp(uuid4):') 1046 output.append(color_format("{color_title}Taint (by forced/invalidated task) changed{color_default} from %s to %s") % (a_taint, b_taint)) 1047 1048 return output 1049 1050 1051def calc_basehash(sigdata): 1052 task = sigdata['task'] 1053 basedata = sigdata['varvals'][task] 1054 1055 if basedata is None: 1056 basedata = '' 1057 1058 alldeps = sigdata['taskdeps'] 1059 for dep in alldeps: 1060 basedata = basedata + dep 1061 val = sigdata['varvals'][dep] 1062 if val is not None: 1063 basedata = basedata + str(val) 1064 1065 return hashlib.sha256(basedata.encode("utf-8")).hexdigest() 1066 1067def calc_taskhash(sigdata): 1068 data = sigdata['basehash'] 1069 1070 for dep in sigdata['runtaskdeps']: 1071 data = data + sigdata['runtaskhashes'][dep] 1072 1073 for c in sigdata['file_checksum_values']: 1074 if c[1]: 1075 if "./" in c[0]: 1076 data = data + c[0] 1077 data = data + c[1] 1078 1079 if 'taint' in sigdata: 1080 if 'nostamp:' in sigdata['taint']: 1081 data = data + sigdata['taint'][8:] 1082 else: 1083 data = data + sigdata['taint'] 1084 1085 return hashlib.sha256(data.encode("utf-8")).hexdigest() 1086 1087 1088def dump_sigfile(a): 1089 output = [] 1090 1091 with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f: 1092 a_data = json.load(f, object_hook=SetDecoder) 1093 1094 handle_renames(a_data) 1095 1096 output.append("basehash_ignore_vars: %s" % (sorted(a_data['basehash_ignore_vars']))) 1097 1098 output.append("taskhash_ignore_tasks: %s" % (sorted(a_data['taskhash_ignore_tasks'] or []))) 1099 1100 output.append("Task dependencies: %s" % (sorted(a_data['taskdeps']))) 1101 1102 output.append("basehash: %s" % (a_data['basehash'])) 1103 1104 for dep in sorted(a_data['gendeps']): 1105 output.append("List of dependencies for variable %s is %s" % (dep, sorted(a_data['gendeps'][dep]))) 1106 1107 for dep in sorted(a_data['varvals']): 1108 output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep])) 1109 1110 if 'runtaskdeps' in a_data: 1111 output.append("Tasks this task depends on: %s" % (sorted(a_data['runtaskdeps']))) 1112 1113 if 'file_checksum_values' in a_data: 1114 output.append("This task depends on the checksums of files: %s" % (sorted(a_data['file_checksum_values']))) 1115 1116 if 'runtaskhashes' in a_data: 1117 for dep in sorted(a_data['runtaskhashes']): 1118 output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep])) 1119 1120 if 'taint' in a_data: 1121 if a_data['taint'].startswith('nostamp:'): 1122 msg = a_data['taint'].replace('nostamp:', 'nostamp(uuid4):') 1123 else: 1124 msg = a_data['taint'] 1125 output.append("Tainted (by forced/invalidated task): %s" % msg) 1126 1127 if 'task' in a_data: 1128 computed_basehash = calc_basehash(a_data) 1129 output.append("Computed base hash is %s and from file %s" % (computed_basehash, a_data['basehash'])) 1130 else: 1131 output.append("Unable to compute base hash") 1132 1133 computed_taskhash = calc_taskhash(a_data) 1134 output.append("Computed task hash is %s" % computed_taskhash) 1135 1136 return output 1137