1# 2# SPDX-License-Identifier: GPL-2.0-only 3# 4 5import hashlib 6import logging 7import os 8import re 9import tempfile 10import pickle 11import bb.data 12import difflib 13import simplediff 14import json 15import bb.compress.zstd 16from bb.checksum import FileChecksumCache 17from bb import runqueue 18import hashserv 19import hashserv.client 20 21logger = logging.getLogger('BitBake.SigGen') 22hashequiv_logger = logging.getLogger('BitBake.SigGen.HashEquiv') 23 24class SetEncoder(json.JSONEncoder): 25 def default(self, obj): 26 if isinstance(obj, set): 27 return dict(_set_object=list(sorted(obj))) 28 return json.JSONEncoder.default(self, obj) 29 30def SetDecoder(dct): 31 if '_set_object' in dct: 32 return set(dct['_set_object']) 33 return dct 34 35def init(d): 36 siggens = [obj for obj in globals().values() 37 if type(obj) is type and issubclass(obj, SignatureGenerator)] 38 39 desired = d.getVar("BB_SIGNATURE_HANDLER") or "noop" 40 for sg in siggens: 41 if desired == sg.name: 42 return sg(d) 43 break 44 else: 45 logger.error("Invalid signature generator '%s', using default 'noop'\n" 46 "Available generators: %s", desired, 47 ', '.join(obj.name for obj in siggens)) 48 return SignatureGenerator(d) 49 50class SignatureGenerator(object): 51 """ 52 """ 53 name = "noop" 54 55 # If the derived class supports multiconfig datacaches, set this to True 56 # The default is False for backward compatibility with derived signature 57 # generators that do not understand multiconfig caches 58 supports_multiconfig_datacaches = False 59 60 def __init__(self, data): 61 self.basehash = {} 62 self.taskhash = {} 63 self.unihash = {} 64 self.runtaskdeps = {} 65 self.file_checksum_values = {} 66 self.taints = {} 67 self.unitaskhashes = {} 68 self.tidtopn = {} 69 self.setscenetasks = set() 70 71 def finalise(self, fn, d, varient): 72 return 73 74 def postparsing_clean_cache(self): 75 return 76 77 def get_unihash(self, tid): 78 return self.taskhash[tid] 79 80 def prep_taskhash(self, tid, deps, dataCaches): 81 return 82 83 def get_taskhash(self, tid, deps, dataCaches): 84 self.taskhash[tid] = hashlib.sha256(tid.encode("utf-8")).hexdigest() 85 return self.taskhash[tid] 86 87 def writeout_file_checksum_cache(self): 88 """Write/update the file checksum cache onto disk""" 89 return 90 91 def stampfile(self, stampbase, file_name, taskname, extrainfo): 92 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.') 93 94 def stampcleanmask(self, stampbase, file_name, taskname, extrainfo): 95 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.') 96 97 def dump_sigtask(self, fn, task, stampbase, runtime): 98 return 99 100 def invalidate_task(self, task, d, fn): 101 bb.build.del_stamp(task, d, fn) 102 103 def dump_sigs(self, dataCache, options): 104 return 105 106 def get_taskdata(self): 107 return (self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks) 108 109 def set_taskdata(self, data): 110 self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks = data 111 112 def reset(self, data): 113 self.__init__(data) 114 115 def get_taskhashes(self): 116 return self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn 117 118 def set_taskhashes(self, hashes): 119 self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn = hashes 120 121 def save_unitaskhashes(self): 122 return 123 124 def set_setscene_tasks(self, setscene_tasks): 125 return 126 127 @classmethod 128 def get_data_caches(cls, dataCaches, mc): 129 """ 130 This function returns the datacaches that should be passed to signature 131 generator functions. If the signature generator supports multiconfig 132 caches, the entire dictionary of data caches is sent, otherwise a 133 special proxy is sent that support both index access to all 134 multiconfigs, and also direct access for the default multiconfig. 135 136 The proxy class allows code in this class itself to always use 137 multiconfig aware code (to ease maintenance), but derived classes that 138 are unaware of multiconfig data caches can still access the default 139 multiconfig as expected. 140 141 Do not override this function in derived classes; it will be removed in 142 the future when support for multiconfig data caches is mandatory 143 """ 144 class DataCacheProxy(object): 145 def __init__(self): 146 pass 147 148 def __getitem__(self, key): 149 return dataCaches[key] 150 151 def __getattr__(self, name): 152 return getattr(dataCaches[mc], name) 153 154 if cls.supports_multiconfig_datacaches: 155 return dataCaches 156 157 return DataCacheProxy() 158 159 def exit(self): 160 return 161 162class SignatureGeneratorBasic(SignatureGenerator): 163 """ 164 """ 165 name = "basic" 166 167 def __init__(self, data): 168 self.basehash = {} 169 self.taskhash = {} 170 self.unihash = {} 171 self.taskdeps = {} 172 self.runtaskdeps = {} 173 self.file_checksum_values = {} 174 self.taints = {} 175 self.gendeps = {} 176 self.lookupcache = {} 177 self.setscenetasks = set() 178 self.basehash_ignore_vars = set((data.getVar("BB_BASEHASH_IGNORE_VARS") or "").split()) 179 self.taskhash_ignore_tasks = None 180 self.init_rundepcheck(data) 181 checksum_cache_file = data.getVar("BB_HASH_CHECKSUM_CACHE_FILE") 182 if checksum_cache_file: 183 self.checksum_cache = FileChecksumCache() 184 self.checksum_cache.init_cache(data, checksum_cache_file) 185 else: 186 self.checksum_cache = None 187 188 self.unihash_cache = bb.cache.SimpleCache("3") 189 self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {}) 190 self.localdirsexclude = (data.getVar("BB_SIGNATURE_LOCAL_DIRS_EXCLUDE") or "CVS .bzr .git .hg .osc .p4 .repo .svn").split() 191 self.tidtopn = {} 192 193 def init_rundepcheck(self, data): 194 self.taskhash_ignore_tasks = data.getVar("BB_TASKHASH_IGNORE_TASKS") or None 195 if self.taskhash_ignore_tasks: 196 self.twl = re.compile(self.taskhash_ignore_tasks) 197 else: 198 self.twl = None 199 200 def _build_data(self, fn, d): 201 202 ignore_mismatch = ((d.getVar("BB_HASH_IGNORE_MISMATCH") or '') == '1') 203 tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d, self.basehash_ignore_vars) 204 205 taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, self.basehash_ignore_vars, fn) 206 207 for task in tasklist: 208 tid = fn + ":" + task 209 if not ignore_mismatch and tid in self.basehash and self.basehash[tid] != basehash[tid]: 210 bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (tid, self.basehash[tid], basehash[tid])) 211 bb.error("The following commands may help:") 212 cmd = "$ bitbake %s -c%s" % (d.getVar('PN'), task) 213 # Make sure sigdata is dumped before run printdiff 214 bb.error("%s -Snone" % cmd) 215 bb.error("Then:") 216 bb.error("%s -Sprintdiff\n" % cmd) 217 self.basehash[tid] = basehash[tid] 218 219 self.taskdeps[fn] = taskdeps 220 self.gendeps[fn] = gendeps 221 self.lookupcache[fn] = lookupcache 222 223 return taskdeps 224 225 def set_setscene_tasks(self, setscene_tasks): 226 self.setscenetasks = set(setscene_tasks) 227 228 def finalise(self, fn, d, variant): 229 230 mc = d.getVar("__BBMULTICONFIG", False) or "" 231 if variant or mc: 232 fn = bb.cache.realfn2virtual(fn, variant, mc) 233 234 try: 235 taskdeps = self._build_data(fn, d) 236 except bb.parse.SkipRecipe: 237 raise 238 except: 239 bb.warn("Error during finalise of %s" % fn) 240 raise 241 242 #Slow but can be useful for debugging mismatched basehashes 243 #for task in self.taskdeps[fn]: 244 # self.dump_sigtask(fn, task, d.getVar("STAMP"), False) 245 246 for task in taskdeps: 247 d.setVar("BB_BASEHASH:task-%s" % task, self.basehash[fn + ":" + task]) 248 249 def postparsing_clean_cache(self): 250 # 251 # After parsing we can remove some things from memory to reduce our memory footprint 252 # 253 self.gendeps = {} 254 self.lookupcache = {} 255 self.taskdeps = {} 256 257 def rundep_check(self, fn, recipename, task, dep, depname, dataCaches): 258 # Return True if we should keep the dependency, False to drop it 259 # We only manipulate the dependencies for packages not in the ignore 260 # list 261 if self.twl and not self.twl.search(recipename): 262 # then process the actual dependencies 263 if self.twl.search(depname): 264 return False 265 return True 266 267 def read_taint(self, fn, task, stampbase): 268 taint = None 269 try: 270 with open(stampbase + '.' + task + '.taint', 'r') as taintf: 271 taint = taintf.read() 272 except IOError: 273 pass 274 return taint 275 276 def prep_taskhash(self, tid, deps, dataCaches): 277 278 (mc, _, task, fn) = bb.runqueue.split_tid_mcfn(tid) 279 280 self.basehash[tid] = dataCaches[mc].basetaskhash[tid] 281 self.runtaskdeps[tid] = [] 282 self.file_checksum_values[tid] = [] 283 recipename = dataCaches[mc].pkg_fn[fn] 284 285 self.tidtopn[tid] = recipename 286 287 for dep in sorted(deps, key=clean_basepath): 288 (depmc, _, _, depmcfn) = bb.runqueue.split_tid_mcfn(dep) 289 depname = dataCaches[depmc].pkg_fn[depmcfn] 290 if not self.supports_multiconfig_datacaches and mc != depmc: 291 # If the signature generator doesn't understand multiconfig 292 # data caches, any dependency not in the same multiconfig must 293 # be skipped for backward compatibility 294 continue 295 if not self.rundep_check(fn, recipename, task, dep, depname, dataCaches): 296 continue 297 if dep not in self.taskhash: 298 bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep) 299 self.runtaskdeps[tid].append(dep) 300 301 if task in dataCaches[mc].file_checksums[fn]: 302 if self.checksum_cache: 303 checksums = self.checksum_cache.get_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude) 304 else: 305 checksums = bb.fetch2.get_file_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude) 306 for (f,cs) in checksums: 307 self.file_checksum_values[tid].append((f,cs)) 308 309 taskdep = dataCaches[mc].task_deps[fn] 310 if 'nostamp' in taskdep and task in taskdep['nostamp']: 311 # Nostamp tasks need an implicit taint so that they force any dependent tasks to run 312 if tid in self.taints and self.taints[tid].startswith("nostamp:"): 313 # Don't reset taint value upon every call 314 pass 315 else: 316 import uuid 317 taint = str(uuid.uuid4()) 318 self.taints[tid] = "nostamp:" + taint 319 320 taint = self.read_taint(fn, task, dataCaches[mc].stamp[fn]) 321 if taint: 322 self.taints[tid] = taint 323 logger.warning("%s is tainted from a forced run" % tid) 324 325 return 326 327 def get_taskhash(self, tid, deps, dataCaches): 328 329 data = self.basehash[tid] 330 for dep in self.runtaskdeps[tid]: 331 data = data + self.get_unihash(dep) 332 333 for (f, cs) in self.file_checksum_values[tid]: 334 if cs: 335 if "/./" in f: 336 data = data + "./" + f.split("/./")[1] 337 data = data + cs 338 339 if tid in self.taints: 340 if self.taints[tid].startswith("nostamp:"): 341 data = data + self.taints[tid][8:] 342 else: 343 data = data + self.taints[tid] 344 345 h = hashlib.sha256(data.encode("utf-8")).hexdigest() 346 self.taskhash[tid] = h 347 #d.setVar("BB_TASKHASH:task-%s" % task, taskhash[task]) 348 return h 349 350 def writeout_file_checksum_cache(self): 351 """Write/update the file checksum cache onto disk""" 352 if self.checksum_cache: 353 self.checksum_cache.save_extras() 354 self.checksum_cache.save_merge() 355 else: 356 bb.fetch2.fetcher_parse_save() 357 bb.fetch2.fetcher_parse_done() 358 359 def save_unitaskhashes(self): 360 self.unihash_cache.save(self.unitaskhashes) 361 362 def dump_sigtask(self, fn, task, stampbase, runtime): 363 364 tid = fn + ":" + task 365 referencestamp = stampbase 366 if isinstance(runtime, str) and runtime.startswith("customfile"): 367 sigfile = stampbase 368 referencestamp = runtime[11:] 369 elif runtime and tid in self.taskhash: 370 sigfile = stampbase + "." + task + ".sigdata" + "." + self.get_unihash(tid) 371 else: 372 sigfile = stampbase + "." + task + ".sigbasedata" + "." + self.basehash[tid] 373 374 with bb.utils.umask(0o002): 375 bb.utils.mkdirhier(os.path.dirname(sigfile)) 376 377 data = {} 378 data['task'] = task 379 data['basehash_ignore_vars'] = self.basehash_ignore_vars 380 data['taskhash_ignore_tasks'] = self.taskhash_ignore_tasks 381 data['taskdeps'] = self.taskdeps[fn][task] 382 data['basehash'] = self.basehash[tid] 383 data['gendeps'] = {} 384 data['varvals'] = {} 385 data['varvals'][task] = self.lookupcache[fn][task] 386 for dep in self.taskdeps[fn][task]: 387 if dep in self.basehash_ignore_vars: 388 continue 389 data['gendeps'][dep] = self.gendeps[fn][dep] 390 data['varvals'][dep] = self.lookupcache[fn][dep] 391 392 if runtime and tid in self.taskhash: 393 data['runtaskdeps'] = self.runtaskdeps[tid] 394 data['file_checksum_values'] = [] 395 for f,cs in self.file_checksum_values[tid]: 396 if "/./" in f: 397 data['file_checksum_values'].append(("./" + f.split("/./")[1], cs)) 398 else: 399 data['file_checksum_values'].append((os.path.basename(f), cs)) 400 data['runtaskhashes'] = {} 401 for dep in data['runtaskdeps']: 402 data['runtaskhashes'][dep] = self.get_unihash(dep) 403 data['taskhash'] = self.taskhash[tid] 404 data['unihash'] = self.get_unihash(tid) 405 406 taint = self.read_taint(fn, task, referencestamp) 407 if taint: 408 data['taint'] = taint 409 410 if runtime and tid in self.taints: 411 if 'nostamp:' in self.taints[tid]: 412 data['taint'] = self.taints[tid] 413 414 computed_basehash = calc_basehash(data) 415 if computed_basehash != self.basehash[tid]: 416 bb.error("Basehash mismatch %s versus %s for %s" % (computed_basehash, self.basehash[tid], tid)) 417 if runtime and tid in self.taskhash: 418 computed_taskhash = calc_taskhash(data) 419 if computed_taskhash != self.taskhash[tid]: 420 bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid)) 421 sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash) 422 423 fd, tmpfile = tempfile.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.") 424 try: 425 with bb.compress.zstd.open(fd, "wt", encoding="utf-8", num_threads=1) as f: 426 json.dump(data, f, sort_keys=True, separators=(",", ":"), cls=SetEncoder) 427 f.flush() 428 os.chmod(tmpfile, 0o664) 429 bb.utils.rename(tmpfile, sigfile) 430 except (OSError, IOError) as err: 431 try: 432 os.unlink(tmpfile) 433 except OSError: 434 pass 435 raise err 436 437 def dump_sigfn(self, fn, dataCaches, options): 438 if fn in self.taskdeps: 439 for task in self.taskdeps[fn]: 440 tid = fn + ":" + task 441 mc = bb.runqueue.mc_from_tid(tid) 442 if tid not in self.taskhash: 443 continue 444 if dataCaches[mc].basetaskhash[tid] != self.basehash[tid]: 445 bb.error("Bitbake's cached basehash does not match the one we just generated (%s)!" % tid) 446 bb.error("The mismatched hashes were %s and %s" % (dataCaches[mc].basetaskhash[tid], self.basehash[tid])) 447 self.dump_sigtask(fn, task, dataCaches[mc].stamp[fn], True) 448 449class SignatureGeneratorBasicHash(SignatureGeneratorBasic): 450 name = "basichash" 451 452 def get_stampfile_hash(self, tid): 453 if tid in self.taskhash: 454 return self.taskhash[tid] 455 456 # If task is not in basehash, then error 457 return self.basehash[tid] 458 459 def stampfile(self, stampbase, fn, taskname, extrainfo, clean=False): 460 if taskname != "do_setscene" and taskname.endswith("_setscene"): 461 tid = fn + ":" + taskname[:-9] 462 else: 463 tid = fn + ":" + taskname 464 if clean: 465 h = "*" 466 else: 467 h = self.get_stampfile_hash(tid) 468 469 return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.') 470 471 def stampcleanmask(self, stampbase, fn, taskname, extrainfo): 472 return self.stampfile(stampbase, fn, taskname, extrainfo, clean=True) 473 474 def invalidate_task(self, task, d, fn): 475 bb.note("Tainting hash to force rebuild of task %s, %s" % (fn, task)) 476 bb.build.write_taint(task, d, fn) 477 478class SignatureGeneratorUniHashMixIn(object): 479 def __init__(self, data): 480 self.extramethod = {} 481 super().__init__(data) 482 483 def get_taskdata(self): 484 return (self.server, self.method, self.extramethod) + super().get_taskdata() 485 486 def set_taskdata(self, data): 487 self.server, self.method, self.extramethod = data[:3] 488 super().set_taskdata(data[3:]) 489 490 def client(self): 491 if getattr(self, '_client', None) is None: 492 self._client = hashserv.create_client(self.server) 493 return self._client 494 495 def reset(self, data): 496 if getattr(self, '_client', None) is not None: 497 self._client.close() 498 self._client = None 499 return super().reset(data) 500 501 def exit(self): 502 if getattr(self, '_client', None) is not None: 503 self._client.close() 504 self._client = None 505 return super().exit() 506 507 def get_stampfile_hash(self, tid): 508 if tid in self.taskhash: 509 # If a unique hash is reported, use it as the stampfile hash. This 510 # ensures that if a task won't be re-run if the taskhash changes, 511 # but it would result in the same output hash 512 unihash = self._get_unihash(tid) 513 if unihash is not None: 514 return unihash 515 516 return super().get_stampfile_hash(tid) 517 518 def set_unihash(self, tid, unihash): 519 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid) 520 key = mc + ":" + self.tidtopn[tid] + ":" + taskname 521 self.unitaskhashes[key] = (self.taskhash[tid], unihash) 522 self.unihash[tid] = unihash 523 524 def _get_unihash(self, tid, checkkey=None): 525 if tid not in self.tidtopn: 526 return None 527 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid) 528 key = mc + ":" + self.tidtopn[tid] + ":" + taskname 529 if key not in self.unitaskhashes: 530 return None 531 if not checkkey: 532 checkkey = self.taskhash[tid] 533 (key, unihash) = self.unitaskhashes[key] 534 if key != checkkey: 535 return None 536 return unihash 537 538 def get_unihash(self, tid): 539 taskhash = self.taskhash[tid] 540 541 # If its not a setscene task we can return 542 if self.setscenetasks and tid not in self.setscenetasks: 543 self.unihash[tid] = None 544 return taskhash 545 546 # TODO: This cache can grow unbounded. It probably only needs to keep 547 # for each task 548 unihash = self._get_unihash(tid) 549 if unihash is not None: 550 self.unihash[tid] = unihash 551 return unihash 552 553 # In the absence of being able to discover a unique hash from the 554 # server, make it be equivalent to the taskhash. The unique "hash" only 555 # really needs to be a unique string (not even necessarily a hash), but 556 # making it match the taskhash has a few advantages: 557 # 558 # 1) All of the sstate code that assumes hashes can be the same 559 # 2) It provides maximal compatibility with builders that don't use 560 # an equivalency server 561 # 3) The value is easy for multiple independent builders to derive the 562 # same unique hash from the same input. This means that if the 563 # independent builders find the same taskhash, but it isn't reported 564 # to the server, there is a better chance that they will agree on 565 # the unique hash. 566 unihash = taskhash 567 568 try: 569 method = self.method 570 if tid in self.extramethod: 571 method = method + self.extramethod[tid] 572 data = self.client().get_unihash(method, self.taskhash[tid]) 573 if data: 574 unihash = data 575 # A unique hash equal to the taskhash is not very interesting, 576 # so it is reported it at debug level 2. If they differ, that 577 # is much more interesting, so it is reported at debug level 1 578 hashequiv_logger.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server)) 579 else: 580 hashequiv_logger.debug2('No reported unihash for %s:%s from %s' % (tid, taskhash, self.server)) 581 except ConnectionError as e: 582 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 583 584 self.set_unihash(tid, unihash) 585 self.unihash[tid] = unihash 586 return unihash 587 588 def report_unihash(self, path, task, d): 589 import importlib 590 591 taskhash = d.getVar('BB_TASKHASH') 592 unihash = d.getVar('BB_UNIHASH') 593 report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1' 594 tempdir = d.getVar('T') 595 fn = d.getVar('BB_FILENAME') 596 tid = fn + ':do_' + task 597 key = tid + ':' + taskhash 598 599 if self.setscenetasks and tid not in self.setscenetasks: 600 return 601 602 # This can happen if locked sigs are in action. Detect and just exit 603 if taskhash != self.taskhash[tid]: 604 return 605 606 # Sanity checks 607 cache_unihash = self._get_unihash(tid, checkkey=taskhash) 608 if cache_unihash is None: 609 bb.fatal('%s not in unihash cache. Please report this error' % key) 610 611 if cache_unihash != unihash: 612 bb.fatal("Cache unihash %s doesn't match BB_UNIHASH %s" % (cache_unihash, unihash)) 613 614 sigfile = None 615 sigfile_name = "depsig.do_%s.%d" % (task, os.getpid()) 616 sigfile_link = "depsig.do_%s" % task 617 618 try: 619 sigfile = open(os.path.join(tempdir, sigfile_name), 'w+b') 620 621 locs = {'path': path, 'sigfile': sigfile, 'task': task, 'd': d} 622 623 if "." in self.method: 624 (module, method) = self.method.rsplit('.', 1) 625 locs['method'] = getattr(importlib.import_module(module), method) 626 outhash = bb.utils.better_eval('method(path, sigfile, task, d)', locs) 627 else: 628 outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs) 629 630 try: 631 extra_data = {} 632 633 owner = d.getVar('SSTATE_HASHEQUIV_OWNER') 634 if owner: 635 extra_data['owner'] = owner 636 637 if report_taskdata: 638 sigfile.seek(0) 639 640 extra_data['PN'] = d.getVar('PN') 641 extra_data['PV'] = d.getVar('PV') 642 extra_data['PR'] = d.getVar('PR') 643 extra_data['task'] = task 644 extra_data['outhash_siginfo'] = sigfile.read().decode('utf-8') 645 646 method = self.method 647 if tid in self.extramethod: 648 method = method + self.extramethod[tid] 649 650 data = self.client().report_unihash(taskhash, method, outhash, unihash, extra_data) 651 new_unihash = data['unihash'] 652 653 if new_unihash != unihash: 654 hashequiv_logger.debug('Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server)) 655 bb.event.fire(bb.runqueue.taskUniHashUpdate(fn + ':do_' + task, new_unihash), d) 656 self.set_unihash(tid, new_unihash) 657 d.setVar('BB_UNIHASH', new_unihash) 658 else: 659 hashequiv_logger.debug('Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server)) 660 except ConnectionError as e: 661 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 662 finally: 663 if sigfile: 664 sigfile.close() 665 666 sigfile_link_path = os.path.join(tempdir, sigfile_link) 667 bb.utils.remove(sigfile_link_path) 668 669 try: 670 os.symlink(sigfile_name, sigfile_link_path) 671 except OSError: 672 pass 673 674 def report_unihash_equiv(self, tid, taskhash, wanted_unihash, current_unihash, datacaches): 675 try: 676 extra_data = {} 677 method = self.method 678 if tid in self.extramethod: 679 method = method + self.extramethod[tid] 680 681 data = self.client().report_unihash_equiv(taskhash, method, wanted_unihash, extra_data) 682 hashequiv_logger.verbose('Reported task %s as unihash %s to %s (%s)' % (tid, wanted_unihash, self.server, str(data))) 683 684 if data is None: 685 bb.warn("Server unable to handle unihash report") 686 return False 687 688 finalunihash = data['unihash'] 689 690 if finalunihash == current_unihash: 691 hashequiv_logger.verbose('Task %s unihash %s unchanged by server' % (tid, finalunihash)) 692 elif finalunihash == wanted_unihash: 693 hashequiv_logger.verbose('Task %s unihash changed %s -> %s as wanted' % (tid, current_unihash, finalunihash)) 694 self.set_unihash(tid, finalunihash) 695 return True 696 else: 697 # TODO: What to do here? 698 hashequiv_logger.verbose('Task %s unihash reported as unwanted hash %s' % (tid, finalunihash)) 699 700 except ConnectionError as e: 701 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 702 703 return False 704 705# 706# Dummy class used for bitbake-selftest 707# 708class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureGeneratorBasicHash): 709 name = "TestEquivHash" 710 def init_rundepcheck(self, data): 711 super().init_rundepcheck(data) 712 self.server = data.getVar('BB_HASHSERVE') 713 self.method = "sstate_output_hash" 714 715# 716# Dummy class used for bitbake-selftest 717# 718class SignatureGeneratorTestMulticonfigDepends(SignatureGeneratorBasicHash): 719 name = "TestMulticonfigDepends" 720 supports_multiconfig_datacaches = True 721 722def dump_this_task(outfile, d): 723 import bb.parse 724 fn = d.getVar("BB_FILENAME") 725 task = "do_" + d.getVar("BB_CURRENTTASK") 726 referencestamp = bb.build.stamp_internal(task, d, None, True) 727 bb.parse.siggen.dump_sigtask(fn, task, outfile, "customfile:" + referencestamp) 728 729def init_colors(enable_color): 730 """Initialise colour dict for passing to compare_sigfiles()""" 731 # First set up the colours 732 colors = {'color_title': '\033[1m', 733 'color_default': '\033[0m', 734 'color_add': '\033[0;32m', 735 'color_remove': '\033[0;31m', 736 } 737 # Leave all keys present but clear the values 738 if not enable_color: 739 for k in colors.keys(): 740 colors[k] = '' 741 return colors 742 743def worddiff_str(oldstr, newstr, colors=None): 744 if not colors: 745 colors = init_colors(False) 746 diff = simplediff.diff(oldstr.split(' '), newstr.split(' ')) 747 ret = [] 748 for change, value in diff: 749 value = ' '.join(value) 750 if change == '=': 751 ret.append(value) 752 elif change == '+': 753 item = '{color_add}{{+{value}+}}{color_default}'.format(value=value, **colors) 754 ret.append(item) 755 elif change == '-': 756 item = '{color_remove}[-{value}-]{color_default}'.format(value=value, **colors) 757 ret.append(item) 758 whitespace_note = '' 759 if oldstr != newstr and ' '.join(oldstr.split()) == ' '.join(newstr.split()): 760 whitespace_note = ' (whitespace changed)' 761 return '"%s"%s' % (' '.join(ret), whitespace_note) 762 763def list_inline_diff(oldlist, newlist, colors=None): 764 if not colors: 765 colors = init_colors(False) 766 diff = simplediff.diff(oldlist, newlist) 767 ret = [] 768 for change, value in diff: 769 value = ' '.join(value) 770 if change == '=': 771 ret.append("'%s'" % value) 772 elif change == '+': 773 item = '{color_add}+{value}{color_default}'.format(value=value, **colors) 774 ret.append(item) 775 elif change == '-': 776 item = '{color_remove}-{value}{color_default}'.format(value=value, **colors) 777 ret.append(item) 778 return '[%s]' % (', '.join(ret)) 779 780def clean_basepath(basepath): 781 basepath, dir, recipe_task = basepath.rsplit("/", 2) 782 cleaned = dir + '/' + recipe_task 783 784 if basepath[0] == '/': 785 return cleaned 786 787 if basepath.startswith("mc:") and basepath.count(':') >= 2: 788 mc, mc_name, basepath = basepath.split(":", 2) 789 mc_suffix = ':mc:' + mc_name 790 else: 791 mc_suffix = '' 792 793 # mc stuff now removed from basepath. Whatever was next, if present will be the first 794 # suffix. ':/', recipe path start, marks the end of this. Something like 795 # 'virtual:a[:b[:c]]:/path...' (b and c being optional) 796 if basepath[0] != '/': 797 cleaned += ':' + basepath.split(':/', 1)[0] 798 799 return cleaned + mc_suffix 800 801def clean_basepaths(a): 802 b = {} 803 for x in a: 804 b[clean_basepath(x)] = a[x] 805 return b 806 807def clean_basepaths_list(a): 808 b = [] 809 for x in a: 810 b.append(clean_basepath(x)) 811 return b 812 813# Handled renamed fields 814def handle_renames(data): 815 if 'basewhitelist' in data: 816 data['basehash_ignore_vars'] = data['basewhitelist'] 817 del data['basewhitelist'] 818 if 'taskwhitelist' in data: 819 data['taskhash_ignore_tasks'] = data['taskwhitelist'] 820 del data['taskwhitelist'] 821 822 823def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False): 824 output = [] 825 826 colors = init_colors(color) 827 def color_format(formatstr, **values): 828 """ 829 Return colour formatted string. 830 NOTE: call with the format string, not an already formatted string 831 containing values (otherwise you could have trouble with { and } 832 characters) 833 """ 834 if not formatstr.endswith('{color_default}'): 835 formatstr += '{color_default}' 836 # In newer python 3 versions you can pass both of these directly, 837 # but we only require 3.4 at the moment 838 formatparams = {} 839 formatparams.update(colors) 840 formatparams.update(values) 841 return formatstr.format(**formatparams) 842 843 with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f: 844 a_data = json.load(f, object_hook=SetDecoder) 845 with bb.compress.zstd.open(b, "rt", encoding="utf-8", num_threads=1) as f: 846 b_data = json.load(f, object_hook=SetDecoder) 847 848 for data in [a_data, b_data]: 849 handle_renames(data) 850 851 def dict_diff(a, b, ignored_vars=set()): 852 sa = set(a.keys()) 853 sb = set(b.keys()) 854 common = sa & sb 855 changed = set() 856 for i in common: 857 if a[i] != b[i] and i not in ignored_vars: 858 changed.add(i) 859 added = sb - sa 860 removed = sa - sb 861 return changed, added, removed 862 863 def file_checksums_diff(a, b): 864 from collections import Counter 865 866 # Convert lists back to tuples 867 a = [(f[0], f[1]) for f in a] 868 b = [(f[0], f[1]) for f in b] 869 870 # Compare lists, ensuring we can handle duplicate filenames if they exist 871 removedcount = Counter(a) 872 removedcount.subtract(b) 873 addedcount = Counter(b) 874 addedcount.subtract(a) 875 added = [] 876 for x in b: 877 if addedcount[x] > 0: 878 addedcount[x] -= 1 879 added.append(x) 880 removed = [] 881 changed = [] 882 for x in a: 883 if removedcount[x] > 0: 884 removedcount[x] -= 1 885 for y in added: 886 if y[0] == x[0]: 887 changed.append((x[0], x[1], y[1])) 888 added.remove(y) 889 break 890 else: 891 removed.append(x) 892 added = [x[0] for x in added] 893 removed = [x[0] for x in removed] 894 return changed, added, removed 895 896 if 'basehash_ignore_vars' in a_data and a_data['basehash_ignore_vars'] != b_data['basehash_ignore_vars']: 897 output.append(color_format("{color_title}basehash_ignore_vars changed{color_default} from '%s' to '%s'") % (a_data['basehash_ignore_vars'], b_data['basehash_ignore_vars'])) 898 if a_data['basehash_ignore_vars'] and b_data['basehash_ignore_vars']: 899 output.append("changed items: %s" % a_data['basehash_ignore_vars'].symmetric_difference(b_data['basehash_ignore_vars'])) 900 901 if 'taskhash_ignore_tasks' in a_data and a_data['taskhash_ignore_tasks'] != b_data['taskhash_ignore_tasks']: 902 output.append(color_format("{color_title}taskhash_ignore_tasks changed{color_default} from '%s' to '%s'") % (a_data['taskhash_ignore_tasks'], b_data['taskhash_ignore_tasks'])) 903 if a_data['taskhash_ignore_tasks'] and b_data['taskhash_ignore_tasks']: 904 output.append("changed items: %s" % a_data['taskhash_ignore_tasks'].symmetric_difference(b_data['taskhash_ignore_tasks'])) 905 906 if a_data['taskdeps'] != b_data['taskdeps']: 907 output.append(color_format("{color_title}Task dependencies changed{color_default} from:\n%s\nto:\n%s") % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps']))) 908 909 if a_data['basehash'] != b_data['basehash'] and not collapsed: 910 output.append(color_format("{color_title}basehash changed{color_default} from %s to %s") % (a_data['basehash'], b_data['basehash'])) 911 912 changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basehash_ignore_vars'] & b_data['basehash_ignore_vars']) 913 if changed: 914 for dep in sorted(changed): 915 output.append(color_format("{color_title}List of dependencies for variable %s changed from '{color_default}%s{color_title}' to '{color_default}%s{color_title}'") % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep])) 916 if a_data['gendeps'][dep] and b_data['gendeps'][dep]: 917 output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep])) 918 if added: 919 for dep in sorted(added): 920 output.append(color_format("{color_title}Dependency on variable %s was added") % (dep)) 921 if removed: 922 for dep in sorted(removed): 923 output.append(color_format("{color_title}Dependency on Variable %s was removed") % (dep)) 924 925 926 changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals']) 927 if changed: 928 for dep in sorted(changed): 929 oldval = a_data['varvals'][dep] 930 newval = b_data['varvals'][dep] 931 if newval and oldval and ('\n' in oldval or '\n' in newval): 932 diff = difflib.unified_diff(oldval.splitlines(), newval.splitlines(), lineterm='') 933 # Cut off the first two lines, since we aren't interested in 934 # the old/new filename (they are blank anyway in this case) 935 difflines = list(diff)[2:] 936 if color: 937 # Add colour to diff output 938 for i, line in enumerate(difflines): 939 if line.startswith('+'): 940 line = color_format('{color_add}{line}', line=line) 941 difflines[i] = line 942 elif line.startswith('-'): 943 line = color_format('{color_remove}{line}', line=line) 944 difflines[i] = line 945 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff='\n'.join(difflines))) 946 elif newval and oldval and (' ' in oldval or ' ' in newval): 947 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff=worddiff_str(oldval, newval, colors))) 948 else: 949 output.append(color_format("{color_title}Variable {var} value changed from '{color_default}{oldval}{color_title}' to '{color_default}{newval}{color_title}'{color_default}", var=dep, oldval=oldval, newval=newval)) 950 951 if not 'file_checksum_values' in a_data: 952 a_data['file_checksum_values'] = [] 953 if not 'file_checksum_values' in b_data: 954 b_data['file_checksum_values'] = [] 955 956 changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values']) 957 if changed: 958 for f, old, new in changed: 959 output.append(color_format("{color_title}Checksum for file %s changed{color_default} from %s to %s") % (f, old, new)) 960 if added: 961 for f in added: 962 output.append(color_format("{color_title}Dependency on checksum of file %s was added") % (f)) 963 if removed: 964 for f in removed: 965 output.append(color_format("{color_title}Dependency on checksum of file %s was removed") % (f)) 966 967 if not 'runtaskdeps' in a_data: 968 a_data['runtaskdeps'] = {} 969 if not 'runtaskdeps' in b_data: 970 b_data['runtaskdeps'] = {} 971 972 if not collapsed: 973 if len(a_data['runtaskdeps']) != len(b_data['runtaskdeps']): 974 changed = ["Number of task dependencies changed"] 975 else: 976 changed = [] 977 for idx, task in enumerate(a_data['runtaskdeps']): 978 a = a_data['runtaskdeps'][idx] 979 b = b_data['runtaskdeps'][idx] 980 if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b] and not collapsed: 981 changed.append("%s with hash %s\n changed to\n%s with hash %s" % (clean_basepath(a), a_data['runtaskhashes'][a], clean_basepath(b), b_data['runtaskhashes'][b])) 982 983 if changed: 984 clean_a = clean_basepaths_list(a_data['runtaskdeps']) 985 clean_b = clean_basepaths_list(b_data['runtaskdeps']) 986 if clean_a != clean_b: 987 output.append(color_format("{color_title}runtaskdeps changed:{color_default}\n%s") % list_inline_diff(clean_a, clean_b, colors)) 988 else: 989 output.append(color_format("{color_title}runtaskdeps changed:")) 990 output.append("\n".join(changed)) 991 992 993 if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data: 994 a = a_data['runtaskhashes'] 995 b = b_data['runtaskhashes'] 996 changed, added, removed = dict_diff(a, b) 997 if added: 998 for dep in sorted(added): 999 bdep_found = False 1000 if removed: 1001 for bdep in removed: 1002 if b[dep] == a[bdep]: 1003 #output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep)) 1004 bdep_found = True 1005 if not bdep_found: 1006 output.append(color_format("{color_title}Dependency on task %s was added{color_default} with hash %s") % (clean_basepath(dep), b[dep])) 1007 if removed: 1008 for dep in sorted(removed): 1009 adep_found = False 1010 if added: 1011 for adep in added: 1012 if b[adep] == a[dep]: 1013 #output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep)) 1014 adep_found = True 1015 if not adep_found: 1016 output.append(color_format("{color_title}Dependency on task %s was removed{color_default} with hash %s") % (clean_basepath(dep), a[dep])) 1017 if changed: 1018 for dep in sorted(changed): 1019 if not collapsed: 1020 output.append(color_format("{color_title}Hash for task dependency %s changed{color_default} from %s to %s") % (clean_basepath(dep), a[dep], b[dep])) 1021 if callable(recursecb): 1022 recout = recursecb(dep, a[dep], b[dep]) 1023 if recout: 1024 if collapsed: 1025 output.extend(recout) 1026 else: 1027 # If a dependent hash changed, might as well print the line above and then defer to the changes in 1028 # that hash since in all likelyhood, they're the same changes this task also saw. 1029 output = [output[-1]] + recout 1030 1031 a_taint = a_data.get('taint', None) 1032 b_taint = b_data.get('taint', None) 1033 if a_taint != b_taint: 1034 if a_taint and a_taint.startswith('nostamp:'): 1035 a_taint = a_taint.replace('nostamp:', 'nostamp(uuid4):') 1036 if b_taint and b_taint.startswith('nostamp:'): 1037 b_taint = b_taint.replace('nostamp:', 'nostamp(uuid4):') 1038 output.append(color_format("{color_title}Taint (by forced/invalidated task) changed{color_default} from %s to %s") % (a_taint, b_taint)) 1039 1040 return output 1041 1042 1043def calc_basehash(sigdata): 1044 task = sigdata['task'] 1045 basedata = sigdata['varvals'][task] 1046 1047 if basedata is None: 1048 basedata = '' 1049 1050 alldeps = sigdata['taskdeps'] 1051 for dep in alldeps: 1052 basedata = basedata + dep 1053 val = sigdata['varvals'][dep] 1054 if val is not None: 1055 basedata = basedata + str(val) 1056 1057 return hashlib.sha256(basedata.encode("utf-8")).hexdigest() 1058 1059def calc_taskhash(sigdata): 1060 data = sigdata['basehash'] 1061 1062 for dep in sigdata['runtaskdeps']: 1063 data = data + sigdata['runtaskhashes'][dep] 1064 1065 for c in sigdata['file_checksum_values']: 1066 if c[1]: 1067 if "./" in c[0]: 1068 data = data + c[0] 1069 data = data + c[1] 1070 1071 if 'taint' in sigdata: 1072 if 'nostamp:' in sigdata['taint']: 1073 data = data + sigdata['taint'][8:] 1074 else: 1075 data = data + sigdata['taint'] 1076 1077 return hashlib.sha256(data.encode("utf-8")).hexdigest() 1078 1079 1080def dump_sigfile(a): 1081 output = [] 1082 1083 with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f: 1084 a_data = json.load(f, object_hook=SetDecoder) 1085 1086 handle_renames(a_data) 1087 1088 output.append("basehash_ignore_vars: %s" % (sorted(a_data['basehash_ignore_vars']))) 1089 1090 output.append("taskhash_ignore_tasks: %s" % (sorted(a_data['taskhash_ignore_tasks'] or []))) 1091 1092 output.append("Task dependencies: %s" % (sorted(a_data['taskdeps']))) 1093 1094 output.append("basehash: %s" % (a_data['basehash'])) 1095 1096 for dep in sorted(a_data['gendeps']): 1097 output.append("List of dependencies for variable %s is %s" % (dep, sorted(a_data['gendeps'][dep]))) 1098 1099 for dep in sorted(a_data['varvals']): 1100 output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep])) 1101 1102 if 'runtaskdeps' in a_data: 1103 output.append("Tasks this task depends on: %s" % (sorted(a_data['runtaskdeps']))) 1104 1105 if 'file_checksum_values' in a_data: 1106 output.append("This task depends on the checksums of files: %s" % (sorted(a_data['file_checksum_values']))) 1107 1108 if 'runtaskhashes' in a_data: 1109 for dep in sorted(a_data['runtaskhashes']): 1110 output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep])) 1111 1112 if 'taint' in a_data: 1113 if a_data['taint'].startswith('nostamp:'): 1114 msg = a_data['taint'].replace('nostamp:', 'nostamp(uuid4):') 1115 else: 1116 msg = a_data['taint'] 1117 output.append("Tainted (by forced/invalidated task): %s" % msg) 1118 1119 if 'task' in a_data: 1120 computed_basehash = calc_basehash(a_data) 1121 output.append("Computed base hash is %s and from file %s" % (computed_basehash, a_data['basehash'])) 1122 else: 1123 output.append("Unable to compute base hash") 1124 1125 computed_taskhash = calc_taskhash(a_data) 1126 output.append("Computed task hash is %s" % computed_taskhash) 1127 1128 return output 1129