1# 2# SPDX-License-Identifier: GPL-2.0-only 3# 4 5import hashlib 6import logging 7import os 8import re 9import tempfile 10import pickle 11import bb.data 12import difflib 13import simplediff 14import json 15import bb.compress.zstd 16from bb.checksum import FileChecksumCache 17from bb import runqueue 18import hashserv 19import hashserv.client 20 21logger = logging.getLogger('BitBake.SigGen') 22hashequiv_logger = logging.getLogger('BitBake.SigGen.HashEquiv') 23 24class SetEncoder(json.JSONEncoder): 25 def default(self, obj): 26 if isinstance(obj, set): 27 return dict(_set_object=list(sorted(obj))) 28 return json.JSONEncoder.default(self, obj) 29 30def SetDecoder(dct): 31 if '_set_object' in dct: 32 return set(dct['_set_object']) 33 return dct 34 35def init(d): 36 siggens = [obj for obj in globals().values() 37 if type(obj) is type and issubclass(obj, SignatureGenerator)] 38 39 desired = d.getVar("BB_SIGNATURE_HANDLER") or "noop" 40 for sg in siggens: 41 if desired == sg.name: 42 return sg(d) 43 else: 44 logger.error("Invalid signature generator '%s', using default 'noop'\n" 45 "Available generators: %s", desired, 46 ', '.join(obj.name for obj in siggens)) 47 return SignatureGenerator(d) 48 49class SignatureGenerator(object): 50 """ 51 """ 52 name = "noop" 53 54 # If the derived class supports multiconfig datacaches, set this to True 55 # The default is False for backward compatibility with derived signature 56 # generators that do not understand multiconfig caches 57 supports_multiconfig_datacaches = False 58 59 def __init__(self, data): 60 self.basehash = {} 61 self.taskhash = {} 62 self.unihash = {} 63 self.runtaskdeps = {} 64 self.file_checksum_values = {} 65 self.taints = {} 66 self.unitaskhashes = {} 67 self.tidtopn = {} 68 self.setscenetasks = set() 69 70 def finalise(self, fn, d, varient): 71 return 72 73 def postparsing_clean_cache(self): 74 return 75 76 def get_unihash(self, tid): 77 return self.taskhash[tid] 78 79 def prep_taskhash(self, tid, deps, dataCaches): 80 return 81 82 def get_taskhash(self, tid, deps, dataCaches): 83 self.taskhash[tid] = hashlib.sha256(tid.encode("utf-8")).hexdigest() 84 return self.taskhash[tid] 85 86 def writeout_file_checksum_cache(self): 87 """Write/update the file checksum cache onto disk""" 88 return 89 90 def stampfile(self, stampbase, file_name, taskname, extrainfo): 91 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.') 92 93 def stampcleanmask(self, stampbase, file_name, taskname, extrainfo): 94 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.') 95 96 def dump_sigtask(self, fn, task, stampbase, runtime): 97 return 98 99 def invalidate_task(self, task, d, fn): 100 bb.build.del_stamp(task, d, fn) 101 102 def dump_sigs(self, dataCache, options): 103 return 104 105 def get_taskdata(self): 106 return (self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks) 107 108 def set_taskdata(self, data): 109 self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks = data 110 111 def reset(self, data): 112 self.__init__(data) 113 114 def get_taskhashes(self): 115 return self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn 116 117 def set_taskhashes(self, hashes): 118 self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn = hashes 119 120 def save_unitaskhashes(self): 121 return 122 123 def set_setscene_tasks(self, setscene_tasks): 124 return 125 126 @classmethod 127 def get_data_caches(cls, dataCaches, mc): 128 """ 129 This function returns the datacaches that should be passed to signature 130 generator functions. If the signature generator supports multiconfig 131 caches, the entire dictionary of data caches is sent, otherwise a 132 special proxy is sent that support both index access to all 133 multiconfigs, and also direct access for the default multiconfig. 134 135 The proxy class allows code in this class itself to always use 136 multiconfig aware code (to ease maintenance), but derived classes that 137 are unaware of multiconfig data caches can still access the default 138 multiconfig as expected. 139 140 Do not override this function in derived classes; it will be removed in 141 the future when support for multiconfig data caches is mandatory 142 """ 143 class DataCacheProxy(object): 144 def __init__(self): 145 pass 146 147 def __getitem__(self, key): 148 return dataCaches[key] 149 150 def __getattr__(self, name): 151 return getattr(dataCaches[mc], name) 152 153 if cls.supports_multiconfig_datacaches: 154 return dataCaches 155 156 return DataCacheProxy() 157 158 def exit(self): 159 return 160 161class SignatureGeneratorBasic(SignatureGenerator): 162 """ 163 """ 164 name = "basic" 165 166 def __init__(self, data): 167 self.basehash = {} 168 self.taskhash = {} 169 self.unihash = {} 170 self.taskdeps = {} 171 self.runtaskdeps = {} 172 self.file_checksum_values = {} 173 self.taints = {} 174 self.gendeps = {} 175 self.lookupcache = {} 176 self.setscenetasks = set() 177 self.basehash_ignore_vars = set((data.getVar("BB_BASEHASH_IGNORE_VARS") or "").split()) 178 self.taskhash_ignore_tasks = None 179 self.init_rundepcheck(data) 180 checksum_cache_file = data.getVar("BB_HASH_CHECKSUM_CACHE_FILE") 181 if checksum_cache_file: 182 self.checksum_cache = FileChecksumCache() 183 self.checksum_cache.init_cache(data, checksum_cache_file) 184 else: 185 self.checksum_cache = None 186 187 self.unihash_cache = bb.cache.SimpleCache("3") 188 self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {}) 189 self.localdirsexclude = (data.getVar("BB_SIGNATURE_LOCAL_DIRS_EXCLUDE") or "CVS .bzr .git .hg .osc .p4 .repo .svn").split() 190 self.tidtopn = {} 191 192 def init_rundepcheck(self, data): 193 self.taskhash_ignore_tasks = data.getVar("BB_TASKHASH_IGNORE_TASKS") or None 194 if self.taskhash_ignore_tasks: 195 self.twl = re.compile(self.taskhash_ignore_tasks) 196 else: 197 self.twl = None 198 199 def _build_data(self, fn, d): 200 201 ignore_mismatch = ((d.getVar("BB_HASH_IGNORE_MISMATCH") or '') == '1') 202 tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d, self.basehash_ignore_vars) 203 204 taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, self.basehash_ignore_vars, fn) 205 206 for task in tasklist: 207 tid = fn + ":" + task 208 if not ignore_mismatch and tid in self.basehash and self.basehash[tid] != basehash[tid]: 209 bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (tid, self.basehash[tid], basehash[tid])) 210 bb.error("The following commands may help:") 211 cmd = "$ bitbake %s -c%s" % (d.getVar('PN'), task) 212 # Make sure sigdata is dumped before run printdiff 213 bb.error("%s -Snone" % cmd) 214 bb.error("Then:") 215 bb.error("%s -Sprintdiff\n" % cmd) 216 self.basehash[tid] = basehash[tid] 217 218 self.taskdeps[fn] = taskdeps 219 self.gendeps[fn] = gendeps 220 self.lookupcache[fn] = lookupcache 221 222 return taskdeps 223 224 def set_setscene_tasks(self, setscene_tasks): 225 self.setscenetasks = set(setscene_tasks) 226 227 def finalise(self, fn, d, variant): 228 229 mc = d.getVar("__BBMULTICONFIG", False) or "" 230 if variant or mc: 231 fn = bb.cache.realfn2virtual(fn, variant, mc) 232 233 try: 234 taskdeps = self._build_data(fn, d) 235 except bb.parse.SkipRecipe: 236 raise 237 except: 238 bb.warn("Error during finalise of %s" % fn) 239 raise 240 241 #Slow but can be useful for debugging mismatched basehashes 242 #for task in self.taskdeps[fn]: 243 # self.dump_sigtask(fn, task, d.getVar("STAMP"), False) 244 245 for task in taskdeps: 246 d.setVar("BB_BASEHASH:task-%s" % task, self.basehash[fn + ":" + task]) 247 248 def postparsing_clean_cache(self): 249 # 250 # After parsing we can remove some things from memory to reduce our memory footprint 251 # 252 self.gendeps = {} 253 self.lookupcache = {} 254 self.taskdeps = {} 255 256 def rundep_check(self, fn, recipename, task, dep, depname, dataCaches): 257 # Return True if we should keep the dependency, False to drop it 258 # We only manipulate the dependencies for packages not in the ignore 259 # list 260 if self.twl and not self.twl.search(recipename): 261 # then process the actual dependencies 262 if self.twl.search(depname): 263 return False 264 return True 265 266 def read_taint(self, fn, task, stampbase): 267 taint = None 268 try: 269 with open(stampbase + '.' + task + '.taint', 'r') as taintf: 270 taint = taintf.read() 271 except IOError: 272 pass 273 return taint 274 275 def prep_taskhash(self, tid, deps, dataCaches): 276 277 (mc, _, task, fn) = bb.runqueue.split_tid_mcfn(tid) 278 279 self.basehash[tid] = dataCaches[mc].basetaskhash[tid] 280 self.runtaskdeps[tid] = [] 281 self.file_checksum_values[tid] = [] 282 recipename = dataCaches[mc].pkg_fn[fn] 283 284 self.tidtopn[tid] = recipename 285 286 for dep in sorted(deps, key=clean_basepath): 287 (depmc, _, _, depmcfn) = bb.runqueue.split_tid_mcfn(dep) 288 depname = dataCaches[depmc].pkg_fn[depmcfn] 289 if not self.supports_multiconfig_datacaches and mc != depmc: 290 # If the signature generator doesn't understand multiconfig 291 # data caches, any dependency not in the same multiconfig must 292 # be skipped for backward compatibility 293 continue 294 if not self.rundep_check(fn, recipename, task, dep, depname, dataCaches): 295 continue 296 if dep not in self.taskhash: 297 bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep) 298 self.runtaskdeps[tid].append(dep) 299 300 if task in dataCaches[mc].file_checksums[fn]: 301 if self.checksum_cache: 302 checksums = self.checksum_cache.get_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude) 303 else: 304 checksums = bb.fetch2.get_file_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude) 305 for (f,cs) in checksums: 306 self.file_checksum_values[tid].append((f,cs)) 307 308 taskdep = dataCaches[mc].task_deps[fn] 309 if 'nostamp' in taskdep and task in taskdep['nostamp']: 310 # Nostamp tasks need an implicit taint so that they force any dependent tasks to run 311 if tid in self.taints and self.taints[tid].startswith("nostamp:"): 312 # Don't reset taint value upon every call 313 pass 314 else: 315 import uuid 316 taint = str(uuid.uuid4()) 317 self.taints[tid] = "nostamp:" + taint 318 319 taint = self.read_taint(fn, task, dataCaches[mc].stamp[fn]) 320 if taint: 321 self.taints[tid] = taint 322 logger.warning("%s is tainted from a forced run" % tid) 323 324 return 325 326 def get_taskhash(self, tid, deps, dataCaches): 327 328 data = self.basehash[tid] 329 for dep in self.runtaskdeps[tid]: 330 data = data + self.get_unihash(dep) 331 332 for (f, cs) in self.file_checksum_values[tid]: 333 if cs: 334 if "/./" in f: 335 data = data + "./" + f.split("/./")[1] 336 data = data + cs 337 338 if tid in self.taints: 339 if self.taints[tid].startswith("nostamp:"): 340 data = data + self.taints[tid][8:] 341 else: 342 data = data + self.taints[tid] 343 344 h = hashlib.sha256(data.encode("utf-8")).hexdigest() 345 self.taskhash[tid] = h 346 #d.setVar("BB_TASKHASH:task-%s" % task, taskhash[task]) 347 return h 348 349 def writeout_file_checksum_cache(self): 350 """Write/update the file checksum cache onto disk""" 351 if self.checksum_cache: 352 self.checksum_cache.save_extras() 353 self.checksum_cache.save_merge() 354 else: 355 bb.fetch2.fetcher_parse_save() 356 bb.fetch2.fetcher_parse_done() 357 358 def save_unitaskhashes(self): 359 self.unihash_cache.save(self.unitaskhashes) 360 361 def dump_sigtask(self, fn, task, stampbase, runtime): 362 363 tid = fn + ":" + task 364 referencestamp = stampbase 365 if isinstance(runtime, str) and runtime.startswith("customfile"): 366 sigfile = stampbase 367 referencestamp = runtime[11:] 368 elif runtime and tid in self.taskhash: 369 sigfile = stampbase + "." + task + ".sigdata" + "." + self.get_unihash(tid) 370 else: 371 sigfile = stampbase + "." + task + ".sigbasedata" + "." + self.basehash[tid] 372 373 with bb.utils.umask(0o002): 374 bb.utils.mkdirhier(os.path.dirname(sigfile)) 375 376 data = {} 377 data['task'] = task 378 data['basehash_ignore_vars'] = self.basehash_ignore_vars 379 data['taskhash_ignore_tasks'] = self.taskhash_ignore_tasks 380 data['taskdeps'] = self.taskdeps[fn][task] 381 data['basehash'] = self.basehash[tid] 382 data['gendeps'] = {} 383 data['varvals'] = {} 384 data['varvals'][task] = self.lookupcache[fn][task] 385 for dep in self.taskdeps[fn][task]: 386 if dep in self.basehash_ignore_vars: 387 continue 388 data['gendeps'][dep] = self.gendeps[fn][dep] 389 data['varvals'][dep] = self.lookupcache[fn][dep] 390 391 if runtime and tid in self.taskhash: 392 data['runtaskdeps'] = self.runtaskdeps[tid] 393 data['file_checksum_values'] = [] 394 for f,cs in self.file_checksum_values[tid]: 395 if "/./" in f: 396 data['file_checksum_values'].append(("./" + f.split("/./")[1], cs)) 397 else: 398 data['file_checksum_values'].append((os.path.basename(f), cs)) 399 data['runtaskhashes'] = {} 400 for dep in data['runtaskdeps']: 401 data['runtaskhashes'][dep] = self.get_unihash(dep) 402 data['taskhash'] = self.taskhash[tid] 403 data['unihash'] = self.get_unihash(tid) 404 405 taint = self.read_taint(fn, task, referencestamp) 406 if taint: 407 data['taint'] = taint 408 409 if runtime and tid in self.taints: 410 if 'nostamp:' in self.taints[tid]: 411 data['taint'] = self.taints[tid] 412 413 computed_basehash = calc_basehash(data) 414 if computed_basehash != self.basehash[tid]: 415 bb.error("Basehash mismatch %s versus %s for %s" % (computed_basehash, self.basehash[tid], tid)) 416 if runtime and tid in self.taskhash: 417 computed_taskhash = calc_taskhash(data) 418 if computed_taskhash != self.taskhash[tid]: 419 bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid)) 420 sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash) 421 422 fd, tmpfile = tempfile.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.") 423 try: 424 with bb.compress.zstd.open(fd, "wt", encoding="utf-8", num_threads=1) as f: 425 json.dump(data, f, sort_keys=True, separators=(",", ":"), cls=SetEncoder) 426 f.flush() 427 os.chmod(tmpfile, 0o664) 428 bb.utils.rename(tmpfile, sigfile) 429 except (OSError, IOError) as err: 430 try: 431 os.unlink(tmpfile) 432 except OSError: 433 pass 434 raise err 435 436 def dump_sigfn(self, fn, dataCaches, options): 437 if fn in self.taskdeps: 438 for task in self.taskdeps[fn]: 439 tid = fn + ":" + task 440 mc = bb.runqueue.mc_from_tid(tid) 441 if tid not in self.taskhash: 442 continue 443 if dataCaches[mc].basetaskhash[tid] != self.basehash[tid]: 444 bb.error("Bitbake's cached basehash does not match the one we just generated (%s)!" % tid) 445 bb.error("The mismatched hashes were %s and %s" % (dataCaches[mc].basetaskhash[tid], self.basehash[tid])) 446 self.dump_sigtask(fn, task, dataCaches[mc].stamp[fn], True) 447 448class SignatureGeneratorBasicHash(SignatureGeneratorBasic): 449 name = "basichash" 450 451 def get_stampfile_hash(self, tid): 452 if tid in self.taskhash: 453 return self.taskhash[tid] 454 455 # If task is not in basehash, then error 456 return self.basehash[tid] 457 458 def stampfile(self, stampbase, fn, taskname, extrainfo, clean=False): 459 if taskname != "do_setscene" and taskname.endswith("_setscene"): 460 tid = fn + ":" + taskname[:-9] 461 else: 462 tid = fn + ":" + taskname 463 if clean: 464 h = "*" 465 else: 466 h = self.get_stampfile_hash(tid) 467 468 return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.') 469 470 def stampcleanmask(self, stampbase, fn, taskname, extrainfo): 471 return self.stampfile(stampbase, fn, taskname, extrainfo, clean=True) 472 473 def invalidate_task(self, task, d, fn): 474 bb.note("Tainting hash to force rebuild of task %s, %s" % (fn, task)) 475 bb.build.write_taint(task, d, fn) 476 477class SignatureGeneratorUniHashMixIn(object): 478 def __init__(self, data): 479 self.extramethod = {} 480 super().__init__(data) 481 482 def get_taskdata(self): 483 return (self.server, self.method, self.extramethod) + super().get_taskdata() 484 485 def set_taskdata(self, data): 486 self.server, self.method, self.extramethod = data[:3] 487 super().set_taskdata(data[3:]) 488 489 def client(self): 490 if getattr(self, '_client', None) is None: 491 self._client = hashserv.create_client(self.server) 492 return self._client 493 494 def reset(self, data): 495 if getattr(self, '_client', None) is not None: 496 self._client.close() 497 self._client = None 498 return super().reset(data) 499 500 def exit(self): 501 if getattr(self, '_client', None) is not None: 502 self._client.close() 503 self._client = None 504 return super().exit() 505 506 def get_stampfile_hash(self, tid): 507 if tid in self.taskhash: 508 # If a unique hash is reported, use it as the stampfile hash. This 509 # ensures that if a task won't be re-run if the taskhash changes, 510 # but it would result in the same output hash 511 unihash = self._get_unihash(tid) 512 if unihash is not None: 513 return unihash 514 515 return super().get_stampfile_hash(tid) 516 517 def set_unihash(self, tid, unihash): 518 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid) 519 key = mc + ":" + self.tidtopn[tid] + ":" + taskname 520 self.unitaskhashes[key] = (self.taskhash[tid], unihash) 521 self.unihash[tid] = unihash 522 523 def _get_unihash(self, tid, checkkey=None): 524 if tid not in self.tidtopn: 525 return None 526 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid) 527 key = mc + ":" + self.tidtopn[tid] + ":" + taskname 528 if key not in self.unitaskhashes: 529 return None 530 if not checkkey: 531 checkkey = self.taskhash[tid] 532 (key, unihash) = self.unitaskhashes[key] 533 if key != checkkey: 534 return None 535 return unihash 536 537 def get_unihash(self, tid): 538 taskhash = self.taskhash[tid] 539 540 # If its not a setscene task we can return 541 if self.setscenetasks and tid not in self.setscenetasks: 542 self.unihash[tid] = None 543 return taskhash 544 545 # TODO: This cache can grow unbounded. It probably only needs to keep 546 # for each task 547 unihash = self._get_unihash(tid) 548 if unihash is not None: 549 self.unihash[tid] = unihash 550 return unihash 551 552 # In the absence of being able to discover a unique hash from the 553 # server, make it be equivalent to the taskhash. The unique "hash" only 554 # really needs to be a unique string (not even necessarily a hash), but 555 # making it match the taskhash has a few advantages: 556 # 557 # 1) All of the sstate code that assumes hashes can be the same 558 # 2) It provides maximal compatibility with builders that don't use 559 # an equivalency server 560 # 3) The value is easy for multiple independent builders to derive the 561 # same unique hash from the same input. This means that if the 562 # independent builders find the same taskhash, but it isn't reported 563 # to the server, there is a better chance that they will agree on 564 # the unique hash. 565 unihash = taskhash 566 567 try: 568 method = self.method 569 if tid in self.extramethod: 570 method = method + self.extramethod[tid] 571 data = self.client().get_unihash(method, self.taskhash[tid]) 572 if data: 573 unihash = data 574 # A unique hash equal to the taskhash is not very interesting, 575 # so it is reported it at debug level 2. If they differ, that 576 # is much more interesting, so it is reported at debug level 1 577 hashequiv_logger.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server)) 578 else: 579 hashequiv_logger.debug2('No reported unihash for %s:%s from %s' % (tid, taskhash, self.server)) 580 except ConnectionError as e: 581 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 582 583 self.set_unihash(tid, unihash) 584 self.unihash[tid] = unihash 585 return unihash 586 587 def report_unihash(self, path, task, d): 588 import importlib 589 590 taskhash = d.getVar('BB_TASKHASH') 591 unihash = d.getVar('BB_UNIHASH') 592 report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1' 593 tempdir = d.getVar('T') 594 fn = d.getVar('BB_FILENAME') 595 tid = fn + ':do_' + task 596 key = tid + ':' + taskhash 597 598 if self.setscenetasks and tid not in self.setscenetasks: 599 return 600 601 # This can happen if locked sigs are in action. Detect and just exit 602 if taskhash != self.taskhash[tid]: 603 return 604 605 # Sanity checks 606 cache_unihash = self._get_unihash(tid, checkkey=taskhash) 607 if cache_unihash is None: 608 bb.fatal('%s not in unihash cache. Please report this error' % key) 609 610 if cache_unihash != unihash: 611 bb.fatal("Cache unihash %s doesn't match BB_UNIHASH %s" % (cache_unihash, unihash)) 612 613 sigfile = None 614 sigfile_name = "depsig.do_%s.%d" % (task, os.getpid()) 615 sigfile_link = "depsig.do_%s" % task 616 617 try: 618 sigfile = open(os.path.join(tempdir, sigfile_name), 'w+b') 619 620 locs = {'path': path, 'sigfile': sigfile, 'task': task, 'd': d} 621 622 if "." in self.method: 623 (module, method) = self.method.rsplit('.', 1) 624 locs['method'] = getattr(importlib.import_module(module), method) 625 outhash = bb.utils.better_eval('method(path, sigfile, task, d)', locs) 626 else: 627 outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs) 628 629 try: 630 extra_data = {} 631 632 owner = d.getVar('SSTATE_HASHEQUIV_OWNER') 633 if owner: 634 extra_data['owner'] = owner 635 636 if report_taskdata: 637 sigfile.seek(0) 638 639 extra_data['PN'] = d.getVar('PN') 640 extra_data['PV'] = d.getVar('PV') 641 extra_data['PR'] = d.getVar('PR') 642 extra_data['task'] = task 643 extra_data['outhash_siginfo'] = sigfile.read().decode('utf-8') 644 645 method = self.method 646 if tid in self.extramethod: 647 method = method + self.extramethod[tid] 648 649 data = self.client().report_unihash(taskhash, method, outhash, unihash, extra_data) 650 new_unihash = data['unihash'] 651 652 if new_unihash != unihash: 653 hashequiv_logger.debug('Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server)) 654 bb.event.fire(bb.runqueue.taskUniHashUpdate(fn + ':do_' + task, new_unihash), d) 655 self.set_unihash(tid, new_unihash) 656 d.setVar('BB_UNIHASH', new_unihash) 657 else: 658 hashequiv_logger.debug('Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server)) 659 except ConnectionError as e: 660 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 661 finally: 662 if sigfile: 663 sigfile.close() 664 665 sigfile_link_path = os.path.join(tempdir, sigfile_link) 666 bb.utils.remove(sigfile_link_path) 667 668 try: 669 os.symlink(sigfile_name, sigfile_link_path) 670 except OSError: 671 pass 672 673 def report_unihash_equiv(self, tid, taskhash, wanted_unihash, current_unihash, datacaches): 674 try: 675 extra_data = {} 676 method = self.method 677 if tid in self.extramethod: 678 method = method + self.extramethod[tid] 679 680 data = self.client().report_unihash_equiv(taskhash, method, wanted_unihash, extra_data) 681 hashequiv_logger.verbose('Reported task %s as unihash %s to %s (%s)' % (tid, wanted_unihash, self.server, str(data))) 682 683 if data is None: 684 bb.warn("Server unable to handle unihash report") 685 return False 686 687 finalunihash = data['unihash'] 688 689 if finalunihash == current_unihash: 690 hashequiv_logger.verbose('Task %s unihash %s unchanged by server' % (tid, finalunihash)) 691 elif finalunihash == wanted_unihash: 692 hashequiv_logger.verbose('Task %s unihash changed %s -> %s as wanted' % (tid, current_unihash, finalunihash)) 693 self.set_unihash(tid, finalunihash) 694 return True 695 else: 696 # TODO: What to do here? 697 hashequiv_logger.verbose('Task %s unihash reported as unwanted hash %s' % (tid, finalunihash)) 698 699 except ConnectionError as e: 700 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 701 702 return False 703 704# 705# Dummy class used for bitbake-selftest 706# 707class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureGeneratorBasicHash): 708 name = "TestEquivHash" 709 def init_rundepcheck(self, data): 710 super().init_rundepcheck(data) 711 self.server = data.getVar('BB_HASHSERVE') 712 self.method = "sstate_output_hash" 713 714# 715# Dummy class used for bitbake-selftest 716# 717class SignatureGeneratorTestMulticonfigDepends(SignatureGeneratorBasicHash): 718 name = "TestMulticonfigDepends" 719 supports_multiconfig_datacaches = True 720 721def dump_this_task(outfile, d): 722 import bb.parse 723 fn = d.getVar("BB_FILENAME") 724 task = "do_" + d.getVar("BB_CURRENTTASK") 725 referencestamp = bb.build.stamp_internal(task, d, None, True) 726 bb.parse.siggen.dump_sigtask(fn, task, outfile, "customfile:" + referencestamp) 727 728def init_colors(enable_color): 729 """Initialise colour dict for passing to compare_sigfiles()""" 730 # First set up the colours 731 colors = {'color_title': '\033[1m', 732 'color_default': '\033[0m', 733 'color_add': '\033[0;32m', 734 'color_remove': '\033[0;31m', 735 } 736 # Leave all keys present but clear the values 737 if not enable_color: 738 for k in colors.keys(): 739 colors[k] = '' 740 return colors 741 742def worddiff_str(oldstr, newstr, colors=None): 743 if not colors: 744 colors = init_colors(False) 745 diff = simplediff.diff(oldstr.split(' '), newstr.split(' ')) 746 ret = [] 747 for change, value in diff: 748 value = ' '.join(value) 749 if change == '=': 750 ret.append(value) 751 elif change == '+': 752 item = '{color_add}{{+{value}+}}{color_default}'.format(value=value, **colors) 753 ret.append(item) 754 elif change == '-': 755 item = '{color_remove}[-{value}-]{color_default}'.format(value=value, **colors) 756 ret.append(item) 757 whitespace_note = '' 758 if oldstr != newstr and ' '.join(oldstr.split()) == ' '.join(newstr.split()): 759 whitespace_note = ' (whitespace changed)' 760 return '"%s"%s' % (' '.join(ret), whitespace_note) 761 762def list_inline_diff(oldlist, newlist, colors=None): 763 if not colors: 764 colors = init_colors(False) 765 diff = simplediff.diff(oldlist, newlist) 766 ret = [] 767 for change, value in diff: 768 value = ' '.join(value) 769 if change == '=': 770 ret.append("'%s'" % value) 771 elif change == '+': 772 item = '{color_add}+{value}{color_default}'.format(value=value, **colors) 773 ret.append(item) 774 elif change == '-': 775 item = '{color_remove}-{value}{color_default}'.format(value=value, **colors) 776 ret.append(item) 777 return '[%s]' % (', '.join(ret)) 778 779def clean_basepath(basepath): 780 basepath, dir, recipe_task = basepath.rsplit("/", 2) 781 cleaned = dir + '/' + recipe_task 782 783 if basepath[0] == '/': 784 return cleaned 785 786 if basepath.startswith("mc:") and basepath.count(':') >= 2: 787 mc, mc_name, basepath = basepath.split(":", 2) 788 mc_suffix = ':mc:' + mc_name 789 else: 790 mc_suffix = '' 791 792 # mc stuff now removed from basepath. Whatever was next, if present will be the first 793 # suffix. ':/', recipe path start, marks the end of this. Something like 794 # 'virtual:a[:b[:c]]:/path...' (b and c being optional) 795 if basepath[0] != '/': 796 cleaned += ':' + basepath.split(':/', 1)[0] 797 798 return cleaned + mc_suffix 799 800def clean_basepaths(a): 801 b = {} 802 for x in a: 803 b[clean_basepath(x)] = a[x] 804 return b 805 806def clean_basepaths_list(a): 807 b = [] 808 for x in a: 809 b.append(clean_basepath(x)) 810 return b 811 812# Handled renamed fields 813def handle_renames(data): 814 if 'basewhitelist' in data: 815 data['basehash_ignore_vars'] = data['basewhitelist'] 816 del data['basewhitelist'] 817 if 'taskwhitelist' in data: 818 data['taskhash_ignore_tasks'] = data['taskwhitelist'] 819 del data['taskwhitelist'] 820 821 822def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False): 823 output = [] 824 825 colors = init_colors(color) 826 def color_format(formatstr, **values): 827 """ 828 Return colour formatted string. 829 NOTE: call with the format string, not an already formatted string 830 containing values (otherwise you could have trouble with { and } 831 characters) 832 """ 833 if not formatstr.endswith('{color_default}'): 834 formatstr += '{color_default}' 835 # In newer python 3 versions you can pass both of these directly, 836 # but we only require 3.4 at the moment 837 formatparams = {} 838 formatparams.update(colors) 839 formatparams.update(values) 840 return formatstr.format(**formatparams) 841 842 with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f: 843 a_data = json.load(f, object_hook=SetDecoder) 844 with bb.compress.zstd.open(b, "rt", encoding="utf-8", num_threads=1) as f: 845 b_data = json.load(f, object_hook=SetDecoder) 846 847 for data in [a_data, b_data]: 848 handle_renames(data) 849 850 def dict_diff(a, b, ignored_vars=set()): 851 sa = set(a.keys()) 852 sb = set(b.keys()) 853 common = sa & sb 854 changed = set() 855 for i in common: 856 if a[i] != b[i] and i not in ignored_vars: 857 changed.add(i) 858 added = sb - sa 859 removed = sa - sb 860 return changed, added, removed 861 862 def file_checksums_diff(a, b): 863 from collections import Counter 864 865 # Convert lists back to tuples 866 a = [(f[0], f[1]) for f in a] 867 b = [(f[0], f[1]) for f in b] 868 869 # Compare lists, ensuring we can handle duplicate filenames if they exist 870 removedcount = Counter(a) 871 removedcount.subtract(b) 872 addedcount = Counter(b) 873 addedcount.subtract(a) 874 added = [] 875 for x in b: 876 if addedcount[x] > 0: 877 addedcount[x] -= 1 878 added.append(x) 879 removed = [] 880 changed = [] 881 for x in a: 882 if removedcount[x] > 0: 883 removedcount[x] -= 1 884 for y in added: 885 if y[0] == x[0]: 886 changed.append((x[0], x[1], y[1])) 887 added.remove(y) 888 break 889 else: 890 removed.append(x) 891 added = [x[0] for x in added] 892 removed = [x[0] for x in removed] 893 return changed, added, removed 894 895 if 'basehash_ignore_vars' in a_data and a_data['basehash_ignore_vars'] != b_data['basehash_ignore_vars']: 896 output.append(color_format("{color_title}basehash_ignore_vars changed{color_default} from '%s' to '%s'") % (a_data['basehash_ignore_vars'], b_data['basehash_ignore_vars'])) 897 if a_data['basehash_ignore_vars'] and b_data['basehash_ignore_vars']: 898 output.append("changed items: %s" % a_data['basehash_ignore_vars'].symmetric_difference(b_data['basehash_ignore_vars'])) 899 900 if 'taskhash_ignore_tasks' in a_data and a_data['taskhash_ignore_tasks'] != b_data['taskhash_ignore_tasks']: 901 output.append(color_format("{color_title}taskhash_ignore_tasks changed{color_default} from '%s' to '%s'") % (a_data['taskhash_ignore_tasks'], b_data['taskhash_ignore_tasks'])) 902 if a_data['taskhash_ignore_tasks'] and b_data['taskhash_ignore_tasks']: 903 output.append("changed items: %s" % a_data['taskhash_ignore_tasks'].symmetric_difference(b_data['taskhash_ignore_tasks'])) 904 905 if a_data['taskdeps'] != b_data['taskdeps']: 906 output.append(color_format("{color_title}Task dependencies changed{color_default} from:\n%s\nto:\n%s") % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps']))) 907 908 if a_data['basehash'] != b_data['basehash'] and not collapsed: 909 output.append(color_format("{color_title}basehash changed{color_default} from %s to %s") % (a_data['basehash'], b_data['basehash'])) 910 911 changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basehash_ignore_vars'] & b_data['basehash_ignore_vars']) 912 if changed: 913 for dep in sorted(changed): 914 output.append(color_format("{color_title}List of dependencies for variable %s changed from '{color_default}%s{color_title}' to '{color_default}%s{color_title}'") % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep])) 915 if a_data['gendeps'][dep] and b_data['gendeps'][dep]: 916 output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep])) 917 if added: 918 for dep in sorted(added): 919 output.append(color_format("{color_title}Dependency on variable %s was added") % (dep)) 920 if removed: 921 for dep in sorted(removed): 922 output.append(color_format("{color_title}Dependency on Variable %s was removed") % (dep)) 923 924 925 changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals']) 926 if changed: 927 for dep in sorted(changed): 928 oldval = a_data['varvals'][dep] 929 newval = b_data['varvals'][dep] 930 if newval and oldval and ('\n' in oldval or '\n' in newval): 931 diff = difflib.unified_diff(oldval.splitlines(), newval.splitlines(), lineterm='') 932 # Cut off the first two lines, since we aren't interested in 933 # the old/new filename (they are blank anyway in this case) 934 difflines = list(diff)[2:] 935 if color: 936 # Add colour to diff output 937 for i, line in enumerate(difflines): 938 if line.startswith('+'): 939 line = color_format('{color_add}{line}', line=line) 940 difflines[i] = line 941 elif line.startswith('-'): 942 line = color_format('{color_remove}{line}', line=line) 943 difflines[i] = line 944 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff='\n'.join(difflines))) 945 elif newval and oldval and (' ' in oldval or ' ' in newval): 946 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff=worddiff_str(oldval, newval, colors))) 947 else: 948 output.append(color_format("{color_title}Variable {var} value changed from '{color_default}{oldval}{color_title}' to '{color_default}{newval}{color_title}'{color_default}", var=dep, oldval=oldval, newval=newval)) 949 950 if not 'file_checksum_values' in a_data: 951 a_data['file_checksum_values'] = [] 952 if not 'file_checksum_values' in b_data: 953 b_data['file_checksum_values'] = [] 954 955 changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values']) 956 if changed: 957 for f, old, new in changed: 958 output.append(color_format("{color_title}Checksum for file %s changed{color_default} from %s to %s") % (f, old, new)) 959 if added: 960 for f in added: 961 output.append(color_format("{color_title}Dependency on checksum of file %s was added") % (f)) 962 if removed: 963 for f in removed: 964 output.append(color_format("{color_title}Dependency on checksum of file %s was removed") % (f)) 965 966 if not 'runtaskdeps' in a_data: 967 a_data['runtaskdeps'] = {} 968 if not 'runtaskdeps' in b_data: 969 b_data['runtaskdeps'] = {} 970 971 if not collapsed: 972 if len(a_data['runtaskdeps']) != len(b_data['runtaskdeps']): 973 changed = ["Number of task dependencies changed"] 974 else: 975 changed = [] 976 for idx, task in enumerate(a_data['runtaskdeps']): 977 a = a_data['runtaskdeps'][idx] 978 b = b_data['runtaskdeps'][idx] 979 if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b] and not collapsed: 980 changed.append("%s with hash %s\n changed to\n%s with hash %s" % (clean_basepath(a), a_data['runtaskhashes'][a], clean_basepath(b), b_data['runtaskhashes'][b])) 981 982 if changed: 983 clean_a = clean_basepaths_list(a_data['runtaskdeps']) 984 clean_b = clean_basepaths_list(b_data['runtaskdeps']) 985 if clean_a != clean_b: 986 output.append(color_format("{color_title}runtaskdeps changed:{color_default}\n%s") % list_inline_diff(clean_a, clean_b, colors)) 987 else: 988 output.append(color_format("{color_title}runtaskdeps changed:")) 989 output.append("\n".join(changed)) 990 991 992 if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data: 993 a = clean_basepaths(a_data['runtaskhashes']) 994 b = clean_basepaths(b_data['runtaskhashes']) 995 changed, added, removed = dict_diff(a, b) 996 if added: 997 for dep in sorted(added): 998 bdep_found = False 999 if removed: 1000 for bdep in removed: 1001 if b[dep] == a[bdep]: 1002 #output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep)) 1003 bdep_found = True 1004 if not bdep_found: 1005 output.append(color_format("{color_title}Dependency on task %s was added{color_default} with hash %s") % (dep, b[dep])) 1006 if removed: 1007 for dep in sorted(removed): 1008 adep_found = False 1009 if added: 1010 for adep in added: 1011 if b[adep] == a[dep]: 1012 #output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep)) 1013 adep_found = True 1014 if not adep_found: 1015 output.append(color_format("{color_title}Dependency on task %s was removed{color_default} with hash %s") % (dep, a[dep])) 1016 if changed: 1017 for dep in sorted(changed): 1018 if not collapsed: 1019 output.append(color_format("{color_title}Hash for task dependency %s changed{color_default} from %s to %s") % (dep, a[dep], b[dep])) 1020 if callable(recursecb): 1021 recout = recursecb(dep, a[dep], b[dep]) 1022 if recout: 1023 if collapsed: 1024 output.extend(recout) 1025 else: 1026 # If a dependent hash changed, might as well print the line above and then defer to the changes in 1027 # that hash since in all likelyhood, they're the same changes this task also saw. 1028 output = [output[-1]] + recout 1029 break 1030 1031 a_taint = a_data.get('taint', None) 1032 b_taint = b_data.get('taint', None) 1033 if a_taint != b_taint: 1034 if a_taint and a_taint.startswith('nostamp:'): 1035 a_taint = a_taint.replace('nostamp:', 'nostamp(uuid4):') 1036 if b_taint and b_taint.startswith('nostamp:'): 1037 b_taint = b_taint.replace('nostamp:', 'nostamp(uuid4):') 1038 output.append(color_format("{color_title}Taint (by forced/invalidated task) changed{color_default} from %s to %s") % (a_taint, b_taint)) 1039 1040 return output 1041 1042 1043def calc_basehash(sigdata): 1044 task = sigdata['task'] 1045 basedata = sigdata['varvals'][task] 1046 1047 if basedata is None: 1048 basedata = '' 1049 1050 alldeps = sigdata['taskdeps'] 1051 for dep in alldeps: 1052 basedata = basedata + dep 1053 val = sigdata['varvals'][dep] 1054 if val is not None: 1055 basedata = basedata + str(val) 1056 1057 return hashlib.sha256(basedata.encode("utf-8")).hexdigest() 1058 1059def calc_taskhash(sigdata): 1060 data = sigdata['basehash'] 1061 1062 for dep in sigdata['runtaskdeps']: 1063 data = data + sigdata['runtaskhashes'][dep] 1064 1065 for c in sigdata['file_checksum_values']: 1066 if c[1]: 1067 if "./" in c[0]: 1068 data = data + c[0] 1069 data = data + c[1] 1070 1071 if 'taint' in sigdata: 1072 if 'nostamp:' in sigdata['taint']: 1073 data = data + sigdata['taint'][8:] 1074 else: 1075 data = data + sigdata['taint'] 1076 1077 return hashlib.sha256(data.encode("utf-8")).hexdigest() 1078 1079 1080def dump_sigfile(a): 1081 output = [] 1082 1083 with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f: 1084 a_data = json.load(f, object_hook=SetDecoder) 1085 1086 handle_renames(a_data) 1087 1088 output.append("basehash_ignore_vars: %s" % (sorted(a_data['basehash_ignore_vars']))) 1089 1090 output.append("taskhash_ignore_tasks: %s" % (sorted(a_data['taskhash_ignore_tasks'] or []))) 1091 1092 output.append("Task dependencies: %s" % (sorted(a_data['taskdeps']))) 1093 1094 output.append("basehash: %s" % (a_data['basehash'])) 1095 1096 for dep in sorted(a_data['gendeps']): 1097 output.append("List of dependencies for variable %s is %s" % (dep, sorted(a_data['gendeps'][dep]))) 1098 1099 for dep in sorted(a_data['varvals']): 1100 output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep])) 1101 1102 if 'runtaskdeps' in a_data: 1103 output.append("Tasks this task depends on: %s" % (sorted(a_data['runtaskdeps']))) 1104 1105 if 'file_checksum_values' in a_data: 1106 output.append("This task depends on the checksums of files: %s" % (sorted(a_data['file_checksum_values']))) 1107 1108 if 'runtaskhashes' in a_data: 1109 for dep in sorted(a_data['runtaskhashes']): 1110 output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep])) 1111 1112 if 'taint' in a_data: 1113 if a_data['taint'].startswith('nostamp:'): 1114 msg = a_data['taint'].replace('nostamp:', 'nostamp(uuid4):') 1115 else: 1116 msg = a_data['taint'] 1117 output.append("Tainted (by forced/invalidated task): %s" % msg) 1118 1119 if 'task' in a_data: 1120 computed_basehash = calc_basehash(a_data) 1121 output.append("Computed base hash is %s and from file %s" % (computed_basehash, a_data['basehash'])) 1122 else: 1123 output.append("Unable to compute base hash") 1124 1125 computed_taskhash = calc_taskhash(a_data) 1126 output.append("Computed task hash is %s" % computed_taskhash) 1127 1128 return output 1129