1# 2# SPDX-License-Identifier: GPL-2.0-only 3# 4 5import hashlib 6import logging 7import os 8import re 9import tempfile 10import pickle 11import bb.data 12import difflib 13import simplediff 14import json 15import bb.compress.zstd 16from bb.checksum import FileChecksumCache 17from bb import runqueue 18import hashserv 19import hashserv.client 20 21logger = logging.getLogger('BitBake.SigGen') 22hashequiv_logger = logging.getLogger('BitBake.SigGen.HashEquiv') 23 24class SetEncoder(json.JSONEncoder): 25 def default(self, obj): 26 if isinstance(obj, set): 27 return dict(_set_object=list(sorted(obj))) 28 return json.JSONEncoder.default(self, obj) 29 30def SetDecoder(dct): 31 if '_set_object' in dct: 32 return set(dct['_set_object']) 33 return dct 34 35def init(d): 36 siggens = [obj for obj in globals().values() 37 if type(obj) is type and issubclass(obj, SignatureGenerator)] 38 39 desired = d.getVar("BB_SIGNATURE_HANDLER") or "noop" 40 for sg in siggens: 41 if desired == sg.name: 42 return sg(d) 43 break 44 else: 45 logger.error("Invalid signature generator '%s', using default 'noop'\n" 46 "Available generators: %s", desired, 47 ', '.join(obj.name for obj in siggens)) 48 return SignatureGenerator(d) 49 50class SignatureGenerator(object): 51 """ 52 """ 53 name = "noop" 54 55 # If the derived class supports multiconfig datacaches, set this to True 56 # The default is False for backward compatibility with derived signature 57 # generators that do not understand multiconfig caches 58 supports_multiconfig_datacaches = False 59 60 def __init__(self, data): 61 self.basehash = {} 62 self.taskhash = {} 63 self.unihash = {} 64 self.runtaskdeps = {} 65 self.file_checksum_values = {} 66 self.taints = {} 67 self.unitaskhashes = {} 68 self.tidtopn = {} 69 self.setscenetasks = set() 70 71 def finalise(self, fn, d, varient): 72 return 73 74 def postparsing_clean_cache(self): 75 return 76 77 def get_unihash(self, tid): 78 return self.taskhash[tid] 79 80 def prep_taskhash(self, tid, deps, dataCaches): 81 return 82 83 def get_taskhash(self, tid, deps, dataCaches): 84 self.taskhash[tid] = hashlib.sha256(tid.encode("utf-8")).hexdigest() 85 return self.taskhash[tid] 86 87 def writeout_file_checksum_cache(self): 88 """Write/update the file checksum cache onto disk""" 89 return 90 91 def stampfile(self, stampbase, file_name, taskname, extrainfo): 92 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.') 93 94 def stampcleanmask(self, stampbase, file_name, taskname, extrainfo): 95 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.') 96 97 def dump_sigtask(self, fn, task, stampbase, runtime): 98 return 99 100 def invalidate_task(self, task, d, fn): 101 bb.build.del_stamp(task, d, fn) 102 103 def dump_sigs(self, dataCache, options): 104 return 105 106 def get_taskdata(self): 107 return (self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks) 108 109 def set_taskdata(self, data): 110 self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks = data 111 112 def reset(self, data): 113 self.__init__(data) 114 115 def get_taskhashes(self): 116 return self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn 117 118 def set_taskhashes(self, hashes): 119 self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn = hashes 120 121 def save_unitaskhashes(self): 122 return 123 124 def set_setscene_tasks(self, setscene_tasks): 125 return 126 127 @classmethod 128 def get_data_caches(cls, dataCaches, mc): 129 """ 130 This function returns the datacaches that should be passed to signature 131 generator functions. If the signature generator supports multiconfig 132 caches, the entire dictionary of data caches is sent, otherwise a 133 special proxy is sent that support both index access to all 134 multiconfigs, and also direct access for the default multiconfig. 135 136 The proxy class allows code in this class itself to always use 137 multiconfig aware code (to ease maintenance), but derived classes that 138 are unaware of multiconfig data caches can still access the default 139 multiconfig as expected. 140 141 Do not override this function in derived classes; it will be removed in 142 the future when support for multiconfig data caches is mandatory 143 """ 144 class DataCacheProxy(object): 145 def __init__(self): 146 pass 147 148 def __getitem__(self, key): 149 return dataCaches[key] 150 151 def __getattr__(self, name): 152 return getattr(dataCaches[mc], name) 153 154 if cls.supports_multiconfig_datacaches: 155 return dataCaches 156 157 return DataCacheProxy() 158 159class SignatureGeneratorBasic(SignatureGenerator): 160 """ 161 """ 162 name = "basic" 163 164 def __init__(self, data): 165 self.basehash = {} 166 self.taskhash = {} 167 self.unihash = {} 168 self.taskdeps = {} 169 self.runtaskdeps = {} 170 self.file_checksum_values = {} 171 self.taints = {} 172 self.gendeps = {} 173 self.lookupcache = {} 174 self.setscenetasks = set() 175 self.basewhitelist = set((data.getVar("BB_HASHBASE_WHITELIST") or "").split()) 176 self.taskwhitelist = None 177 self.init_rundepcheck(data) 178 checksum_cache_file = data.getVar("BB_HASH_CHECKSUM_CACHE_FILE") 179 if checksum_cache_file: 180 self.checksum_cache = FileChecksumCache() 181 self.checksum_cache.init_cache(data, checksum_cache_file) 182 else: 183 self.checksum_cache = None 184 185 self.unihash_cache = bb.cache.SimpleCache("3") 186 self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {}) 187 self.localdirsexclude = (data.getVar("BB_SIGNATURE_LOCAL_DIRS_EXCLUDE") or "CVS .bzr .git .hg .osc .p4 .repo .svn").split() 188 self.tidtopn = {} 189 190 def init_rundepcheck(self, data): 191 self.taskwhitelist = data.getVar("BB_HASHTASK_WHITELIST") or None 192 if self.taskwhitelist: 193 self.twl = re.compile(self.taskwhitelist) 194 else: 195 self.twl = None 196 197 def _build_data(self, fn, d): 198 199 ignore_mismatch = ((d.getVar("BB_HASH_IGNORE_MISMATCH") or '') == '1') 200 tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d, self.basewhitelist) 201 202 taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, self.basewhitelist, fn) 203 204 for task in tasklist: 205 tid = fn + ":" + task 206 if not ignore_mismatch and tid in self.basehash and self.basehash[tid] != basehash[tid]: 207 bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (tid, self.basehash[tid], basehash[tid])) 208 bb.error("The following commands may help:") 209 cmd = "$ bitbake %s -c%s" % (d.getVar('PN'), task) 210 # Make sure sigdata is dumped before run printdiff 211 bb.error("%s -Snone" % cmd) 212 bb.error("Then:") 213 bb.error("%s -Sprintdiff\n" % cmd) 214 self.basehash[tid] = basehash[tid] 215 216 self.taskdeps[fn] = taskdeps 217 self.gendeps[fn] = gendeps 218 self.lookupcache[fn] = lookupcache 219 220 return taskdeps 221 222 def set_setscene_tasks(self, setscene_tasks): 223 self.setscenetasks = set(setscene_tasks) 224 225 def finalise(self, fn, d, variant): 226 227 mc = d.getVar("__BBMULTICONFIG", False) or "" 228 if variant or mc: 229 fn = bb.cache.realfn2virtual(fn, variant, mc) 230 231 try: 232 taskdeps = self._build_data(fn, d) 233 except bb.parse.SkipRecipe: 234 raise 235 except: 236 bb.warn("Error during finalise of %s" % fn) 237 raise 238 239 #Slow but can be useful for debugging mismatched basehashes 240 #for task in self.taskdeps[fn]: 241 # self.dump_sigtask(fn, task, d.getVar("STAMP"), False) 242 243 for task in taskdeps: 244 d.setVar("BB_BASEHASH:task-%s" % task, self.basehash[fn + ":" + task]) 245 246 def postparsing_clean_cache(self): 247 # 248 # After parsing we can remove some things from memory to reduce our memory footprint 249 # 250 self.gendeps = {} 251 self.lookupcache = {} 252 self.taskdeps = {} 253 254 def rundep_check(self, fn, recipename, task, dep, depname, dataCaches): 255 # Return True if we should keep the dependency, False to drop it 256 # We only manipulate the dependencies for packages not in the whitelist 257 if self.twl and not self.twl.search(recipename): 258 # then process the actual dependencies 259 if self.twl.search(depname): 260 return False 261 return True 262 263 def read_taint(self, fn, task, stampbase): 264 taint = None 265 try: 266 with open(stampbase + '.' + task + '.taint', 'r') as taintf: 267 taint = taintf.read() 268 except IOError: 269 pass 270 return taint 271 272 def prep_taskhash(self, tid, deps, dataCaches): 273 274 (mc, _, task, fn) = bb.runqueue.split_tid_mcfn(tid) 275 276 self.basehash[tid] = dataCaches[mc].basetaskhash[tid] 277 self.runtaskdeps[tid] = [] 278 self.file_checksum_values[tid] = [] 279 recipename = dataCaches[mc].pkg_fn[fn] 280 281 self.tidtopn[tid] = recipename 282 283 for dep in sorted(deps, key=clean_basepath): 284 (depmc, _, _, depmcfn) = bb.runqueue.split_tid_mcfn(dep) 285 depname = dataCaches[depmc].pkg_fn[depmcfn] 286 if not self.supports_multiconfig_datacaches and mc != depmc: 287 # If the signature generator doesn't understand multiconfig 288 # data caches, any dependency not in the same multiconfig must 289 # be skipped for backward compatibility 290 continue 291 if not self.rundep_check(fn, recipename, task, dep, depname, dataCaches): 292 continue 293 if dep not in self.taskhash: 294 bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep) 295 self.runtaskdeps[tid].append(dep) 296 297 if task in dataCaches[mc].file_checksums[fn]: 298 if self.checksum_cache: 299 checksums = self.checksum_cache.get_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude) 300 else: 301 checksums = bb.fetch2.get_file_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude) 302 for (f,cs) in checksums: 303 self.file_checksum_values[tid].append((f,cs)) 304 305 taskdep = dataCaches[mc].task_deps[fn] 306 if 'nostamp' in taskdep and task in taskdep['nostamp']: 307 # Nostamp tasks need an implicit taint so that they force any dependent tasks to run 308 if tid in self.taints and self.taints[tid].startswith("nostamp:"): 309 # Don't reset taint value upon every call 310 pass 311 else: 312 import uuid 313 taint = str(uuid.uuid4()) 314 self.taints[tid] = "nostamp:" + taint 315 316 taint = self.read_taint(fn, task, dataCaches[mc].stamp[fn]) 317 if taint: 318 self.taints[tid] = taint 319 logger.warning("%s is tainted from a forced run" % tid) 320 321 return 322 323 def get_taskhash(self, tid, deps, dataCaches): 324 325 data = self.basehash[tid] 326 for dep in self.runtaskdeps[tid]: 327 data = data + self.get_unihash(dep) 328 329 for (f, cs) in self.file_checksum_values[tid]: 330 if cs: 331 if "/./" in f: 332 data = data + "./" + f.split("/./")[1] 333 data = data + cs 334 335 if tid in self.taints: 336 if self.taints[tid].startswith("nostamp:"): 337 data = data + self.taints[tid][8:] 338 else: 339 data = data + self.taints[tid] 340 341 h = hashlib.sha256(data.encode("utf-8")).hexdigest() 342 self.taskhash[tid] = h 343 #d.setVar("BB_TASKHASH:task-%s" % task, taskhash[task]) 344 return h 345 346 def writeout_file_checksum_cache(self): 347 """Write/update the file checksum cache onto disk""" 348 if self.checksum_cache: 349 self.checksum_cache.save_extras() 350 self.checksum_cache.save_merge() 351 else: 352 bb.fetch2.fetcher_parse_save() 353 bb.fetch2.fetcher_parse_done() 354 355 def save_unitaskhashes(self): 356 self.unihash_cache.save(self.unitaskhashes) 357 358 def dump_sigtask(self, fn, task, stampbase, runtime): 359 360 tid = fn + ":" + task 361 referencestamp = stampbase 362 if isinstance(runtime, str) and runtime.startswith("customfile"): 363 sigfile = stampbase 364 referencestamp = runtime[11:] 365 elif runtime and tid in self.taskhash: 366 sigfile = stampbase + "." + task + ".sigdata" + "." + self.get_unihash(tid) 367 else: 368 sigfile = stampbase + "." + task + ".sigbasedata" + "." + self.basehash[tid] 369 370 with bb.utils.umask(0o002): 371 bb.utils.mkdirhier(os.path.dirname(sigfile)) 372 373 data = {} 374 data['task'] = task 375 data['basewhitelist'] = self.basewhitelist 376 data['taskwhitelist'] = self.taskwhitelist 377 data['taskdeps'] = self.taskdeps[fn][task] 378 data['basehash'] = self.basehash[tid] 379 data['gendeps'] = {} 380 data['varvals'] = {} 381 data['varvals'][task] = self.lookupcache[fn][task] 382 for dep in self.taskdeps[fn][task]: 383 if dep in self.basewhitelist: 384 continue 385 data['gendeps'][dep] = self.gendeps[fn][dep] 386 data['varvals'][dep] = self.lookupcache[fn][dep] 387 388 if runtime and tid in self.taskhash: 389 data['runtaskdeps'] = self.runtaskdeps[tid] 390 data['file_checksum_values'] = [] 391 for f,cs in self.file_checksum_values[tid]: 392 if "/./" in f: 393 data['file_checksum_values'].append(("./" + f.split("/./")[1], cs)) 394 else: 395 data['file_checksum_values'].append((os.path.basename(f), cs)) 396 data['runtaskhashes'] = {} 397 for dep in data['runtaskdeps']: 398 data['runtaskhashes'][dep] = self.get_unihash(dep) 399 data['taskhash'] = self.taskhash[tid] 400 data['unihash'] = self.get_unihash(tid) 401 402 taint = self.read_taint(fn, task, referencestamp) 403 if taint: 404 data['taint'] = taint 405 406 if runtime and tid in self.taints: 407 if 'nostamp:' in self.taints[tid]: 408 data['taint'] = self.taints[tid] 409 410 computed_basehash = calc_basehash(data) 411 if computed_basehash != self.basehash[tid]: 412 bb.error("Basehash mismatch %s versus %s for %s" % (computed_basehash, self.basehash[tid], tid)) 413 if runtime and tid in self.taskhash: 414 computed_taskhash = calc_taskhash(data) 415 if computed_taskhash != self.taskhash[tid]: 416 bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid)) 417 sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash) 418 419 fd, tmpfile = tempfile.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.") 420 try: 421 with bb.compress.zstd.open(fd, "wt", encoding="utf-8", num_threads=1) as f: 422 json.dump(data, f, sort_keys=True, separators=(",", ":"), cls=SetEncoder) 423 f.flush() 424 os.chmod(tmpfile, 0o664) 425 bb.utils.rename(tmpfile, sigfile) 426 except (OSError, IOError) as err: 427 try: 428 os.unlink(tmpfile) 429 except OSError: 430 pass 431 raise err 432 433 def dump_sigfn(self, fn, dataCaches, options): 434 if fn in self.taskdeps: 435 for task in self.taskdeps[fn]: 436 tid = fn + ":" + task 437 mc = bb.runqueue.mc_from_tid(tid) 438 if tid not in self.taskhash: 439 continue 440 if dataCaches[mc].basetaskhash[tid] != self.basehash[tid]: 441 bb.error("Bitbake's cached basehash does not match the one we just generated (%s)!" % tid) 442 bb.error("The mismatched hashes were %s and %s" % (dataCaches[mc].basetaskhash[tid], self.basehash[tid])) 443 self.dump_sigtask(fn, task, dataCaches[mc].stamp[fn], True) 444 445class SignatureGeneratorBasicHash(SignatureGeneratorBasic): 446 name = "basichash" 447 448 def get_stampfile_hash(self, tid): 449 if tid in self.taskhash: 450 return self.taskhash[tid] 451 452 # If task is not in basehash, then error 453 return self.basehash[tid] 454 455 def stampfile(self, stampbase, fn, taskname, extrainfo, clean=False): 456 if taskname != "do_setscene" and taskname.endswith("_setscene"): 457 tid = fn + ":" + taskname[:-9] 458 else: 459 tid = fn + ":" + taskname 460 if clean: 461 h = "*" 462 else: 463 h = self.get_stampfile_hash(tid) 464 465 return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.') 466 467 def stampcleanmask(self, stampbase, fn, taskname, extrainfo): 468 return self.stampfile(stampbase, fn, taskname, extrainfo, clean=True) 469 470 def invalidate_task(self, task, d, fn): 471 bb.note("Tainting hash to force rebuild of task %s, %s" % (fn, task)) 472 bb.build.write_taint(task, d, fn) 473 474class SignatureGeneratorUniHashMixIn(object): 475 def __init__(self, data): 476 self.extramethod = {} 477 super().__init__(data) 478 479 def get_taskdata(self): 480 return (self.server, self.method, self.extramethod) + super().get_taskdata() 481 482 def set_taskdata(self, data): 483 self.server, self.method, self.extramethod = data[:3] 484 super().set_taskdata(data[3:]) 485 486 def client(self): 487 if getattr(self, '_client', None) is None: 488 self._client = hashserv.create_client(self.server) 489 return self._client 490 491 def get_stampfile_hash(self, tid): 492 if tid in self.taskhash: 493 # If a unique hash is reported, use it as the stampfile hash. This 494 # ensures that if a task won't be re-run if the taskhash changes, 495 # but it would result in the same output hash 496 unihash = self._get_unihash(tid) 497 if unihash is not None: 498 return unihash 499 500 return super().get_stampfile_hash(tid) 501 502 def set_unihash(self, tid, unihash): 503 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid) 504 key = mc + ":" + self.tidtopn[tid] + ":" + taskname 505 self.unitaskhashes[key] = (self.taskhash[tid], unihash) 506 self.unihash[tid] = unihash 507 508 def _get_unihash(self, tid, checkkey=None): 509 if tid not in self.tidtopn: 510 return None 511 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid) 512 key = mc + ":" + self.tidtopn[tid] + ":" + taskname 513 if key not in self.unitaskhashes: 514 return None 515 if not checkkey: 516 checkkey = self.taskhash[tid] 517 (key, unihash) = self.unitaskhashes[key] 518 if key != checkkey: 519 return None 520 return unihash 521 522 def get_unihash(self, tid): 523 taskhash = self.taskhash[tid] 524 525 # If its not a setscene task we can return 526 if self.setscenetasks and tid not in self.setscenetasks: 527 self.unihash[tid] = None 528 return taskhash 529 530 # TODO: This cache can grow unbounded. It probably only needs to keep 531 # for each task 532 unihash = self._get_unihash(tid) 533 if unihash is not None: 534 self.unihash[tid] = unihash 535 return unihash 536 537 # In the absence of being able to discover a unique hash from the 538 # server, make it be equivalent to the taskhash. The unique "hash" only 539 # really needs to be a unique string (not even necessarily a hash), but 540 # making it match the taskhash has a few advantages: 541 # 542 # 1) All of the sstate code that assumes hashes can be the same 543 # 2) It provides maximal compatibility with builders that don't use 544 # an equivalency server 545 # 3) The value is easy for multiple independent builders to derive the 546 # same unique hash from the same input. This means that if the 547 # independent builders find the same taskhash, but it isn't reported 548 # to the server, there is a better chance that they will agree on 549 # the unique hash. 550 unihash = taskhash 551 552 try: 553 method = self.method 554 if tid in self.extramethod: 555 method = method + self.extramethod[tid] 556 data = self.client().get_unihash(method, self.taskhash[tid]) 557 if data: 558 unihash = data 559 # A unique hash equal to the taskhash is not very interesting, 560 # so it is reported it at debug level 2. If they differ, that 561 # is much more interesting, so it is reported at debug level 1 562 hashequiv_logger.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server)) 563 else: 564 hashequiv_logger.debug2('No reported unihash for %s:%s from %s' % (tid, taskhash, self.server)) 565 except ConnectionError as e: 566 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 567 568 self.set_unihash(tid, unihash) 569 self.unihash[tid] = unihash 570 return unihash 571 572 def report_unihash(self, path, task, d): 573 import importlib 574 575 taskhash = d.getVar('BB_TASKHASH') 576 unihash = d.getVar('BB_UNIHASH') 577 report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1' 578 tempdir = d.getVar('T') 579 fn = d.getVar('BB_FILENAME') 580 tid = fn + ':do_' + task 581 key = tid + ':' + taskhash 582 583 if self.setscenetasks and tid not in self.setscenetasks: 584 return 585 586 # This can happen if locked sigs are in action. Detect and just abort 587 if taskhash != self.taskhash[tid]: 588 return 589 590 # Sanity checks 591 cache_unihash = self._get_unihash(tid, checkkey=taskhash) 592 if cache_unihash is None: 593 bb.fatal('%s not in unihash cache. Please report this error' % key) 594 595 if cache_unihash != unihash: 596 bb.fatal("Cache unihash %s doesn't match BB_UNIHASH %s" % (cache_unihash, unihash)) 597 598 sigfile = None 599 sigfile_name = "depsig.do_%s.%d" % (task, os.getpid()) 600 sigfile_link = "depsig.do_%s" % task 601 602 try: 603 sigfile = open(os.path.join(tempdir, sigfile_name), 'w+b') 604 605 locs = {'path': path, 'sigfile': sigfile, 'task': task, 'd': d} 606 607 if "." in self.method: 608 (module, method) = self.method.rsplit('.', 1) 609 locs['method'] = getattr(importlib.import_module(module), method) 610 outhash = bb.utils.better_eval('method(path, sigfile, task, d)', locs) 611 else: 612 outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs) 613 614 try: 615 extra_data = {} 616 617 owner = d.getVar('SSTATE_HASHEQUIV_OWNER') 618 if owner: 619 extra_data['owner'] = owner 620 621 if report_taskdata: 622 sigfile.seek(0) 623 624 extra_data['PN'] = d.getVar('PN') 625 extra_data['PV'] = d.getVar('PV') 626 extra_data['PR'] = d.getVar('PR') 627 extra_data['task'] = task 628 extra_data['outhash_siginfo'] = sigfile.read().decode('utf-8') 629 630 method = self.method 631 if tid in self.extramethod: 632 method = method + self.extramethod[tid] 633 634 data = self.client().report_unihash(taskhash, method, outhash, unihash, extra_data) 635 new_unihash = data['unihash'] 636 637 if new_unihash != unihash: 638 hashequiv_logger.debug('Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server)) 639 bb.event.fire(bb.runqueue.taskUniHashUpdate(fn + ':do_' + task, new_unihash), d) 640 self.set_unihash(tid, new_unihash) 641 d.setVar('BB_UNIHASH', new_unihash) 642 else: 643 hashequiv_logger.debug('Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server)) 644 except ConnectionError as e: 645 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 646 finally: 647 if sigfile: 648 sigfile.close() 649 650 sigfile_link_path = os.path.join(tempdir, sigfile_link) 651 bb.utils.remove(sigfile_link_path) 652 653 try: 654 os.symlink(sigfile_name, sigfile_link_path) 655 except OSError: 656 pass 657 658 def report_unihash_equiv(self, tid, taskhash, wanted_unihash, current_unihash, datacaches): 659 try: 660 extra_data = {} 661 method = self.method 662 if tid in self.extramethod: 663 method = method + self.extramethod[tid] 664 665 data = self.client().report_unihash_equiv(taskhash, method, wanted_unihash, extra_data) 666 hashequiv_logger.verbose('Reported task %s as unihash %s to %s (%s)' % (tid, wanted_unihash, self.server, str(data))) 667 668 if data is None: 669 bb.warn("Server unable to handle unihash report") 670 return False 671 672 finalunihash = data['unihash'] 673 674 if finalunihash == current_unihash: 675 hashequiv_logger.verbose('Task %s unihash %s unchanged by server' % (tid, finalunihash)) 676 elif finalunihash == wanted_unihash: 677 hashequiv_logger.verbose('Task %s unihash changed %s -> %s as wanted' % (tid, current_unihash, finalunihash)) 678 self.set_unihash(tid, finalunihash) 679 return True 680 else: 681 # TODO: What to do here? 682 hashequiv_logger.verbose('Task %s unihash reported as unwanted hash %s' % (tid, finalunihash)) 683 684 except ConnectionError as e: 685 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 686 687 return False 688 689# 690# Dummy class used for bitbake-selftest 691# 692class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureGeneratorBasicHash): 693 name = "TestEquivHash" 694 def init_rundepcheck(self, data): 695 super().init_rundepcheck(data) 696 self.server = data.getVar('BB_HASHSERVE') 697 self.method = "sstate_output_hash" 698 699# 700# Dummy class used for bitbake-selftest 701# 702class SignatureGeneratorTestMulticonfigDepends(SignatureGeneratorBasicHash): 703 name = "TestMulticonfigDepends" 704 supports_multiconfig_datacaches = True 705 706def dump_this_task(outfile, d): 707 import bb.parse 708 fn = d.getVar("BB_FILENAME") 709 task = "do_" + d.getVar("BB_CURRENTTASK") 710 referencestamp = bb.build.stamp_internal(task, d, None, True) 711 bb.parse.siggen.dump_sigtask(fn, task, outfile, "customfile:" + referencestamp) 712 713def init_colors(enable_color): 714 """Initialise colour dict for passing to compare_sigfiles()""" 715 # First set up the colours 716 colors = {'color_title': '\033[1m', 717 'color_default': '\033[0m', 718 'color_add': '\033[0;32m', 719 'color_remove': '\033[0;31m', 720 } 721 # Leave all keys present but clear the values 722 if not enable_color: 723 for k in colors.keys(): 724 colors[k] = '' 725 return colors 726 727def worddiff_str(oldstr, newstr, colors=None): 728 if not colors: 729 colors = init_colors(False) 730 diff = simplediff.diff(oldstr.split(' '), newstr.split(' ')) 731 ret = [] 732 for change, value in diff: 733 value = ' '.join(value) 734 if change == '=': 735 ret.append(value) 736 elif change == '+': 737 item = '{color_add}{{+{value}+}}{color_default}'.format(value=value, **colors) 738 ret.append(item) 739 elif change == '-': 740 item = '{color_remove}[-{value}-]{color_default}'.format(value=value, **colors) 741 ret.append(item) 742 whitespace_note = '' 743 if oldstr != newstr and ' '.join(oldstr.split()) == ' '.join(newstr.split()): 744 whitespace_note = ' (whitespace changed)' 745 return '"%s"%s' % (' '.join(ret), whitespace_note) 746 747def list_inline_diff(oldlist, newlist, colors=None): 748 if not colors: 749 colors = init_colors(False) 750 diff = simplediff.diff(oldlist, newlist) 751 ret = [] 752 for change, value in diff: 753 value = ' '.join(value) 754 if change == '=': 755 ret.append("'%s'" % value) 756 elif change == '+': 757 item = '{color_add}+{value}{color_default}'.format(value=value, **colors) 758 ret.append(item) 759 elif change == '-': 760 item = '{color_remove}-{value}{color_default}'.format(value=value, **colors) 761 ret.append(item) 762 return '[%s]' % (', '.join(ret)) 763 764def clean_basepath(basepath): 765 basepath, dir, recipe_task = basepath.rsplit("/", 2) 766 cleaned = dir + '/' + recipe_task 767 768 if basepath[0] == '/': 769 return cleaned 770 771 if basepath.startswith("mc:") and basepath.count(':') >= 2: 772 mc, mc_name, basepath = basepath.split(":", 2) 773 mc_suffix = ':mc:' + mc_name 774 else: 775 mc_suffix = '' 776 777 # mc stuff now removed from basepath. Whatever was next, if present will be the first 778 # suffix. ':/', recipe path start, marks the end of this. Something like 779 # 'virtual:a[:b[:c]]:/path...' (b and c being optional) 780 if basepath[0] != '/': 781 cleaned += ':' + basepath.split(':/', 1)[0] 782 783 return cleaned + mc_suffix 784 785def clean_basepaths(a): 786 b = {} 787 for x in a: 788 b[clean_basepath(x)] = a[x] 789 return b 790 791def clean_basepaths_list(a): 792 b = [] 793 for x in a: 794 b.append(clean_basepath(x)) 795 return b 796 797def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False): 798 output = [] 799 800 colors = init_colors(color) 801 def color_format(formatstr, **values): 802 """ 803 Return colour formatted string. 804 NOTE: call with the format string, not an already formatted string 805 containing values (otherwise you could have trouble with { and } 806 characters) 807 """ 808 if not formatstr.endswith('{color_default}'): 809 formatstr += '{color_default}' 810 # In newer python 3 versions you can pass both of these directly, 811 # but we only require 3.4 at the moment 812 formatparams = {} 813 formatparams.update(colors) 814 formatparams.update(values) 815 return formatstr.format(**formatparams) 816 817 with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f: 818 a_data = json.load(f, object_hook=SetDecoder) 819 with bb.compress.zstd.open(b, "rt", encoding="utf-8", num_threads=1) as f: 820 b_data = json.load(f, object_hook=SetDecoder) 821 822 def dict_diff(a, b, whitelist=set()): 823 sa = set(a.keys()) 824 sb = set(b.keys()) 825 common = sa & sb 826 changed = set() 827 for i in common: 828 if a[i] != b[i] and i not in whitelist: 829 changed.add(i) 830 added = sb - sa 831 removed = sa - sb 832 return changed, added, removed 833 834 def file_checksums_diff(a, b): 835 from collections import Counter 836 837 # Convert lists back to tuples 838 a = [(f[0], f[1]) for f in a] 839 b = [(f[0], f[1]) for f in b] 840 841 # Compare lists, ensuring we can handle duplicate filenames if they exist 842 removedcount = Counter(a) 843 removedcount.subtract(b) 844 addedcount = Counter(b) 845 addedcount.subtract(a) 846 added = [] 847 for x in b: 848 if addedcount[x] > 0: 849 addedcount[x] -= 1 850 added.append(x) 851 removed = [] 852 changed = [] 853 for x in a: 854 if removedcount[x] > 0: 855 removedcount[x] -= 1 856 for y in added: 857 if y[0] == x[0]: 858 changed.append((x[0], x[1], y[1])) 859 added.remove(y) 860 break 861 else: 862 removed.append(x) 863 added = [x[0] for x in added] 864 removed = [x[0] for x in removed] 865 return changed, added, removed 866 867 if 'basewhitelist' in a_data and a_data['basewhitelist'] != b_data['basewhitelist']: 868 output.append(color_format("{color_title}basewhitelist changed{color_default} from '%s' to '%s'") % (a_data['basewhitelist'], b_data['basewhitelist'])) 869 if a_data['basewhitelist'] and b_data['basewhitelist']: 870 output.append("changed items: %s" % a_data['basewhitelist'].symmetric_difference(b_data['basewhitelist'])) 871 872 if 'taskwhitelist' in a_data and a_data['taskwhitelist'] != b_data['taskwhitelist']: 873 output.append(color_format("{color_title}taskwhitelist changed{color_default} from '%s' to '%s'") % (a_data['taskwhitelist'], b_data['taskwhitelist'])) 874 if a_data['taskwhitelist'] and b_data['taskwhitelist']: 875 output.append("changed items: %s" % a_data['taskwhitelist'].symmetric_difference(b_data['taskwhitelist'])) 876 877 if a_data['taskdeps'] != b_data['taskdeps']: 878 output.append(color_format("{color_title}Task dependencies changed{color_default} from:\n%s\nto:\n%s") % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps']))) 879 880 if a_data['basehash'] != b_data['basehash'] and not collapsed: 881 output.append(color_format("{color_title}basehash changed{color_default} from %s to %s") % (a_data['basehash'], b_data['basehash'])) 882 883 changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basewhitelist'] & b_data['basewhitelist']) 884 if changed: 885 for dep in sorted(changed): 886 output.append(color_format("{color_title}List of dependencies for variable %s changed from '{color_default}%s{color_title}' to '{color_default}%s{color_title}'") % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep])) 887 if a_data['gendeps'][dep] and b_data['gendeps'][dep]: 888 output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep])) 889 if added: 890 for dep in sorted(added): 891 output.append(color_format("{color_title}Dependency on variable %s was added") % (dep)) 892 if removed: 893 for dep in sorted(removed): 894 output.append(color_format("{color_title}Dependency on Variable %s was removed") % (dep)) 895 896 897 changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals']) 898 if changed: 899 for dep in sorted(changed): 900 oldval = a_data['varvals'][dep] 901 newval = b_data['varvals'][dep] 902 if newval and oldval and ('\n' in oldval or '\n' in newval): 903 diff = difflib.unified_diff(oldval.splitlines(), newval.splitlines(), lineterm='') 904 # Cut off the first two lines, since we aren't interested in 905 # the old/new filename (they are blank anyway in this case) 906 difflines = list(diff)[2:] 907 if color: 908 # Add colour to diff output 909 for i, line in enumerate(difflines): 910 if line.startswith('+'): 911 line = color_format('{color_add}{line}', line=line) 912 difflines[i] = line 913 elif line.startswith('-'): 914 line = color_format('{color_remove}{line}', line=line) 915 difflines[i] = line 916 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff='\n'.join(difflines))) 917 elif newval and oldval and (' ' in oldval or ' ' in newval): 918 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff=worddiff_str(oldval, newval, colors))) 919 else: 920 output.append(color_format("{color_title}Variable {var} value changed from '{color_default}{oldval}{color_title}' to '{color_default}{newval}{color_title}'{color_default}", var=dep, oldval=oldval, newval=newval)) 921 922 if not 'file_checksum_values' in a_data: 923 a_data['file_checksum_values'] = [] 924 if not 'file_checksum_values' in b_data: 925 b_data['file_checksum_values'] = [] 926 927 changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values']) 928 if changed: 929 for f, old, new in changed: 930 output.append(color_format("{color_title}Checksum for file %s changed{color_default} from %s to %s") % (f, old, new)) 931 if added: 932 for f in added: 933 output.append(color_format("{color_title}Dependency on checksum of file %s was added") % (f)) 934 if removed: 935 for f in removed: 936 output.append(color_format("{color_title}Dependency on checksum of file %s was removed") % (f)) 937 938 if not 'runtaskdeps' in a_data: 939 a_data['runtaskdeps'] = {} 940 if not 'runtaskdeps' in b_data: 941 b_data['runtaskdeps'] = {} 942 943 if not collapsed: 944 if len(a_data['runtaskdeps']) != len(b_data['runtaskdeps']): 945 changed = ["Number of task dependencies changed"] 946 else: 947 changed = [] 948 for idx, task in enumerate(a_data['runtaskdeps']): 949 a = a_data['runtaskdeps'][idx] 950 b = b_data['runtaskdeps'][idx] 951 if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b] and not collapsed: 952 changed.append("%s with hash %s\n changed to\n%s with hash %s" % (clean_basepath(a), a_data['runtaskhashes'][a], clean_basepath(b), b_data['runtaskhashes'][b])) 953 954 if changed: 955 clean_a = clean_basepaths_list(a_data['runtaskdeps']) 956 clean_b = clean_basepaths_list(b_data['runtaskdeps']) 957 if clean_a != clean_b: 958 output.append(color_format("{color_title}runtaskdeps changed:{color_default}\n%s") % list_inline_diff(clean_a, clean_b, colors)) 959 else: 960 output.append(color_format("{color_title}runtaskdeps changed:")) 961 output.append("\n".join(changed)) 962 963 964 if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data: 965 a = a_data['runtaskhashes'] 966 b = b_data['runtaskhashes'] 967 changed, added, removed = dict_diff(a, b) 968 if added: 969 for dep in sorted(added): 970 bdep_found = False 971 if removed: 972 for bdep in removed: 973 if b[dep] == a[bdep]: 974 #output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep)) 975 bdep_found = True 976 if not bdep_found: 977 output.append(color_format("{color_title}Dependency on task %s was added{color_default} with hash %s") % (clean_basepath(dep), b[dep])) 978 if removed: 979 for dep in sorted(removed): 980 adep_found = False 981 if added: 982 for adep in added: 983 if b[adep] == a[dep]: 984 #output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep)) 985 adep_found = True 986 if not adep_found: 987 output.append(color_format("{color_title}Dependency on task %s was removed{color_default} with hash %s") % (clean_basepath(dep), a[dep])) 988 if changed: 989 for dep in sorted(changed): 990 if not collapsed: 991 output.append(color_format("{color_title}Hash for dependent task %s changed{color_default} from %s to %s") % (clean_basepath(dep), a[dep], b[dep])) 992 if callable(recursecb): 993 recout = recursecb(dep, a[dep], b[dep]) 994 if recout: 995 if collapsed: 996 output.extend(recout) 997 else: 998 # If a dependent hash changed, might as well print the line above and then defer to the changes in 999 # that hash since in all likelyhood, they're the same changes this task also saw. 1000 output = [output[-1]] + recout 1001 1002 a_taint = a_data.get('taint', None) 1003 b_taint = b_data.get('taint', None) 1004 if a_taint != b_taint: 1005 if a_taint and a_taint.startswith('nostamp:'): 1006 a_taint = a_taint.replace('nostamp:', 'nostamp(uuid4):') 1007 if b_taint and b_taint.startswith('nostamp:'): 1008 b_taint = b_taint.replace('nostamp:', 'nostamp(uuid4):') 1009 output.append(color_format("{color_title}Taint (by forced/invalidated task) changed{color_default} from %s to %s") % (a_taint, b_taint)) 1010 1011 return output 1012 1013 1014def calc_basehash(sigdata): 1015 task = sigdata['task'] 1016 basedata = sigdata['varvals'][task] 1017 1018 if basedata is None: 1019 basedata = '' 1020 1021 alldeps = sigdata['taskdeps'] 1022 for dep in alldeps: 1023 basedata = basedata + dep 1024 val = sigdata['varvals'][dep] 1025 if val is not None: 1026 basedata = basedata + str(val) 1027 1028 return hashlib.sha256(basedata.encode("utf-8")).hexdigest() 1029 1030def calc_taskhash(sigdata): 1031 data = sigdata['basehash'] 1032 1033 for dep in sigdata['runtaskdeps']: 1034 data = data + sigdata['runtaskhashes'][dep] 1035 1036 for c in sigdata['file_checksum_values']: 1037 if c[1]: 1038 if "./" in c[0]: 1039 data = data + c[0] 1040 data = data + c[1] 1041 1042 if 'taint' in sigdata: 1043 if 'nostamp:' in sigdata['taint']: 1044 data = data + sigdata['taint'][8:] 1045 else: 1046 data = data + sigdata['taint'] 1047 1048 return hashlib.sha256(data.encode("utf-8")).hexdigest() 1049 1050 1051def dump_sigfile(a): 1052 output = [] 1053 1054 with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f: 1055 a_data = json.load(f, object_hook=SetDecoder) 1056 1057 output.append("basewhitelist: %s" % (sorted(a_data['basewhitelist']))) 1058 1059 output.append("taskwhitelist: %s" % (sorted(a_data['taskwhitelist'] or []))) 1060 1061 output.append("Task dependencies: %s" % (sorted(a_data['taskdeps']))) 1062 1063 output.append("basehash: %s" % (a_data['basehash'])) 1064 1065 for dep in sorted(a_data['gendeps']): 1066 output.append("List of dependencies for variable %s is %s" % (dep, sorted(a_data['gendeps'][dep]))) 1067 1068 for dep in sorted(a_data['varvals']): 1069 output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep])) 1070 1071 if 'runtaskdeps' in a_data: 1072 output.append("Tasks this task depends on: %s" % (sorted(a_data['runtaskdeps']))) 1073 1074 if 'file_checksum_values' in a_data: 1075 output.append("This task depends on the checksums of files: %s" % (sorted(a_data['file_checksum_values']))) 1076 1077 if 'runtaskhashes' in a_data: 1078 for dep in sorted(a_data['runtaskhashes']): 1079 output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep])) 1080 1081 if 'taint' in a_data: 1082 if a_data['taint'].startswith('nostamp:'): 1083 msg = a_data['taint'].replace('nostamp:', 'nostamp(uuid4):') 1084 else: 1085 msg = a_data['taint'] 1086 output.append("Tainted (by forced/invalidated task): %s" % msg) 1087 1088 if 'task' in a_data: 1089 computed_basehash = calc_basehash(a_data) 1090 output.append("Computed base hash is %s and from file %s" % (computed_basehash, a_data['basehash'])) 1091 else: 1092 output.append("Unable to compute base hash") 1093 1094 computed_taskhash = calc_taskhash(a_data) 1095 output.append("Computed task hash is %s" % computed_taskhash) 1096 1097 return output 1098