1# 2# SPDX-License-Identifier: GPL-2.0-only 3# 4 5import hashlib 6import logging 7import os 8import re 9import tempfile 10import pickle 11import bb.data 12import difflib 13import simplediff 14import json 15import bb.compress.zstd 16from bb.checksum import FileChecksumCache 17from bb import runqueue 18import hashserv 19import hashserv.client 20 21logger = logging.getLogger('BitBake.SigGen') 22hashequiv_logger = logging.getLogger('BitBake.SigGen.HashEquiv') 23 24class SetEncoder(json.JSONEncoder): 25 def default(self, obj): 26 if isinstance(obj, set): 27 return dict(_set_object=list(sorted(obj))) 28 return json.JSONEncoder.default(self, obj) 29 30def SetDecoder(dct): 31 if '_set_object' in dct: 32 return set(dct['_set_object']) 33 return dct 34 35def init(d): 36 siggens = [obj for obj in globals().values() 37 if type(obj) is type and issubclass(obj, SignatureGenerator)] 38 39 desired = d.getVar("BB_SIGNATURE_HANDLER") or "noop" 40 for sg in siggens: 41 if desired == sg.name: 42 return sg(d) 43 else: 44 logger.error("Invalid signature generator '%s', using default 'noop'\n" 45 "Available generators: %s", desired, 46 ', '.join(obj.name for obj in siggens)) 47 return SignatureGenerator(d) 48 49class SignatureGenerator(object): 50 """ 51 """ 52 name = "noop" 53 54 # If the derived class supports multiconfig datacaches, set this to True 55 # The default is False for backward compatibility with derived signature 56 # generators that do not understand multiconfig caches 57 supports_multiconfig_datacaches = False 58 59 def __init__(self, data): 60 self.basehash = {} 61 self.taskhash = {} 62 self.unihash = {} 63 self.runtaskdeps = {} 64 self.file_checksum_values = {} 65 self.taints = {} 66 self.unitaskhashes = {} 67 self.tidtopn = {} 68 self.setscenetasks = set() 69 70 def finalise(self, fn, d, varient): 71 return 72 73 def postparsing_clean_cache(self): 74 return 75 76 def get_unihash(self, tid): 77 return self.taskhash[tid] 78 79 def prep_taskhash(self, tid, deps, dataCaches): 80 return 81 82 def get_taskhash(self, tid, deps, dataCaches): 83 self.taskhash[tid] = hashlib.sha256(tid.encode("utf-8")).hexdigest() 84 return self.taskhash[tid] 85 86 def writeout_file_checksum_cache(self): 87 """Write/update the file checksum cache onto disk""" 88 return 89 90 def stampfile(self, stampbase, file_name, taskname, extrainfo): 91 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.') 92 93 def stampcleanmask(self, stampbase, file_name, taskname, extrainfo): 94 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.') 95 96 def dump_sigtask(self, fn, task, stampbase, runtime): 97 return 98 99 def invalidate_task(self, task, d, fn): 100 bb.build.del_stamp(task, d, fn) 101 102 def dump_sigs(self, dataCache, options): 103 return 104 105 def get_taskdata(self): 106 return (self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks) 107 108 def set_taskdata(self, data): 109 self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks = data 110 111 def reset(self, data): 112 self.__init__(data) 113 114 def get_taskhashes(self): 115 return self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn 116 117 def set_taskhashes(self, hashes): 118 self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn = hashes 119 120 def save_unitaskhashes(self): 121 return 122 123 def copy_unitaskhashes(self, targetdir): 124 return 125 126 def set_setscene_tasks(self, setscene_tasks): 127 return 128 129 @classmethod 130 def get_data_caches(cls, dataCaches, mc): 131 """ 132 This function returns the datacaches that should be passed to signature 133 generator functions. If the signature generator supports multiconfig 134 caches, the entire dictionary of data caches is sent, otherwise a 135 special proxy is sent that support both index access to all 136 multiconfigs, and also direct access for the default multiconfig. 137 138 The proxy class allows code in this class itself to always use 139 multiconfig aware code (to ease maintenance), but derived classes that 140 are unaware of multiconfig data caches can still access the default 141 multiconfig as expected. 142 143 Do not override this function in derived classes; it will be removed in 144 the future when support for multiconfig data caches is mandatory 145 """ 146 class DataCacheProxy(object): 147 def __init__(self): 148 pass 149 150 def __getitem__(self, key): 151 return dataCaches[key] 152 153 def __getattr__(self, name): 154 return getattr(dataCaches[mc], name) 155 156 if cls.supports_multiconfig_datacaches: 157 return dataCaches 158 159 return DataCacheProxy() 160 161 def exit(self): 162 return 163 164class SignatureGeneratorBasic(SignatureGenerator): 165 """ 166 """ 167 name = "basic" 168 169 def __init__(self, data): 170 self.basehash = {} 171 self.taskhash = {} 172 self.unihash = {} 173 self.taskdeps = {} 174 self.runtaskdeps = {} 175 self.file_checksum_values = {} 176 self.taints = {} 177 self.gendeps = {} 178 self.lookupcache = {} 179 self.setscenetasks = set() 180 self.basehash_ignore_vars = set((data.getVar("BB_BASEHASH_IGNORE_VARS") or "").split()) 181 self.taskhash_ignore_tasks = None 182 self.init_rundepcheck(data) 183 checksum_cache_file = data.getVar("BB_HASH_CHECKSUM_CACHE_FILE") 184 if checksum_cache_file: 185 self.checksum_cache = FileChecksumCache() 186 self.checksum_cache.init_cache(data, checksum_cache_file) 187 else: 188 self.checksum_cache = None 189 190 self.unihash_cache = bb.cache.SimpleCache("3") 191 self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {}) 192 self.localdirsexclude = (data.getVar("BB_SIGNATURE_LOCAL_DIRS_EXCLUDE") or "CVS .bzr .git .hg .osc .p4 .repo .svn").split() 193 self.tidtopn = {} 194 195 def init_rundepcheck(self, data): 196 self.taskhash_ignore_tasks = data.getVar("BB_TASKHASH_IGNORE_TASKS") or None 197 if self.taskhash_ignore_tasks: 198 self.twl = re.compile(self.taskhash_ignore_tasks) 199 else: 200 self.twl = None 201 202 def _build_data(self, fn, d): 203 204 ignore_mismatch = ((d.getVar("BB_HASH_IGNORE_MISMATCH") or '') == '1') 205 tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d, self.basehash_ignore_vars) 206 207 taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, self.basehash_ignore_vars, fn) 208 209 for task in tasklist: 210 tid = fn + ":" + task 211 if not ignore_mismatch and tid in self.basehash and self.basehash[tid] != basehash[tid]: 212 bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (tid, self.basehash[tid], basehash[tid])) 213 bb.error("The following commands may help:") 214 cmd = "$ bitbake %s -c%s" % (d.getVar('PN'), task) 215 # Make sure sigdata is dumped before run printdiff 216 bb.error("%s -Snone" % cmd) 217 bb.error("Then:") 218 bb.error("%s -Sprintdiff\n" % cmd) 219 self.basehash[tid] = basehash[tid] 220 221 self.taskdeps[fn] = taskdeps 222 self.gendeps[fn] = gendeps 223 self.lookupcache[fn] = lookupcache 224 225 return taskdeps 226 227 def set_setscene_tasks(self, setscene_tasks): 228 self.setscenetasks = set(setscene_tasks) 229 230 def finalise(self, fn, d, variant): 231 232 mc = d.getVar("__BBMULTICONFIG", False) or "" 233 if variant or mc: 234 fn = bb.cache.realfn2virtual(fn, variant, mc) 235 236 try: 237 taskdeps = self._build_data(fn, d) 238 except bb.parse.SkipRecipe: 239 raise 240 except: 241 bb.warn("Error during finalise of %s" % fn) 242 raise 243 244 #Slow but can be useful for debugging mismatched basehashes 245 #for task in self.taskdeps[fn]: 246 # self.dump_sigtask(fn, task, d.getVar("STAMP"), False) 247 248 for task in taskdeps: 249 d.setVar("BB_BASEHASH:task-%s" % task, self.basehash[fn + ":" + task]) 250 251 def postparsing_clean_cache(self): 252 # 253 # After parsing we can remove some things from memory to reduce our memory footprint 254 # 255 self.gendeps = {} 256 self.lookupcache = {} 257 self.taskdeps = {} 258 259 def rundep_check(self, fn, recipename, task, dep, depname, dataCaches): 260 # Return True if we should keep the dependency, False to drop it 261 # We only manipulate the dependencies for packages not in the ignore 262 # list 263 if self.twl and not self.twl.search(recipename): 264 # then process the actual dependencies 265 if self.twl.search(depname): 266 return False 267 return True 268 269 def read_taint(self, fn, task, stampbase): 270 taint = None 271 try: 272 with open(stampbase + '.' + task + '.taint', 'r') as taintf: 273 taint = taintf.read() 274 except IOError: 275 pass 276 return taint 277 278 def prep_taskhash(self, tid, deps, dataCaches): 279 280 (mc, _, task, fn) = bb.runqueue.split_tid_mcfn(tid) 281 282 self.basehash[tid] = dataCaches[mc].basetaskhash[tid] 283 self.runtaskdeps[tid] = [] 284 self.file_checksum_values[tid] = [] 285 recipename = dataCaches[mc].pkg_fn[fn] 286 287 self.tidtopn[tid] = recipename 288 289 for dep in sorted(deps, key=clean_basepath): 290 (depmc, _, _, depmcfn) = bb.runqueue.split_tid_mcfn(dep) 291 depname = dataCaches[depmc].pkg_fn[depmcfn] 292 if not self.supports_multiconfig_datacaches and mc != depmc: 293 # If the signature generator doesn't understand multiconfig 294 # data caches, any dependency not in the same multiconfig must 295 # be skipped for backward compatibility 296 continue 297 if not self.rundep_check(fn, recipename, task, dep, depname, dataCaches): 298 continue 299 if dep not in self.taskhash: 300 bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep) 301 self.runtaskdeps[tid].append(dep) 302 303 if task in dataCaches[mc].file_checksums[fn]: 304 if self.checksum_cache: 305 checksums = self.checksum_cache.get_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude) 306 else: 307 checksums = bb.fetch2.get_file_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude) 308 for (f,cs) in checksums: 309 self.file_checksum_values[tid].append((f,cs)) 310 311 taskdep = dataCaches[mc].task_deps[fn] 312 if 'nostamp' in taskdep and task in taskdep['nostamp']: 313 # Nostamp tasks need an implicit taint so that they force any dependent tasks to run 314 if tid in self.taints and self.taints[tid].startswith("nostamp:"): 315 # Don't reset taint value upon every call 316 pass 317 else: 318 import uuid 319 taint = str(uuid.uuid4()) 320 self.taints[tid] = "nostamp:" + taint 321 322 taint = self.read_taint(fn, task, dataCaches[mc].stamp[fn]) 323 if taint: 324 self.taints[tid] = taint 325 logger.warning("%s is tainted from a forced run" % tid) 326 327 return 328 329 def get_taskhash(self, tid, deps, dataCaches): 330 331 data = self.basehash[tid] 332 for dep in self.runtaskdeps[tid]: 333 data = data + self.get_unihash(dep) 334 335 for (f, cs) in self.file_checksum_values[tid]: 336 if cs: 337 if "/./" in f: 338 data = data + "./" + f.split("/./")[1] 339 data = data + cs 340 341 if tid in self.taints: 342 if self.taints[tid].startswith("nostamp:"): 343 data = data + self.taints[tid][8:] 344 else: 345 data = data + self.taints[tid] 346 347 h = hashlib.sha256(data.encode("utf-8")).hexdigest() 348 self.taskhash[tid] = h 349 #d.setVar("BB_TASKHASH:task-%s" % task, taskhash[task]) 350 return h 351 352 def writeout_file_checksum_cache(self): 353 """Write/update the file checksum cache onto disk""" 354 if self.checksum_cache: 355 self.checksum_cache.save_extras() 356 self.checksum_cache.save_merge() 357 else: 358 bb.fetch2.fetcher_parse_save() 359 bb.fetch2.fetcher_parse_done() 360 361 def save_unitaskhashes(self): 362 self.unihash_cache.save(self.unitaskhashes) 363 364 def copy_unitaskhashes(self, targetdir): 365 self.unihash_cache.copyfile(targetdir) 366 367 def dump_sigtask(self, fn, task, stampbase, runtime): 368 369 tid = fn + ":" + task 370 referencestamp = stampbase 371 if isinstance(runtime, str) and runtime.startswith("customfile"): 372 sigfile = stampbase 373 referencestamp = runtime[11:] 374 elif runtime and tid in self.taskhash: 375 sigfile = stampbase + "." + task + ".sigdata" + "." + self.get_unihash(tid) 376 else: 377 sigfile = stampbase + "." + task + ".sigbasedata" + "." + self.basehash[tid] 378 379 with bb.utils.umask(0o002): 380 bb.utils.mkdirhier(os.path.dirname(sigfile)) 381 382 data = {} 383 data['task'] = task 384 data['basehash_ignore_vars'] = self.basehash_ignore_vars 385 data['taskhash_ignore_tasks'] = self.taskhash_ignore_tasks 386 data['taskdeps'] = self.taskdeps[fn][task] 387 data['basehash'] = self.basehash[tid] 388 data['gendeps'] = {} 389 data['varvals'] = {} 390 data['varvals'][task] = self.lookupcache[fn][task] 391 for dep in self.taskdeps[fn][task]: 392 if dep in self.basehash_ignore_vars: 393 continue 394 data['gendeps'][dep] = self.gendeps[fn][dep] 395 data['varvals'][dep] = self.lookupcache[fn][dep] 396 397 if runtime and tid in self.taskhash: 398 data['runtaskdeps'] = self.runtaskdeps[tid] 399 data['file_checksum_values'] = [] 400 for f,cs in self.file_checksum_values[tid]: 401 if "/./" in f: 402 data['file_checksum_values'].append(("./" + f.split("/./")[1], cs)) 403 else: 404 data['file_checksum_values'].append((os.path.basename(f), cs)) 405 data['runtaskhashes'] = {} 406 for dep in data['runtaskdeps']: 407 data['runtaskhashes'][dep] = self.get_unihash(dep) 408 data['taskhash'] = self.taskhash[tid] 409 data['unihash'] = self.get_unihash(tid) 410 411 taint = self.read_taint(fn, task, referencestamp) 412 if taint: 413 data['taint'] = taint 414 415 if runtime and tid in self.taints: 416 if 'nostamp:' in self.taints[tid]: 417 data['taint'] = self.taints[tid] 418 419 computed_basehash = calc_basehash(data) 420 if computed_basehash != self.basehash[tid]: 421 bb.error("Basehash mismatch %s versus %s for %s" % (computed_basehash, self.basehash[tid], tid)) 422 if runtime and tid in self.taskhash: 423 computed_taskhash = calc_taskhash(data) 424 if computed_taskhash != self.taskhash[tid]: 425 bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid)) 426 sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash) 427 428 fd, tmpfile = tempfile.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.") 429 try: 430 with bb.compress.zstd.open(fd, "wt", encoding="utf-8", num_threads=1) as f: 431 json.dump(data, f, sort_keys=True, separators=(",", ":"), cls=SetEncoder) 432 f.flush() 433 os.chmod(tmpfile, 0o664) 434 bb.utils.rename(tmpfile, sigfile) 435 except (OSError, IOError) as err: 436 try: 437 os.unlink(tmpfile) 438 except OSError: 439 pass 440 raise err 441 442 def dump_sigfn(self, fn, dataCaches, options): 443 if fn in self.taskdeps: 444 for task in self.taskdeps[fn]: 445 tid = fn + ":" + task 446 mc = bb.runqueue.mc_from_tid(tid) 447 if tid not in self.taskhash: 448 continue 449 if dataCaches[mc].basetaskhash[tid] != self.basehash[tid]: 450 bb.error("Bitbake's cached basehash does not match the one we just generated (%s)!" % tid) 451 bb.error("The mismatched hashes were %s and %s" % (dataCaches[mc].basetaskhash[tid], self.basehash[tid])) 452 self.dump_sigtask(fn, task, dataCaches[mc].stamp[fn], True) 453 454class SignatureGeneratorBasicHash(SignatureGeneratorBasic): 455 name = "basichash" 456 457 def get_stampfile_hash(self, tid): 458 if tid in self.taskhash: 459 return self.taskhash[tid] 460 461 # If task is not in basehash, then error 462 return self.basehash[tid] 463 464 def stampfile(self, stampbase, fn, taskname, extrainfo, clean=False): 465 if taskname != "do_setscene" and taskname.endswith("_setscene"): 466 tid = fn + ":" + taskname[:-9] 467 else: 468 tid = fn + ":" + taskname 469 if clean: 470 h = "*" 471 else: 472 h = self.get_stampfile_hash(tid) 473 474 return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.') 475 476 def stampcleanmask(self, stampbase, fn, taskname, extrainfo): 477 return self.stampfile(stampbase, fn, taskname, extrainfo, clean=True) 478 479 def invalidate_task(self, task, d, fn): 480 bb.note("Tainting hash to force rebuild of task %s, %s" % (fn, task)) 481 bb.build.write_taint(task, d, fn) 482 483class SignatureGeneratorUniHashMixIn(object): 484 def __init__(self, data): 485 self.extramethod = {} 486 super().__init__(data) 487 488 def get_taskdata(self): 489 return (self.server, self.method, self.extramethod) + super().get_taskdata() 490 491 def set_taskdata(self, data): 492 self.server, self.method, self.extramethod = data[:3] 493 super().set_taskdata(data[3:]) 494 495 def client(self): 496 if getattr(self, '_client', None) is None: 497 self._client = hashserv.create_client(self.server) 498 return self._client 499 500 def reset(self, data): 501 if getattr(self, '_client', None) is not None: 502 self._client.close() 503 self._client = None 504 return super().reset(data) 505 506 def exit(self): 507 if getattr(self, '_client', None) is not None: 508 self._client.close() 509 self._client = None 510 return super().exit() 511 512 def get_stampfile_hash(self, tid): 513 if tid in self.taskhash: 514 # If a unique hash is reported, use it as the stampfile hash. This 515 # ensures that if a task won't be re-run if the taskhash changes, 516 # but it would result in the same output hash 517 unihash = self._get_unihash(tid) 518 if unihash is not None: 519 return unihash 520 521 return super().get_stampfile_hash(tid) 522 523 def set_unihash(self, tid, unihash): 524 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid) 525 key = mc + ":" + self.tidtopn[tid] + ":" + taskname 526 self.unitaskhashes[key] = (self.taskhash[tid], unihash) 527 self.unihash[tid] = unihash 528 529 def _get_unihash(self, tid, checkkey=None): 530 if tid not in self.tidtopn: 531 return None 532 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid) 533 key = mc + ":" + self.tidtopn[tid] + ":" + taskname 534 if key not in self.unitaskhashes: 535 return None 536 if not checkkey: 537 checkkey = self.taskhash[tid] 538 (key, unihash) = self.unitaskhashes[key] 539 if key != checkkey: 540 return None 541 return unihash 542 543 def get_unihash(self, tid): 544 taskhash = self.taskhash[tid] 545 546 # If its not a setscene task we can return 547 if self.setscenetasks and tid not in self.setscenetasks: 548 self.unihash[tid] = None 549 return taskhash 550 551 # TODO: This cache can grow unbounded. It probably only needs to keep 552 # for each task 553 unihash = self._get_unihash(tid) 554 if unihash is not None: 555 self.unihash[tid] = unihash 556 return unihash 557 558 # In the absence of being able to discover a unique hash from the 559 # server, make it be equivalent to the taskhash. The unique "hash" only 560 # really needs to be a unique string (not even necessarily a hash), but 561 # making it match the taskhash has a few advantages: 562 # 563 # 1) All of the sstate code that assumes hashes can be the same 564 # 2) It provides maximal compatibility with builders that don't use 565 # an equivalency server 566 # 3) The value is easy for multiple independent builders to derive the 567 # same unique hash from the same input. This means that if the 568 # independent builders find the same taskhash, but it isn't reported 569 # to the server, there is a better chance that they will agree on 570 # the unique hash. 571 unihash = taskhash 572 573 try: 574 method = self.method 575 if tid in self.extramethod: 576 method = method + self.extramethod[tid] 577 data = self.client().get_unihash(method, self.taskhash[tid]) 578 if data: 579 unihash = data 580 # A unique hash equal to the taskhash is not very interesting, 581 # so it is reported it at debug level 2. If they differ, that 582 # is much more interesting, so it is reported at debug level 1 583 hashequiv_logger.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server)) 584 else: 585 hashequiv_logger.debug2('No reported unihash for %s:%s from %s' % (tid, taskhash, self.server)) 586 except ConnectionError as e: 587 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 588 589 self.set_unihash(tid, unihash) 590 self.unihash[tid] = unihash 591 return unihash 592 593 def report_unihash(self, path, task, d): 594 import importlib 595 596 taskhash = d.getVar('BB_TASKHASH') 597 unihash = d.getVar('BB_UNIHASH') 598 report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1' 599 tempdir = d.getVar('T') 600 fn = d.getVar('BB_FILENAME') 601 tid = fn + ':do_' + task 602 key = tid + ':' + taskhash 603 604 if self.setscenetasks and tid not in self.setscenetasks: 605 return 606 607 # This can happen if locked sigs are in action. Detect and just exit 608 if taskhash != self.taskhash[tid]: 609 return 610 611 # Sanity checks 612 cache_unihash = self._get_unihash(tid, checkkey=taskhash) 613 if cache_unihash is None: 614 bb.fatal('%s not in unihash cache. Please report this error' % key) 615 616 if cache_unihash != unihash: 617 bb.fatal("Cache unihash %s doesn't match BB_UNIHASH %s" % (cache_unihash, unihash)) 618 619 sigfile = None 620 sigfile_name = "depsig.do_%s.%d" % (task, os.getpid()) 621 sigfile_link = "depsig.do_%s" % task 622 623 try: 624 sigfile = open(os.path.join(tempdir, sigfile_name), 'w+b') 625 626 locs = {'path': path, 'sigfile': sigfile, 'task': task, 'd': d} 627 628 if "." in self.method: 629 (module, method) = self.method.rsplit('.', 1) 630 locs['method'] = getattr(importlib.import_module(module), method) 631 outhash = bb.utils.better_eval('method(path, sigfile, task, d)', locs) 632 else: 633 outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs) 634 635 try: 636 extra_data = {} 637 638 owner = d.getVar('SSTATE_HASHEQUIV_OWNER') 639 if owner: 640 extra_data['owner'] = owner 641 642 if report_taskdata: 643 sigfile.seek(0) 644 645 extra_data['PN'] = d.getVar('PN') 646 extra_data['PV'] = d.getVar('PV') 647 extra_data['PR'] = d.getVar('PR') 648 extra_data['task'] = task 649 extra_data['outhash_siginfo'] = sigfile.read().decode('utf-8') 650 651 method = self.method 652 if tid in self.extramethod: 653 method = method + self.extramethod[tid] 654 655 data = self.client().report_unihash(taskhash, method, outhash, unihash, extra_data) 656 new_unihash = data['unihash'] 657 658 if new_unihash != unihash: 659 hashequiv_logger.debug('Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server)) 660 bb.event.fire(bb.runqueue.taskUniHashUpdate(fn + ':do_' + task, new_unihash), d) 661 self.set_unihash(tid, new_unihash) 662 d.setVar('BB_UNIHASH', new_unihash) 663 else: 664 hashequiv_logger.debug('Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server)) 665 except ConnectionError as e: 666 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 667 finally: 668 if sigfile: 669 sigfile.close() 670 671 sigfile_link_path = os.path.join(tempdir, sigfile_link) 672 bb.utils.remove(sigfile_link_path) 673 674 try: 675 os.symlink(sigfile_name, sigfile_link_path) 676 except OSError: 677 pass 678 679 def report_unihash_equiv(self, tid, taskhash, wanted_unihash, current_unihash, datacaches): 680 try: 681 extra_data = {} 682 method = self.method 683 if tid in self.extramethod: 684 method = method + self.extramethod[tid] 685 686 data = self.client().report_unihash_equiv(taskhash, method, wanted_unihash, extra_data) 687 hashequiv_logger.verbose('Reported task %s as unihash %s to %s (%s)' % (tid, wanted_unihash, self.server, str(data))) 688 689 if data is None: 690 bb.warn("Server unable to handle unihash report") 691 return False 692 693 finalunihash = data['unihash'] 694 695 if finalunihash == current_unihash: 696 hashequiv_logger.verbose('Task %s unihash %s unchanged by server' % (tid, finalunihash)) 697 elif finalunihash == wanted_unihash: 698 hashequiv_logger.verbose('Task %s unihash changed %s -> %s as wanted' % (tid, current_unihash, finalunihash)) 699 self.set_unihash(tid, finalunihash) 700 return True 701 else: 702 # TODO: What to do here? 703 hashequiv_logger.verbose('Task %s unihash reported as unwanted hash %s' % (tid, finalunihash)) 704 705 except ConnectionError as e: 706 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 707 708 return False 709 710# 711# Dummy class used for bitbake-selftest 712# 713class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureGeneratorBasicHash): 714 name = "TestEquivHash" 715 def init_rundepcheck(self, data): 716 super().init_rundepcheck(data) 717 self.server = data.getVar('BB_HASHSERVE') 718 self.method = "sstate_output_hash" 719 720# 721# Dummy class used for bitbake-selftest 722# 723class SignatureGeneratorTestMulticonfigDepends(SignatureGeneratorBasicHash): 724 name = "TestMulticonfigDepends" 725 supports_multiconfig_datacaches = True 726 727def dump_this_task(outfile, d): 728 import bb.parse 729 fn = d.getVar("BB_FILENAME") 730 task = "do_" + d.getVar("BB_CURRENTTASK") 731 referencestamp = bb.build.stamp_internal(task, d, None, True) 732 bb.parse.siggen.dump_sigtask(fn, task, outfile, "customfile:" + referencestamp) 733 734def init_colors(enable_color): 735 """Initialise colour dict for passing to compare_sigfiles()""" 736 # First set up the colours 737 colors = {'color_title': '\033[1m', 738 'color_default': '\033[0m', 739 'color_add': '\033[0;32m', 740 'color_remove': '\033[0;31m', 741 } 742 # Leave all keys present but clear the values 743 if not enable_color: 744 for k in colors.keys(): 745 colors[k] = '' 746 return colors 747 748def worddiff_str(oldstr, newstr, colors=None): 749 if not colors: 750 colors = init_colors(False) 751 diff = simplediff.diff(oldstr.split(' '), newstr.split(' ')) 752 ret = [] 753 for change, value in diff: 754 value = ' '.join(value) 755 if change == '=': 756 ret.append(value) 757 elif change == '+': 758 item = '{color_add}{{+{value}+}}{color_default}'.format(value=value, **colors) 759 ret.append(item) 760 elif change == '-': 761 item = '{color_remove}[-{value}-]{color_default}'.format(value=value, **colors) 762 ret.append(item) 763 whitespace_note = '' 764 if oldstr != newstr and ' '.join(oldstr.split()) == ' '.join(newstr.split()): 765 whitespace_note = ' (whitespace changed)' 766 return '"%s"%s' % (' '.join(ret), whitespace_note) 767 768def list_inline_diff(oldlist, newlist, colors=None): 769 if not colors: 770 colors = init_colors(False) 771 diff = simplediff.diff(oldlist, newlist) 772 ret = [] 773 for change, value in diff: 774 value = ' '.join(value) 775 if change == '=': 776 ret.append("'%s'" % value) 777 elif change == '+': 778 item = '{color_add}+{value}{color_default}'.format(value=value, **colors) 779 ret.append(item) 780 elif change == '-': 781 item = '{color_remove}-{value}{color_default}'.format(value=value, **colors) 782 ret.append(item) 783 return '[%s]' % (', '.join(ret)) 784 785def clean_basepath(basepath): 786 basepath, dir, recipe_task = basepath.rsplit("/", 2) 787 cleaned = dir + '/' + recipe_task 788 789 if basepath[0] == '/': 790 return cleaned 791 792 if basepath.startswith("mc:") and basepath.count(':') >= 2: 793 mc, mc_name, basepath = basepath.split(":", 2) 794 mc_suffix = ':mc:' + mc_name 795 else: 796 mc_suffix = '' 797 798 # mc stuff now removed from basepath. Whatever was next, if present will be the first 799 # suffix. ':/', recipe path start, marks the end of this. Something like 800 # 'virtual:a[:b[:c]]:/path...' (b and c being optional) 801 if basepath[0] != '/': 802 cleaned += ':' + basepath.split(':/', 1)[0] 803 804 return cleaned + mc_suffix 805 806def clean_basepaths(a): 807 b = {} 808 for x in a: 809 b[clean_basepath(x)] = a[x] 810 return b 811 812def clean_basepaths_list(a): 813 b = [] 814 for x in a: 815 b.append(clean_basepath(x)) 816 return b 817 818# Handled renamed fields 819def handle_renames(data): 820 if 'basewhitelist' in data: 821 data['basehash_ignore_vars'] = data['basewhitelist'] 822 del data['basewhitelist'] 823 if 'taskwhitelist' in data: 824 data['taskhash_ignore_tasks'] = data['taskwhitelist'] 825 del data['taskwhitelist'] 826 827 828def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False): 829 output = [] 830 831 colors = init_colors(color) 832 def color_format(formatstr, **values): 833 """ 834 Return colour formatted string. 835 NOTE: call with the format string, not an already formatted string 836 containing values (otherwise you could have trouble with { and } 837 characters) 838 """ 839 if not formatstr.endswith('{color_default}'): 840 formatstr += '{color_default}' 841 # In newer python 3 versions you can pass both of these directly, 842 # but we only require 3.4 at the moment 843 formatparams = {} 844 formatparams.update(colors) 845 formatparams.update(values) 846 return formatstr.format(**formatparams) 847 848 with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f: 849 a_data = json.load(f, object_hook=SetDecoder) 850 with bb.compress.zstd.open(b, "rt", encoding="utf-8", num_threads=1) as f: 851 b_data = json.load(f, object_hook=SetDecoder) 852 853 for data in [a_data, b_data]: 854 handle_renames(data) 855 856 def dict_diff(a, b, ignored_vars=set()): 857 sa = set(a.keys()) 858 sb = set(b.keys()) 859 common = sa & sb 860 changed = set() 861 for i in common: 862 if a[i] != b[i] and i not in ignored_vars: 863 changed.add(i) 864 added = sb - sa 865 removed = sa - sb 866 return changed, added, removed 867 868 def file_checksums_diff(a, b): 869 from collections import Counter 870 871 # Convert lists back to tuples 872 a = [(f[0], f[1]) for f in a] 873 b = [(f[0], f[1]) for f in b] 874 875 # Compare lists, ensuring we can handle duplicate filenames if they exist 876 removedcount = Counter(a) 877 removedcount.subtract(b) 878 addedcount = Counter(b) 879 addedcount.subtract(a) 880 added = [] 881 for x in b: 882 if addedcount[x] > 0: 883 addedcount[x] -= 1 884 added.append(x) 885 removed = [] 886 changed = [] 887 for x in a: 888 if removedcount[x] > 0: 889 removedcount[x] -= 1 890 for y in added: 891 if y[0] == x[0]: 892 changed.append((x[0], x[1], y[1])) 893 added.remove(y) 894 break 895 else: 896 removed.append(x) 897 added = [x[0] for x in added] 898 removed = [x[0] for x in removed] 899 return changed, added, removed 900 901 if 'basehash_ignore_vars' in a_data and a_data['basehash_ignore_vars'] != b_data['basehash_ignore_vars']: 902 output.append(color_format("{color_title}basehash_ignore_vars changed{color_default} from '%s' to '%s'") % (a_data['basehash_ignore_vars'], b_data['basehash_ignore_vars'])) 903 if a_data['basehash_ignore_vars'] and b_data['basehash_ignore_vars']: 904 output.append("changed items: %s" % a_data['basehash_ignore_vars'].symmetric_difference(b_data['basehash_ignore_vars'])) 905 906 if 'taskhash_ignore_tasks' in a_data and a_data['taskhash_ignore_tasks'] != b_data['taskhash_ignore_tasks']: 907 output.append(color_format("{color_title}taskhash_ignore_tasks changed{color_default} from '%s' to '%s'") % (a_data['taskhash_ignore_tasks'], b_data['taskhash_ignore_tasks'])) 908 if a_data['taskhash_ignore_tasks'] and b_data['taskhash_ignore_tasks']: 909 output.append("changed items: %s" % a_data['taskhash_ignore_tasks'].symmetric_difference(b_data['taskhash_ignore_tasks'])) 910 911 if a_data['taskdeps'] != b_data['taskdeps']: 912 output.append(color_format("{color_title}Task dependencies changed{color_default} from:\n%s\nto:\n%s") % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps']))) 913 914 if a_data['basehash'] != b_data['basehash'] and not collapsed: 915 output.append(color_format("{color_title}basehash changed{color_default} from %s to %s") % (a_data['basehash'], b_data['basehash'])) 916 917 changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basehash_ignore_vars'] & b_data['basehash_ignore_vars']) 918 if changed: 919 for dep in sorted(changed): 920 output.append(color_format("{color_title}List of dependencies for variable %s changed from '{color_default}%s{color_title}' to '{color_default}%s{color_title}'") % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep])) 921 if a_data['gendeps'][dep] and b_data['gendeps'][dep]: 922 output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep])) 923 if added: 924 for dep in sorted(added): 925 output.append(color_format("{color_title}Dependency on variable %s was added") % (dep)) 926 if removed: 927 for dep in sorted(removed): 928 output.append(color_format("{color_title}Dependency on Variable %s was removed") % (dep)) 929 930 931 changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals']) 932 if changed: 933 for dep in sorted(changed): 934 oldval = a_data['varvals'][dep] 935 newval = b_data['varvals'][dep] 936 if newval and oldval and ('\n' in oldval or '\n' in newval): 937 diff = difflib.unified_diff(oldval.splitlines(), newval.splitlines(), lineterm='') 938 # Cut off the first two lines, since we aren't interested in 939 # the old/new filename (they are blank anyway in this case) 940 difflines = list(diff)[2:] 941 if color: 942 # Add colour to diff output 943 for i, line in enumerate(difflines): 944 if line.startswith('+'): 945 line = color_format('{color_add}{line}', line=line) 946 difflines[i] = line 947 elif line.startswith('-'): 948 line = color_format('{color_remove}{line}', line=line) 949 difflines[i] = line 950 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff='\n'.join(difflines))) 951 elif newval and oldval and (' ' in oldval or ' ' in newval): 952 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff=worddiff_str(oldval, newval, colors))) 953 else: 954 output.append(color_format("{color_title}Variable {var} value changed from '{color_default}{oldval}{color_title}' to '{color_default}{newval}{color_title}'{color_default}", var=dep, oldval=oldval, newval=newval)) 955 956 if not 'file_checksum_values' in a_data: 957 a_data['file_checksum_values'] = [] 958 if not 'file_checksum_values' in b_data: 959 b_data['file_checksum_values'] = [] 960 961 changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values']) 962 if changed: 963 for f, old, new in changed: 964 output.append(color_format("{color_title}Checksum for file %s changed{color_default} from %s to %s") % (f, old, new)) 965 if added: 966 for f in added: 967 output.append(color_format("{color_title}Dependency on checksum of file %s was added") % (f)) 968 if removed: 969 for f in removed: 970 output.append(color_format("{color_title}Dependency on checksum of file %s was removed") % (f)) 971 972 if not 'runtaskdeps' in a_data: 973 a_data['runtaskdeps'] = {} 974 if not 'runtaskdeps' in b_data: 975 b_data['runtaskdeps'] = {} 976 977 if not collapsed: 978 if len(a_data['runtaskdeps']) != len(b_data['runtaskdeps']): 979 changed = ["Number of task dependencies changed"] 980 else: 981 changed = [] 982 for idx, task in enumerate(a_data['runtaskdeps']): 983 a = a_data['runtaskdeps'][idx] 984 b = b_data['runtaskdeps'][idx] 985 if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b] and not collapsed: 986 changed.append("%s with hash %s\n changed to\n%s with hash %s" % (clean_basepath(a), a_data['runtaskhashes'][a], clean_basepath(b), b_data['runtaskhashes'][b])) 987 988 if changed: 989 clean_a = clean_basepaths_list(a_data['runtaskdeps']) 990 clean_b = clean_basepaths_list(b_data['runtaskdeps']) 991 if clean_a != clean_b: 992 output.append(color_format("{color_title}runtaskdeps changed:{color_default}\n%s") % list_inline_diff(clean_a, clean_b, colors)) 993 else: 994 output.append(color_format("{color_title}runtaskdeps changed:")) 995 output.append("\n".join(changed)) 996 997 998 if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data: 999 a = clean_basepaths(a_data['runtaskhashes']) 1000 b = clean_basepaths(b_data['runtaskhashes']) 1001 changed, added, removed = dict_diff(a, b) 1002 if added: 1003 for dep in sorted(added): 1004 bdep_found = False 1005 if removed: 1006 for bdep in removed: 1007 if b[dep] == a[bdep]: 1008 #output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep)) 1009 bdep_found = True 1010 if not bdep_found: 1011 output.append(color_format("{color_title}Dependency on task %s was added{color_default} with hash %s") % (dep, b[dep])) 1012 if removed: 1013 for dep in sorted(removed): 1014 adep_found = False 1015 if added: 1016 for adep in added: 1017 if b[adep] == a[dep]: 1018 #output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep)) 1019 adep_found = True 1020 if not adep_found: 1021 output.append(color_format("{color_title}Dependency on task %s was removed{color_default} with hash %s") % (dep, a[dep])) 1022 if changed: 1023 for dep in sorted(changed): 1024 if not collapsed: 1025 output.append(color_format("{color_title}Hash for task dependency %s changed{color_default} from %s to %s") % (dep, a[dep], b[dep])) 1026 if callable(recursecb): 1027 recout = recursecb(dep, a[dep], b[dep]) 1028 if recout: 1029 if collapsed: 1030 output.extend(recout) 1031 else: 1032 # If a dependent hash changed, might as well print the line above and then defer to the changes in 1033 # that hash since in all likelyhood, they're the same changes this task also saw. 1034 output = [output[-1]] + recout 1035 break 1036 1037 a_taint = a_data.get('taint', None) 1038 b_taint = b_data.get('taint', None) 1039 if a_taint != b_taint: 1040 if a_taint and a_taint.startswith('nostamp:'): 1041 a_taint = a_taint.replace('nostamp:', 'nostamp(uuid4):') 1042 if b_taint and b_taint.startswith('nostamp:'): 1043 b_taint = b_taint.replace('nostamp:', 'nostamp(uuid4):') 1044 output.append(color_format("{color_title}Taint (by forced/invalidated task) changed{color_default} from %s to %s") % (a_taint, b_taint)) 1045 1046 return output 1047 1048 1049def calc_basehash(sigdata): 1050 task = sigdata['task'] 1051 basedata = sigdata['varvals'][task] 1052 1053 if basedata is None: 1054 basedata = '' 1055 1056 alldeps = sigdata['taskdeps'] 1057 for dep in alldeps: 1058 basedata = basedata + dep 1059 val = sigdata['varvals'][dep] 1060 if val is not None: 1061 basedata = basedata + str(val) 1062 1063 return hashlib.sha256(basedata.encode("utf-8")).hexdigest() 1064 1065def calc_taskhash(sigdata): 1066 data = sigdata['basehash'] 1067 1068 for dep in sigdata['runtaskdeps']: 1069 data = data + sigdata['runtaskhashes'][dep] 1070 1071 for c in sigdata['file_checksum_values']: 1072 if c[1]: 1073 if "./" in c[0]: 1074 data = data + c[0] 1075 data = data + c[1] 1076 1077 if 'taint' in sigdata: 1078 if 'nostamp:' in sigdata['taint']: 1079 data = data + sigdata['taint'][8:] 1080 else: 1081 data = data + sigdata['taint'] 1082 1083 return hashlib.sha256(data.encode("utf-8")).hexdigest() 1084 1085 1086def dump_sigfile(a): 1087 output = [] 1088 1089 with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f: 1090 a_data = json.load(f, object_hook=SetDecoder) 1091 1092 handle_renames(a_data) 1093 1094 output.append("basehash_ignore_vars: %s" % (sorted(a_data['basehash_ignore_vars']))) 1095 1096 output.append("taskhash_ignore_tasks: %s" % (sorted(a_data['taskhash_ignore_tasks'] or []))) 1097 1098 output.append("Task dependencies: %s" % (sorted(a_data['taskdeps']))) 1099 1100 output.append("basehash: %s" % (a_data['basehash'])) 1101 1102 for dep in sorted(a_data['gendeps']): 1103 output.append("List of dependencies for variable %s is %s" % (dep, sorted(a_data['gendeps'][dep]))) 1104 1105 for dep in sorted(a_data['varvals']): 1106 output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep])) 1107 1108 if 'runtaskdeps' in a_data: 1109 output.append("Tasks this task depends on: %s" % (sorted(a_data['runtaskdeps']))) 1110 1111 if 'file_checksum_values' in a_data: 1112 output.append("This task depends on the checksums of files: %s" % (sorted(a_data['file_checksum_values']))) 1113 1114 if 'runtaskhashes' in a_data: 1115 for dep in sorted(a_data['runtaskhashes']): 1116 output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep])) 1117 1118 if 'taint' in a_data: 1119 if a_data['taint'].startswith('nostamp:'): 1120 msg = a_data['taint'].replace('nostamp:', 'nostamp(uuid4):') 1121 else: 1122 msg = a_data['taint'] 1123 output.append("Tainted (by forced/invalidated task): %s" % msg) 1124 1125 if 'task' in a_data: 1126 computed_basehash = calc_basehash(a_data) 1127 output.append("Computed base hash is %s and from file %s" % (computed_basehash, a_data['basehash'])) 1128 else: 1129 output.append("Unable to compute base hash") 1130 1131 computed_taskhash = calc_taskhash(a_data) 1132 output.append("Computed task hash is %s" % computed_taskhash) 1133 1134 return output 1135