1# 2# Copyright BitBake Contributors 3# 4# SPDX-License-Identifier: GPL-2.0-only 5# 6 7import hashlib 8import logging 9import os 10import re 11import tempfile 12import pickle 13import bb.data 14import difflib 15import simplediff 16import json 17import types 18import bb.compress.zstd 19from bb.checksum import FileChecksumCache 20from bb import runqueue 21import hashserv 22import hashserv.client 23 24logger = logging.getLogger('BitBake.SigGen') 25hashequiv_logger = logging.getLogger('BitBake.SigGen.HashEquiv') 26 27class SetEncoder(json.JSONEncoder): 28 def default(self, obj): 29 if isinstance(obj, set) or isinstance(obj, frozenset): 30 return dict(_set_object=list(sorted(obj))) 31 return json.JSONEncoder.default(self, obj) 32 33def SetDecoder(dct): 34 if '_set_object' in dct: 35 return frozenset(dct['_set_object']) 36 return dct 37 38def init(d): 39 siggens = [obj for obj in globals().values() 40 if type(obj) is type and issubclass(obj, SignatureGenerator)] 41 42 desired = d.getVar("BB_SIGNATURE_HANDLER") or "noop" 43 for sg in siggens: 44 if desired == sg.name: 45 return sg(d) 46 else: 47 logger.error("Invalid signature generator '%s', using default 'noop'\n" 48 "Available generators: %s", desired, 49 ', '.join(obj.name for obj in siggens)) 50 return SignatureGenerator(d) 51 52class SignatureGenerator(object): 53 """ 54 """ 55 name = "noop" 56 57 def __init__(self, data): 58 self.basehash = {} 59 self.taskhash = {} 60 self.unihash = {} 61 self.runtaskdeps = {} 62 self.file_checksum_values = {} 63 self.taints = {} 64 self.unitaskhashes = {} 65 self.tidtopn = {} 66 self.setscenetasks = set() 67 68 def finalise(self, fn, d, varient): 69 return 70 71 def postparsing_clean_cache(self): 72 return 73 74 def setup_datacache(self, datacaches): 75 self.datacaches = datacaches 76 77 def setup_datacache_from_datastore(self, mcfn, d): 78 # In task context we have no cache so setup internal data structures 79 # from the fully parsed data store provided 80 81 mc = d.getVar("__BBMULTICONFIG", False) or "" 82 tasks = d.getVar('__BBTASKS', False) 83 84 self.datacaches = {} 85 self.datacaches[mc] = types.SimpleNamespace() 86 setattr(self.datacaches[mc], "stamp", {}) 87 self.datacaches[mc].stamp[mcfn] = d.getVar('STAMP') 88 setattr(self.datacaches[mc], "stamp_extrainfo", {}) 89 self.datacaches[mc].stamp_extrainfo[mcfn] = {} 90 for t in tasks: 91 flag = d.getVarFlag(t, "stamp-extra-info") 92 if flag: 93 self.datacaches[mc].stamp_extrainfo[mcfn][t] = flag 94 95 def get_unihash(self, tid): 96 return self.taskhash[tid] 97 98 def prep_taskhash(self, tid, deps, dataCaches): 99 return 100 101 def get_taskhash(self, tid, deps, dataCaches): 102 self.taskhash[tid] = hashlib.sha256(tid.encode("utf-8")).hexdigest() 103 return self.taskhash[tid] 104 105 def writeout_file_checksum_cache(self): 106 """Write/update the file checksum cache onto disk""" 107 return 108 109 def stampfile_base(self, mcfn): 110 mc = bb.runqueue.mc_from_tid(mcfn) 111 return self.datacaches[mc].stamp[mcfn] 112 113 def stampfile_mcfn(self, taskname, mcfn, extrainfo=True): 114 mc = bb.runqueue.mc_from_tid(mcfn) 115 stamp = self.datacaches[mc].stamp[mcfn] 116 if not stamp: 117 return 118 119 stamp_extrainfo = "" 120 if extrainfo: 121 taskflagname = taskname 122 if taskname.endswith("_setscene"): 123 taskflagname = taskname.replace("_setscene", "") 124 stamp_extrainfo = self.datacaches[mc].stamp_extrainfo[mcfn].get(taskflagname) or "" 125 126 return self.stampfile(stamp, mcfn, taskname, stamp_extrainfo) 127 128 def stampfile(self, stampbase, file_name, taskname, extrainfo): 129 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.') 130 131 def stampcleanmask_mcfn(self, taskname, mcfn): 132 mc = bb.runqueue.mc_from_tid(mcfn) 133 stamp = self.datacaches[mc].stamp[mcfn] 134 if not stamp: 135 return [] 136 137 taskflagname = taskname 138 if taskname.endswith("_setscene"): 139 taskflagname = taskname.replace("_setscene", "") 140 stamp_extrainfo = self.datacaches[mc].stamp_extrainfo[mcfn].get(taskflagname) or "" 141 142 return self.stampcleanmask(stamp, mcfn, taskname, stamp_extrainfo) 143 144 def stampcleanmask(self, stampbase, file_name, taskname, extrainfo): 145 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.') 146 147 def dump_sigtask(self, mcfn, task, stampbase, runtime): 148 return 149 150 def invalidate_task(self, task, mcfn): 151 mc = bb.runqueue.mc_from_tid(mcfn) 152 stamp = self.datacaches[mc].stamp[mcfn] 153 bb.utils.remove(stamp) 154 155 def dump_sigs(self, dataCache, options): 156 return 157 158 def get_taskdata(self): 159 return (self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks) 160 161 def set_taskdata(self, data): 162 self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks = data 163 164 def reset(self, data): 165 self.__init__(data) 166 167 def get_taskhashes(self): 168 return self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn 169 170 def set_taskhashes(self, hashes): 171 self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn = hashes 172 173 def save_unitaskhashes(self): 174 return 175 176 def copy_unitaskhashes(self, targetdir): 177 return 178 179 def set_setscene_tasks(self, setscene_tasks): 180 return 181 182 def exit(self): 183 return 184 185class SignatureGeneratorBasic(SignatureGenerator): 186 """ 187 """ 188 name = "basic" 189 190 def __init__(self, data): 191 self.basehash = {} 192 self.taskhash = {} 193 self.unihash = {} 194 self.runtaskdeps = {} 195 self.file_checksum_values = {} 196 self.taints = {} 197 self.setscenetasks = set() 198 self.basehash_ignore_vars = set((data.getVar("BB_BASEHASH_IGNORE_VARS") or "").split()) 199 self.taskhash_ignore_tasks = None 200 self.init_rundepcheck(data) 201 checksum_cache_file = data.getVar("BB_HASH_CHECKSUM_CACHE_FILE") 202 if checksum_cache_file: 203 self.checksum_cache = FileChecksumCache() 204 self.checksum_cache.init_cache(data, checksum_cache_file) 205 else: 206 self.checksum_cache = None 207 208 self.unihash_cache = bb.cache.SimpleCache("3") 209 self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {}) 210 self.localdirsexclude = (data.getVar("BB_SIGNATURE_LOCAL_DIRS_EXCLUDE") or "CVS .bzr .git .hg .osc .p4 .repo .svn").split() 211 self.tidtopn = {} 212 213 def init_rundepcheck(self, data): 214 self.taskhash_ignore_tasks = data.getVar("BB_TASKHASH_IGNORE_TASKS") or None 215 if self.taskhash_ignore_tasks: 216 self.twl = re.compile(self.taskhash_ignore_tasks) 217 else: 218 self.twl = None 219 220 def _build_data(self, mcfn, d): 221 222 ignore_mismatch = ((d.getVar("BB_HASH_IGNORE_MISMATCH") or '') == '1') 223 tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d, self.basehash_ignore_vars) 224 225 taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, self.basehash_ignore_vars, mcfn) 226 227 for task in tasklist: 228 tid = mcfn + ":" + task 229 if not ignore_mismatch and tid in self.basehash and self.basehash[tid] != basehash[tid]: 230 bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (tid, self.basehash[tid], basehash[tid])) 231 bb.error("The following commands may help:") 232 cmd = "$ bitbake %s -c%s" % (d.getVar('PN'), task) 233 # Make sure sigdata is dumped before run printdiff 234 bb.error("%s -Snone" % cmd) 235 bb.error("Then:") 236 bb.error("%s -Sprintdiff\n" % cmd) 237 self.basehash[tid] = basehash[tid] 238 239 return taskdeps, gendeps, lookupcache 240 241 def set_setscene_tasks(self, setscene_tasks): 242 self.setscenetasks = set(setscene_tasks) 243 244 def finalise(self, fn, d, variant): 245 246 mc = d.getVar("__BBMULTICONFIG", False) or "" 247 mcfn = fn 248 if variant or mc: 249 mcfn = bb.cache.realfn2virtual(fn, variant, mc) 250 251 try: 252 taskdeps, gendeps, lookupcache = self._build_data(mcfn, d) 253 except bb.parse.SkipRecipe: 254 raise 255 except: 256 bb.warn("Error during finalise of %s" % mcfn) 257 raise 258 259 #Slow but can be useful for debugging mismatched basehashes 260 #for task in self.taskdeps[mcfn]: 261 # self.dump_sigtask(mcfn, task, d.getVar("STAMP"), False) 262 263 basehashes = {} 264 for task in taskdeps: 265 basehashes[task] = self.basehash[mcfn + ":" + task] 266 267 d.setVar("__siggen_basehashes", basehashes) 268 d.setVar("__siggen_gendeps", gendeps) 269 d.setVar("__siggen_varvals", lookupcache) 270 d.setVar("__siggen_taskdeps", taskdeps) 271 272 def setup_datacache_from_datastore(self, mcfn, d): 273 super().setup_datacache_from_datastore(mcfn, d) 274 275 mc = bb.runqueue.mc_from_tid(mcfn) 276 for attr in ["siggen_varvals", "siggen_taskdeps", "siggen_gendeps"]: 277 if not hasattr(self.datacaches[mc], attr): 278 setattr(self.datacaches[mc], attr, {}) 279 self.datacaches[mc].siggen_varvals[mcfn] = d.getVar("__siggen_varvals") 280 self.datacaches[mc].siggen_taskdeps[mcfn] = d.getVar("__siggen_taskdeps") 281 self.datacaches[mc].siggen_gendeps[mcfn] = d.getVar("__siggen_gendeps") 282 283 def rundep_check(self, fn, recipename, task, dep, depname, dataCaches): 284 # Return True if we should keep the dependency, False to drop it 285 # We only manipulate the dependencies for packages not in the ignore 286 # list 287 if self.twl and not self.twl.search(recipename): 288 # then process the actual dependencies 289 if self.twl.search(depname): 290 return False 291 return True 292 293 def read_taint(self, fn, task, stampbase): 294 taint = None 295 try: 296 with open(stampbase + '.' + task + '.taint', 'r') as taintf: 297 taint = taintf.read() 298 except IOError: 299 pass 300 return taint 301 302 def prep_taskhash(self, tid, deps, dataCaches): 303 304 (mc, _, task, mcfn) = bb.runqueue.split_tid_mcfn(tid) 305 306 self.basehash[tid] = dataCaches[mc].basetaskhash[tid] 307 self.runtaskdeps[tid] = [] 308 self.file_checksum_values[tid] = [] 309 recipename = dataCaches[mc].pkg_fn[mcfn] 310 311 self.tidtopn[tid] = recipename 312 313 for dep in sorted(deps, key=clean_basepath): 314 (depmc, _, _, depmcfn) = bb.runqueue.split_tid_mcfn(dep) 315 depname = dataCaches[depmc].pkg_fn[depmcfn] 316 if not self.rundep_check(mcfn, recipename, task, dep, depname, dataCaches): 317 continue 318 if dep not in self.taskhash: 319 bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep) 320 self.runtaskdeps[tid].append(dep) 321 322 if task in dataCaches[mc].file_checksums[mcfn]: 323 if self.checksum_cache: 324 checksums = self.checksum_cache.get_checksums(dataCaches[mc].file_checksums[mcfn][task], recipename, self.localdirsexclude) 325 else: 326 checksums = bb.fetch2.get_file_checksums(dataCaches[mc].file_checksums[mcfn][task], recipename, self.localdirsexclude) 327 for (f,cs) in checksums: 328 self.file_checksum_values[tid].append((f,cs)) 329 330 taskdep = dataCaches[mc].task_deps[mcfn] 331 if 'nostamp' in taskdep and task in taskdep['nostamp']: 332 # Nostamp tasks need an implicit taint so that they force any dependent tasks to run 333 if tid in self.taints and self.taints[tid].startswith("nostamp:"): 334 # Don't reset taint value upon every call 335 pass 336 else: 337 import uuid 338 taint = str(uuid.uuid4()) 339 self.taints[tid] = "nostamp:" + taint 340 341 taint = self.read_taint(mcfn, task, dataCaches[mc].stamp[mcfn]) 342 if taint: 343 self.taints[tid] = taint 344 logger.warning("%s is tainted from a forced run" % tid) 345 346 return 347 348 def get_taskhash(self, tid, deps, dataCaches): 349 350 data = self.basehash[tid] 351 for dep in self.runtaskdeps[tid]: 352 data = data + self.get_unihash(dep) 353 354 for (f, cs) in self.file_checksum_values[tid]: 355 if cs: 356 if "/./" in f: 357 data = data + "./" + f.split("/./")[1] 358 data = data + cs 359 360 if tid in self.taints: 361 if self.taints[tid].startswith("nostamp:"): 362 data = data + self.taints[tid][8:] 363 else: 364 data = data + self.taints[tid] 365 366 h = hashlib.sha256(data.encode("utf-8")).hexdigest() 367 self.taskhash[tid] = h 368 #d.setVar("BB_TASKHASH:task-%s" % task, taskhash[task]) 369 return h 370 371 def writeout_file_checksum_cache(self): 372 """Write/update the file checksum cache onto disk""" 373 if self.checksum_cache: 374 self.checksum_cache.save_extras() 375 self.checksum_cache.save_merge() 376 else: 377 bb.fetch2.fetcher_parse_save() 378 bb.fetch2.fetcher_parse_done() 379 380 def save_unitaskhashes(self): 381 self.unihash_cache.save(self.unitaskhashes) 382 383 def copy_unitaskhashes(self, targetdir): 384 self.unihash_cache.copyfile(targetdir) 385 386 def dump_sigtask(self, mcfn, task, stampbase, runtime): 387 tid = mcfn + ":" + task 388 mc = bb.runqueue.mc_from_tid(mcfn) 389 referencestamp = stampbase 390 if isinstance(runtime, str) and runtime.startswith("customfile"): 391 sigfile = stampbase 392 referencestamp = runtime[11:] 393 elif runtime and tid in self.taskhash: 394 sigfile = stampbase + "." + task + ".sigdata" + "." + self.get_unihash(tid) 395 else: 396 sigfile = stampbase + "." + task + ".sigbasedata" + "." + self.basehash[tid] 397 398 with bb.utils.umask(0o002): 399 bb.utils.mkdirhier(os.path.dirname(sigfile)) 400 401 data = {} 402 data['task'] = task 403 data['basehash_ignore_vars'] = self.basehash_ignore_vars 404 data['taskhash_ignore_tasks'] = self.taskhash_ignore_tasks 405 data['taskdeps'] = self.datacaches[mc].siggen_taskdeps[mcfn][task] 406 data['basehash'] = self.basehash[tid] 407 data['gendeps'] = {} 408 data['varvals'] = {} 409 data['varvals'][task] = self.datacaches[mc].siggen_varvals[mcfn][task] 410 for dep in self.datacaches[mc].siggen_taskdeps[mcfn][task]: 411 if dep in self.basehash_ignore_vars: 412 continue 413 data['gendeps'][dep] = self.datacaches[mc].siggen_gendeps[mcfn][dep] 414 data['varvals'][dep] = self.datacaches[mc].siggen_varvals[mcfn][dep] 415 416 if runtime and tid in self.taskhash: 417 data['runtaskdeps'] = self.runtaskdeps[tid] 418 data['file_checksum_values'] = [] 419 for f,cs in self.file_checksum_values[tid]: 420 if "/./" in f: 421 data['file_checksum_values'].append(("./" + f.split("/./")[1], cs)) 422 else: 423 data['file_checksum_values'].append((os.path.basename(f), cs)) 424 data['runtaskhashes'] = {} 425 for dep in data['runtaskdeps']: 426 data['runtaskhashes'][dep] = self.get_unihash(dep) 427 data['taskhash'] = self.taskhash[tid] 428 data['unihash'] = self.get_unihash(tid) 429 430 taint = self.read_taint(mcfn, task, referencestamp) 431 if taint: 432 data['taint'] = taint 433 434 if runtime and tid in self.taints: 435 if 'nostamp:' in self.taints[tid]: 436 data['taint'] = self.taints[tid] 437 438 computed_basehash = calc_basehash(data) 439 if computed_basehash != self.basehash[tid]: 440 bb.error("Basehash mismatch %s versus %s for %s" % (computed_basehash, self.basehash[tid], tid)) 441 if runtime and tid in self.taskhash: 442 computed_taskhash = calc_taskhash(data) 443 if computed_taskhash != self.taskhash[tid]: 444 bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid)) 445 sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash) 446 447 fd, tmpfile = bb.utils.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.") 448 try: 449 with bb.compress.zstd.open(fd, "wt", encoding="utf-8", num_threads=1) as f: 450 json.dump(data, f, sort_keys=True, separators=(",", ":"), cls=SetEncoder) 451 f.flush() 452 os.chmod(tmpfile, 0o664) 453 bb.utils.rename(tmpfile, sigfile) 454 except (OSError, IOError) as err: 455 try: 456 os.unlink(tmpfile) 457 except OSError: 458 pass 459 raise err 460 461class SignatureGeneratorBasicHash(SignatureGeneratorBasic): 462 name = "basichash" 463 464 def get_stampfile_hash(self, tid): 465 if tid in self.taskhash: 466 return self.taskhash[tid] 467 468 # If task is not in basehash, then error 469 return self.basehash[tid] 470 471 def stampfile(self, stampbase, mcfn, taskname, extrainfo, clean=False): 472 if taskname.endswith("_setscene"): 473 tid = mcfn + ":" + taskname[:-9] 474 else: 475 tid = mcfn + ":" + taskname 476 if clean: 477 h = "*" 478 else: 479 h = self.get_stampfile_hash(tid) 480 481 return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.') 482 483 def stampcleanmask(self, stampbase, mcfn, taskname, extrainfo): 484 return self.stampfile(stampbase, mcfn, taskname, extrainfo, clean=True) 485 486 def invalidate_task(self, task, mcfn): 487 bb.note("Tainting hash to force rebuild of task %s, %s" % (mcfn, task)) 488 489 mc = bb.runqueue.mc_from_tid(mcfn) 490 stamp = self.datacaches[mc].stamp[mcfn] 491 492 taintfn = stamp + '.' + task + '.taint' 493 494 import uuid 495 bb.utils.mkdirhier(os.path.dirname(taintfn)) 496 # The specific content of the taint file is not really important, 497 # we just need it to be random, so a random UUID is used 498 with open(taintfn, 'w') as taintf: 499 taintf.write(str(uuid.uuid4())) 500 501class SignatureGeneratorUniHashMixIn(object): 502 def __init__(self, data): 503 self.extramethod = {} 504 super().__init__(data) 505 506 def get_taskdata(self): 507 return (self.server, self.method, self.extramethod) + super().get_taskdata() 508 509 def set_taskdata(self, data): 510 self.server, self.method, self.extramethod = data[:3] 511 super().set_taskdata(data[3:]) 512 513 def client(self): 514 if getattr(self, '_client', None) is None: 515 self._client = hashserv.create_client(self.server) 516 return self._client 517 518 def reset(self, data): 519 if getattr(self, '_client', None) is not None: 520 self._client.close() 521 self._client = None 522 return super().reset(data) 523 524 def exit(self): 525 if getattr(self, '_client', None) is not None: 526 self._client.close() 527 self._client = None 528 return super().exit() 529 530 def get_stampfile_hash(self, tid): 531 if tid in self.taskhash: 532 # If a unique hash is reported, use it as the stampfile hash. This 533 # ensures that if a task won't be re-run if the taskhash changes, 534 # but it would result in the same output hash 535 unihash = self._get_unihash(tid) 536 if unihash is not None: 537 return unihash 538 539 return super().get_stampfile_hash(tid) 540 541 def set_unihash(self, tid, unihash): 542 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid) 543 key = mc + ":" + self.tidtopn[tid] + ":" + taskname 544 self.unitaskhashes[key] = (self.taskhash[tid], unihash) 545 self.unihash[tid] = unihash 546 547 def _get_unihash(self, tid, checkkey=None): 548 if tid not in self.tidtopn: 549 return None 550 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid) 551 key = mc + ":" + self.tidtopn[tid] + ":" + taskname 552 if key not in self.unitaskhashes: 553 return None 554 if not checkkey: 555 checkkey = self.taskhash[tid] 556 (key, unihash) = self.unitaskhashes[key] 557 if key != checkkey: 558 return None 559 return unihash 560 561 def get_unihash(self, tid): 562 taskhash = self.taskhash[tid] 563 564 # If its not a setscene task we can return 565 if self.setscenetasks and tid not in self.setscenetasks: 566 self.unihash[tid] = None 567 return taskhash 568 569 # TODO: This cache can grow unbounded. It probably only needs to keep 570 # for each task 571 unihash = self._get_unihash(tid) 572 if unihash is not None: 573 self.unihash[tid] = unihash 574 return unihash 575 576 # In the absence of being able to discover a unique hash from the 577 # server, make it be equivalent to the taskhash. The unique "hash" only 578 # really needs to be a unique string (not even necessarily a hash), but 579 # making it match the taskhash has a few advantages: 580 # 581 # 1) All of the sstate code that assumes hashes can be the same 582 # 2) It provides maximal compatibility with builders that don't use 583 # an equivalency server 584 # 3) The value is easy for multiple independent builders to derive the 585 # same unique hash from the same input. This means that if the 586 # independent builders find the same taskhash, but it isn't reported 587 # to the server, there is a better chance that they will agree on 588 # the unique hash. 589 unihash = taskhash 590 591 try: 592 method = self.method 593 if tid in self.extramethod: 594 method = method + self.extramethod[tid] 595 data = self.client().get_unihash(method, self.taskhash[tid]) 596 if data: 597 unihash = data 598 # A unique hash equal to the taskhash is not very interesting, 599 # so it is reported it at debug level 2. If they differ, that 600 # is much more interesting, so it is reported at debug level 1 601 hashequiv_logger.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server)) 602 else: 603 hashequiv_logger.debug2('No reported unihash for %s:%s from %s' % (tid, taskhash, self.server)) 604 except ConnectionError as e: 605 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 606 607 self.set_unihash(tid, unihash) 608 self.unihash[tid] = unihash 609 return unihash 610 611 def report_unihash(self, path, task, d): 612 import importlib 613 614 taskhash = d.getVar('BB_TASKHASH') 615 unihash = d.getVar('BB_UNIHASH') 616 report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1' 617 tempdir = d.getVar('T') 618 mcfn = d.getVar('BB_FILENAME') 619 tid = mcfn + ':do_' + task 620 key = tid + ':' + taskhash 621 622 if self.setscenetasks and tid not in self.setscenetasks: 623 return 624 625 # This can happen if locked sigs are in action. Detect and just exit 626 if taskhash != self.taskhash[tid]: 627 return 628 629 # Sanity checks 630 cache_unihash = self._get_unihash(tid, checkkey=taskhash) 631 if cache_unihash is None: 632 bb.fatal('%s not in unihash cache. Please report this error' % key) 633 634 if cache_unihash != unihash: 635 bb.fatal("Cache unihash %s doesn't match BB_UNIHASH %s" % (cache_unihash, unihash)) 636 637 sigfile = None 638 sigfile_name = "depsig.do_%s.%d" % (task, os.getpid()) 639 sigfile_link = "depsig.do_%s" % task 640 641 try: 642 sigfile = open(os.path.join(tempdir, sigfile_name), 'w+b') 643 644 locs = {'path': path, 'sigfile': sigfile, 'task': task, 'd': d} 645 646 if "." in self.method: 647 (module, method) = self.method.rsplit('.', 1) 648 locs['method'] = getattr(importlib.import_module(module), method) 649 outhash = bb.utils.better_eval('method(path, sigfile, task, d)', locs) 650 else: 651 outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs) 652 653 try: 654 extra_data = {} 655 656 owner = d.getVar('SSTATE_HASHEQUIV_OWNER') 657 if owner: 658 extra_data['owner'] = owner 659 660 if report_taskdata: 661 sigfile.seek(0) 662 663 extra_data['PN'] = d.getVar('PN') 664 extra_data['PV'] = d.getVar('PV') 665 extra_data['PR'] = d.getVar('PR') 666 extra_data['task'] = task 667 extra_data['outhash_siginfo'] = sigfile.read().decode('utf-8') 668 669 method = self.method 670 if tid in self.extramethod: 671 method = method + self.extramethod[tid] 672 673 data = self.client().report_unihash(taskhash, method, outhash, unihash, extra_data) 674 new_unihash = data['unihash'] 675 676 if new_unihash != unihash: 677 hashequiv_logger.debug('Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server)) 678 bb.event.fire(bb.runqueue.taskUniHashUpdate(mcfn + ':do_' + task, new_unihash), d) 679 self.set_unihash(tid, new_unihash) 680 d.setVar('BB_UNIHASH', new_unihash) 681 else: 682 hashequiv_logger.debug('Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server)) 683 except ConnectionError as e: 684 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 685 finally: 686 if sigfile: 687 sigfile.close() 688 689 sigfile_link_path = os.path.join(tempdir, sigfile_link) 690 bb.utils.remove(sigfile_link_path) 691 692 try: 693 os.symlink(sigfile_name, sigfile_link_path) 694 except OSError: 695 pass 696 697 def report_unihash_equiv(self, tid, taskhash, wanted_unihash, current_unihash, datacaches): 698 try: 699 extra_data = {} 700 method = self.method 701 if tid in self.extramethod: 702 method = method + self.extramethod[tid] 703 704 data = self.client().report_unihash_equiv(taskhash, method, wanted_unihash, extra_data) 705 hashequiv_logger.verbose('Reported task %s as unihash %s to %s (%s)' % (tid, wanted_unihash, self.server, str(data))) 706 707 if data is None: 708 bb.warn("Server unable to handle unihash report") 709 return False 710 711 finalunihash = data['unihash'] 712 713 if finalunihash == current_unihash: 714 hashequiv_logger.verbose('Task %s unihash %s unchanged by server' % (tid, finalunihash)) 715 elif finalunihash == wanted_unihash: 716 hashequiv_logger.verbose('Task %s unihash changed %s -> %s as wanted' % (tid, current_unihash, finalunihash)) 717 self.set_unihash(tid, finalunihash) 718 return True 719 else: 720 # TODO: What to do here? 721 hashequiv_logger.verbose('Task %s unihash reported as unwanted hash %s' % (tid, finalunihash)) 722 723 except ConnectionError as e: 724 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 725 726 return False 727 728# 729# Dummy class used for bitbake-selftest 730# 731class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureGeneratorBasicHash): 732 name = "TestEquivHash" 733 def init_rundepcheck(self, data): 734 super().init_rundepcheck(data) 735 self.server = data.getVar('BB_HASHSERVE') 736 self.method = "sstate_output_hash" 737 738def dump_this_task(outfile, d): 739 import bb.parse 740 mcfn = d.getVar("BB_FILENAME") 741 task = "do_" + d.getVar("BB_CURRENTTASK") 742 referencestamp = bb.parse.siggen.stampfile_base(mcfn) 743 bb.parse.siggen.dump_sigtask(mcfn, task, outfile, "customfile:" + referencestamp) 744 745def init_colors(enable_color): 746 """Initialise colour dict for passing to compare_sigfiles()""" 747 # First set up the colours 748 colors = {'color_title': '\033[1m', 749 'color_default': '\033[0m', 750 'color_add': '\033[0;32m', 751 'color_remove': '\033[0;31m', 752 } 753 # Leave all keys present but clear the values 754 if not enable_color: 755 for k in colors.keys(): 756 colors[k] = '' 757 return colors 758 759def worddiff_str(oldstr, newstr, colors=None): 760 if not colors: 761 colors = init_colors(False) 762 diff = simplediff.diff(oldstr.split(' '), newstr.split(' ')) 763 ret = [] 764 for change, value in diff: 765 value = ' '.join(value) 766 if change == '=': 767 ret.append(value) 768 elif change == '+': 769 item = '{color_add}{{+{value}+}}{color_default}'.format(value=value, **colors) 770 ret.append(item) 771 elif change == '-': 772 item = '{color_remove}[-{value}-]{color_default}'.format(value=value, **colors) 773 ret.append(item) 774 whitespace_note = '' 775 if oldstr != newstr and ' '.join(oldstr.split()) == ' '.join(newstr.split()): 776 whitespace_note = ' (whitespace changed)' 777 return '"%s"%s' % (' '.join(ret), whitespace_note) 778 779def list_inline_diff(oldlist, newlist, colors=None): 780 if not colors: 781 colors = init_colors(False) 782 diff = simplediff.diff(oldlist, newlist) 783 ret = [] 784 for change, value in diff: 785 value = ' '.join(value) 786 if change == '=': 787 ret.append("'%s'" % value) 788 elif change == '+': 789 item = '{color_add}+{value}{color_default}'.format(value=value, **colors) 790 ret.append(item) 791 elif change == '-': 792 item = '{color_remove}-{value}{color_default}'.format(value=value, **colors) 793 ret.append(item) 794 return '[%s]' % (', '.join(ret)) 795 796def clean_basepath(basepath): 797 basepath, dir, recipe_task = basepath.rsplit("/", 2) 798 cleaned = dir + '/' + recipe_task 799 800 if basepath[0] == '/': 801 return cleaned 802 803 if basepath.startswith("mc:") and basepath.count(':') >= 2: 804 mc, mc_name, basepath = basepath.split(":", 2) 805 mc_suffix = ':mc:' + mc_name 806 else: 807 mc_suffix = '' 808 809 # mc stuff now removed from basepath. Whatever was next, if present will be the first 810 # suffix. ':/', recipe path start, marks the end of this. Something like 811 # 'virtual:a[:b[:c]]:/path...' (b and c being optional) 812 if basepath[0] != '/': 813 cleaned += ':' + basepath.split(':/', 1)[0] 814 815 return cleaned + mc_suffix 816 817def clean_basepaths(a): 818 b = {} 819 for x in a: 820 b[clean_basepath(x)] = a[x] 821 return b 822 823def clean_basepaths_list(a): 824 b = [] 825 for x in a: 826 b.append(clean_basepath(x)) 827 return b 828 829# Handled renamed fields 830def handle_renames(data): 831 if 'basewhitelist' in data: 832 data['basehash_ignore_vars'] = data['basewhitelist'] 833 del data['basewhitelist'] 834 if 'taskwhitelist' in data: 835 data['taskhash_ignore_tasks'] = data['taskwhitelist'] 836 del data['taskwhitelist'] 837 838 839def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False): 840 output = [] 841 842 colors = init_colors(color) 843 def color_format(formatstr, **values): 844 """ 845 Return colour formatted string. 846 NOTE: call with the format string, not an already formatted string 847 containing values (otherwise you could have trouble with { and } 848 characters) 849 """ 850 if not formatstr.endswith('{color_default}'): 851 formatstr += '{color_default}' 852 # In newer python 3 versions you can pass both of these directly, 853 # but we only require 3.4 at the moment 854 formatparams = {} 855 formatparams.update(colors) 856 formatparams.update(values) 857 return formatstr.format(**formatparams) 858 859 with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f: 860 a_data = json.load(f, object_hook=SetDecoder) 861 with bb.compress.zstd.open(b, "rt", encoding="utf-8", num_threads=1) as f: 862 b_data = json.load(f, object_hook=SetDecoder) 863 864 for data in [a_data, b_data]: 865 handle_renames(data) 866 867 def dict_diff(a, b, ignored_vars=set()): 868 sa = set(a.keys()) 869 sb = set(b.keys()) 870 common = sa & sb 871 changed = set() 872 for i in common: 873 if a[i] != b[i] and i not in ignored_vars: 874 changed.add(i) 875 added = sb - sa 876 removed = sa - sb 877 return changed, added, removed 878 879 def file_checksums_diff(a, b): 880 from collections import Counter 881 882 # Convert lists back to tuples 883 a = [(f[0], f[1]) for f in a] 884 b = [(f[0], f[1]) for f in b] 885 886 # Compare lists, ensuring we can handle duplicate filenames if they exist 887 removedcount = Counter(a) 888 removedcount.subtract(b) 889 addedcount = Counter(b) 890 addedcount.subtract(a) 891 added = [] 892 for x in b: 893 if addedcount[x] > 0: 894 addedcount[x] -= 1 895 added.append(x) 896 removed = [] 897 changed = [] 898 for x in a: 899 if removedcount[x] > 0: 900 removedcount[x] -= 1 901 for y in added: 902 if y[0] == x[0]: 903 changed.append((x[0], x[1], y[1])) 904 added.remove(y) 905 break 906 else: 907 removed.append(x) 908 added = [x[0] for x in added] 909 removed = [x[0] for x in removed] 910 return changed, added, removed 911 912 if 'basehash_ignore_vars' in a_data and a_data['basehash_ignore_vars'] != b_data['basehash_ignore_vars']: 913 output.append(color_format("{color_title}basehash_ignore_vars changed{color_default} from '%s' to '%s'") % (a_data['basehash_ignore_vars'], b_data['basehash_ignore_vars'])) 914 if a_data['basehash_ignore_vars'] and b_data['basehash_ignore_vars']: 915 output.append("changed items: %s" % a_data['basehash_ignore_vars'].symmetric_difference(b_data['basehash_ignore_vars'])) 916 917 if 'taskhash_ignore_tasks' in a_data and a_data['taskhash_ignore_tasks'] != b_data['taskhash_ignore_tasks']: 918 output.append(color_format("{color_title}taskhash_ignore_tasks changed{color_default} from '%s' to '%s'") % (a_data['taskhash_ignore_tasks'], b_data['taskhash_ignore_tasks'])) 919 if a_data['taskhash_ignore_tasks'] and b_data['taskhash_ignore_tasks']: 920 output.append("changed items: %s" % a_data['taskhash_ignore_tasks'].symmetric_difference(b_data['taskhash_ignore_tasks'])) 921 922 if a_data['taskdeps'] != b_data['taskdeps']: 923 output.append(color_format("{color_title}Task dependencies changed{color_default} from:\n%s\nto:\n%s") % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps']))) 924 925 if a_data['basehash'] != b_data['basehash'] and not collapsed: 926 output.append(color_format("{color_title}basehash changed{color_default} from %s to %s") % (a_data['basehash'], b_data['basehash'])) 927 928 changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basehash_ignore_vars'] & b_data['basehash_ignore_vars']) 929 if changed: 930 for dep in sorted(changed): 931 output.append(color_format("{color_title}List of dependencies for variable %s changed from '{color_default}%s{color_title}' to '{color_default}%s{color_title}'") % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep])) 932 if a_data['gendeps'][dep] and b_data['gendeps'][dep]: 933 output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep])) 934 if added: 935 for dep in sorted(added): 936 output.append(color_format("{color_title}Dependency on variable %s was added") % (dep)) 937 if removed: 938 for dep in sorted(removed): 939 output.append(color_format("{color_title}Dependency on Variable %s was removed") % (dep)) 940 941 942 changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals']) 943 if changed: 944 for dep in sorted(changed): 945 oldval = a_data['varvals'][dep] 946 newval = b_data['varvals'][dep] 947 if newval and oldval and ('\n' in oldval or '\n' in newval): 948 diff = difflib.unified_diff(oldval.splitlines(), newval.splitlines(), lineterm='') 949 # Cut off the first two lines, since we aren't interested in 950 # the old/new filename (they are blank anyway in this case) 951 difflines = list(diff)[2:] 952 if color: 953 # Add colour to diff output 954 for i, line in enumerate(difflines): 955 if line.startswith('+'): 956 line = color_format('{color_add}{line}', line=line) 957 difflines[i] = line 958 elif line.startswith('-'): 959 line = color_format('{color_remove}{line}', line=line) 960 difflines[i] = line 961 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff='\n'.join(difflines))) 962 elif newval and oldval and (' ' in oldval or ' ' in newval): 963 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff=worddiff_str(oldval, newval, colors))) 964 else: 965 output.append(color_format("{color_title}Variable {var} value changed from '{color_default}{oldval}{color_title}' to '{color_default}{newval}{color_title}'{color_default}", var=dep, oldval=oldval, newval=newval)) 966 967 if not 'file_checksum_values' in a_data: 968 a_data['file_checksum_values'] = [] 969 if not 'file_checksum_values' in b_data: 970 b_data['file_checksum_values'] = [] 971 972 changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values']) 973 if changed: 974 for f, old, new in changed: 975 output.append(color_format("{color_title}Checksum for file %s changed{color_default} from %s to %s") % (f, old, new)) 976 if added: 977 for f in added: 978 output.append(color_format("{color_title}Dependency on checksum of file %s was added") % (f)) 979 if removed: 980 for f in removed: 981 output.append(color_format("{color_title}Dependency on checksum of file %s was removed") % (f)) 982 983 if not 'runtaskdeps' in a_data: 984 a_data['runtaskdeps'] = {} 985 if not 'runtaskdeps' in b_data: 986 b_data['runtaskdeps'] = {} 987 988 if not collapsed: 989 if len(a_data['runtaskdeps']) != len(b_data['runtaskdeps']): 990 changed = ["Number of task dependencies changed"] 991 else: 992 changed = [] 993 for idx, task in enumerate(a_data['runtaskdeps']): 994 a = a_data['runtaskdeps'][idx] 995 b = b_data['runtaskdeps'][idx] 996 if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b] and not collapsed: 997 changed.append("%s with hash %s\n changed to\n%s with hash %s" % (clean_basepath(a), a_data['runtaskhashes'][a], clean_basepath(b), b_data['runtaskhashes'][b])) 998 999 if changed: 1000 clean_a = clean_basepaths_list(a_data['runtaskdeps']) 1001 clean_b = clean_basepaths_list(b_data['runtaskdeps']) 1002 if clean_a != clean_b: 1003 output.append(color_format("{color_title}runtaskdeps changed:{color_default}\n%s") % list_inline_diff(clean_a, clean_b, colors)) 1004 else: 1005 output.append(color_format("{color_title}runtaskdeps changed:")) 1006 output.append("\n".join(changed)) 1007 1008 1009 if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data: 1010 a = clean_basepaths(a_data['runtaskhashes']) 1011 b = clean_basepaths(b_data['runtaskhashes']) 1012 changed, added, removed = dict_diff(a, b) 1013 if added: 1014 for dep in sorted(added): 1015 bdep_found = False 1016 if removed: 1017 for bdep in removed: 1018 if b[dep] == a[bdep]: 1019 #output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep)) 1020 bdep_found = True 1021 if not bdep_found: 1022 output.append(color_format("{color_title}Dependency on task %s was added{color_default} with hash %s") % (dep, b[dep])) 1023 if removed: 1024 for dep in sorted(removed): 1025 adep_found = False 1026 if added: 1027 for adep in added: 1028 if b[adep] == a[dep]: 1029 #output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep)) 1030 adep_found = True 1031 if not adep_found: 1032 output.append(color_format("{color_title}Dependency on task %s was removed{color_default} with hash %s") % (dep, a[dep])) 1033 if changed: 1034 for dep in sorted(changed): 1035 if not collapsed: 1036 output.append(color_format("{color_title}Hash for task dependency %s changed{color_default} from %s to %s") % (dep, a[dep], b[dep])) 1037 if callable(recursecb): 1038 recout = recursecb(dep, a[dep], b[dep]) 1039 if recout: 1040 if collapsed: 1041 output.extend(recout) 1042 else: 1043 # If a dependent hash changed, might as well print the line above and then defer to the changes in 1044 # that hash since in all likelyhood, they're the same changes this task also saw. 1045 output = [output[-1]] + recout 1046 break 1047 1048 a_taint = a_data.get('taint', None) 1049 b_taint = b_data.get('taint', None) 1050 if a_taint != b_taint: 1051 if a_taint and a_taint.startswith('nostamp:'): 1052 a_taint = a_taint.replace('nostamp:', 'nostamp(uuid4):') 1053 if b_taint and b_taint.startswith('nostamp:'): 1054 b_taint = b_taint.replace('nostamp:', 'nostamp(uuid4):') 1055 output.append(color_format("{color_title}Taint (by forced/invalidated task) changed{color_default} from %s to %s") % (a_taint, b_taint)) 1056 1057 return output 1058 1059 1060def calc_basehash(sigdata): 1061 task = sigdata['task'] 1062 basedata = sigdata['varvals'][task] 1063 1064 if basedata is None: 1065 basedata = '' 1066 1067 alldeps = sigdata['taskdeps'] 1068 for dep in sorted(alldeps): 1069 basedata = basedata + dep 1070 val = sigdata['varvals'][dep] 1071 if val is not None: 1072 basedata = basedata + str(val) 1073 1074 return hashlib.sha256(basedata.encode("utf-8")).hexdigest() 1075 1076def calc_taskhash(sigdata): 1077 data = sigdata['basehash'] 1078 1079 for dep in sigdata['runtaskdeps']: 1080 data = data + sigdata['runtaskhashes'][dep] 1081 1082 for c in sigdata['file_checksum_values']: 1083 if c[1]: 1084 if "./" in c[0]: 1085 data = data + c[0] 1086 data = data + c[1] 1087 1088 if 'taint' in sigdata: 1089 if 'nostamp:' in sigdata['taint']: 1090 data = data + sigdata['taint'][8:] 1091 else: 1092 data = data + sigdata['taint'] 1093 1094 return hashlib.sha256(data.encode("utf-8")).hexdigest() 1095 1096 1097def dump_sigfile(a): 1098 output = [] 1099 1100 with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f: 1101 a_data = json.load(f, object_hook=SetDecoder) 1102 1103 handle_renames(a_data) 1104 1105 output.append("basehash_ignore_vars: %s" % (sorted(a_data['basehash_ignore_vars']))) 1106 1107 output.append("taskhash_ignore_tasks: %s" % (sorted(a_data['taskhash_ignore_tasks'] or []))) 1108 1109 output.append("Task dependencies: %s" % (sorted(a_data['taskdeps']))) 1110 1111 output.append("basehash: %s" % (a_data['basehash'])) 1112 1113 for dep in sorted(a_data['gendeps']): 1114 output.append("List of dependencies for variable %s is %s" % (dep, sorted(a_data['gendeps'][dep]))) 1115 1116 for dep in sorted(a_data['varvals']): 1117 output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep])) 1118 1119 if 'runtaskdeps' in a_data: 1120 output.append("Tasks this task depends on: %s" % (sorted(a_data['runtaskdeps']))) 1121 1122 if 'file_checksum_values' in a_data: 1123 output.append("This task depends on the checksums of files: %s" % (sorted(a_data['file_checksum_values']))) 1124 1125 if 'runtaskhashes' in a_data: 1126 for dep in sorted(a_data['runtaskhashes']): 1127 output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep])) 1128 1129 if 'taint' in a_data: 1130 if a_data['taint'].startswith('nostamp:'): 1131 msg = a_data['taint'].replace('nostamp:', 'nostamp(uuid4):') 1132 else: 1133 msg = a_data['taint'] 1134 output.append("Tainted (by forced/invalidated task): %s" % msg) 1135 1136 if 'task' in a_data: 1137 computed_basehash = calc_basehash(a_data) 1138 output.append("Computed base hash is %s and from file %s" % (computed_basehash, a_data['basehash'])) 1139 else: 1140 output.append("Unable to compute base hash") 1141 1142 computed_taskhash = calc_taskhash(a_data) 1143 output.append("Computed task hash is %s" % computed_taskhash) 1144 1145 return output 1146