1# 2# Copyright BitBake Contributors 3# 4# SPDX-License-Identifier: GPL-2.0-only 5# 6 7import hashlib 8import logging 9import os 10import re 11import tempfile 12import pickle 13import bb.data 14import difflib 15import simplediff 16import json 17import types 18import bb.compress.zstd 19from bb.checksum import FileChecksumCache 20from bb import runqueue 21import hashserv 22import hashserv.client 23 24logger = logging.getLogger('BitBake.SigGen') 25hashequiv_logger = logging.getLogger('BitBake.SigGen.HashEquiv') 26 27#find_siginfo and find_siginfo_version are set by the metadata siggen 28# The minimum version of the find_siginfo function we need 29find_siginfo_minversion = 2 30 31def check_siggen_version(siggen): 32 if not hasattr(siggen, "find_siginfo_version"): 33 bb.fatal("Siggen from metadata (OE-Core?) is too old, please update it (no version found)") 34 if siggen.find_siginfo_version < siggen.find_siginfo_minversion: 35 bb.fatal("Siggen from metadata (OE-Core?) is too old, please update it (%s vs %s)" % (siggen.find_siginfo_version, siggen.find_siginfo_minversion)) 36 37class SetEncoder(json.JSONEncoder): 38 def default(self, obj): 39 if isinstance(obj, set) or isinstance(obj, frozenset): 40 return dict(_set_object=list(sorted(obj))) 41 return json.JSONEncoder.default(self, obj) 42 43def SetDecoder(dct): 44 if '_set_object' in dct: 45 return frozenset(dct['_set_object']) 46 return dct 47 48def init(d): 49 siggens = [obj for obj in globals().values() 50 if type(obj) is type and issubclass(obj, SignatureGenerator)] 51 52 desired = d.getVar("BB_SIGNATURE_HANDLER") or "noop" 53 for sg in siggens: 54 if desired == sg.name: 55 return sg(d) 56 else: 57 logger.error("Invalid signature generator '%s', using default 'noop'\n" 58 "Available generators: %s", desired, 59 ', '.join(obj.name for obj in siggens)) 60 return SignatureGenerator(d) 61 62class SignatureGenerator(object): 63 """ 64 """ 65 name = "noop" 66 67 def __init__(self, data): 68 self.basehash = {} 69 self.taskhash = {} 70 self.unihash = {} 71 self.runtaskdeps = {} 72 self.file_checksum_values = {} 73 self.taints = {} 74 self.unitaskhashes = {} 75 self.tidtopn = {} 76 self.setscenetasks = set() 77 78 def finalise(self, fn, d, varient): 79 return 80 81 def postparsing_clean_cache(self): 82 return 83 84 def setup_datacache(self, datacaches): 85 self.datacaches = datacaches 86 87 def setup_datacache_from_datastore(self, mcfn, d): 88 # In task context we have no cache so setup internal data structures 89 # from the fully parsed data store provided 90 91 mc = d.getVar("__BBMULTICONFIG", False) or "" 92 tasks = d.getVar('__BBTASKS', False) 93 94 self.datacaches = {} 95 self.datacaches[mc] = types.SimpleNamespace() 96 setattr(self.datacaches[mc], "stamp", {}) 97 self.datacaches[mc].stamp[mcfn] = d.getVar('STAMP') 98 setattr(self.datacaches[mc], "stamp_extrainfo", {}) 99 self.datacaches[mc].stamp_extrainfo[mcfn] = {} 100 for t in tasks: 101 flag = d.getVarFlag(t, "stamp-extra-info") 102 if flag: 103 self.datacaches[mc].stamp_extrainfo[mcfn][t] = flag 104 105 def get_unihash(self, tid): 106 return self.taskhash[tid] 107 108 def prep_taskhash(self, tid, deps, dataCaches): 109 return 110 111 def get_taskhash(self, tid, deps, dataCaches): 112 self.taskhash[tid] = hashlib.sha256(tid.encode("utf-8")).hexdigest() 113 return self.taskhash[tid] 114 115 def writeout_file_checksum_cache(self): 116 """Write/update the file checksum cache onto disk""" 117 return 118 119 def stampfile_base(self, mcfn): 120 mc = bb.runqueue.mc_from_tid(mcfn) 121 return self.datacaches[mc].stamp[mcfn] 122 123 def stampfile_mcfn(self, taskname, mcfn, extrainfo=True): 124 mc = bb.runqueue.mc_from_tid(mcfn) 125 stamp = self.datacaches[mc].stamp[mcfn] 126 if not stamp: 127 return 128 129 stamp_extrainfo = "" 130 if extrainfo: 131 taskflagname = taskname 132 if taskname.endswith("_setscene"): 133 taskflagname = taskname.replace("_setscene", "") 134 stamp_extrainfo = self.datacaches[mc].stamp_extrainfo[mcfn].get(taskflagname) or "" 135 136 return self.stampfile(stamp, mcfn, taskname, stamp_extrainfo) 137 138 def stampfile(self, stampbase, file_name, taskname, extrainfo): 139 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.') 140 141 def stampcleanmask_mcfn(self, taskname, mcfn): 142 mc = bb.runqueue.mc_from_tid(mcfn) 143 stamp = self.datacaches[mc].stamp[mcfn] 144 if not stamp: 145 return [] 146 147 taskflagname = taskname 148 if taskname.endswith("_setscene"): 149 taskflagname = taskname.replace("_setscene", "") 150 stamp_extrainfo = self.datacaches[mc].stamp_extrainfo[mcfn].get(taskflagname) or "" 151 152 return self.stampcleanmask(stamp, mcfn, taskname, stamp_extrainfo) 153 154 def stampcleanmask(self, stampbase, file_name, taskname, extrainfo): 155 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.') 156 157 def dump_sigtask(self, mcfn, task, stampbase, runtime): 158 return 159 160 def invalidate_task(self, task, mcfn): 161 mc = bb.runqueue.mc_from_tid(mcfn) 162 stamp = self.datacaches[mc].stamp[mcfn] 163 bb.utils.remove(stamp) 164 165 def dump_sigs(self, dataCache, options): 166 return 167 168 def get_taskdata(self): 169 return (self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks) 170 171 def set_taskdata(self, data): 172 self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks = data 173 174 def reset(self, data): 175 self.__init__(data) 176 177 def get_taskhashes(self): 178 return self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn 179 180 def set_taskhashes(self, hashes): 181 self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn = hashes 182 183 def save_unitaskhashes(self): 184 return 185 186 def copy_unitaskhashes(self, targetdir): 187 return 188 189 def set_setscene_tasks(self, setscene_tasks): 190 return 191 192 def exit(self): 193 return 194 195def build_pnid(mc, pn, taskname): 196 if mc: 197 return "mc:" + mc + ":" + pn + ":" + taskname 198 return pn + ":" + taskname 199 200class SignatureGeneratorBasic(SignatureGenerator): 201 """ 202 """ 203 name = "basic" 204 205 def __init__(self, data): 206 self.basehash = {} 207 self.taskhash = {} 208 self.unihash = {} 209 self.runtaskdeps = {} 210 self.file_checksum_values = {} 211 self.taints = {} 212 self.setscenetasks = set() 213 self.basehash_ignore_vars = set((data.getVar("BB_BASEHASH_IGNORE_VARS") or "").split()) 214 self.taskhash_ignore_tasks = None 215 self.init_rundepcheck(data) 216 checksum_cache_file = data.getVar("BB_HASH_CHECKSUM_CACHE_FILE") 217 if checksum_cache_file: 218 self.checksum_cache = FileChecksumCache() 219 self.checksum_cache.init_cache(data, checksum_cache_file) 220 else: 221 self.checksum_cache = None 222 223 self.unihash_cache = bb.cache.SimpleCache("3") 224 self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {}) 225 self.localdirsexclude = (data.getVar("BB_SIGNATURE_LOCAL_DIRS_EXCLUDE") or "CVS .bzr .git .hg .osc .p4 .repo .svn").split() 226 self.tidtopn = {} 227 228 def init_rundepcheck(self, data): 229 self.taskhash_ignore_tasks = data.getVar("BB_TASKHASH_IGNORE_TASKS") or None 230 if self.taskhash_ignore_tasks: 231 self.twl = re.compile(self.taskhash_ignore_tasks) 232 else: 233 self.twl = None 234 235 def _build_data(self, mcfn, d): 236 237 ignore_mismatch = ((d.getVar("BB_HASH_IGNORE_MISMATCH") or '') == '1') 238 tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d, self.basehash_ignore_vars) 239 240 taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, self.basehash_ignore_vars, mcfn) 241 242 for task in tasklist: 243 tid = mcfn + ":" + task 244 if not ignore_mismatch and tid in self.basehash and self.basehash[tid] != basehash[tid]: 245 bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (tid, self.basehash[tid], basehash[tid])) 246 bb.error("The following commands may help:") 247 cmd = "$ bitbake %s -c%s" % (d.getVar('PN'), task) 248 # Make sure sigdata is dumped before run printdiff 249 bb.error("%s -Snone" % cmd) 250 bb.error("Then:") 251 bb.error("%s -Sprintdiff\n" % cmd) 252 self.basehash[tid] = basehash[tid] 253 254 return taskdeps, gendeps, lookupcache 255 256 def set_setscene_tasks(self, setscene_tasks): 257 self.setscenetasks = set(setscene_tasks) 258 259 def finalise(self, fn, d, variant): 260 261 mc = d.getVar("__BBMULTICONFIG", False) or "" 262 mcfn = fn 263 if variant or mc: 264 mcfn = bb.cache.realfn2virtual(fn, variant, mc) 265 266 try: 267 taskdeps, gendeps, lookupcache = self._build_data(mcfn, d) 268 except bb.parse.SkipRecipe: 269 raise 270 except: 271 bb.warn("Error during finalise of %s" % mcfn) 272 raise 273 274 basehashes = {} 275 for task in taskdeps: 276 basehashes[task] = self.basehash[mcfn + ":" + task] 277 278 d.setVar("__siggen_basehashes", basehashes) 279 d.setVar("__siggen_gendeps", gendeps) 280 d.setVar("__siggen_varvals", lookupcache) 281 d.setVar("__siggen_taskdeps", taskdeps) 282 283 #Slow but can be useful for debugging mismatched basehashes 284 #self.setup_datacache_from_datastore(mcfn, d) 285 #for task in taskdeps: 286 # self.dump_sigtask(mcfn, task, d.getVar("STAMP"), False) 287 288 def setup_datacache_from_datastore(self, mcfn, d): 289 super().setup_datacache_from_datastore(mcfn, d) 290 291 mc = bb.runqueue.mc_from_tid(mcfn) 292 for attr in ["siggen_varvals", "siggen_taskdeps", "siggen_gendeps"]: 293 if not hasattr(self.datacaches[mc], attr): 294 setattr(self.datacaches[mc], attr, {}) 295 self.datacaches[mc].siggen_varvals[mcfn] = d.getVar("__siggen_varvals") 296 self.datacaches[mc].siggen_taskdeps[mcfn] = d.getVar("__siggen_taskdeps") 297 self.datacaches[mc].siggen_gendeps[mcfn] = d.getVar("__siggen_gendeps") 298 299 def rundep_check(self, fn, recipename, task, dep, depname, dataCaches): 300 # Return True if we should keep the dependency, False to drop it 301 # We only manipulate the dependencies for packages not in the ignore 302 # list 303 if self.twl and not self.twl.search(recipename): 304 # then process the actual dependencies 305 if self.twl.search(depname): 306 return False 307 return True 308 309 def read_taint(self, fn, task, stampbase): 310 taint = None 311 try: 312 with open(stampbase + '.' + task + '.taint', 'r') as taintf: 313 taint = taintf.read() 314 except IOError: 315 pass 316 return taint 317 318 def prep_taskhash(self, tid, deps, dataCaches): 319 320 (mc, _, task, mcfn) = bb.runqueue.split_tid_mcfn(tid) 321 322 self.basehash[tid] = dataCaches[mc].basetaskhash[tid] 323 self.runtaskdeps[tid] = [] 324 self.file_checksum_values[tid] = [] 325 recipename = dataCaches[mc].pkg_fn[mcfn] 326 327 self.tidtopn[tid] = recipename 328 # save hashfn for deps into siginfo? 329 for dep in deps: 330 (depmc, _, deptask, depmcfn) = bb.runqueue.split_tid_mcfn(dep) 331 dep_pn = dataCaches[depmc].pkg_fn[depmcfn] 332 333 if not self.rundep_check(mcfn, recipename, task, dep, dep_pn, dataCaches): 334 continue 335 336 if dep not in self.taskhash: 337 bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep) 338 339 dep_pnid = build_pnid(depmc, dep_pn, deptask) 340 self.runtaskdeps[tid].append((dep_pnid, dep)) 341 342 if task in dataCaches[mc].file_checksums[mcfn]: 343 if self.checksum_cache: 344 checksums = self.checksum_cache.get_checksums(dataCaches[mc].file_checksums[mcfn][task], recipename, self.localdirsexclude) 345 else: 346 checksums = bb.fetch2.get_file_checksums(dataCaches[mc].file_checksums[mcfn][task], recipename, self.localdirsexclude) 347 for (f,cs) in checksums: 348 self.file_checksum_values[tid].append((f,cs)) 349 350 taskdep = dataCaches[mc].task_deps[mcfn] 351 if 'nostamp' in taskdep and task in taskdep['nostamp']: 352 # Nostamp tasks need an implicit taint so that they force any dependent tasks to run 353 if tid in self.taints and self.taints[tid].startswith("nostamp:"): 354 # Don't reset taint value upon every call 355 pass 356 else: 357 import uuid 358 taint = str(uuid.uuid4()) 359 self.taints[tid] = "nostamp:" + taint 360 361 taint = self.read_taint(mcfn, task, dataCaches[mc].stamp[mcfn]) 362 if taint: 363 self.taints[tid] = taint 364 logger.warning("%s is tainted from a forced run" % tid) 365 366 return 367 368 def get_taskhash(self, tid, deps, dataCaches): 369 370 data = self.basehash[tid] 371 for dep in sorted(self.runtaskdeps[tid]): 372 data += self.get_unihash(dep[1]) 373 374 for (f, cs) in sorted(self.file_checksum_values[tid], key=clean_checksum_file_path): 375 if cs: 376 if "/./" in f: 377 data += "./" + f.split("/./")[1] 378 data += cs 379 380 if tid in self.taints: 381 if self.taints[tid].startswith("nostamp:"): 382 data += self.taints[tid][8:] 383 else: 384 data += self.taints[tid] 385 386 h = hashlib.sha256(data.encode("utf-8")).hexdigest() 387 self.taskhash[tid] = h 388 #d.setVar("BB_TASKHASH:task-%s" % task, taskhash[task]) 389 return h 390 391 def writeout_file_checksum_cache(self): 392 """Write/update the file checksum cache onto disk""" 393 if self.checksum_cache: 394 self.checksum_cache.save_extras() 395 self.checksum_cache.save_merge() 396 else: 397 bb.fetch2.fetcher_parse_save() 398 bb.fetch2.fetcher_parse_done() 399 400 def save_unitaskhashes(self): 401 self.unihash_cache.save(self.unitaskhashes) 402 403 def copy_unitaskhashes(self, targetdir): 404 self.unihash_cache.copyfile(targetdir) 405 406 def dump_sigtask(self, mcfn, task, stampbase, runtime): 407 tid = mcfn + ":" + task 408 mc = bb.runqueue.mc_from_tid(mcfn) 409 referencestamp = stampbase 410 if isinstance(runtime, str) and runtime.startswith("customfile"): 411 sigfile = stampbase 412 referencestamp = runtime[11:] 413 elif runtime and tid in self.taskhash: 414 sigfile = stampbase + "." + task + ".sigdata" + "." + self.get_unihash(tid) 415 else: 416 sigfile = stampbase + "." + task + ".sigbasedata" + "." + self.basehash[tid] 417 418 with bb.utils.umask(0o002): 419 bb.utils.mkdirhier(os.path.dirname(sigfile)) 420 421 data = {} 422 data['task'] = task 423 data['basehash_ignore_vars'] = self.basehash_ignore_vars 424 data['taskhash_ignore_tasks'] = self.taskhash_ignore_tasks 425 data['taskdeps'] = self.datacaches[mc].siggen_taskdeps[mcfn][task] 426 data['basehash'] = self.basehash[tid] 427 data['gendeps'] = {} 428 data['varvals'] = {} 429 data['varvals'][task] = self.datacaches[mc].siggen_varvals[mcfn][task] 430 for dep in self.datacaches[mc].siggen_taskdeps[mcfn][task]: 431 if dep in self.basehash_ignore_vars: 432 continue 433 data['gendeps'][dep] = self.datacaches[mc].siggen_gendeps[mcfn][dep] 434 data['varvals'][dep] = self.datacaches[mc].siggen_varvals[mcfn][dep] 435 436 if runtime and tid in self.taskhash: 437 data['runtaskdeps'] = [dep[0] for dep in sorted(self.runtaskdeps[tid])] 438 data['file_checksum_values'] = [] 439 for f,cs in sorted(self.file_checksum_values[tid], key=clean_checksum_file_path): 440 if "/./" in f: 441 data['file_checksum_values'].append(("./" + f.split("/./")[1], cs)) 442 else: 443 data['file_checksum_values'].append((os.path.basename(f), cs)) 444 data['runtaskhashes'] = {} 445 for dep in self.runtaskdeps[tid]: 446 data['runtaskhashes'][dep[0]] = self.get_unihash(dep[1]) 447 data['taskhash'] = self.taskhash[tid] 448 data['unihash'] = self.get_unihash(tid) 449 450 taint = self.read_taint(mcfn, task, referencestamp) 451 if taint: 452 data['taint'] = taint 453 454 if runtime and tid in self.taints: 455 if 'nostamp:' in self.taints[tid]: 456 data['taint'] = self.taints[tid] 457 458 computed_basehash = calc_basehash(data) 459 if computed_basehash != self.basehash[tid]: 460 bb.error("Basehash mismatch %s versus %s for %s" % (computed_basehash, self.basehash[tid], tid)) 461 if runtime and tid in self.taskhash: 462 computed_taskhash = calc_taskhash(data) 463 if computed_taskhash != self.taskhash[tid]: 464 bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid)) 465 sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash) 466 467 fd, tmpfile = bb.utils.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.") 468 try: 469 with bb.compress.zstd.open(fd, "wt", encoding="utf-8", num_threads=1) as f: 470 json.dump(data, f, sort_keys=True, separators=(",", ":"), cls=SetEncoder) 471 f.flush() 472 os.chmod(tmpfile, 0o664) 473 bb.utils.rename(tmpfile, sigfile) 474 except (OSError, IOError) as err: 475 try: 476 os.unlink(tmpfile) 477 except OSError: 478 pass 479 raise err 480 481class SignatureGeneratorBasicHash(SignatureGeneratorBasic): 482 name = "basichash" 483 484 def get_stampfile_hash(self, tid): 485 if tid in self.taskhash: 486 return self.taskhash[tid] 487 488 # If task is not in basehash, then error 489 return self.basehash[tid] 490 491 def stampfile(self, stampbase, mcfn, taskname, extrainfo, clean=False): 492 if taskname.endswith("_setscene"): 493 tid = mcfn + ":" + taskname[:-9] 494 else: 495 tid = mcfn + ":" + taskname 496 if clean: 497 h = "*" 498 else: 499 h = self.get_stampfile_hash(tid) 500 501 return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.') 502 503 def stampcleanmask(self, stampbase, mcfn, taskname, extrainfo): 504 return self.stampfile(stampbase, mcfn, taskname, extrainfo, clean=True) 505 506 def invalidate_task(self, task, mcfn): 507 bb.note("Tainting hash to force rebuild of task %s, %s" % (mcfn, task)) 508 509 mc = bb.runqueue.mc_from_tid(mcfn) 510 stamp = self.datacaches[mc].stamp[mcfn] 511 512 taintfn = stamp + '.' + task + '.taint' 513 514 import uuid 515 bb.utils.mkdirhier(os.path.dirname(taintfn)) 516 # The specific content of the taint file is not really important, 517 # we just need it to be random, so a random UUID is used 518 with open(taintfn, 'w') as taintf: 519 taintf.write(str(uuid.uuid4())) 520 521class SignatureGeneratorUniHashMixIn(object): 522 def __init__(self, data): 523 self.extramethod = {} 524 super().__init__(data) 525 526 def get_taskdata(self): 527 return (self.server, self.method, self.extramethod) + super().get_taskdata() 528 529 def set_taskdata(self, data): 530 self.server, self.method, self.extramethod = data[:3] 531 super().set_taskdata(data[3:]) 532 533 def client(self): 534 if getattr(self, '_client', None) is None: 535 self._client = hashserv.create_client(self.server) 536 return self._client 537 538 def reset(self, data): 539 if getattr(self, '_client', None) is not None: 540 self._client.close() 541 self._client = None 542 return super().reset(data) 543 544 def exit(self): 545 if getattr(self, '_client', None) is not None: 546 self._client.close() 547 self._client = None 548 return super().exit() 549 550 def get_stampfile_hash(self, tid): 551 if tid in self.taskhash: 552 # If a unique hash is reported, use it as the stampfile hash. This 553 # ensures that if a task won't be re-run if the taskhash changes, 554 # but it would result in the same output hash 555 unihash = self._get_unihash(tid) 556 if unihash is not None: 557 return unihash 558 559 return super().get_stampfile_hash(tid) 560 561 def set_unihash(self, tid, unihash): 562 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid) 563 key = mc + ":" + self.tidtopn[tid] + ":" + taskname 564 self.unitaskhashes[key] = (self.taskhash[tid], unihash) 565 self.unihash[tid] = unihash 566 567 def _get_unihash(self, tid, checkkey=None): 568 if tid not in self.tidtopn: 569 return None 570 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid) 571 key = mc + ":" + self.tidtopn[tid] + ":" + taskname 572 if key not in self.unitaskhashes: 573 return None 574 if not checkkey: 575 checkkey = self.taskhash[tid] 576 (key, unihash) = self.unitaskhashes[key] 577 if key != checkkey: 578 return None 579 return unihash 580 581 def get_unihash(self, tid): 582 taskhash = self.taskhash[tid] 583 584 # If its not a setscene task we can return 585 if self.setscenetasks and tid not in self.setscenetasks: 586 self.unihash[tid] = None 587 return taskhash 588 589 # TODO: This cache can grow unbounded. It probably only needs to keep 590 # for each task 591 unihash = self._get_unihash(tid) 592 if unihash is not None: 593 self.unihash[tid] = unihash 594 return unihash 595 596 # In the absence of being able to discover a unique hash from the 597 # server, make it be equivalent to the taskhash. The unique "hash" only 598 # really needs to be a unique string (not even necessarily a hash), but 599 # making it match the taskhash has a few advantages: 600 # 601 # 1) All of the sstate code that assumes hashes can be the same 602 # 2) It provides maximal compatibility with builders that don't use 603 # an equivalency server 604 # 3) The value is easy for multiple independent builders to derive the 605 # same unique hash from the same input. This means that if the 606 # independent builders find the same taskhash, but it isn't reported 607 # to the server, there is a better chance that they will agree on 608 # the unique hash. 609 unihash = taskhash 610 611 try: 612 method = self.method 613 if tid in self.extramethod: 614 method = method + self.extramethod[tid] 615 data = self.client().get_unihash(method, self.taskhash[tid]) 616 if data: 617 unihash = data 618 # A unique hash equal to the taskhash is not very interesting, 619 # so it is reported it at debug level 2. If they differ, that 620 # is much more interesting, so it is reported at debug level 1 621 hashequiv_logger.bbdebug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server)) 622 else: 623 hashequiv_logger.debug2('No reported unihash for %s:%s from %s' % (tid, taskhash, self.server)) 624 except ConnectionError as e: 625 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 626 627 self.set_unihash(tid, unihash) 628 self.unihash[tid] = unihash 629 return unihash 630 631 def report_unihash(self, path, task, d): 632 import importlib 633 634 taskhash = d.getVar('BB_TASKHASH') 635 unihash = d.getVar('BB_UNIHASH') 636 report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1' 637 tempdir = d.getVar('T') 638 mcfn = d.getVar('BB_FILENAME') 639 tid = mcfn + ':do_' + task 640 key = tid + ':' + taskhash 641 642 if self.setscenetasks and tid not in self.setscenetasks: 643 return 644 645 # This can happen if locked sigs are in action. Detect and just exit 646 if taskhash != self.taskhash[tid]: 647 return 648 649 # Sanity checks 650 cache_unihash = self._get_unihash(tid, checkkey=taskhash) 651 if cache_unihash is None: 652 bb.fatal('%s not in unihash cache. Please report this error' % key) 653 654 if cache_unihash != unihash: 655 bb.fatal("Cache unihash %s doesn't match BB_UNIHASH %s" % (cache_unihash, unihash)) 656 657 sigfile = None 658 sigfile_name = "depsig.do_%s.%d" % (task, os.getpid()) 659 sigfile_link = "depsig.do_%s" % task 660 661 try: 662 sigfile = open(os.path.join(tempdir, sigfile_name), 'w+b') 663 664 locs = {'path': path, 'sigfile': sigfile, 'task': task, 'd': d} 665 666 if "." in self.method: 667 (module, method) = self.method.rsplit('.', 1) 668 locs['method'] = getattr(importlib.import_module(module), method) 669 outhash = bb.utils.better_eval('method(path, sigfile, task, d)', locs) 670 else: 671 outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs) 672 673 try: 674 extra_data = {} 675 676 owner = d.getVar('SSTATE_HASHEQUIV_OWNER') 677 if owner: 678 extra_data['owner'] = owner 679 680 if report_taskdata: 681 sigfile.seek(0) 682 683 extra_data['PN'] = d.getVar('PN') 684 extra_data['PV'] = d.getVar('PV') 685 extra_data['PR'] = d.getVar('PR') 686 extra_data['task'] = task 687 extra_data['outhash_siginfo'] = sigfile.read().decode('utf-8') 688 689 method = self.method 690 if tid in self.extramethod: 691 method = method + self.extramethod[tid] 692 693 data = self.client().report_unihash(taskhash, method, outhash, unihash, extra_data) 694 new_unihash = data['unihash'] 695 696 if new_unihash != unihash: 697 hashequiv_logger.debug('Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server)) 698 bb.event.fire(bb.runqueue.taskUniHashUpdate(mcfn + ':do_' + task, new_unihash), d) 699 self.set_unihash(tid, new_unihash) 700 d.setVar('BB_UNIHASH', new_unihash) 701 else: 702 hashequiv_logger.debug('Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server)) 703 except ConnectionError as e: 704 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 705 finally: 706 if sigfile: 707 sigfile.close() 708 709 sigfile_link_path = os.path.join(tempdir, sigfile_link) 710 bb.utils.remove(sigfile_link_path) 711 712 try: 713 os.symlink(sigfile_name, sigfile_link_path) 714 except OSError: 715 pass 716 717 def report_unihash_equiv(self, tid, taskhash, wanted_unihash, current_unihash, datacaches): 718 try: 719 extra_data = {} 720 method = self.method 721 if tid in self.extramethod: 722 method = method + self.extramethod[tid] 723 724 data = self.client().report_unihash_equiv(taskhash, method, wanted_unihash, extra_data) 725 hashequiv_logger.verbose('Reported task %s as unihash %s to %s (%s)' % (tid, wanted_unihash, self.server, str(data))) 726 727 if data is None: 728 bb.warn("Server unable to handle unihash report") 729 return False 730 731 finalunihash = data['unihash'] 732 733 if finalunihash == current_unihash: 734 hashequiv_logger.verbose('Task %s unihash %s unchanged by server' % (tid, finalunihash)) 735 elif finalunihash == wanted_unihash: 736 hashequiv_logger.verbose('Task %s unihash changed %s -> %s as wanted' % (tid, current_unihash, finalunihash)) 737 self.set_unihash(tid, finalunihash) 738 return True 739 else: 740 # TODO: What to do here? 741 hashequiv_logger.verbose('Task %s unihash reported as unwanted hash %s' % (tid, finalunihash)) 742 743 except ConnectionError as e: 744 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 745 746 return False 747 748# 749# Dummy class used for bitbake-selftest 750# 751class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureGeneratorBasicHash): 752 name = "TestEquivHash" 753 def init_rundepcheck(self, data): 754 super().init_rundepcheck(data) 755 self.server = data.getVar('BB_HASHSERVE') 756 self.method = "sstate_output_hash" 757 758def clean_checksum_file_path(file_checksum_tuple): 759 f, cs = file_checksum_tuple 760 if "/./" in f: 761 return "./" + f.split("/./")[1] 762 return f 763 764def dump_this_task(outfile, d): 765 import bb.parse 766 mcfn = d.getVar("BB_FILENAME") 767 task = "do_" + d.getVar("BB_CURRENTTASK") 768 referencestamp = bb.parse.siggen.stampfile_base(mcfn) 769 bb.parse.siggen.dump_sigtask(mcfn, task, outfile, "customfile:" + referencestamp) 770 771def init_colors(enable_color): 772 """Initialise colour dict for passing to compare_sigfiles()""" 773 # First set up the colours 774 colors = {'color_title': '\033[1m', 775 'color_default': '\033[0m', 776 'color_add': '\033[0;32m', 777 'color_remove': '\033[0;31m', 778 } 779 # Leave all keys present but clear the values 780 if not enable_color: 781 for k in colors.keys(): 782 colors[k] = '' 783 return colors 784 785def worddiff_str(oldstr, newstr, colors=None): 786 if not colors: 787 colors = init_colors(False) 788 diff = simplediff.diff(oldstr.split(' '), newstr.split(' ')) 789 ret = [] 790 for change, value in diff: 791 value = ' '.join(value) 792 if change == '=': 793 ret.append(value) 794 elif change == '+': 795 item = '{color_add}{{+{value}+}}{color_default}'.format(value=value, **colors) 796 ret.append(item) 797 elif change == '-': 798 item = '{color_remove}[-{value}-]{color_default}'.format(value=value, **colors) 799 ret.append(item) 800 whitespace_note = '' 801 if oldstr != newstr and ' '.join(oldstr.split()) == ' '.join(newstr.split()): 802 whitespace_note = ' (whitespace changed)' 803 return '"%s"%s' % (' '.join(ret), whitespace_note) 804 805def list_inline_diff(oldlist, newlist, colors=None): 806 if not colors: 807 colors = init_colors(False) 808 diff = simplediff.diff(oldlist, newlist) 809 ret = [] 810 for change, value in diff: 811 value = ' '.join(value) 812 if change == '=': 813 ret.append("'%s'" % value) 814 elif change == '+': 815 item = '{color_add}+{value}{color_default}'.format(value=value, **colors) 816 ret.append(item) 817 elif change == '-': 818 item = '{color_remove}-{value}{color_default}'.format(value=value, **colors) 819 ret.append(item) 820 return '[%s]' % (', '.join(ret)) 821 822# Handled renamed fields 823def handle_renames(data): 824 if 'basewhitelist' in data: 825 data['basehash_ignore_vars'] = data['basewhitelist'] 826 del data['basewhitelist'] 827 if 'taskwhitelist' in data: 828 data['taskhash_ignore_tasks'] = data['taskwhitelist'] 829 del data['taskwhitelist'] 830 831 832def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False): 833 output = [] 834 835 colors = init_colors(color) 836 def color_format(formatstr, **values): 837 """ 838 Return colour formatted string. 839 NOTE: call with the format string, not an already formatted string 840 containing values (otherwise you could have trouble with { and } 841 characters) 842 """ 843 if not formatstr.endswith('{color_default}'): 844 formatstr += '{color_default}' 845 # In newer python 3 versions you can pass both of these directly, 846 # but we only require 3.4 at the moment 847 formatparams = {} 848 formatparams.update(colors) 849 formatparams.update(values) 850 return formatstr.format(**formatparams) 851 852 with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f: 853 a_data = json.load(f, object_hook=SetDecoder) 854 with bb.compress.zstd.open(b, "rt", encoding="utf-8", num_threads=1) as f: 855 b_data = json.load(f, object_hook=SetDecoder) 856 857 for data in [a_data, b_data]: 858 handle_renames(data) 859 860 def dict_diff(a, b, ignored_vars=set()): 861 sa = set(a.keys()) 862 sb = set(b.keys()) 863 common = sa & sb 864 changed = set() 865 for i in common: 866 if a[i] != b[i] and i not in ignored_vars: 867 changed.add(i) 868 added = sb - sa 869 removed = sa - sb 870 return changed, added, removed 871 872 def file_checksums_diff(a, b): 873 from collections import Counter 874 875 # Convert lists back to tuples 876 a = [(f[0], f[1]) for f in a] 877 b = [(f[0], f[1]) for f in b] 878 879 # Compare lists, ensuring we can handle duplicate filenames if they exist 880 removedcount = Counter(a) 881 removedcount.subtract(b) 882 addedcount = Counter(b) 883 addedcount.subtract(a) 884 added = [] 885 for x in b: 886 if addedcount[x] > 0: 887 addedcount[x] -= 1 888 added.append(x) 889 removed = [] 890 changed = [] 891 for x in a: 892 if removedcount[x] > 0: 893 removedcount[x] -= 1 894 for y in added: 895 if y[0] == x[0]: 896 changed.append((x[0], x[1], y[1])) 897 added.remove(y) 898 break 899 else: 900 removed.append(x) 901 added = [x[0] for x in added] 902 removed = [x[0] for x in removed] 903 return changed, added, removed 904 905 if 'basehash_ignore_vars' in a_data and a_data['basehash_ignore_vars'] != b_data['basehash_ignore_vars']: 906 output.append(color_format("{color_title}basehash_ignore_vars changed{color_default} from '%s' to '%s'") % (a_data['basehash_ignore_vars'], b_data['basehash_ignore_vars'])) 907 if a_data['basehash_ignore_vars'] and b_data['basehash_ignore_vars']: 908 output.append("changed items: %s" % a_data['basehash_ignore_vars'].symmetric_difference(b_data['basehash_ignore_vars'])) 909 910 if 'taskhash_ignore_tasks' in a_data and a_data['taskhash_ignore_tasks'] != b_data['taskhash_ignore_tasks']: 911 output.append(color_format("{color_title}taskhash_ignore_tasks changed{color_default} from '%s' to '%s'") % (a_data['taskhash_ignore_tasks'], b_data['taskhash_ignore_tasks'])) 912 if a_data['taskhash_ignore_tasks'] and b_data['taskhash_ignore_tasks']: 913 output.append("changed items: %s" % a_data['taskhash_ignore_tasks'].symmetric_difference(b_data['taskhash_ignore_tasks'])) 914 915 if a_data['taskdeps'] != b_data['taskdeps']: 916 output.append(color_format("{color_title}Task dependencies changed{color_default} from:\n%s\nto:\n%s") % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps']))) 917 918 if a_data['basehash'] != b_data['basehash'] and not collapsed: 919 output.append(color_format("{color_title}basehash changed{color_default} from %s to %s") % (a_data['basehash'], b_data['basehash'])) 920 921 changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basehash_ignore_vars'] & b_data['basehash_ignore_vars']) 922 if changed: 923 for dep in sorted(changed): 924 output.append(color_format("{color_title}List of dependencies for variable %s changed from '{color_default}%s{color_title}' to '{color_default}%s{color_title}'") % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep])) 925 if a_data['gendeps'][dep] and b_data['gendeps'][dep]: 926 output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep])) 927 if added: 928 for dep in sorted(added): 929 output.append(color_format("{color_title}Dependency on variable %s was added") % (dep)) 930 if removed: 931 for dep in sorted(removed): 932 output.append(color_format("{color_title}Dependency on Variable %s was removed") % (dep)) 933 934 935 changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals']) 936 if changed: 937 for dep in sorted(changed): 938 oldval = a_data['varvals'][dep] 939 newval = b_data['varvals'][dep] 940 if newval and oldval and ('\n' in oldval or '\n' in newval): 941 diff = difflib.unified_diff(oldval.splitlines(), newval.splitlines(), lineterm='') 942 # Cut off the first two lines, since we aren't interested in 943 # the old/new filename (they are blank anyway in this case) 944 difflines = list(diff)[2:] 945 if color: 946 # Add colour to diff output 947 for i, line in enumerate(difflines): 948 if line.startswith('+'): 949 line = color_format('{color_add}{line}', line=line) 950 difflines[i] = line 951 elif line.startswith('-'): 952 line = color_format('{color_remove}{line}', line=line) 953 difflines[i] = line 954 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff='\n'.join(difflines))) 955 elif newval and oldval and (' ' in oldval or ' ' in newval): 956 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff=worddiff_str(oldval, newval, colors))) 957 else: 958 output.append(color_format("{color_title}Variable {var} value changed from '{color_default}{oldval}{color_title}' to '{color_default}{newval}{color_title}'{color_default}", var=dep, oldval=oldval, newval=newval)) 959 960 if not 'file_checksum_values' in a_data: 961 a_data['file_checksum_values'] = [] 962 if not 'file_checksum_values' in b_data: 963 b_data['file_checksum_values'] = [] 964 965 changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values']) 966 if changed: 967 for f, old, new in changed: 968 output.append(color_format("{color_title}Checksum for file %s changed{color_default} from %s to %s") % (f, old, new)) 969 if added: 970 for f in added: 971 output.append(color_format("{color_title}Dependency on checksum of file %s was added") % (f)) 972 if removed: 973 for f in removed: 974 output.append(color_format("{color_title}Dependency on checksum of file %s was removed") % (f)) 975 976 if not 'runtaskdeps' in a_data: 977 a_data['runtaskdeps'] = {} 978 if not 'runtaskdeps' in b_data: 979 b_data['runtaskdeps'] = {} 980 981 if not collapsed: 982 if len(a_data['runtaskdeps']) != len(b_data['runtaskdeps']): 983 changed = ["Number of task dependencies changed"] 984 else: 985 changed = [] 986 for idx, task in enumerate(a_data['runtaskdeps']): 987 a = a_data['runtaskdeps'][idx] 988 b = b_data['runtaskdeps'][idx] 989 if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b] and not collapsed: 990 changed.append("%s with hash %s\n changed to\n%s with hash %s" % (a, a_data['runtaskhashes'][a], b, b_data['runtaskhashes'][b])) 991 992 if changed: 993 clean_a = a_data['runtaskdeps'] 994 clean_b = b_data['runtaskdeps'] 995 if clean_a != clean_b: 996 output.append(color_format("{color_title}runtaskdeps changed:{color_default}\n%s") % list_inline_diff(clean_a, clean_b, colors)) 997 else: 998 output.append(color_format("{color_title}runtaskdeps changed:")) 999 output.append("\n".join(changed)) 1000 1001 1002 if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data: 1003 a = a_data['runtaskhashes'] 1004 b = b_data['runtaskhashes'] 1005 changed, added, removed = dict_diff(a, b) 1006 if added: 1007 for dep in sorted(added): 1008 bdep_found = False 1009 if removed: 1010 for bdep in removed: 1011 if b[dep] == a[bdep]: 1012 #output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep)) 1013 bdep_found = True 1014 if not bdep_found: 1015 output.append(color_format("{color_title}Dependency on task %s was added{color_default} with hash %s") % (dep, b[dep])) 1016 if removed: 1017 for dep in sorted(removed): 1018 adep_found = False 1019 if added: 1020 for adep in added: 1021 if b[adep] == a[dep]: 1022 #output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep)) 1023 adep_found = True 1024 if not adep_found: 1025 output.append(color_format("{color_title}Dependency on task %s was removed{color_default} with hash %s") % (dep, a[dep])) 1026 if changed: 1027 for dep in sorted(changed): 1028 if not collapsed: 1029 output.append(color_format("{color_title}Hash for task dependency %s changed{color_default} from %s to %s") % (dep, a[dep], b[dep])) 1030 if callable(recursecb): 1031 recout = recursecb(dep, a[dep], b[dep]) 1032 if recout: 1033 if collapsed: 1034 output.extend(recout) 1035 else: 1036 # If a dependent hash changed, might as well print the line above and then defer to the changes in 1037 # that hash since in all likelyhood, they're the same changes this task also saw. 1038 output = [output[-1]] + recout 1039 break 1040 1041 a_taint = a_data.get('taint', None) 1042 b_taint = b_data.get('taint', None) 1043 if a_taint != b_taint: 1044 if a_taint and a_taint.startswith('nostamp:'): 1045 a_taint = a_taint.replace('nostamp:', 'nostamp(uuid4):') 1046 if b_taint and b_taint.startswith('nostamp:'): 1047 b_taint = b_taint.replace('nostamp:', 'nostamp(uuid4):') 1048 output.append(color_format("{color_title}Taint (by forced/invalidated task) changed{color_default} from %s to %s") % (a_taint, b_taint)) 1049 1050 return output 1051 1052 1053def calc_basehash(sigdata): 1054 task = sigdata['task'] 1055 basedata = sigdata['varvals'][task] 1056 1057 if basedata is None: 1058 basedata = '' 1059 1060 alldeps = sigdata['taskdeps'] 1061 for dep in sorted(alldeps): 1062 basedata = basedata + dep 1063 val = sigdata['varvals'][dep] 1064 if val is not None: 1065 basedata = basedata + str(val) 1066 1067 return hashlib.sha256(basedata.encode("utf-8")).hexdigest() 1068 1069def calc_taskhash(sigdata): 1070 data = sigdata['basehash'] 1071 1072 for dep in sigdata['runtaskdeps']: 1073 data = data + sigdata['runtaskhashes'][dep] 1074 1075 for c in sigdata['file_checksum_values']: 1076 if c[1]: 1077 if "./" in c[0]: 1078 data = data + c[0] 1079 data = data + c[1] 1080 1081 if 'taint' in sigdata: 1082 if 'nostamp:' in sigdata['taint']: 1083 data = data + sigdata['taint'][8:] 1084 else: 1085 data = data + sigdata['taint'] 1086 1087 return hashlib.sha256(data.encode("utf-8")).hexdigest() 1088 1089 1090def dump_sigfile(a): 1091 output = [] 1092 1093 with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f: 1094 a_data = json.load(f, object_hook=SetDecoder) 1095 1096 handle_renames(a_data) 1097 1098 output.append("basehash_ignore_vars: %s" % (sorted(a_data['basehash_ignore_vars']))) 1099 1100 output.append("taskhash_ignore_tasks: %s" % (sorted(a_data['taskhash_ignore_tasks'] or []))) 1101 1102 output.append("Task dependencies: %s" % (sorted(a_data['taskdeps']))) 1103 1104 output.append("basehash: %s" % (a_data['basehash'])) 1105 1106 for dep in sorted(a_data['gendeps']): 1107 output.append("List of dependencies for variable %s is %s" % (dep, sorted(a_data['gendeps'][dep]))) 1108 1109 for dep in sorted(a_data['varvals']): 1110 output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep])) 1111 1112 if 'runtaskdeps' in a_data: 1113 output.append("Tasks this task depends on: %s" % (sorted(a_data['runtaskdeps']))) 1114 1115 if 'file_checksum_values' in a_data: 1116 output.append("This task depends on the checksums of files: %s" % (sorted(a_data['file_checksum_values']))) 1117 1118 if 'runtaskhashes' in a_data: 1119 for dep in sorted(a_data['runtaskhashes']): 1120 output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep])) 1121 1122 if 'taint' in a_data: 1123 if a_data['taint'].startswith('nostamp:'): 1124 msg = a_data['taint'].replace('nostamp:', 'nostamp(uuid4):') 1125 else: 1126 msg = a_data['taint'] 1127 output.append("Tainted (by forced/invalidated task): %s" % msg) 1128 1129 if 'task' in a_data: 1130 computed_basehash = calc_basehash(a_data) 1131 output.append("Computed base hash is %s and from file %s" % (computed_basehash, a_data['basehash'])) 1132 else: 1133 output.append("Unable to compute base hash") 1134 1135 computed_taskhash = calc_taskhash(a_data) 1136 output.append("Computed task hash is %s" % computed_taskhash) 1137 1138 return output 1139