1# 2# Copyright BitBake Contributors 3# 4# SPDX-License-Identifier: GPL-2.0-only 5# 6 7import hashlib 8import logging 9import os 10import re 11import tempfile 12import pickle 13import bb.data 14import difflib 15import simplediff 16import json 17import types 18import bb.compress.zstd 19from bb.checksum import FileChecksumCache 20from bb import runqueue 21import hashserv 22import hashserv.client 23 24logger = logging.getLogger('BitBake.SigGen') 25hashequiv_logger = logging.getLogger('BitBake.SigGen.HashEquiv') 26 27class SetEncoder(json.JSONEncoder): 28 def default(self, obj): 29 if isinstance(obj, set) or isinstance(obj, frozenset): 30 return dict(_set_object=list(sorted(obj))) 31 return json.JSONEncoder.default(self, obj) 32 33def SetDecoder(dct): 34 if '_set_object' in dct: 35 return frozenset(dct['_set_object']) 36 return dct 37 38def init(d): 39 siggens = [obj for obj in globals().values() 40 if type(obj) is type and issubclass(obj, SignatureGenerator)] 41 42 desired = d.getVar("BB_SIGNATURE_HANDLER") or "noop" 43 for sg in siggens: 44 if desired == sg.name: 45 return sg(d) 46 else: 47 logger.error("Invalid signature generator '%s', using default 'noop'\n" 48 "Available generators: %s", desired, 49 ', '.join(obj.name for obj in siggens)) 50 return SignatureGenerator(d) 51 52class SignatureGenerator(object): 53 """ 54 """ 55 name = "noop" 56 57 def __init__(self, data): 58 self.basehash = {} 59 self.taskhash = {} 60 self.unihash = {} 61 self.runtaskdeps = {} 62 self.file_checksum_values = {} 63 self.taints = {} 64 self.unitaskhashes = {} 65 self.tidtopn = {} 66 self.setscenetasks = set() 67 68 def finalise(self, fn, d, varient): 69 return 70 71 def postparsing_clean_cache(self): 72 return 73 74 def setup_datacache(self, datacaches): 75 self.datacaches = datacaches 76 77 def setup_datacache_from_datastore(self, mcfn, d): 78 # In task context we have no cache so setup internal data structures 79 # from the fully parsed data store provided 80 81 mc = d.getVar("__BBMULTICONFIG", False) or "" 82 tasks = d.getVar('__BBTASKS', False) 83 84 self.datacaches = {} 85 self.datacaches[mc] = types.SimpleNamespace() 86 setattr(self.datacaches[mc], "stamp", {}) 87 self.datacaches[mc].stamp[mcfn] = d.getVar('STAMP') 88 setattr(self.datacaches[mc], "stamp_extrainfo", {}) 89 self.datacaches[mc].stamp_extrainfo[mcfn] = {} 90 for t in tasks: 91 flag = d.getVarFlag(t, "stamp-extra-info") 92 if flag: 93 self.datacaches[mc].stamp_extrainfo[mcfn][t] = flag 94 95 def get_unihash(self, tid): 96 return self.taskhash[tid] 97 98 def prep_taskhash(self, tid, deps, dataCaches): 99 return 100 101 def get_taskhash(self, tid, deps, dataCaches): 102 self.taskhash[tid] = hashlib.sha256(tid.encode("utf-8")).hexdigest() 103 return self.taskhash[tid] 104 105 def writeout_file_checksum_cache(self): 106 """Write/update the file checksum cache onto disk""" 107 return 108 109 def stampfile_base(self, mcfn): 110 mc = bb.runqueue.mc_from_tid(mcfn) 111 return self.datacaches[mc].stamp[mcfn] 112 113 def stampfile_mcfn(self, taskname, mcfn, extrainfo=True): 114 mc = bb.runqueue.mc_from_tid(mcfn) 115 stamp = self.datacaches[mc].stamp[mcfn] 116 if not stamp: 117 return 118 119 stamp_extrainfo = "" 120 if extrainfo: 121 taskflagname = taskname 122 if taskname.endswith("_setscene"): 123 taskflagname = taskname.replace("_setscene", "") 124 stamp_extrainfo = self.datacaches[mc].stamp_extrainfo[mcfn].get(taskflagname) or "" 125 126 return self.stampfile(stamp, mcfn, taskname, stamp_extrainfo) 127 128 def stampfile(self, stampbase, file_name, taskname, extrainfo): 129 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.') 130 131 def stampcleanmask_mcfn(self, taskname, mcfn): 132 mc = bb.runqueue.mc_from_tid(mcfn) 133 stamp = self.datacaches[mc].stamp[mcfn] 134 if not stamp: 135 return [] 136 137 taskflagname = taskname 138 if taskname.endswith("_setscene"): 139 taskflagname = taskname.replace("_setscene", "") 140 stamp_extrainfo = self.datacaches[mc].stamp_extrainfo[mcfn].get(taskflagname) or "" 141 142 return self.stampcleanmask(stamp, mcfn, taskname, stamp_extrainfo) 143 144 def stampcleanmask(self, stampbase, file_name, taskname, extrainfo): 145 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.') 146 147 def dump_sigtask(self, mcfn, task, stampbase, runtime): 148 return 149 150 def invalidate_task(self, task, mcfn): 151 mc = bb.runqueue.mc_from_tid(mcfn) 152 stamp = self.datacaches[mc].stamp[mcfn] 153 bb.utils.remove(stamp) 154 155 def dump_sigs(self, dataCache, options): 156 return 157 158 def get_taskdata(self): 159 return (self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks) 160 161 def set_taskdata(self, data): 162 self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks = data 163 164 def reset(self, data): 165 self.__init__(data) 166 167 def get_taskhashes(self): 168 return self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn 169 170 def set_taskhashes(self, hashes): 171 self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn = hashes 172 173 def save_unitaskhashes(self): 174 return 175 176 def copy_unitaskhashes(self, targetdir): 177 return 178 179 def set_setscene_tasks(self, setscene_tasks): 180 return 181 182 def exit(self): 183 return 184 185def build_pnid(mc, pn, taskname): 186 if mc: 187 return "mc:" + mc + ":" + pn + ":" + taskname 188 return pn + ":" + taskname 189 190class SignatureGeneratorBasic(SignatureGenerator): 191 """ 192 """ 193 name = "basic" 194 195 def __init__(self, data): 196 self.basehash = {} 197 self.taskhash = {} 198 self.unihash = {} 199 self.runtaskdeps = {} 200 self.file_checksum_values = {} 201 self.taints = {} 202 self.setscenetasks = set() 203 self.basehash_ignore_vars = set((data.getVar("BB_BASEHASH_IGNORE_VARS") or "").split()) 204 self.taskhash_ignore_tasks = None 205 self.init_rundepcheck(data) 206 checksum_cache_file = data.getVar("BB_HASH_CHECKSUM_CACHE_FILE") 207 if checksum_cache_file: 208 self.checksum_cache = FileChecksumCache() 209 self.checksum_cache.init_cache(data, checksum_cache_file) 210 else: 211 self.checksum_cache = None 212 213 self.unihash_cache = bb.cache.SimpleCache("3") 214 self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {}) 215 self.localdirsexclude = (data.getVar("BB_SIGNATURE_LOCAL_DIRS_EXCLUDE") or "CVS .bzr .git .hg .osc .p4 .repo .svn").split() 216 self.tidtopn = {} 217 218 def init_rundepcheck(self, data): 219 self.taskhash_ignore_tasks = data.getVar("BB_TASKHASH_IGNORE_TASKS") or None 220 if self.taskhash_ignore_tasks: 221 self.twl = re.compile(self.taskhash_ignore_tasks) 222 else: 223 self.twl = None 224 225 def _build_data(self, mcfn, d): 226 227 ignore_mismatch = ((d.getVar("BB_HASH_IGNORE_MISMATCH") or '') == '1') 228 tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d, self.basehash_ignore_vars) 229 230 taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, self.basehash_ignore_vars, mcfn) 231 232 for task in tasklist: 233 tid = mcfn + ":" + task 234 if not ignore_mismatch and tid in self.basehash and self.basehash[tid] != basehash[tid]: 235 bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (tid, self.basehash[tid], basehash[tid])) 236 bb.error("The following commands may help:") 237 cmd = "$ bitbake %s -c%s" % (d.getVar('PN'), task) 238 # Make sure sigdata is dumped before run printdiff 239 bb.error("%s -Snone" % cmd) 240 bb.error("Then:") 241 bb.error("%s -Sprintdiff\n" % cmd) 242 self.basehash[tid] = basehash[tid] 243 244 return taskdeps, gendeps, lookupcache 245 246 def set_setscene_tasks(self, setscene_tasks): 247 self.setscenetasks = set(setscene_tasks) 248 249 def finalise(self, fn, d, variant): 250 251 mc = d.getVar("__BBMULTICONFIG", False) or "" 252 mcfn = fn 253 if variant or mc: 254 mcfn = bb.cache.realfn2virtual(fn, variant, mc) 255 256 try: 257 taskdeps, gendeps, lookupcache = self._build_data(mcfn, d) 258 except bb.parse.SkipRecipe: 259 raise 260 except: 261 bb.warn("Error during finalise of %s" % mcfn) 262 raise 263 264 #Slow but can be useful for debugging mismatched basehashes 265 #for task in self.taskdeps[mcfn]: 266 # self.dump_sigtask(mcfn, task, d.getVar("STAMP"), False) 267 268 basehashes = {} 269 for task in taskdeps: 270 basehashes[task] = self.basehash[mcfn + ":" + task] 271 272 d.setVar("__siggen_basehashes", basehashes) 273 d.setVar("__siggen_gendeps", gendeps) 274 d.setVar("__siggen_varvals", lookupcache) 275 d.setVar("__siggen_taskdeps", taskdeps) 276 277 def setup_datacache_from_datastore(self, mcfn, d): 278 super().setup_datacache_from_datastore(mcfn, d) 279 280 mc = bb.runqueue.mc_from_tid(mcfn) 281 for attr in ["siggen_varvals", "siggen_taskdeps", "siggen_gendeps"]: 282 if not hasattr(self.datacaches[mc], attr): 283 setattr(self.datacaches[mc], attr, {}) 284 self.datacaches[mc].siggen_varvals[mcfn] = d.getVar("__siggen_varvals") 285 self.datacaches[mc].siggen_taskdeps[mcfn] = d.getVar("__siggen_taskdeps") 286 self.datacaches[mc].siggen_gendeps[mcfn] = d.getVar("__siggen_gendeps") 287 288 def rundep_check(self, fn, recipename, task, dep, depname, dataCaches): 289 # Return True if we should keep the dependency, False to drop it 290 # We only manipulate the dependencies for packages not in the ignore 291 # list 292 if self.twl and not self.twl.search(recipename): 293 # then process the actual dependencies 294 if self.twl.search(depname): 295 return False 296 return True 297 298 def read_taint(self, fn, task, stampbase): 299 taint = None 300 try: 301 with open(stampbase + '.' + task + '.taint', 'r') as taintf: 302 taint = taintf.read() 303 except IOError: 304 pass 305 return taint 306 307 def prep_taskhash(self, tid, deps, dataCaches): 308 309 (mc, _, task, mcfn) = bb.runqueue.split_tid_mcfn(tid) 310 311 self.basehash[tid] = dataCaches[mc].basetaskhash[tid] 312 self.runtaskdeps[tid] = [] 313 self.file_checksum_values[tid] = [] 314 recipename = dataCaches[mc].pkg_fn[mcfn] 315 316 self.tidtopn[tid] = recipename 317 # save hashfn for deps into siginfo? 318 for dep in deps: 319 (depmc, _, deptask, depmcfn) = bb.runqueue.split_tid_mcfn(dep) 320 dep_pn = dataCaches[depmc].pkg_fn[depmcfn] 321 322 if not self.rundep_check(mcfn, recipename, task, dep, dep_pn, dataCaches): 323 continue 324 325 if dep not in self.taskhash: 326 bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep) 327 328 dep_pnid = build_pnid(depmc, dep_pn, deptask) 329 self.runtaskdeps[tid].append((dep_pnid, dep)) 330 331 if task in dataCaches[mc].file_checksums[mcfn]: 332 if self.checksum_cache: 333 checksums = self.checksum_cache.get_checksums(dataCaches[mc].file_checksums[mcfn][task], recipename, self.localdirsexclude) 334 else: 335 checksums = bb.fetch2.get_file_checksums(dataCaches[mc].file_checksums[mcfn][task], recipename, self.localdirsexclude) 336 for (f,cs) in checksums: 337 self.file_checksum_values[tid].append((f,cs)) 338 339 taskdep = dataCaches[mc].task_deps[mcfn] 340 if 'nostamp' in taskdep and task in taskdep['nostamp']: 341 # Nostamp tasks need an implicit taint so that they force any dependent tasks to run 342 if tid in self.taints and self.taints[tid].startswith("nostamp:"): 343 # Don't reset taint value upon every call 344 pass 345 else: 346 import uuid 347 taint = str(uuid.uuid4()) 348 self.taints[tid] = "nostamp:" + taint 349 350 taint = self.read_taint(mcfn, task, dataCaches[mc].stamp[mcfn]) 351 if taint: 352 self.taints[tid] = taint 353 logger.warning("%s is tainted from a forced run" % tid) 354 355 return 356 357 def get_taskhash(self, tid, deps, dataCaches): 358 359 data = self.basehash[tid] 360 for dep in sorted(self.runtaskdeps[tid]): 361 data += self.get_unihash(dep[1]) 362 363 for (f, cs) in self.file_checksum_values[tid]: 364 if cs: 365 if "/./" in f: 366 data += "./" + f.split("/./")[1] 367 data += cs 368 369 if tid in self.taints: 370 if self.taints[tid].startswith("nostamp:"): 371 data += self.taints[tid][8:] 372 else: 373 data += self.taints[tid] 374 375 h = hashlib.sha256(data.encode("utf-8")).hexdigest() 376 self.taskhash[tid] = h 377 #d.setVar("BB_TASKHASH:task-%s" % task, taskhash[task]) 378 return h 379 380 def writeout_file_checksum_cache(self): 381 """Write/update the file checksum cache onto disk""" 382 if self.checksum_cache: 383 self.checksum_cache.save_extras() 384 self.checksum_cache.save_merge() 385 else: 386 bb.fetch2.fetcher_parse_save() 387 bb.fetch2.fetcher_parse_done() 388 389 def save_unitaskhashes(self): 390 self.unihash_cache.save(self.unitaskhashes) 391 392 def copy_unitaskhashes(self, targetdir): 393 self.unihash_cache.copyfile(targetdir) 394 395 def dump_sigtask(self, mcfn, task, stampbase, runtime): 396 tid = mcfn + ":" + task 397 mc = bb.runqueue.mc_from_tid(mcfn) 398 referencestamp = stampbase 399 if isinstance(runtime, str) and runtime.startswith("customfile"): 400 sigfile = stampbase 401 referencestamp = runtime[11:] 402 elif runtime and tid in self.taskhash: 403 sigfile = stampbase + "." + task + ".sigdata" + "." + self.get_unihash(tid) 404 else: 405 sigfile = stampbase + "." + task + ".sigbasedata" + "." + self.basehash[tid] 406 407 with bb.utils.umask(0o002): 408 bb.utils.mkdirhier(os.path.dirname(sigfile)) 409 410 data = {} 411 data['task'] = task 412 data['basehash_ignore_vars'] = self.basehash_ignore_vars 413 data['taskhash_ignore_tasks'] = self.taskhash_ignore_tasks 414 data['taskdeps'] = self.datacaches[mc].siggen_taskdeps[mcfn][task] 415 data['basehash'] = self.basehash[tid] 416 data['gendeps'] = {} 417 data['varvals'] = {} 418 data['varvals'][task] = self.datacaches[mc].siggen_varvals[mcfn][task] 419 for dep in self.datacaches[mc].siggen_taskdeps[mcfn][task]: 420 if dep in self.basehash_ignore_vars: 421 continue 422 data['gendeps'][dep] = self.datacaches[mc].siggen_gendeps[mcfn][dep] 423 data['varvals'][dep] = self.datacaches[mc].siggen_varvals[mcfn][dep] 424 425 if runtime and tid in self.taskhash: 426 data['runtaskdeps'] = [dep[0] for dep in sorted(self.runtaskdeps[tid])] 427 data['file_checksum_values'] = [] 428 for f,cs in self.file_checksum_values[tid]: 429 if "/./" in f: 430 data['file_checksum_values'].append(("./" + f.split("/./")[1], cs)) 431 else: 432 data['file_checksum_values'].append((os.path.basename(f), cs)) 433 data['runtaskhashes'] = {} 434 for dep in self.runtaskdeps[tid]: 435 data['runtaskhashes'][dep[0]] = self.get_unihash(dep[1]) 436 data['taskhash'] = self.taskhash[tid] 437 data['unihash'] = self.get_unihash(tid) 438 439 taint = self.read_taint(mcfn, task, referencestamp) 440 if taint: 441 data['taint'] = taint 442 443 if runtime and tid in self.taints: 444 if 'nostamp:' in self.taints[tid]: 445 data['taint'] = self.taints[tid] 446 447 computed_basehash = calc_basehash(data) 448 if computed_basehash != self.basehash[tid]: 449 bb.error("Basehash mismatch %s versus %s for %s" % (computed_basehash, self.basehash[tid], tid)) 450 if runtime and tid in self.taskhash: 451 computed_taskhash = calc_taskhash(data) 452 if computed_taskhash != self.taskhash[tid]: 453 bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid)) 454 sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash) 455 456 fd, tmpfile = bb.utils.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.") 457 try: 458 with bb.compress.zstd.open(fd, "wt", encoding="utf-8", num_threads=1) as f: 459 json.dump(data, f, sort_keys=True, separators=(",", ":"), cls=SetEncoder) 460 f.flush() 461 os.chmod(tmpfile, 0o664) 462 bb.utils.rename(tmpfile, sigfile) 463 except (OSError, IOError) as err: 464 try: 465 os.unlink(tmpfile) 466 except OSError: 467 pass 468 raise err 469 470class SignatureGeneratorBasicHash(SignatureGeneratorBasic): 471 name = "basichash" 472 473 def get_stampfile_hash(self, tid): 474 if tid in self.taskhash: 475 return self.taskhash[tid] 476 477 # If task is not in basehash, then error 478 return self.basehash[tid] 479 480 def stampfile(self, stampbase, mcfn, taskname, extrainfo, clean=False): 481 if taskname.endswith("_setscene"): 482 tid = mcfn + ":" + taskname[:-9] 483 else: 484 tid = mcfn + ":" + taskname 485 if clean: 486 h = "*" 487 else: 488 h = self.get_stampfile_hash(tid) 489 490 return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.') 491 492 def stampcleanmask(self, stampbase, mcfn, taskname, extrainfo): 493 return self.stampfile(stampbase, mcfn, taskname, extrainfo, clean=True) 494 495 def invalidate_task(self, task, mcfn): 496 bb.note("Tainting hash to force rebuild of task %s, %s" % (mcfn, task)) 497 498 mc = bb.runqueue.mc_from_tid(mcfn) 499 stamp = self.datacaches[mc].stamp[mcfn] 500 501 taintfn = stamp + '.' + task + '.taint' 502 503 import uuid 504 bb.utils.mkdirhier(os.path.dirname(taintfn)) 505 # The specific content of the taint file is not really important, 506 # we just need it to be random, so a random UUID is used 507 with open(taintfn, 'w') as taintf: 508 taintf.write(str(uuid.uuid4())) 509 510class SignatureGeneratorUniHashMixIn(object): 511 def __init__(self, data): 512 self.extramethod = {} 513 super().__init__(data) 514 515 def get_taskdata(self): 516 return (self.server, self.method, self.extramethod) + super().get_taskdata() 517 518 def set_taskdata(self, data): 519 self.server, self.method, self.extramethod = data[:3] 520 super().set_taskdata(data[3:]) 521 522 def client(self): 523 if getattr(self, '_client', None) is None: 524 self._client = hashserv.create_client(self.server) 525 return self._client 526 527 def reset(self, data): 528 if getattr(self, '_client', None) is not None: 529 self._client.close() 530 self._client = None 531 return super().reset(data) 532 533 def exit(self): 534 if getattr(self, '_client', None) is not None: 535 self._client.close() 536 self._client = None 537 return super().exit() 538 539 def get_stampfile_hash(self, tid): 540 if tid in self.taskhash: 541 # If a unique hash is reported, use it as the stampfile hash. This 542 # ensures that if a task won't be re-run if the taskhash changes, 543 # but it would result in the same output hash 544 unihash = self._get_unihash(tid) 545 if unihash is not None: 546 return unihash 547 548 return super().get_stampfile_hash(tid) 549 550 def set_unihash(self, tid, unihash): 551 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid) 552 key = mc + ":" + self.tidtopn[tid] + ":" + taskname 553 self.unitaskhashes[key] = (self.taskhash[tid], unihash) 554 self.unihash[tid] = unihash 555 556 def _get_unihash(self, tid, checkkey=None): 557 if tid not in self.tidtopn: 558 return None 559 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid) 560 key = mc + ":" + self.tidtopn[tid] + ":" + taskname 561 if key not in self.unitaskhashes: 562 return None 563 if not checkkey: 564 checkkey = self.taskhash[tid] 565 (key, unihash) = self.unitaskhashes[key] 566 if key != checkkey: 567 return None 568 return unihash 569 570 def get_unihash(self, tid): 571 taskhash = self.taskhash[tid] 572 573 # If its not a setscene task we can return 574 if self.setscenetasks and tid not in self.setscenetasks: 575 self.unihash[tid] = None 576 return taskhash 577 578 # TODO: This cache can grow unbounded. It probably only needs to keep 579 # for each task 580 unihash = self._get_unihash(tid) 581 if unihash is not None: 582 self.unihash[tid] = unihash 583 return unihash 584 585 # In the absence of being able to discover a unique hash from the 586 # server, make it be equivalent to the taskhash. The unique "hash" only 587 # really needs to be a unique string (not even necessarily a hash), but 588 # making it match the taskhash has a few advantages: 589 # 590 # 1) All of the sstate code that assumes hashes can be the same 591 # 2) It provides maximal compatibility with builders that don't use 592 # an equivalency server 593 # 3) The value is easy for multiple independent builders to derive the 594 # same unique hash from the same input. This means that if the 595 # independent builders find the same taskhash, but it isn't reported 596 # to the server, there is a better chance that they will agree on 597 # the unique hash. 598 unihash = taskhash 599 600 try: 601 method = self.method 602 if tid in self.extramethod: 603 method = method + self.extramethod[tid] 604 data = self.client().get_unihash(method, self.taskhash[tid]) 605 if data: 606 unihash = data 607 # A unique hash equal to the taskhash is not very interesting, 608 # so it is reported it at debug level 2. If they differ, that 609 # is much more interesting, so it is reported at debug level 1 610 hashequiv_logger.bbdebug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server)) 611 else: 612 hashequiv_logger.debug2('No reported unihash for %s:%s from %s' % (tid, taskhash, self.server)) 613 except ConnectionError as e: 614 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 615 616 self.set_unihash(tid, unihash) 617 self.unihash[tid] = unihash 618 return unihash 619 620 def report_unihash(self, path, task, d): 621 import importlib 622 623 taskhash = d.getVar('BB_TASKHASH') 624 unihash = d.getVar('BB_UNIHASH') 625 report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1' 626 tempdir = d.getVar('T') 627 mcfn = d.getVar('BB_FILENAME') 628 tid = mcfn + ':do_' + task 629 key = tid + ':' + taskhash 630 631 if self.setscenetasks and tid not in self.setscenetasks: 632 return 633 634 # This can happen if locked sigs are in action. Detect and just exit 635 if taskhash != self.taskhash[tid]: 636 return 637 638 # Sanity checks 639 cache_unihash = self._get_unihash(tid, checkkey=taskhash) 640 if cache_unihash is None: 641 bb.fatal('%s not in unihash cache. Please report this error' % key) 642 643 if cache_unihash != unihash: 644 bb.fatal("Cache unihash %s doesn't match BB_UNIHASH %s" % (cache_unihash, unihash)) 645 646 sigfile = None 647 sigfile_name = "depsig.do_%s.%d" % (task, os.getpid()) 648 sigfile_link = "depsig.do_%s" % task 649 650 try: 651 sigfile = open(os.path.join(tempdir, sigfile_name), 'w+b') 652 653 locs = {'path': path, 'sigfile': sigfile, 'task': task, 'd': d} 654 655 if "." in self.method: 656 (module, method) = self.method.rsplit('.', 1) 657 locs['method'] = getattr(importlib.import_module(module), method) 658 outhash = bb.utils.better_eval('method(path, sigfile, task, d)', locs) 659 else: 660 outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs) 661 662 try: 663 extra_data = {} 664 665 owner = d.getVar('SSTATE_HASHEQUIV_OWNER') 666 if owner: 667 extra_data['owner'] = owner 668 669 if report_taskdata: 670 sigfile.seek(0) 671 672 extra_data['PN'] = d.getVar('PN') 673 extra_data['PV'] = d.getVar('PV') 674 extra_data['PR'] = d.getVar('PR') 675 extra_data['task'] = task 676 extra_data['outhash_siginfo'] = sigfile.read().decode('utf-8') 677 678 method = self.method 679 if tid in self.extramethod: 680 method = method + self.extramethod[tid] 681 682 data = self.client().report_unihash(taskhash, method, outhash, unihash, extra_data) 683 new_unihash = data['unihash'] 684 685 if new_unihash != unihash: 686 hashequiv_logger.debug('Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server)) 687 bb.event.fire(bb.runqueue.taskUniHashUpdate(mcfn + ':do_' + task, new_unihash), d) 688 self.set_unihash(tid, new_unihash) 689 d.setVar('BB_UNIHASH', new_unihash) 690 else: 691 hashequiv_logger.debug('Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server)) 692 except ConnectionError as e: 693 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 694 finally: 695 if sigfile: 696 sigfile.close() 697 698 sigfile_link_path = os.path.join(tempdir, sigfile_link) 699 bb.utils.remove(sigfile_link_path) 700 701 try: 702 os.symlink(sigfile_name, sigfile_link_path) 703 except OSError: 704 pass 705 706 def report_unihash_equiv(self, tid, taskhash, wanted_unihash, current_unihash, datacaches): 707 try: 708 extra_data = {} 709 method = self.method 710 if tid in self.extramethod: 711 method = method + self.extramethod[tid] 712 713 data = self.client().report_unihash_equiv(taskhash, method, wanted_unihash, extra_data) 714 hashequiv_logger.verbose('Reported task %s as unihash %s to %s (%s)' % (tid, wanted_unihash, self.server, str(data))) 715 716 if data is None: 717 bb.warn("Server unable to handle unihash report") 718 return False 719 720 finalunihash = data['unihash'] 721 722 if finalunihash == current_unihash: 723 hashequiv_logger.verbose('Task %s unihash %s unchanged by server' % (tid, finalunihash)) 724 elif finalunihash == wanted_unihash: 725 hashequiv_logger.verbose('Task %s unihash changed %s -> %s as wanted' % (tid, current_unihash, finalunihash)) 726 self.set_unihash(tid, finalunihash) 727 return True 728 else: 729 # TODO: What to do here? 730 hashequiv_logger.verbose('Task %s unihash reported as unwanted hash %s' % (tid, finalunihash)) 731 732 except ConnectionError as e: 733 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 734 735 return False 736 737# 738# Dummy class used for bitbake-selftest 739# 740class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureGeneratorBasicHash): 741 name = "TestEquivHash" 742 def init_rundepcheck(self, data): 743 super().init_rundepcheck(data) 744 self.server = data.getVar('BB_HASHSERVE') 745 self.method = "sstate_output_hash" 746 747def dump_this_task(outfile, d): 748 import bb.parse 749 mcfn = d.getVar("BB_FILENAME") 750 task = "do_" + d.getVar("BB_CURRENTTASK") 751 referencestamp = bb.parse.siggen.stampfile_base(mcfn) 752 bb.parse.siggen.dump_sigtask(mcfn, task, outfile, "customfile:" + referencestamp) 753 754def init_colors(enable_color): 755 """Initialise colour dict for passing to compare_sigfiles()""" 756 # First set up the colours 757 colors = {'color_title': '\033[1m', 758 'color_default': '\033[0m', 759 'color_add': '\033[0;32m', 760 'color_remove': '\033[0;31m', 761 } 762 # Leave all keys present but clear the values 763 if not enable_color: 764 for k in colors.keys(): 765 colors[k] = '' 766 return colors 767 768def worddiff_str(oldstr, newstr, colors=None): 769 if not colors: 770 colors = init_colors(False) 771 diff = simplediff.diff(oldstr.split(' '), newstr.split(' ')) 772 ret = [] 773 for change, value in diff: 774 value = ' '.join(value) 775 if change == '=': 776 ret.append(value) 777 elif change == '+': 778 item = '{color_add}{{+{value}+}}{color_default}'.format(value=value, **colors) 779 ret.append(item) 780 elif change == '-': 781 item = '{color_remove}[-{value}-]{color_default}'.format(value=value, **colors) 782 ret.append(item) 783 whitespace_note = '' 784 if oldstr != newstr and ' '.join(oldstr.split()) == ' '.join(newstr.split()): 785 whitespace_note = ' (whitespace changed)' 786 return '"%s"%s' % (' '.join(ret), whitespace_note) 787 788def list_inline_diff(oldlist, newlist, colors=None): 789 if not colors: 790 colors = init_colors(False) 791 diff = simplediff.diff(oldlist, newlist) 792 ret = [] 793 for change, value in diff: 794 value = ' '.join(value) 795 if change == '=': 796 ret.append("'%s'" % value) 797 elif change == '+': 798 item = '{color_add}+{value}{color_default}'.format(value=value, **colors) 799 ret.append(item) 800 elif change == '-': 801 item = '{color_remove}-{value}{color_default}'.format(value=value, **colors) 802 ret.append(item) 803 return '[%s]' % (', '.join(ret)) 804 805# Handled renamed fields 806def handle_renames(data): 807 if 'basewhitelist' in data: 808 data['basehash_ignore_vars'] = data['basewhitelist'] 809 del data['basewhitelist'] 810 if 'taskwhitelist' in data: 811 data['taskhash_ignore_tasks'] = data['taskwhitelist'] 812 del data['taskwhitelist'] 813 814 815def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False): 816 output = [] 817 818 colors = init_colors(color) 819 def color_format(formatstr, **values): 820 """ 821 Return colour formatted string. 822 NOTE: call with the format string, not an already formatted string 823 containing values (otherwise you could have trouble with { and } 824 characters) 825 """ 826 if not formatstr.endswith('{color_default}'): 827 formatstr += '{color_default}' 828 # In newer python 3 versions you can pass both of these directly, 829 # but we only require 3.4 at the moment 830 formatparams = {} 831 formatparams.update(colors) 832 formatparams.update(values) 833 return formatstr.format(**formatparams) 834 835 with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f: 836 a_data = json.load(f, object_hook=SetDecoder) 837 with bb.compress.zstd.open(b, "rt", encoding="utf-8", num_threads=1) as f: 838 b_data = json.load(f, object_hook=SetDecoder) 839 840 for data in [a_data, b_data]: 841 handle_renames(data) 842 843 def dict_diff(a, b, ignored_vars=set()): 844 sa = set(a.keys()) 845 sb = set(b.keys()) 846 common = sa & sb 847 changed = set() 848 for i in common: 849 if a[i] != b[i] and i not in ignored_vars: 850 changed.add(i) 851 added = sb - sa 852 removed = sa - sb 853 return changed, added, removed 854 855 def file_checksums_diff(a, b): 856 from collections import Counter 857 858 # Convert lists back to tuples 859 a = [(f[0], f[1]) for f in a] 860 b = [(f[0], f[1]) for f in b] 861 862 # Compare lists, ensuring we can handle duplicate filenames if they exist 863 removedcount = Counter(a) 864 removedcount.subtract(b) 865 addedcount = Counter(b) 866 addedcount.subtract(a) 867 added = [] 868 for x in b: 869 if addedcount[x] > 0: 870 addedcount[x] -= 1 871 added.append(x) 872 removed = [] 873 changed = [] 874 for x in a: 875 if removedcount[x] > 0: 876 removedcount[x] -= 1 877 for y in added: 878 if y[0] == x[0]: 879 changed.append((x[0], x[1], y[1])) 880 added.remove(y) 881 break 882 else: 883 removed.append(x) 884 added = [x[0] for x in added] 885 removed = [x[0] for x in removed] 886 return changed, added, removed 887 888 if 'basehash_ignore_vars' in a_data and a_data['basehash_ignore_vars'] != b_data['basehash_ignore_vars']: 889 output.append(color_format("{color_title}basehash_ignore_vars changed{color_default} from '%s' to '%s'") % (a_data['basehash_ignore_vars'], b_data['basehash_ignore_vars'])) 890 if a_data['basehash_ignore_vars'] and b_data['basehash_ignore_vars']: 891 output.append("changed items: %s" % a_data['basehash_ignore_vars'].symmetric_difference(b_data['basehash_ignore_vars'])) 892 893 if 'taskhash_ignore_tasks' in a_data and a_data['taskhash_ignore_tasks'] != b_data['taskhash_ignore_tasks']: 894 output.append(color_format("{color_title}taskhash_ignore_tasks changed{color_default} from '%s' to '%s'") % (a_data['taskhash_ignore_tasks'], b_data['taskhash_ignore_tasks'])) 895 if a_data['taskhash_ignore_tasks'] and b_data['taskhash_ignore_tasks']: 896 output.append("changed items: %s" % a_data['taskhash_ignore_tasks'].symmetric_difference(b_data['taskhash_ignore_tasks'])) 897 898 if a_data['taskdeps'] != b_data['taskdeps']: 899 output.append(color_format("{color_title}Task dependencies changed{color_default} from:\n%s\nto:\n%s") % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps']))) 900 901 if a_data['basehash'] != b_data['basehash'] and not collapsed: 902 output.append(color_format("{color_title}basehash changed{color_default} from %s to %s") % (a_data['basehash'], b_data['basehash'])) 903 904 changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basehash_ignore_vars'] & b_data['basehash_ignore_vars']) 905 if changed: 906 for dep in sorted(changed): 907 output.append(color_format("{color_title}List of dependencies for variable %s changed from '{color_default}%s{color_title}' to '{color_default}%s{color_title}'") % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep])) 908 if a_data['gendeps'][dep] and b_data['gendeps'][dep]: 909 output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep])) 910 if added: 911 for dep in sorted(added): 912 output.append(color_format("{color_title}Dependency on variable %s was added") % (dep)) 913 if removed: 914 for dep in sorted(removed): 915 output.append(color_format("{color_title}Dependency on Variable %s was removed") % (dep)) 916 917 918 changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals']) 919 if changed: 920 for dep in sorted(changed): 921 oldval = a_data['varvals'][dep] 922 newval = b_data['varvals'][dep] 923 if newval and oldval and ('\n' in oldval or '\n' in newval): 924 diff = difflib.unified_diff(oldval.splitlines(), newval.splitlines(), lineterm='') 925 # Cut off the first two lines, since we aren't interested in 926 # the old/new filename (they are blank anyway in this case) 927 difflines = list(diff)[2:] 928 if color: 929 # Add colour to diff output 930 for i, line in enumerate(difflines): 931 if line.startswith('+'): 932 line = color_format('{color_add}{line}', line=line) 933 difflines[i] = line 934 elif line.startswith('-'): 935 line = color_format('{color_remove}{line}', line=line) 936 difflines[i] = line 937 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff='\n'.join(difflines))) 938 elif newval and oldval and (' ' in oldval or ' ' in newval): 939 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff=worddiff_str(oldval, newval, colors))) 940 else: 941 output.append(color_format("{color_title}Variable {var} value changed from '{color_default}{oldval}{color_title}' to '{color_default}{newval}{color_title}'{color_default}", var=dep, oldval=oldval, newval=newval)) 942 943 if not 'file_checksum_values' in a_data: 944 a_data['file_checksum_values'] = [] 945 if not 'file_checksum_values' in b_data: 946 b_data['file_checksum_values'] = [] 947 948 changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values']) 949 if changed: 950 for f, old, new in changed: 951 output.append(color_format("{color_title}Checksum for file %s changed{color_default} from %s to %s") % (f, old, new)) 952 if added: 953 for f in added: 954 output.append(color_format("{color_title}Dependency on checksum of file %s was added") % (f)) 955 if removed: 956 for f in removed: 957 output.append(color_format("{color_title}Dependency on checksum of file %s was removed") % (f)) 958 959 if not 'runtaskdeps' in a_data: 960 a_data['runtaskdeps'] = {} 961 if not 'runtaskdeps' in b_data: 962 b_data['runtaskdeps'] = {} 963 964 if not collapsed: 965 if len(a_data['runtaskdeps']) != len(b_data['runtaskdeps']): 966 changed = ["Number of task dependencies changed"] 967 else: 968 changed = [] 969 for idx, task in enumerate(a_data['runtaskdeps']): 970 a = a_data['runtaskdeps'][idx] 971 b = b_data['runtaskdeps'][idx] 972 if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b] and not collapsed: 973 changed.append("%s with hash %s\n changed to\n%s with hash %s" % (a, a_data['runtaskhashes'][a], b, b_data['runtaskhashes'][b])) 974 975 if changed: 976 clean_a = a_data['runtaskdeps'] 977 clean_b = b_data['runtaskdeps'] 978 if clean_a != clean_b: 979 output.append(color_format("{color_title}runtaskdeps changed:{color_default}\n%s") % list_inline_diff(clean_a, clean_b, colors)) 980 else: 981 output.append(color_format("{color_title}runtaskdeps changed:")) 982 output.append("\n".join(changed)) 983 984 985 if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data: 986 a = a_data['runtaskhashes'] 987 b = b_data['runtaskhashes'] 988 changed, added, removed = dict_diff(a, b) 989 if added: 990 for dep in sorted(added): 991 bdep_found = False 992 if removed: 993 for bdep in removed: 994 if b[dep] == a[bdep]: 995 #output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep)) 996 bdep_found = True 997 if not bdep_found: 998 output.append(color_format("{color_title}Dependency on task %s was added{color_default} with hash %s") % (dep, b[dep])) 999 if removed: 1000 for dep in sorted(removed): 1001 adep_found = False 1002 if added: 1003 for adep in added: 1004 if b[adep] == a[dep]: 1005 #output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep)) 1006 adep_found = True 1007 if not adep_found: 1008 output.append(color_format("{color_title}Dependency on task %s was removed{color_default} with hash %s") % (dep, a[dep])) 1009 if changed: 1010 for dep in sorted(changed): 1011 if not collapsed: 1012 output.append(color_format("{color_title}Hash for task dependency %s changed{color_default} from %s to %s") % (dep, a[dep], b[dep])) 1013 if callable(recursecb): 1014 recout = recursecb(dep, a[dep], b[dep]) 1015 if recout: 1016 if collapsed: 1017 output.extend(recout) 1018 else: 1019 # If a dependent hash changed, might as well print the line above and then defer to the changes in 1020 # that hash since in all likelyhood, they're the same changes this task also saw. 1021 output = [output[-1]] + recout 1022 break 1023 1024 a_taint = a_data.get('taint', None) 1025 b_taint = b_data.get('taint', None) 1026 if a_taint != b_taint: 1027 if a_taint and a_taint.startswith('nostamp:'): 1028 a_taint = a_taint.replace('nostamp:', 'nostamp(uuid4):') 1029 if b_taint and b_taint.startswith('nostamp:'): 1030 b_taint = b_taint.replace('nostamp:', 'nostamp(uuid4):') 1031 output.append(color_format("{color_title}Taint (by forced/invalidated task) changed{color_default} from %s to %s") % (a_taint, b_taint)) 1032 1033 return output 1034 1035 1036def calc_basehash(sigdata): 1037 task = sigdata['task'] 1038 basedata = sigdata['varvals'][task] 1039 1040 if basedata is None: 1041 basedata = '' 1042 1043 alldeps = sigdata['taskdeps'] 1044 for dep in sorted(alldeps): 1045 basedata = basedata + dep 1046 val = sigdata['varvals'][dep] 1047 if val is not None: 1048 basedata = basedata + str(val) 1049 1050 return hashlib.sha256(basedata.encode("utf-8")).hexdigest() 1051 1052def calc_taskhash(sigdata): 1053 data = sigdata['basehash'] 1054 1055 for dep in sigdata['runtaskdeps']: 1056 data = data + sigdata['runtaskhashes'][dep] 1057 1058 for c in sigdata['file_checksum_values']: 1059 if c[1]: 1060 if "./" in c[0]: 1061 data = data + c[0] 1062 data = data + c[1] 1063 1064 if 'taint' in sigdata: 1065 if 'nostamp:' in sigdata['taint']: 1066 data = data + sigdata['taint'][8:] 1067 else: 1068 data = data + sigdata['taint'] 1069 1070 return hashlib.sha256(data.encode("utf-8")).hexdigest() 1071 1072 1073def dump_sigfile(a): 1074 output = [] 1075 1076 with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f: 1077 a_data = json.load(f, object_hook=SetDecoder) 1078 1079 handle_renames(a_data) 1080 1081 output.append("basehash_ignore_vars: %s" % (sorted(a_data['basehash_ignore_vars']))) 1082 1083 output.append("taskhash_ignore_tasks: %s" % (sorted(a_data['taskhash_ignore_tasks'] or []))) 1084 1085 output.append("Task dependencies: %s" % (sorted(a_data['taskdeps']))) 1086 1087 output.append("basehash: %s" % (a_data['basehash'])) 1088 1089 for dep in sorted(a_data['gendeps']): 1090 output.append("List of dependencies for variable %s is %s" % (dep, sorted(a_data['gendeps'][dep]))) 1091 1092 for dep in sorted(a_data['varvals']): 1093 output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep])) 1094 1095 if 'runtaskdeps' in a_data: 1096 output.append("Tasks this task depends on: %s" % (sorted(a_data['runtaskdeps']))) 1097 1098 if 'file_checksum_values' in a_data: 1099 output.append("This task depends on the checksums of files: %s" % (sorted(a_data['file_checksum_values']))) 1100 1101 if 'runtaskhashes' in a_data: 1102 for dep in sorted(a_data['runtaskhashes']): 1103 output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep])) 1104 1105 if 'taint' in a_data: 1106 if a_data['taint'].startswith('nostamp:'): 1107 msg = a_data['taint'].replace('nostamp:', 'nostamp(uuid4):') 1108 else: 1109 msg = a_data['taint'] 1110 output.append("Tainted (by forced/invalidated task): %s" % msg) 1111 1112 if 'task' in a_data: 1113 computed_basehash = calc_basehash(a_data) 1114 output.append("Computed base hash is %s and from file %s" % (computed_basehash, a_data['basehash'])) 1115 else: 1116 output.append("Unable to compute base hash") 1117 1118 computed_taskhash = calc_taskhash(a_data) 1119 output.append("Computed task hash is %s" % computed_taskhash) 1120 1121 return output 1122