1# 2# Copyright BitBake Contributors 3# 4# SPDX-License-Identifier: GPL-2.0-only 5# 6 7import hashlib 8import logging 9import os 10import re 11import tempfile 12import pickle 13import bb.data 14import difflib 15import simplediff 16import json 17import types 18import bb.compress.zstd 19from bb.checksum import FileChecksumCache 20from bb import runqueue 21import hashserv 22import hashserv.client 23 24logger = logging.getLogger('BitBake.SigGen') 25hashequiv_logger = logging.getLogger('BitBake.SigGen.HashEquiv') 26 27class SetEncoder(json.JSONEncoder): 28 def default(self, obj): 29 if isinstance(obj, set) or isinstance(obj, frozenset): 30 return dict(_set_object=list(sorted(obj))) 31 return json.JSONEncoder.default(self, obj) 32 33def SetDecoder(dct): 34 if '_set_object' in dct: 35 return frozenset(dct['_set_object']) 36 return dct 37 38def init(d): 39 siggens = [obj for obj in globals().values() 40 if type(obj) is type and issubclass(obj, SignatureGenerator)] 41 42 desired = d.getVar("BB_SIGNATURE_HANDLER") or "noop" 43 for sg in siggens: 44 if desired == sg.name: 45 return sg(d) 46 else: 47 logger.error("Invalid signature generator '%s', using default 'noop'\n" 48 "Available generators: %s", desired, 49 ', '.join(obj.name for obj in siggens)) 50 return SignatureGenerator(d) 51 52class SignatureGenerator(object): 53 """ 54 """ 55 name = "noop" 56 57 def __init__(self, data): 58 self.basehash = {} 59 self.taskhash = {} 60 self.unihash = {} 61 self.runtaskdeps = {} 62 self.file_checksum_values = {} 63 self.taints = {} 64 self.unitaskhashes = {} 65 self.tidtopn = {} 66 self.setscenetasks = set() 67 68 def finalise(self, fn, d, varient): 69 return 70 71 def postparsing_clean_cache(self): 72 return 73 74 def setup_datacache(self, datacaches): 75 self.datacaches = datacaches 76 77 def setup_datacache_from_datastore(self, mcfn, d): 78 # In task context we have no cache so setup internal data structures 79 # from the fully parsed data store provided 80 81 mc = d.getVar("__BBMULTICONFIG", False) or "" 82 tasks = d.getVar('__BBTASKS', False) 83 84 self.datacaches = {} 85 self.datacaches[mc] = types.SimpleNamespace() 86 setattr(self.datacaches[mc], "stamp", {}) 87 self.datacaches[mc].stamp[mcfn] = d.getVar('STAMP') 88 setattr(self.datacaches[mc], "stamp_extrainfo", {}) 89 self.datacaches[mc].stamp_extrainfo[mcfn] = {} 90 for t in tasks: 91 flag = d.getVarFlag(t, "stamp-extra-info") 92 if flag: 93 self.datacaches[mc].stamp_extrainfo[mcfn][t] = flag 94 95 def get_unihash(self, tid): 96 return self.taskhash[tid] 97 98 def prep_taskhash(self, tid, deps, dataCaches): 99 return 100 101 def get_taskhash(self, tid, deps, dataCaches): 102 self.taskhash[tid] = hashlib.sha256(tid.encode("utf-8")).hexdigest() 103 return self.taskhash[tid] 104 105 def writeout_file_checksum_cache(self): 106 """Write/update the file checksum cache onto disk""" 107 return 108 109 def stampfile_base(self, mcfn): 110 mc = bb.runqueue.mc_from_tid(mcfn) 111 return self.datacaches[mc].stamp[mcfn] 112 113 def stampfile_mcfn(self, taskname, mcfn, extrainfo=True): 114 mc = bb.runqueue.mc_from_tid(mcfn) 115 stamp = self.datacaches[mc].stamp[mcfn] 116 if not stamp: 117 return 118 119 stamp_extrainfo = "" 120 if extrainfo: 121 taskflagname = taskname 122 if taskname.endswith("_setscene"): 123 taskflagname = taskname.replace("_setscene", "") 124 stamp_extrainfo = self.datacaches[mc].stamp_extrainfo[mcfn].get(taskflagname) or "" 125 126 return self.stampfile(stamp, mcfn, taskname, stamp_extrainfo) 127 128 def stampfile(self, stampbase, file_name, taskname, extrainfo): 129 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.') 130 131 def stampcleanmask_mcfn(self, taskname, mcfn): 132 mc = bb.runqueue.mc_from_tid(mcfn) 133 stamp = self.datacaches[mc].stamp[mcfn] 134 if not stamp: 135 return [] 136 137 taskflagname = taskname 138 if taskname.endswith("_setscene"): 139 taskflagname = taskname.replace("_setscene", "") 140 stamp_extrainfo = self.datacaches[mc].stamp_extrainfo[mcfn].get(taskflagname) or "" 141 142 return self.stampcleanmask(stamp, mcfn, taskname, stamp_extrainfo) 143 144 def stampcleanmask(self, stampbase, file_name, taskname, extrainfo): 145 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.') 146 147 def dump_sigtask(self, mcfn, task, stampbase, runtime): 148 return 149 150 def invalidate_task(self, task, mcfn): 151 mc = bb.runqueue.mc_from_tid(mcfn) 152 stamp = self.datacaches[mc].stamp[mcfn] 153 bb.utils.remove(stamp) 154 155 def dump_sigs(self, dataCache, options): 156 return 157 158 def get_taskdata(self): 159 return (self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks) 160 161 def set_taskdata(self, data): 162 self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks = data 163 164 def reset(self, data): 165 self.__init__(data) 166 167 def get_taskhashes(self): 168 return self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn 169 170 def set_taskhashes(self, hashes): 171 self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn = hashes 172 173 def save_unitaskhashes(self): 174 return 175 176 def copy_unitaskhashes(self, targetdir): 177 return 178 179 def set_setscene_tasks(self, setscene_tasks): 180 return 181 182 def exit(self): 183 return 184 185def build_pnid(mc, pn, taskname): 186 if mc: 187 return "mc:" + mc + ":" + pn + ":" + taskname 188 return pn + ":" + taskname 189 190class SignatureGeneratorBasic(SignatureGenerator): 191 """ 192 """ 193 name = "basic" 194 195 def __init__(self, data): 196 self.basehash = {} 197 self.taskhash = {} 198 self.unihash = {} 199 self.runtaskdeps = {} 200 self.file_checksum_values = {} 201 self.taints = {} 202 self.setscenetasks = set() 203 self.basehash_ignore_vars = set((data.getVar("BB_BASEHASH_IGNORE_VARS") or "").split()) 204 self.taskhash_ignore_tasks = None 205 self.init_rundepcheck(data) 206 checksum_cache_file = data.getVar("BB_HASH_CHECKSUM_CACHE_FILE") 207 if checksum_cache_file: 208 self.checksum_cache = FileChecksumCache() 209 self.checksum_cache.init_cache(data, checksum_cache_file) 210 else: 211 self.checksum_cache = None 212 213 self.unihash_cache = bb.cache.SimpleCache("3") 214 self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {}) 215 self.localdirsexclude = (data.getVar("BB_SIGNATURE_LOCAL_DIRS_EXCLUDE") or "CVS .bzr .git .hg .osc .p4 .repo .svn").split() 216 self.tidtopn = {} 217 218 def init_rundepcheck(self, data): 219 self.taskhash_ignore_tasks = data.getVar("BB_TASKHASH_IGNORE_TASKS") or None 220 if self.taskhash_ignore_tasks: 221 self.twl = re.compile(self.taskhash_ignore_tasks) 222 else: 223 self.twl = None 224 225 def _build_data(self, mcfn, d): 226 227 ignore_mismatch = ((d.getVar("BB_HASH_IGNORE_MISMATCH") or '') == '1') 228 tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d, self.basehash_ignore_vars) 229 230 taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, self.basehash_ignore_vars, mcfn) 231 232 for task in tasklist: 233 tid = mcfn + ":" + task 234 if not ignore_mismatch and tid in self.basehash and self.basehash[tid] != basehash[tid]: 235 bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (tid, self.basehash[tid], basehash[tid])) 236 bb.error("The following commands may help:") 237 cmd = "$ bitbake %s -c%s" % (d.getVar('PN'), task) 238 # Make sure sigdata is dumped before run printdiff 239 bb.error("%s -Snone" % cmd) 240 bb.error("Then:") 241 bb.error("%s -Sprintdiff\n" % cmd) 242 self.basehash[tid] = basehash[tid] 243 244 return taskdeps, gendeps, lookupcache 245 246 def set_setscene_tasks(self, setscene_tasks): 247 self.setscenetasks = set(setscene_tasks) 248 249 def finalise(self, fn, d, variant): 250 251 mc = d.getVar("__BBMULTICONFIG", False) or "" 252 mcfn = fn 253 if variant or mc: 254 mcfn = bb.cache.realfn2virtual(fn, variant, mc) 255 256 try: 257 taskdeps, gendeps, lookupcache = self._build_data(mcfn, d) 258 except bb.parse.SkipRecipe: 259 raise 260 except: 261 bb.warn("Error during finalise of %s" % mcfn) 262 raise 263 264 basehashes = {} 265 for task in taskdeps: 266 basehashes[task] = self.basehash[mcfn + ":" + task] 267 268 d.setVar("__siggen_basehashes", basehashes) 269 d.setVar("__siggen_gendeps", gendeps) 270 d.setVar("__siggen_varvals", lookupcache) 271 d.setVar("__siggen_taskdeps", taskdeps) 272 273 #Slow but can be useful for debugging mismatched basehashes 274 #self.setup_datacache_from_datastore(mcfn, d) 275 #for task in taskdeps: 276 # self.dump_sigtask(mcfn, task, d.getVar("STAMP"), False) 277 278 def setup_datacache_from_datastore(self, mcfn, d): 279 super().setup_datacache_from_datastore(mcfn, d) 280 281 mc = bb.runqueue.mc_from_tid(mcfn) 282 for attr in ["siggen_varvals", "siggen_taskdeps", "siggen_gendeps"]: 283 if not hasattr(self.datacaches[mc], attr): 284 setattr(self.datacaches[mc], attr, {}) 285 self.datacaches[mc].siggen_varvals[mcfn] = d.getVar("__siggen_varvals") 286 self.datacaches[mc].siggen_taskdeps[mcfn] = d.getVar("__siggen_taskdeps") 287 self.datacaches[mc].siggen_gendeps[mcfn] = d.getVar("__siggen_gendeps") 288 289 def rundep_check(self, fn, recipename, task, dep, depname, dataCaches): 290 # Return True if we should keep the dependency, False to drop it 291 # We only manipulate the dependencies for packages not in the ignore 292 # list 293 if self.twl and not self.twl.search(recipename): 294 # then process the actual dependencies 295 if self.twl.search(depname): 296 return False 297 return True 298 299 def read_taint(self, fn, task, stampbase): 300 taint = None 301 try: 302 with open(stampbase + '.' + task + '.taint', 'r') as taintf: 303 taint = taintf.read() 304 except IOError: 305 pass 306 return taint 307 308 def prep_taskhash(self, tid, deps, dataCaches): 309 310 (mc, _, task, mcfn) = bb.runqueue.split_tid_mcfn(tid) 311 312 self.basehash[tid] = dataCaches[mc].basetaskhash[tid] 313 self.runtaskdeps[tid] = [] 314 self.file_checksum_values[tid] = [] 315 recipename = dataCaches[mc].pkg_fn[mcfn] 316 317 self.tidtopn[tid] = recipename 318 # save hashfn for deps into siginfo? 319 for dep in deps: 320 (depmc, _, deptask, depmcfn) = bb.runqueue.split_tid_mcfn(dep) 321 dep_pn = dataCaches[depmc].pkg_fn[depmcfn] 322 323 if not self.rundep_check(mcfn, recipename, task, dep, dep_pn, dataCaches): 324 continue 325 326 if dep not in self.taskhash: 327 bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep) 328 329 dep_pnid = build_pnid(depmc, dep_pn, deptask) 330 self.runtaskdeps[tid].append((dep_pnid, dep)) 331 332 if task in dataCaches[mc].file_checksums[mcfn]: 333 if self.checksum_cache: 334 checksums = self.checksum_cache.get_checksums(dataCaches[mc].file_checksums[mcfn][task], recipename, self.localdirsexclude) 335 else: 336 checksums = bb.fetch2.get_file_checksums(dataCaches[mc].file_checksums[mcfn][task], recipename, self.localdirsexclude) 337 for (f,cs) in checksums: 338 self.file_checksum_values[tid].append((f,cs)) 339 340 taskdep = dataCaches[mc].task_deps[mcfn] 341 if 'nostamp' in taskdep and task in taskdep['nostamp']: 342 # Nostamp tasks need an implicit taint so that they force any dependent tasks to run 343 if tid in self.taints and self.taints[tid].startswith("nostamp:"): 344 # Don't reset taint value upon every call 345 pass 346 else: 347 import uuid 348 taint = str(uuid.uuid4()) 349 self.taints[tid] = "nostamp:" + taint 350 351 taint = self.read_taint(mcfn, task, dataCaches[mc].stamp[mcfn]) 352 if taint: 353 self.taints[tid] = taint 354 logger.warning("%s is tainted from a forced run" % tid) 355 356 return 357 358 def get_taskhash(self, tid, deps, dataCaches): 359 360 data = self.basehash[tid] 361 for dep in sorted(self.runtaskdeps[tid]): 362 data += self.get_unihash(dep[1]) 363 364 for (f, cs) in sorted(self.file_checksum_values[tid], key=clean_checksum_file_path): 365 if cs: 366 if "/./" in f: 367 data += "./" + f.split("/./")[1] 368 data += cs 369 370 if tid in self.taints: 371 if self.taints[tid].startswith("nostamp:"): 372 data += self.taints[tid][8:] 373 else: 374 data += self.taints[tid] 375 376 h = hashlib.sha256(data.encode("utf-8")).hexdigest() 377 self.taskhash[tid] = h 378 #d.setVar("BB_TASKHASH:task-%s" % task, taskhash[task]) 379 return h 380 381 def writeout_file_checksum_cache(self): 382 """Write/update the file checksum cache onto disk""" 383 if self.checksum_cache: 384 self.checksum_cache.save_extras() 385 self.checksum_cache.save_merge() 386 else: 387 bb.fetch2.fetcher_parse_save() 388 bb.fetch2.fetcher_parse_done() 389 390 def save_unitaskhashes(self): 391 self.unihash_cache.save(self.unitaskhashes) 392 393 def copy_unitaskhashes(self, targetdir): 394 self.unihash_cache.copyfile(targetdir) 395 396 def dump_sigtask(self, mcfn, task, stampbase, runtime): 397 tid = mcfn + ":" + task 398 mc = bb.runqueue.mc_from_tid(mcfn) 399 referencestamp = stampbase 400 if isinstance(runtime, str) and runtime.startswith("customfile"): 401 sigfile = stampbase 402 referencestamp = runtime[11:] 403 elif runtime and tid in self.taskhash: 404 sigfile = stampbase + "." + task + ".sigdata" + "." + self.get_unihash(tid) 405 else: 406 sigfile = stampbase + "." + task + ".sigbasedata" + "." + self.basehash[tid] 407 408 with bb.utils.umask(0o002): 409 bb.utils.mkdirhier(os.path.dirname(sigfile)) 410 411 data = {} 412 data['task'] = task 413 data['basehash_ignore_vars'] = self.basehash_ignore_vars 414 data['taskhash_ignore_tasks'] = self.taskhash_ignore_tasks 415 data['taskdeps'] = self.datacaches[mc].siggen_taskdeps[mcfn][task] 416 data['basehash'] = self.basehash[tid] 417 data['gendeps'] = {} 418 data['varvals'] = {} 419 data['varvals'][task] = self.datacaches[mc].siggen_varvals[mcfn][task] 420 for dep in self.datacaches[mc].siggen_taskdeps[mcfn][task]: 421 if dep in self.basehash_ignore_vars: 422 continue 423 data['gendeps'][dep] = self.datacaches[mc].siggen_gendeps[mcfn][dep] 424 data['varvals'][dep] = self.datacaches[mc].siggen_varvals[mcfn][dep] 425 426 if runtime and tid in self.taskhash: 427 data['runtaskdeps'] = [dep[0] for dep in sorted(self.runtaskdeps[tid])] 428 data['file_checksum_values'] = [] 429 for f,cs in sorted(self.file_checksum_values[tid], key=clean_checksum_file_path): 430 if "/./" in f: 431 data['file_checksum_values'].append(("./" + f.split("/./")[1], cs)) 432 else: 433 data['file_checksum_values'].append((os.path.basename(f), cs)) 434 data['runtaskhashes'] = {} 435 for dep in self.runtaskdeps[tid]: 436 data['runtaskhashes'][dep[0]] = self.get_unihash(dep[1]) 437 data['taskhash'] = self.taskhash[tid] 438 data['unihash'] = self.get_unihash(tid) 439 440 taint = self.read_taint(mcfn, task, referencestamp) 441 if taint: 442 data['taint'] = taint 443 444 if runtime and tid in self.taints: 445 if 'nostamp:' in self.taints[tid]: 446 data['taint'] = self.taints[tid] 447 448 computed_basehash = calc_basehash(data) 449 if computed_basehash != self.basehash[tid]: 450 bb.error("Basehash mismatch %s versus %s for %s" % (computed_basehash, self.basehash[tid], tid)) 451 if runtime and tid in self.taskhash: 452 computed_taskhash = calc_taskhash(data) 453 if computed_taskhash != self.taskhash[tid]: 454 bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid)) 455 sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash) 456 457 fd, tmpfile = bb.utils.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.") 458 try: 459 with bb.compress.zstd.open(fd, "wt", encoding="utf-8", num_threads=1) as f: 460 json.dump(data, f, sort_keys=True, separators=(",", ":"), cls=SetEncoder) 461 f.flush() 462 os.chmod(tmpfile, 0o664) 463 bb.utils.rename(tmpfile, sigfile) 464 except (OSError, IOError) as err: 465 try: 466 os.unlink(tmpfile) 467 except OSError: 468 pass 469 raise err 470 471class SignatureGeneratorBasicHash(SignatureGeneratorBasic): 472 name = "basichash" 473 474 def get_stampfile_hash(self, tid): 475 if tid in self.taskhash: 476 return self.taskhash[tid] 477 478 # If task is not in basehash, then error 479 return self.basehash[tid] 480 481 def stampfile(self, stampbase, mcfn, taskname, extrainfo, clean=False): 482 if taskname.endswith("_setscene"): 483 tid = mcfn + ":" + taskname[:-9] 484 else: 485 tid = mcfn + ":" + taskname 486 if clean: 487 h = "*" 488 else: 489 h = self.get_stampfile_hash(tid) 490 491 return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.') 492 493 def stampcleanmask(self, stampbase, mcfn, taskname, extrainfo): 494 return self.stampfile(stampbase, mcfn, taskname, extrainfo, clean=True) 495 496 def invalidate_task(self, task, mcfn): 497 bb.note("Tainting hash to force rebuild of task %s, %s" % (mcfn, task)) 498 499 mc = bb.runqueue.mc_from_tid(mcfn) 500 stamp = self.datacaches[mc].stamp[mcfn] 501 502 taintfn = stamp + '.' + task + '.taint' 503 504 import uuid 505 bb.utils.mkdirhier(os.path.dirname(taintfn)) 506 # The specific content of the taint file is not really important, 507 # we just need it to be random, so a random UUID is used 508 with open(taintfn, 'w') as taintf: 509 taintf.write(str(uuid.uuid4())) 510 511class SignatureGeneratorUniHashMixIn(object): 512 def __init__(self, data): 513 self.extramethod = {} 514 super().__init__(data) 515 516 def get_taskdata(self): 517 return (self.server, self.method, self.extramethod) + super().get_taskdata() 518 519 def set_taskdata(self, data): 520 self.server, self.method, self.extramethod = data[:3] 521 super().set_taskdata(data[3:]) 522 523 def client(self): 524 if getattr(self, '_client', None) is None: 525 self._client = hashserv.create_client(self.server) 526 return self._client 527 528 def reset(self, data): 529 if getattr(self, '_client', None) is not None: 530 self._client.close() 531 self._client = None 532 return super().reset(data) 533 534 def exit(self): 535 if getattr(self, '_client', None) is not None: 536 self._client.close() 537 self._client = None 538 return super().exit() 539 540 def get_stampfile_hash(self, tid): 541 if tid in self.taskhash: 542 # If a unique hash is reported, use it as the stampfile hash. This 543 # ensures that if a task won't be re-run if the taskhash changes, 544 # but it would result in the same output hash 545 unihash = self._get_unihash(tid) 546 if unihash is not None: 547 return unihash 548 549 return super().get_stampfile_hash(tid) 550 551 def set_unihash(self, tid, unihash): 552 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid) 553 key = mc + ":" + self.tidtopn[tid] + ":" + taskname 554 self.unitaskhashes[key] = (self.taskhash[tid], unihash) 555 self.unihash[tid] = unihash 556 557 def _get_unihash(self, tid, checkkey=None): 558 if tid not in self.tidtopn: 559 return None 560 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid) 561 key = mc + ":" + self.tidtopn[tid] + ":" + taskname 562 if key not in self.unitaskhashes: 563 return None 564 if not checkkey: 565 checkkey = self.taskhash[tid] 566 (key, unihash) = self.unitaskhashes[key] 567 if key != checkkey: 568 return None 569 return unihash 570 571 def get_unihash(self, tid): 572 taskhash = self.taskhash[tid] 573 574 # If its not a setscene task we can return 575 if self.setscenetasks and tid not in self.setscenetasks: 576 self.unihash[tid] = None 577 return taskhash 578 579 # TODO: This cache can grow unbounded. It probably only needs to keep 580 # for each task 581 unihash = self._get_unihash(tid) 582 if unihash is not None: 583 self.unihash[tid] = unihash 584 return unihash 585 586 # In the absence of being able to discover a unique hash from the 587 # server, make it be equivalent to the taskhash. The unique "hash" only 588 # really needs to be a unique string (not even necessarily a hash), but 589 # making it match the taskhash has a few advantages: 590 # 591 # 1) All of the sstate code that assumes hashes can be the same 592 # 2) It provides maximal compatibility with builders that don't use 593 # an equivalency server 594 # 3) The value is easy for multiple independent builders to derive the 595 # same unique hash from the same input. This means that if the 596 # independent builders find the same taskhash, but it isn't reported 597 # to the server, there is a better chance that they will agree on 598 # the unique hash. 599 unihash = taskhash 600 601 try: 602 method = self.method 603 if tid in self.extramethod: 604 method = method + self.extramethod[tid] 605 data = self.client().get_unihash(method, self.taskhash[tid]) 606 if data: 607 unihash = data 608 # A unique hash equal to the taskhash is not very interesting, 609 # so it is reported it at debug level 2. If they differ, that 610 # is much more interesting, so it is reported at debug level 1 611 hashequiv_logger.bbdebug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server)) 612 else: 613 hashequiv_logger.debug2('No reported unihash for %s:%s from %s' % (tid, taskhash, self.server)) 614 except ConnectionError as e: 615 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 616 617 self.set_unihash(tid, unihash) 618 self.unihash[tid] = unihash 619 return unihash 620 621 def report_unihash(self, path, task, d): 622 import importlib 623 624 taskhash = d.getVar('BB_TASKHASH') 625 unihash = d.getVar('BB_UNIHASH') 626 report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1' 627 tempdir = d.getVar('T') 628 mcfn = d.getVar('BB_FILENAME') 629 tid = mcfn + ':do_' + task 630 key = tid + ':' + taskhash 631 632 if self.setscenetasks and tid not in self.setscenetasks: 633 return 634 635 # This can happen if locked sigs are in action. Detect and just exit 636 if taskhash != self.taskhash[tid]: 637 return 638 639 # Sanity checks 640 cache_unihash = self._get_unihash(tid, checkkey=taskhash) 641 if cache_unihash is None: 642 bb.fatal('%s not in unihash cache. Please report this error' % key) 643 644 if cache_unihash != unihash: 645 bb.fatal("Cache unihash %s doesn't match BB_UNIHASH %s" % (cache_unihash, unihash)) 646 647 sigfile = None 648 sigfile_name = "depsig.do_%s.%d" % (task, os.getpid()) 649 sigfile_link = "depsig.do_%s" % task 650 651 try: 652 sigfile = open(os.path.join(tempdir, sigfile_name), 'w+b') 653 654 locs = {'path': path, 'sigfile': sigfile, 'task': task, 'd': d} 655 656 if "." in self.method: 657 (module, method) = self.method.rsplit('.', 1) 658 locs['method'] = getattr(importlib.import_module(module), method) 659 outhash = bb.utils.better_eval('method(path, sigfile, task, d)', locs) 660 else: 661 outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs) 662 663 try: 664 extra_data = {} 665 666 owner = d.getVar('SSTATE_HASHEQUIV_OWNER') 667 if owner: 668 extra_data['owner'] = owner 669 670 if report_taskdata: 671 sigfile.seek(0) 672 673 extra_data['PN'] = d.getVar('PN') 674 extra_data['PV'] = d.getVar('PV') 675 extra_data['PR'] = d.getVar('PR') 676 extra_data['task'] = task 677 extra_data['outhash_siginfo'] = sigfile.read().decode('utf-8') 678 679 method = self.method 680 if tid in self.extramethod: 681 method = method + self.extramethod[tid] 682 683 data = self.client().report_unihash(taskhash, method, outhash, unihash, extra_data) 684 new_unihash = data['unihash'] 685 686 if new_unihash != unihash: 687 hashequiv_logger.debug('Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server)) 688 bb.event.fire(bb.runqueue.taskUniHashUpdate(mcfn + ':do_' + task, new_unihash), d) 689 self.set_unihash(tid, new_unihash) 690 d.setVar('BB_UNIHASH', new_unihash) 691 else: 692 hashequiv_logger.debug('Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server)) 693 except ConnectionError as e: 694 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 695 finally: 696 if sigfile: 697 sigfile.close() 698 699 sigfile_link_path = os.path.join(tempdir, sigfile_link) 700 bb.utils.remove(sigfile_link_path) 701 702 try: 703 os.symlink(sigfile_name, sigfile_link_path) 704 except OSError: 705 pass 706 707 def report_unihash_equiv(self, tid, taskhash, wanted_unihash, current_unihash, datacaches): 708 try: 709 extra_data = {} 710 method = self.method 711 if tid in self.extramethod: 712 method = method + self.extramethod[tid] 713 714 data = self.client().report_unihash_equiv(taskhash, method, wanted_unihash, extra_data) 715 hashequiv_logger.verbose('Reported task %s as unihash %s to %s (%s)' % (tid, wanted_unihash, self.server, str(data))) 716 717 if data is None: 718 bb.warn("Server unable to handle unihash report") 719 return False 720 721 finalunihash = data['unihash'] 722 723 if finalunihash == current_unihash: 724 hashequiv_logger.verbose('Task %s unihash %s unchanged by server' % (tid, finalunihash)) 725 elif finalunihash == wanted_unihash: 726 hashequiv_logger.verbose('Task %s unihash changed %s -> %s as wanted' % (tid, current_unihash, finalunihash)) 727 self.set_unihash(tid, finalunihash) 728 return True 729 else: 730 # TODO: What to do here? 731 hashequiv_logger.verbose('Task %s unihash reported as unwanted hash %s' % (tid, finalunihash)) 732 733 except ConnectionError as e: 734 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 735 736 return False 737 738# 739# Dummy class used for bitbake-selftest 740# 741class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureGeneratorBasicHash): 742 name = "TestEquivHash" 743 def init_rundepcheck(self, data): 744 super().init_rundepcheck(data) 745 self.server = data.getVar('BB_HASHSERVE') 746 self.method = "sstate_output_hash" 747 748def clean_checksum_file_path(file_checksum_tuple): 749 f, cs = file_checksum_tuple 750 if "/./" in f: 751 return "./" + f.split("/./")[1] 752 return f 753 754def dump_this_task(outfile, d): 755 import bb.parse 756 mcfn = d.getVar("BB_FILENAME") 757 task = "do_" + d.getVar("BB_CURRENTTASK") 758 referencestamp = bb.parse.siggen.stampfile_base(mcfn) 759 bb.parse.siggen.dump_sigtask(mcfn, task, outfile, "customfile:" + referencestamp) 760 761def init_colors(enable_color): 762 """Initialise colour dict for passing to compare_sigfiles()""" 763 # First set up the colours 764 colors = {'color_title': '\033[1m', 765 'color_default': '\033[0m', 766 'color_add': '\033[0;32m', 767 'color_remove': '\033[0;31m', 768 } 769 # Leave all keys present but clear the values 770 if not enable_color: 771 for k in colors.keys(): 772 colors[k] = '' 773 return colors 774 775def worddiff_str(oldstr, newstr, colors=None): 776 if not colors: 777 colors = init_colors(False) 778 diff = simplediff.diff(oldstr.split(' '), newstr.split(' ')) 779 ret = [] 780 for change, value in diff: 781 value = ' '.join(value) 782 if change == '=': 783 ret.append(value) 784 elif change == '+': 785 item = '{color_add}{{+{value}+}}{color_default}'.format(value=value, **colors) 786 ret.append(item) 787 elif change == '-': 788 item = '{color_remove}[-{value}-]{color_default}'.format(value=value, **colors) 789 ret.append(item) 790 whitespace_note = '' 791 if oldstr != newstr and ' '.join(oldstr.split()) == ' '.join(newstr.split()): 792 whitespace_note = ' (whitespace changed)' 793 return '"%s"%s' % (' '.join(ret), whitespace_note) 794 795def list_inline_diff(oldlist, newlist, colors=None): 796 if not colors: 797 colors = init_colors(False) 798 diff = simplediff.diff(oldlist, newlist) 799 ret = [] 800 for change, value in diff: 801 value = ' '.join(value) 802 if change == '=': 803 ret.append("'%s'" % value) 804 elif change == '+': 805 item = '{color_add}+{value}{color_default}'.format(value=value, **colors) 806 ret.append(item) 807 elif change == '-': 808 item = '{color_remove}-{value}{color_default}'.format(value=value, **colors) 809 ret.append(item) 810 return '[%s]' % (', '.join(ret)) 811 812# Handled renamed fields 813def handle_renames(data): 814 if 'basewhitelist' in data: 815 data['basehash_ignore_vars'] = data['basewhitelist'] 816 del data['basewhitelist'] 817 if 'taskwhitelist' in data: 818 data['taskhash_ignore_tasks'] = data['taskwhitelist'] 819 del data['taskwhitelist'] 820 821 822def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False): 823 output = [] 824 825 colors = init_colors(color) 826 def color_format(formatstr, **values): 827 """ 828 Return colour formatted string. 829 NOTE: call with the format string, not an already formatted string 830 containing values (otherwise you could have trouble with { and } 831 characters) 832 """ 833 if not formatstr.endswith('{color_default}'): 834 formatstr += '{color_default}' 835 # In newer python 3 versions you can pass both of these directly, 836 # but we only require 3.4 at the moment 837 formatparams = {} 838 formatparams.update(colors) 839 formatparams.update(values) 840 return formatstr.format(**formatparams) 841 842 with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f: 843 a_data = json.load(f, object_hook=SetDecoder) 844 with bb.compress.zstd.open(b, "rt", encoding="utf-8", num_threads=1) as f: 845 b_data = json.load(f, object_hook=SetDecoder) 846 847 for data in [a_data, b_data]: 848 handle_renames(data) 849 850 def dict_diff(a, b, ignored_vars=set()): 851 sa = set(a.keys()) 852 sb = set(b.keys()) 853 common = sa & sb 854 changed = set() 855 for i in common: 856 if a[i] != b[i] and i not in ignored_vars: 857 changed.add(i) 858 added = sb - sa 859 removed = sa - sb 860 return changed, added, removed 861 862 def file_checksums_diff(a, b): 863 from collections import Counter 864 865 # Convert lists back to tuples 866 a = [(f[0], f[1]) for f in a] 867 b = [(f[0], f[1]) for f in b] 868 869 # Compare lists, ensuring we can handle duplicate filenames if they exist 870 removedcount = Counter(a) 871 removedcount.subtract(b) 872 addedcount = Counter(b) 873 addedcount.subtract(a) 874 added = [] 875 for x in b: 876 if addedcount[x] > 0: 877 addedcount[x] -= 1 878 added.append(x) 879 removed = [] 880 changed = [] 881 for x in a: 882 if removedcount[x] > 0: 883 removedcount[x] -= 1 884 for y in added: 885 if y[0] == x[0]: 886 changed.append((x[0], x[1], y[1])) 887 added.remove(y) 888 break 889 else: 890 removed.append(x) 891 added = [x[0] for x in added] 892 removed = [x[0] for x in removed] 893 return changed, added, removed 894 895 if 'basehash_ignore_vars' in a_data and a_data['basehash_ignore_vars'] != b_data['basehash_ignore_vars']: 896 output.append(color_format("{color_title}basehash_ignore_vars changed{color_default} from '%s' to '%s'") % (a_data['basehash_ignore_vars'], b_data['basehash_ignore_vars'])) 897 if a_data['basehash_ignore_vars'] and b_data['basehash_ignore_vars']: 898 output.append("changed items: %s" % a_data['basehash_ignore_vars'].symmetric_difference(b_data['basehash_ignore_vars'])) 899 900 if 'taskhash_ignore_tasks' in a_data and a_data['taskhash_ignore_tasks'] != b_data['taskhash_ignore_tasks']: 901 output.append(color_format("{color_title}taskhash_ignore_tasks changed{color_default} from '%s' to '%s'") % (a_data['taskhash_ignore_tasks'], b_data['taskhash_ignore_tasks'])) 902 if a_data['taskhash_ignore_tasks'] and b_data['taskhash_ignore_tasks']: 903 output.append("changed items: %s" % a_data['taskhash_ignore_tasks'].symmetric_difference(b_data['taskhash_ignore_tasks'])) 904 905 if a_data['taskdeps'] != b_data['taskdeps']: 906 output.append(color_format("{color_title}Task dependencies changed{color_default} from:\n%s\nto:\n%s") % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps']))) 907 908 if a_data['basehash'] != b_data['basehash'] and not collapsed: 909 output.append(color_format("{color_title}basehash changed{color_default} from %s to %s") % (a_data['basehash'], b_data['basehash'])) 910 911 changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basehash_ignore_vars'] & b_data['basehash_ignore_vars']) 912 if changed: 913 for dep in sorted(changed): 914 output.append(color_format("{color_title}List of dependencies for variable %s changed from '{color_default}%s{color_title}' to '{color_default}%s{color_title}'") % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep])) 915 if a_data['gendeps'][dep] and b_data['gendeps'][dep]: 916 output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep])) 917 if added: 918 for dep in sorted(added): 919 output.append(color_format("{color_title}Dependency on variable %s was added") % (dep)) 920 if removed: 921 for dep in sorted(removed): 922 output.append(color_format("{color_title}Dependency on Variable %s was removed") % (dep)) 923 924 925 changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals']) 926 if changed: 927 for dep in sorted(changed): 928 oldval = a_data['varvals'][dep] 929 newval = b_data['varvals'][dep] 930 if newval and oldval and ('\n' in oldval or '\n' in newval): 931 diff = difflib.unified_diff(oldval.splitlines(), newval.splitlines(), lineterm='') 932 # Cut off the first two lines, since we aren't interested in 933 # the old/new filename (they are blank anyway in this case) 934 difflines = list(diff)[2:] 935 if color: 936 # Add colour to diff output 937 for i, line in enumerate(difflines): 938 if line.startswith('+'): 939 line = color_format('{color_add}{line}', line=line) 940 difflines[i] = line 941 elif line.startswith('-'): 942 line = color_format('{color_remove}{line}', line=line) 943 difflines[i] = line 944 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff='\n'.join(difflines))) 945 elif newval and oldval and (' ' in oldval or ' ' in newval): 946 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff=worddiff_str(oldval, newval, colors))) 947 else: 948 output.append(color_format("{color_title}Variable {var} value changed from '{color_default}{oldval}{color_title}' to '{color_default}{newval}{color_title}'{color_default}", var=dep, oldval=oldval, newval=newval)) 949 950 if not 'file_checksum_values' in a_data: 951 a_data['file_checksum_values'] = [] 952 if not 'file_checksum_values' in b_data: 953 b_data['file_checksum_values'] = [] 954 955 changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values']) 956 if changed: 957 for f, old, new in changed: 958 output.append(color_format("{color_title}Checksum for file %s changed{color_default} from %s to %s") % (f, old, new)) 959 if added: 960 for f in added: 961 output.append(color_format("{color_title}Dependency on checksum of file %s was added") % (f)) 962 if removed: 963 for f in removed: 964 output.append(color_format("{color_title}Dependency on checksum of file %s was removed") % (f)) 965 966 if not 'runtaskdeps' in a_data: 967 a_data['runtaskdeps'] = {} 968 if not 'runtaskdeps' in b_data: 969 b_data['runtaskdeps'] = {} 970 971 if not collapsed: 972 if len(a_data['runtaskdeps']) != len(b_data['runtaskdeps']): 973 changed = ["Number of task dependencies changed"] 974 else: 975 changed = [] 976 for idx, task in enumerate(a_data['runtaskdeps']): 977 a = a_data['runtaskdeps'][idx] 978 b = b_data['runtaskdeps'][idx] 979 if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b] and not collapsed: 980 changed.append("%s with hash %s\n changed to\n%s with hash %s" % (a, a_data['runtaskhashes'][a], b, b_data['runtaskhashes'][b])) 981 982 if changed: 983 clean_a = a_data['runtaskdeps'] 984 clean_b = b_data['runtaskdeps'] 985 if clean_a != clean_b: 986 output.append(color_format("{color_title}runtaskdeps changed:{color_default}\n%s") % list_inline_diff(clean_a, clean_b, colors)) 987 else: 988 output.append(color_format("{color_title}runtaskdeps changed:")) 989 output.append("\n".join(changed)) 990 991 992 if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data: 993 a = a_data['runtaskhashes'] 994 b = b_data['runtaskhashes'] 995 changed, added, removed = dict_diff(a, b) 996 if added: 997 for dep in sorted(added): 998 bdep_found = False 999 if removed: 1000 for bdep in removed: 1001 if b[dep] == a[bdep]: 1002 #output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep)) 1003 bdep_found = True 1004 if not bdep_found: 1005 output.append(color_format("{color_title}Dependency on task %s was added{color_default} with hash %s") % (dep, b[dep])) 1006 if removed: 1007 for dep in sorted(removed): 1008 adep_found = False 1009 if added: 1010 for adep in added: 1011 if b[adep] == a[dep]: 1012 #output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep)) 1013 adep_found = True 1014 if not adep_found: 1015 output.append(color_format("{color_title}Dependency on task %s was removed{color_default} with hash %s") % (dep, a[dep])) 1016 if changed: 1017 for dep in sorted(changed): 1018 if not collapsed: 1019 output.append(color_format("{color_title}Hash for task dependency %s changed{color_default} from %s to %s") % (dep, a[dep], b[dep])) 1020 if callable(recursecb): 1021 recout = recursecb(dep, a[dep], b[dep]) 1022 if recout: 1023 if collapsed: 1024 output.extend(recout) 1025 else: 1026 # If a dependent hash changed, might as well print the line above and then defer to the changes in 1027 # that hash since in all likelyhood, they're the same changes this task also saw. 1028 output = [output[-1]] + recout 1029 break 1030 1031 a_taint = a_data.get('taint', None) 1032 b_taint = b_data.get('taint', None) 1033 if a_taint != b_taint: 1034 if a_taint and a_taint.startswith('nostamp:'): 1035 a_taint = a_taint.replace('nostamp:', 'nostamp(uuid4):') 1036 if b_taint and b_taint.startswith('nostamp:'): 1037 b_taint = b_taint.replace('nostamp:', 'nostamp(uuid4):') 1038 output.append(color_format("{color_title}Taint (by forced/invalidated task) changed{color_default} from %s to %s") % (a_taint, b_taint)) 1039 1040 return output 1041 1042 1043def calc_basehash(sigdata): 1044 task = sigdata['task'] 1045 basedata = sigdata['varvals'][task] 1046 1047 if basedata is None: 1048 basedata = '' 1049 1050 alldeps = sigdata['taskdeps'] 1051 for dep in sorted(alldeps): 1052 basedata = basedata + dep 1053 val = sigdata['varvals'][dep] 1054 if val is not None: 1055 basedata = basedata + str(val) 1056 1057 return hashlib.sha256(basedata.encode("utf-8")).hexdigest() 1058 1059def calc_taskhash(sigdata): 1060 data = sigdata['basehash'] 1061 1062 for dep in sigdata['runtaskdeps']: 1063 data = data + sigdata['runtaskhashes'][dep] 1064 1065 for c in sigdata['file_checksum_values']: 1066 if c[1]: 1067 if "./" in c[0]: 1068 data = data + c[0] 1069 data = data + c[1] 1070 1071 if 'taint' in sigdata: 1072 if 'nostamp:' in sigdata['taint']: 1073 data = data + sigdata['taint'][8:] 1074 else: 1075 data = data + sigdata['taint'] 1076 1077 return hashlib.sha256(data.encode("utf-8")).hexdigest() 1078 1079 1080def dump_sigfile(a): 1081 output = [] 1082 1083 with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f: 1084 a_data = json.load(f, object_hook=SetDecoder) 1085 1086 handle_renames(a_data) 1087 1088 output.append("basehash_ignore_vars: %s" % (sorted(a_data['basehash_ignore_vars']))) 1089 1090 output.append("taskhash_ignore_tasks: %s" % (sorted(a_data['taskhash_ignore_tasks'] or []))) 1091 1092 output.append("Task dependencies: %s" % (sorted(a_data['taskdeps']))) 1093 1094 output.append("basehash: %s" % (a_data['basehash'])) 1095 1096 for dep in sorted(a_data['gendeps']): 1097 output.append("List of dependencies for variable %s is %s" % (dep, sorted(a_data['gendeps'][dep]))) 1098 1099 for dep in sorted(a_data['varvals']): 1100 output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep])) 1101 1102 if 'runtaskdeps' in a_data: 1103 output.append("Tasks this task depends on: %s" % (sorted(a_data['runtaskdeps']))) 1104 1105 if 'file_checksum_values' in a_data: 1106 output.append("This task depends on the checksums of files: %s" % (sorted(a_data['file_checksum_values']))) 1107 1108 if 'runtaskhashes' in a_data: 1109 for dep in sorted(a_data['runtaskhashes']): 1110 output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep])) 1111 1112 if 'taint' in a_data: 1113 if a_data['taint'].startswith('nostamp:'): 1114 msg = a_data['taint'].replace('nostamp:', 'nostamp(uuid4):') 1115 else: 1116 msg = a_data['taint'] 1117 output.append("Tainted (by forced/invalidated task): %s" % msg) 1118 1119 if 'task' in a_data: 1120 computed_basehash = calc_basehash(a_data) 1121 output.append("Computed base hash is %s and from file %s" % (computed_basehash, a_data['basehash'])) 1122 else: 1123 output.append("Unable to compute base hash") 1124 1125 computed_taskhash = calc_taskhash(a_data) 1126 output.append("Computed task hash is %s" % computed_taskhash) 1127 1128 return output 1129