1# 2# SPDX-License-Identifier: GPL-2.0-only 3# 4 5import hashlib 6import logging 7import os 8import re 9import tempfile 10import pickle 11import bb.data 12import difflib 13import simplediff 14import json 15import bb.compress.zstd 16from bb.checksum import FileChecksumCache 17from bb import runqueue 18import hashserv 19import hashserv.client 20 21logger = logging.getLogger('BitBake.SigGen') 22hashequiv_logger = logging.getLogger('BitBake.SigGen.HashEquiv') 23 24class SetEncoder(json.JSONEncoder): 25 def default(self, obj): 26 if isinstance(obj, set): 27 return dict(_set_object=list(sorted(obj))) 28 return json.JSONEncoder.default(self, obj) 29 30def SetDecoder(dct): 31 if '_set_object' in dct: 32 return set(dct['_set_object']) 33 return dct 34 35def init(d): 36 siggens = [obj for obj in globals().values() 37 if type(obj) is type and issubclass(obj, SignatureGenerator)] 38 39 desired = d.getVar("BB_SIGNATURE_HANDLER") or "noop" 40 for sg in siggens: 41 if desired == sg.name: 42 return sg(d) 43 break 44 else: 45 logger.error("Invalid signature generator '%s', using default 'noop'\n" 46 "Available generators: %s", desired, 47 ', '.join(obj.name for obj in siggens)) 48 return SignatureGenerator(d) 49 50class SignatureGenerator(object): 51 """ 52 """ 53 name = "noop" 54 55 # If the derived class supports multiconfig datacaches, set this to True 56 # The default is False for backward compatibility with derived signature 57 # generators that do not understand multiconfig caches 58 supports_multiconfig_datacaches = False 59 60 def __init__(self, data): 61 self.basehash = {} 62 self.taskhash = {} 63 self.unihash = {} 64 self.runtaskdeps = {} 65 self.file_checksum_values = {} 66 self.taints = {} 67 self.unitaskhashes = {} 68 self.tidtopn = {} 69 self.setscenetasks = set() 70 71 def finalise(self, fn, d, varient): 72 return 73 74 def postparsing_clean_cache(self): 75 return 76 77 def get_unihash(self, tid): 78 return self.taskhash[tid] 79 80 def prep_taskhash(self, tid, deps, dataCaches): 81 return 82 83 def get_taskhash(self, tid, deps, dataCaches): 84 self.taskhash[tid] = hashlib.sha256(tid.encode("utf-8")).hexdigest() 85 return self.taskhash[tid] 86 87 def writeout_file_checksum_cache(self): 88 """Write/update the file checksum cache onto disk""" 89 return 90 91 def stampfile(self, stampbase, file_name, taskname, extrainfo): 92 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.') 93 94 def stampcleanmask(self, stampbase, file_name, taskname, extrainfo): 95 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.') 96 97 def dump_sigtask(self, fn, task, stampbase, runtime): 98 return 99 100 def invalidate_task(self, task, d, fn): 101 bb.build.del_stamp(task, d, fn) 102 103 def dump_sigs(self, dataCache, options): 104 return 105 106 def get_taskdata(self): 107 return (self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks) 108 109 def set_taskdata(self, data): 110 self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks = data 111 112 def reset(self, data): 113 self.__init__(data) 114 115 def get_taskhashes(self): 116 return self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn 117 118 def set_taskhashes(self, hashes): 119 self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn = hashes 120 121 def save_unitaskhashes(self): 122 return 123 124 def set_setscene_tasks(self, setscene_tasks): 125 return 126 127 @classmethod 128 def get_data_caches(cls, dataCaches, mc): 129 """ 130 This function returns the datacaches that should be passed to signature 131 generator functions. If the signature generator supports multiconfig 132 caches, the entire dictionary of data caches is sent, otherwise a 133 special proxy is sent that support both index access to all 134 multiconfigs, and also direct access for the default multiconfig. 135 136 The proxy class allows code in this class itself to always use 137 multiconfig aware code (to ease maintenance), but derived classes that 138 are unaware of multiconfig data caches can still access the default 139 multiconfig as expected. 140 141 Do not override this function in derived classes; it will be removed in 142 the future when support for multiconfig data caches is mandatory 143 """ 144 class DataCacheProxy(object): 145 def __init__(self): 146 pass 147 148 def __getitem__(self, key): 149 return dataCaches[key] 150 151 def __getattr__(self, name): 152 return getattr(dataCaches[mc], name) 153 154 if cls.supports_multiconfig_datacaches: 155 return dataCaches 156 157 return DataCacheProxy() 158 159class SignatureGeneratorBasic(SignatureGenerator): 160 """ 161 """ 162 name = "basic" 163 164 def __init__(self, data): 165 self.basehash = {} 166 self.taskhash = {} 167 self.unihash = {} 168 self.taskdeps = {} 169 self.runtaskdeps = {} 170 self.file_checksum_values = {} 171 self.taints = {} 172 self.gendeps = {} 173 self.lookupcache = {} 174 self.setscenetasks = set() 175 self.basewhitelist = set((data.getVar("BB_HASHBASE_WHITELIST") or "").split()) 176 self.taskwhitelist = None 177 self.init_rundepcheck(data) 178 checksum_cache_file = data.getVar("BB_HASH_CHECKSUM_CACHE_FILE") 179 if checksum_cache_file: 180 self.checksum_cache = FileChecksumCache() 181 self.checksum_cache.init_cache(data, checksum_cache_file) 182 else: 183 self.checksum_cache = None 184 185 self.unihash_cache = bb.cache.SimpleCache("3") 186 self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {}) 187 self.localdirsexclude = (data.getVar("BB_SIGNATURE_LOCAL_DIRS_EXCLUDE") or "CVS .bzr .git .hg .osc .p4 .repo .svn").split() 188 self.tidtopn = {} 189 190 def init_rundepcheck(self, data): 191 self.taskwhitelist = data.getVar("BB_HASHTASK_WHITELIST") or None 192 if self.taskwhitelist: 193 self.twl = re.compile(self.taskwhitelist) 194 else: 195 self.twl = None 196 197 def _build_data(self, fn, d): 198 199 ignore_mismatch = ((d.getVar("BB_HASH_IGNORE_MISMATCH") or '') == '1') 200 tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d, self.basewhitelist) 201 202 taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, self.basewhitelist, fn) 203 204 for task in tasklist: 205 tid = fn + ":" + task 206 if not ignore_mismatch and tid in self.basehash and self.basehash[tid] != basehash[tid]: 207 bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (tid, self.basehash[tid], basehash[tid])) 208 bb.error("The following commands may help:") 209 cmd = "$ bitbake %s -c%s" % (d.getVar('PN'), task) 210 # Make sure sigdata is dumped before run printdiff 211 bb.error("%s -Snone" % cmd) 212 bb.error("Then:") 213 bb.error("%s -Sprintdiff\n" % cmd) 214 self.basehash[tid] = basehash[tid] 215 216 self.taskdeps[fn] = taskdeps 217 self.gendeps[fn] = gendeps 218 self.lookupcache[fn] = lookupcache 219 220 return taskdeps 221 222 def set_setscene_tasks(self, setscene_tasks): 223 self.setscenetasks = set(setscene_tasks) 224 225 def finalise(self, fn, d, variant): 226 227 mc = d.getVar("__BBMULTICONFIG", False) or "" 228 if variant or mc: 229 fn = bb.cache.realfn2virtual(fn, variant, mc) 230 231 try: 232 taskdeps = self._build_data(fn, d) 233 except bb.parse.SkipRecipe: 234 raise 235 except: 236 bb.warn("Error during finalise of %s" % fn) 237 raise 238 239 #Slow but can be useful for debugging mismatched basehashes 240 #for task in self.taskdeps[fn]: 241 # self.dump_sigtask(fn, task, d.getVar("STAMP"), False) 242 243 for task in taskdeps: 244 d.setVar("BB_BASEHASH:task-%s" % task, self.basehash[fn + ":" + task]) 245 246 def postparsing_clean_cache(self): 247 # 248 # After parsing we can remove some things from memory to reduce our memory footprint 249 # 250 self.gendeps = {} 251 self.lookupcache = {} 252 self.taskdeps = {} 253 254 def rundep_check(self, fn, recipename, task, dep, depname, dataCaches): 255 # Return True if we should keep the dependency, False to drop it 256 # We only manipulate the dependencies for packages not in the whitelist 257 if self.twl and not self.twl.search(recipename): 258 # then process the actual dependencies 259 if self.twl.search(depname): 260 return False 261 return True 262 263 def read_taint(self, fn, task, stampbase): 264 taint = None 265 try: 266 with open(stampbase + '.' + task + '.taint', 'r') as taintf: 267 taint = taintf.read() 268 except IOError: 269 pass 270 return taint 271 272 def prep_taskhash(self, tid, deps, dataCaches): 273 274 (mc, _, task, fn) = bb.runqueue.split_tid_mcfn(tid) 275 276 self.basehash[tid] = dataCaches[mc].basetaskhash[tid] 277 self.runtaskdeps[tid] = [] 278 self.file_checksum_values[tid] = [] 279 recipename = dataCaches[mc].pkg_fn[fn] 280 281 self.tidtopn[tid] = recipename 282 283 for dep in sorted(deps, key=clean_basepath): 284 (depmc, _, _, depmcfn) = bb.runqueue.split_tid_mcfn(dep) 285 depname = dataCaches[depmc].pkg_fn[depmcfn] 286 if not self.supports_multiconfig_datacaches and mc != depmc: 287 # If the signature generator doesn't understand multiconfig 288 # data caches, any dependency not in the same multiconfig must 289 # be skipped for backward compatibility 290 continue 291 if not self.rundep_check(fn, recipename, task, dep, depname, dataCaches): 292 continue 293 if dep not in self.taskhash: 294 bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep) 295 self.runtaskdeps[tid].append(dep) 296 297 if task in dataCaches[mc].file_checksums[fn]: 298 if self.checksum_cache: 299 checksums = self.checksum_cache.get_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude) 300 else: 301 checksums = bb.fetch2.get_file_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude) 302 for (f,cs) in checksums: 303 self.file_checksum_values[tid].append((f,cs)) 304 305 taskdep = dataCaches[mc].task_deps[fn] 306 if 'nostamp' in taskdep and task in taskdep['nostamp']: 307 # Nostamp tasks need an implicit taint so that they force any dependent tasks to run 308 if tid in self.taints and self.taints[tid].startswith("nostamp:"): 309 # Don't reset taint value upon every call 310 pass 311 else: 312 import uuid 313 taint = str(uuid.uuid4()) 314 self.taints[tid] = "nostamp:" + taint 315 316 taint = self.read_taint(fn, task, dataCaches[mc].stamp[fn]) 317 if taint: 318 self.taints[tid] = taint 319 logger.warning("%s is tainted from a forced run" % tid) 320 321 return 322 323 def get_taskhash(self, tid, deps, dataCaches): 324 325 data = self.basehash[tid] 326 for dep in self.runtaskdeps[tid]: 327 data = data + self.get_unihash(dep) 328 329 for (f, cs) in self.file_checksum_values[tid]: 330 if cs: 331 data = data + cs 332 333 if tid in self.taints: 334 if self.taints[tid].startswith("nostamp:"): 335 data = data + self.taints[tid][8:] 336 else: 337 data = data + self.taints[tid] 338 339 h = hashlib.sha256(data.encode("utf-8")).hexdigest() 340 self.taskhash[tid] = h 341 #d.setVar("BB_TASKHASH:task-%s" % task, taskhash[task]) 342 return h 343 344 def writeout_file_checksum_cache(self): 345 """Write/update the file checksum cache onto disk""" 346 if self.checksum_cache: 347 self.checksum_cache.save_extras() 348 self.checksum_cache.save_merge() 349 else: 350 bb.fetch2.fetcher_parse_save() 351 bb.fetch2.fetcher_parse_done() 352 353 def save_unitaskhashes(self): 354 self.unihash_cache.save(self.unitaskhashes) 355 356 def dump_sigtask(self, fn, task, stampbase, runtime): 357 358 tid = fn + ":" + task 359 referencestamp = stampbase 360 if isinstance(runtime, str) and runtime.startswith("customfile"): 361 sigfile = stampbase 362 referencestamp = runtime[11:] 363 elif runtime and tid in self.taskhash: 364 sigfile = stampbase + "." + task + ".sigdata" + "." + self.get_unihash(tid) 365 else: 366 sigfile = stampbase + "." + task + ".sigbasedata" + "." + self.basehash[tid] 367 368 with bb.utils.umask(0o002): 369 bb.utils.mkdirhier(os.path.dirname(sigfile)) 370 371 data = {} 372 data['task'] = task 373 data['basewhitelist'] = self.basewhitelist 374 data['taskwhitelist'] = self.taskwhitelist 375 data['taskdeps'] = self.taskdeps[fn][task] 376 data['basehash'] = self.basehash[tid] 377 data['gendeps'] = {} 378 data['varvals'] = {} 379 data['varvals'][task] = self.lookupcache[fn][task] 380 for dep in self.taskdeps[fn][task]: 381 if dep in self.basewhitelist: 382 continue 383 data['gendeps'][dep] = self.gendeps[fn][dep] 384 data['varvals'][dep] = self.lookupcache[fn][dep] 385 386 if runtime and tid in self.taskhash: 387 data['runtaskdeps'] = self.runtaskdeps[tid] 388 data['file_checksum_values'] = [(os.path.basename(f), cs) for f,cs in self.file_checksum_values[tid]] 389 data['runtaskhashes'] = {} 390 for dep in data['runtaskdeps']: 391 data['runtaskhashes'][dep] = self.get_unihash(dep) 392 data['taskhash'] = self.taskhash[tid] 393 data['unihash'] = self.get_unihash(tid) 394 395 taint = self.read_taint(fn, task, referencestamp) 396 if taint: 397 data['taint'] = taint 398 399 if runtime and tid in self.taints: 400 if 'nostamp:' in self.taints[tid]: 401 data['taint'] = self.taints[tid] 402 403 computed_basehash = calc_basehash(data) 404 if computed_basehash != self.basehash[tid]: 405 bb.error("Basehash mismatch %s versus %s for %s" % (computed_basehash, self.basehash[tid], tid)) 406 if runtime and tid in self.taskhash: 407 computed_taskhash = calc_taskhash(data) 408 if computed_taskhash != self.taskhash[tid]: 409 bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid)) 410 sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash) 411 412 fd, tmpfile = tempfile.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.") 413 try: 414 with bb.compress.zstd.open(fd, "wt", encoding="utf-8", num_threads=1) as f: 415 json.dump(data, f, sort_keys=True, separators=(",", ":"), cls=SetEncoder) 416 f.flush() 417 os.chmod(tmpfile, 0o664) 418 bb.utils.rename(tmpfile, sigfile) 419 except (OSError, IOError) as err: 420 try: 421 os.unlink(tmpfile) 422 except OSError: 423 pass 424 raise err 425 426 def dump_sigfn(self, fn, dataCaches, options): 427 if fn in self.taskdeps: 428 for task in self.taskdeps[fn]: 429 tid = fn + ":" + task 430 mc = bb.runqueue.mc_from_tid(tid) 431 if tid not in self.taskhash: 432 continue 433 if dataCaches[mc].basetaskhash[tid] != self.basehash[tid]: 434 bb.error("Bitbake's cached basehash does not match the one we just generated (%s)!" % tid) 435 bb.error("The mismatched hashes were %s and %s" % (dataCaches[mc].basetaskhash[tid], self.basehash[tid])) 436 self.dump_sigtask(fn, task, dataCaches[mc].stamp[fn], True) 437 438class SignatureGeneratorBasicHash(SignatureGeneratorBasic): 439 name = "basichash" 440 441 def get_stampfile_hash(self, tid): 442 if tid in self.taskhash: 443 return self.taskhash[tid] 444 445 # If task is not in basehash, then error 446 return self.basehash[tid] 447 448 def stampfile(self, stampbase, fn, taskname, extrainfo, clean=False): 449 if taskname != "do_setscene" and taskname.endswith("_setscene"): 450 tid = fn + ":" + taskname[:-9] 451 else: 452 tid = fn + ":" + taskname 453 if clean: 454 h = "*" 455 else: 456 h = self.get_stampfile_hash(tid) 457 458 return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.') 459 460 def stampcleanmask(self, stampbase, fn, taskname, extrainfo): 461 return self.stampfile(stampbase, fn, taskname, extrainfo, clean=True) 462 463 def invalidate_task(self, task, d, fn): 464 bb.note("Tainting hash to force rebuild of task %s, %s" % (fn, task)) 465 bb.build.write_taint(task, d, fn) 466 467class SignatureGeneratorUniHashMixIn(object): 468 def __init__(self, data): 469 self.extramethod = {} 470 super().__init__(data) 471 472 def get_taskdata(self): 473 return (self.server, self.method, self.extramethod) + super().get_taskdata() 474 475 def set_taskdata(self, data): 476 self.server, self.method, self.extramethod = data[:3] 477 super().set_taskdata(data[3:]) 478 479 def client(self): 480 if getattr(self, '_client', None) is None: 481 self._client = hashserv.create_client(self.server) 482 return self._client 483 484 def get_stampfile_hash(self, tid): 485 if tid in self.taskhash: 486 # If a unique hash is reported, use it as the stampfile hash. This 487 # ensures that if a task won't be re-run if the taskhash changes, 488 # but it would result in the same output hash 489 unihash = self._get_unihash(tid) 490 if unihash is not None: 491 return unihash 492 493 return super().get_stampfile_hash(tid) 494 495 def set_unihash(self, tid, unihash): 496 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid) 497 key = mc + ":" + self.tidtopn[tid] + ":" + taskname 498 self.unitaskhashes[key] = (self.taskhash[tid], unihash) 499 self.unihash[tid] = unihash 500 501 def _get_unihash(self, tid, checkkey=None): 502 if tid not in self.tidtopn: 503 return None 504 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid) 505 key = mc + ":" + self.tidtopn[tid] + ":" + taskname 506 if key not in self.unitaskhashes: 507 return None 508 if not checkkey: 509 checkkey = self.taskhash[tid] 510 (key, unihash) = self.unitaskhashes[key] 511 if key != checkkey: 512 return None 513 return unihash 514 515 def get_unihash(self, tid): 516 taskhash = self.taskhash[tid] 517 518 # If its not a setscene task we can return 519 if self.setscenetasks and tid not in self.setscenetasks: 520 self.unihash[tid] = None 521 return taskhash 522 523 # TODO: This cache can grow unbounded. It probably only needs to keep 524 # for each task 525 unihash = self._get_unihash(tid) 526 if unihash is not None: 527 self.unihash[tid] = unihash 528 return unihash 529 530 # In the absence of being able to discover a unique hash from the 531 # server, make it be equivalent to the taskhash. The unique "hash" only 532 # really needs to be a unique string (not even necessarily a hash), but 533 # making it match the taskhash has a few advantages: 534 # 535 # 1) All of the sstate code that assumes hashes can be the same 536 # 2) It provides maximal compatibility with builders that don't use 537 # an equivalency server 538 # 3) The value is easy for multiple independent builders to derive the 539 # same unique hash from the same input. This means that if the 540 # independent builders find the same taskhash, but it isn't reported 541 # to the server, there is a better chance that they will agree on 542 # the unique hash. 543 unihash = taskhash 544 545 try: 546 method = self.method 547 if tid in self.extramethod: 548 method = method + self.extramethod[tid] 549 data = self.client().get_unihash(method, self.taskhash[tid]) 550 if data: 551 unihash = data 552 # A unique hash equal to the taskhash is not very interesting, 553 # so it is reported it at debug level 2. If they differ, that 554 # is much more interesting, so it is reported at debug level 1 555 hashequiv_logger.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server)) 556 else: 557 hashequiv_logger.debug2('No reported unihash for %s:%s from %s' % (tid, taskhash, self.server)) 558 except ConnectionError as e: 559 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 560 561 self.set_unihash(tid, unihash) 562 self.unihash[tid] = unihash 563 return unihash 564 565 def report_unihash(self, path, task, d): 566 import importlib 567 568 taskhash = d.getVar('BB_TASKHASH') 569 unihash = d.getVar('BB_UNIHASH') 570 report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1' 571 tempdir = d.getVar('T') 572 fn = d.getVar('BB_FILENAME') 573 tid = fn + ':do_' + task 574 key = tid + ':' + taskhash 575 576 if self.setscenetasks and tid not in self.setscenetasks: 577 return 578 579 # This can happen if locked sigs are in action. Detect and just abort 580 if taskhash != self.taskhash[tid]: 581 return 582 583 # Sanity checks 584 cache_unihash = self._get_unihash(tid, checkkey=taskhash) 585 if cache_unihash is None: 586 bb.fatal('%s not in unihash cache. Please report this error' % key) 587 588 if cache_unihash != unihash: 589 bb.fatal("Cache unihash %s doesn't match BB_UNIHASH %s" % (cache_unihash, unihash)) 590 591 sigfile = None 592 sigfile_name = "depsig.do_%s.%d" % (task, os.getpid()) 593 sigfile_link = "depsig.do_%s" % task 594 595 try: 596 sigfile = open(os.path.join(tempdir, sigfile_name), 'w+b') 597 598 locs = {'path': path, 'sigfile': sigfile, 'task': task, 'd': d} 599 600 if "." in self.method: 601 (module, method) = self.method.rsplit('.', 1) 602 locs['method'] = getattr(importlib.import_module(module), method) 603 outhash = bb.utils.better_eval('method(path, sigfile, task, d)', locs) 604 else: 605 outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs) 606 607 try: 608 extra_data = {} 609 610 owner = d.getVar('SSTATE_HASHEQUIV_OWNER') 611 if owner: 612 extra_data['owner'] = owner 613 614 if report_taskdata: 615 sigfile.seek(0) 616 617 extra_data['PN'] = d.getVar('PN') 618 extra_data['PV'] = d.getVar('PV') 619 extra_data['PR'] = d.getVar('PR') 620 extra_data['task'] = task 621 extra_data['outhash_siginfo'] = sigfile.read().decode('utf-8') 622 623 method = self.method 624 if tid in self.extramethod: 625 method = method + self.extramethod[tid] 626 627 data = self.client().report_unihash(taskhash, method, outhash, unihash, extra_data) 628 new_unihash = data['unihash'] 629 630 if new_unihash != unihash: 631 hashequiv_logger.debug('Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server)) 632 bb.event.fire(bb.runqueue.taskUniHashUpdate(fn + ':do_' + task, new_unihash), d) 633 self.set_unihash(tid, new_unihash) 634 d.setVar('BB_UNIHASH', new_unihash) 635 else: 636 hashequiv_logger.debug('Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server)) 637 except ConnectionError as e: 638 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 639 finally: 640 if sigfile: 641 sigfile.close() 642 643 sigfile_link_path = os.path.join(tempdir, sigfile_link) 644 bb.utils.remove(sigfile_link_path) 645 646 try: 647 os.symlink(sigfile_name, sigfile_link_path) 648 except OSError: 649 pass 650 651 def report_unihash_equiv(self, tid, taskhash, wanted_unihash, current_unihash, datacaches): 652 try: 653 extra_data = {} 654 method = self.method 655 if tid in self.extramethod: 656 method = method + self.extramethod[tid] 657 658 data = self.client().report_unihash_equiv(taskhash, method, wanted_unihash, extra_data) 659 hashequiv_logger.verbose('Reported task %s as unihash %s to %s (%s)' % (tid, wanted_unihash, self.server, str(data))) 660 661 if data is None: 662 bb.warn("Server unable to handle unihash report") 663 return False 664 665 finalunihash = data['unihash'] 666 667 if finalunihash == current_unihash: 668 hashequiv_logger.verbose('Task %s unihash %s unchanged by server' % (tid, finalunihash)) 669 elif finalunihash == wanted_unihash: 670 hashequiv_logger.verbose('Task %s unihash changed %s -> %s as wanted' % (tid, current_unihash, finalunihash)) 671 self.set_unihash(tid, finalunihash) 672 return True 673 else: 674 # TODO: What to do here? 675 hashequiv_logger.verbose('Task %s unihash reported as unwanted hash %s' % (tid, finalunihash)) 676 677 except ConnectionError as e: 678 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 679 680 return False 681 682# 683# Dummy class used for bitbake-selftest 684# 685class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureGeneratorBasicHash): 686 name = "TestEquivHash" 687 def init_rundepcheck(self, data): 688 super().init_rundepcheck(data) 689 self.server = data.getVar('BB_HASHSERVE') 690 self.method = "sstate_output_hash" 691 692# 693# Dummy class used for bitbake-selftest 694# 695class SignatureGeneratorTestMulticonfigDepends(SignatureGeneratorBasicHash): 696 name = "TestMulticonfigDepends" 697 supports_multiconfig_datacaches = True 698 699def dump_this_task(outfile, d): 700 import bb.parse 701 fn = d.getVar("BB_FILENAME") 702 task = "do_" + d.getVar("BB_CURRENTTASK") 703 referencestamp = bb.build.stamp_internal(task, d, None, True) 704 bb.parse.siggen.dump_sigtask(fn, task, outfile, "customfile:" + referencestamp) 705 706def init_colors(enable_color): 707 """Initialise colour dict for passing to compare_sigfiles()""" 708 # First set up the colours 709 colors = {'color_title': '\033[1m', 710 'color_default': '\033[0m', 711 'color_add': '\033[0;32m', 712 'color_remove': '\033[0;31m', 713 } 714 # Leave all keys present but clear the values 715 if not enable_color: 716 for k in colors.keys(): 717 colors[k] = '' 718 return colors 719 720def worddiff_str(oldstr, newstr, colors=None): 721 if not colors: 722 colors = init_colors(False) 723 diff = simplediff.diff(oldstr.split(' '), newstr.split(' ')) 724 ret = [] 725 for change, value in diff: 726 value = ' '.join(value) 727 if change == '=': 728 ret.append(value) 729 elif change == '+': 730 item = '{color_add}{{+{value}+}}{color_default}'.format(value=value, **colors) 731 ret.append(item) 732 elif change == '-': 733 item = '{color_remove}[-{value}-]{color_default}'.format(value=value, **colors) 734 ret.append(item) 735 whitespace_note = '' 736 if oldstr != newstr and ' '.join(oldstr.split()) == ' '.join(newstr.split()): 737 whitespace_note = ' (whitespace changed)' 738 return '"%s"%s' % (' '.join(ret), whitespace_note) 739 740def list_inline_diff(oldlist, newlist, colors=None): 741 if not colors: 742 colors = init_colors(False) 743 diff = simplediff.diff(oldlist, newlist) 744 ret = [] 745 for change, value in diff: 746 value = ' '.join(value) 747 if change == '=': 748 ret.append("'%s'" % value) 749 elif change == '+': 750 item = '{color_add}+{value}{color_default}'.format(value=value, **colors) 751 ret.append(item) 752 elif change == '-': 753 item = '{color_remove}-{value}{color_default}'.format(value=value, **colors) 754 ret.append(item) 755 return '[%s]' % (', '.join(ret)) 756 757def clean_basepath(basepath): 758 basepath, dir, recipe_task = basepath.rsplit("/", 2) 759 cleaned = dir + '/' + recipe_task 760 761 if basepath[0] == '/': 762 return cleaned 763 764 if basepath.startswith("mc:") and basepath.count(':') >= 2: 765 mc, mc_name, basepath = basepath.split(":", 2) 766 mc_suffix = ':mc:' + mc_name 767 else: 768 mc_suffix = '' 769 770 # mc stuff now removed from basepath. Whatever was next, if present will be the first 771 # suffix. ':/', recipe path start, marks the end of this. Something like 772 # 'virtual:a[:b[:c]]:/path...' (b and c being optional) 773 if basepath[0] != '/': 774 cleaned += ':' + basepath.split(':/', 1)[0] 775 776 return cleaned + mc_suffix 777 778def clean_basepaths(a): 779 b = {} 780 for x in a: 781 b[clean_basepath(x)] = a[x] 782 return b 783 784def clean_basepaths_list(a): 785 b = [] 786 for x in a: 787 b.append(clean_basepath(x)) 788 return b 789 790def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False): 791 output = [] 792 793 colors = init_colors(color) 794 def color_format(formatstr, **values): 795 """ 796 Return colour formatted string. 797 NOTE: call with the format string, not an already formatted string 798 containing values (otherwise you could have trouble with { and } 799 characters) 800 """ 801 if not formatstr.endswith('{color_default}'): 802 formatstr += '{color_default}' 803 # In newer python 3 versions you can pass both of these directly, 804 # but we only require 3.4 at the moment 805 formatparams = {} 806 formatparams.update(colors) 807 formatparams.update(values) 808 return formatstr.format(**formatparams) 809 810 with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f: 811 a_data = json.load(f, object_hook=SetDecoder) 812 with bb.compress.zstd.open(b, "rt", encoding="utf-8", num_threads=1) as f: 813 b_data = json.load(f, object_hook=SetDecoder) 814 815 def dict_diff(a, b, whitelist=set()): 816 sa = set(a.keys()) 817 sb = set(b.keys()) 818 common = sa & sb 819 changed = set() 820 for i in common: 821 if a[i] != b[i] and i not in whitelist: 822 changed.add(i) 823 added = sb - sa 824 removed = sa - sb 825 return changed, added, removed 826 827 def file_checksums_diff(a, b): 828 from collections import Counter 829 830 # Convert lists back to tuples 831 a = [(f[0], f[1]) for f in a] 832 b = [(f[0], f[1]) for f in b] 833 834 # Compare lists, ensuring we can handle duplicate filenames if they exist 835 removedcount = Counter(a) 836 removedcount.subtract(b) 837 addedcount = Counter(b) 838 addedcount.subtract(a) 839 added = [] 840 for x in b: 841 if addedcount[x] > 0: 842 addedcount[x] -= 1 843 added.append(x) 844 removed = [] 845 changed = [] 846 for x in a: 847 if removedcount[x] > 0: 848 removedcount[x] -= 1 849 for y in added: 850 if y[0] == x[0]: 851 changed.append((x[0], x[1], y[1])) 852 added.remove(y) 853 break 854 else: 855 removed.append(x) 856 added = [x[0] for x in added] 857 removed = [x[0] for x in removed] 858 return changed, added, removed 859 860 if 'basewhitelist' in a_data and a_data['basewhitelist'] != b_data['basewhitelist']: 861 output.append(color_format("{color_title}basewhitelist changed{color_default} from '%s' to '%s'") % (a_data['basewhitelist'], b_data['basewhitelist'])) 862 if a_data['basewhitelist'] and b_data['basewhitelist']: 863 output.append("changed items: %s" % a_data['basewhitelist'].symmetric_difference(b_data['basewhitelist'])) 864 865 if 'taskwhitelist' in a_data and a_data['taskwhitelist'] != b_data['taskwhitelist']: 866 output.append(color_format("{color_title}taskwhitelist changed{color_default} from '%s' to '%s'") % (a_data['taskwhitelist'], b_data['taskwhitelist'])) 867 if a_data['taskwhitelist'] and b_data['taskwhitelist']: 868 output.append("changed items: %s" % a_data['taskwhitelist'].symmetric_difference(b_data['taskwhitelist'])) 869 870 if a_data['taskdeps'] != b_data['taskdeps']: 871 output.append(color_format("{color_title}Task dependencies changed{color_default} from:\n%s\nto:\n%s") % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps']))) 872 873 if a_data['basehash'] != b_data['basehash'] and not collapsed: 874 output.append(color_format("{color_title}basehash changed{color_default} from %s to %s") % (a_data['basehash'], b_data['basehash'])) 875 876 changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basewhitelist'] & b_data['basewhitelist']) 877 if changed: 878 for dep in sorted(changed): 879 output.append(color_format("{color_title}List of dependencies for variable %s changed from '{color_default}%s{color_title}' to '{color_default}%s{color_title}'") % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep])) 880 if a_data['gendeps'][dep] and b_data['gendeps'][dep]: 881 output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep])) 882 if added: 883 for dep in sorted(added): 884 output.append(color_format("{color_title}Dependency on variable %s was added") % (dep)) 885 if removed: 886 for dep in sorted(removed): 887 output.append(color_format("{color_title}Dependency on Variable %s was removed") % (dep)) 888 889 890 changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals']) 891 if changed: 892 for dep in sorted(changed): 893 oldval = a_data['varvals'][dep] 894 newval = b_data['varvals'][dep] 895 if newval and oldval and ('\n' in oldval or '\n' in newval): 896 diff = difflib.unified_diff(oldval.splitlines(), newval.splitlines(), lineterm='') 897 # Cut off the first two lines, since we aren't interested in 898 # the old/new filename (they are blank anyway in this case) 899 difflines = list(diff)[2:] 900 if color: 901 # Add colour to diff output 902 for i, line in enumerate(difflines): 903 if line.startswith('+'): 904 line = color_format('{color_add}{line}', line=line) 905 difflines[i] = line 906 elif line.startswith('-'): 907 line = color_format('{color_remove}{line}', line=line) 908 difflines[i] = line 909 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff='\n'.join(difflines))) 910 elif newval and oldval and (' ' in oldval or ' ' in newval): 911 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff=worddiff_str(oldval, newval, colors))) 912 else: 913 output.append(color_format("{color_title}Variable {var} value changed from '{color_default}{oldval}{color_title}' to '{color_default}{newval}{color_title}'{color_default}", var=dep, oldval=oldval, newval=newval)) 914 915 if not 'file_checksum_values' in a_data: 916 a_data['file_checksum_values'] = [] 917 if not 'file_checksum_values' in b_data: 918 b_data['file_checksum_values'] = [] 919 920 changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values']) 921 if changed: 922 for f, old, new in changed: 923 output.append(color_format("{color_title}Checksum for file %s changed{color_default} from %s to %s") % (f, old, new)) 924 if added: 925 for f in added: 926 output.append(color_format("{color_title}Dependency on checksum of file %s was added") % (f)) 927 if removed: 928 for f in removed: 929 output.append(color_format("{color_title}Dependency on checksum of file %s was removed") % (f)) 930 931 if not 'runtaskdeps' in a_data: 932 a_data['runtaskdeps'] = {} 933 if not 'runtaskdeps' in b_data: 934 b_data['runtaskdeps'] = {} 935 936 if not collapsed: 937 if len(a_data['runtaskdeps']) != len(b_data['runtaskdeps']): 938 changed = ["Number of task dependencies changed"] 939 else: 940 changed = [] 941 for idx, task in enumerate(a_data['runtaskdeps']): 942 a = a_data['runtaskdeps'][idx] 943 b = b_data['runtaskdeps'][idx] 944 if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b] and not collapsed: 945 changed.append("%s with hash %s\n changed to\n%s with hash %s" % (clean_basepath(a), a_data['runtaskhashes'][a], clean_basepath(b), b_data['runtaskhashes'][b])) 946 947 if changed: 948 clean_a = clean_basepaths_list(a_data['runtaskdeps']) 949 clean_b = clean_basepaths_list(b_data['runtaskdeps']) 950 if clean_a != clean_b: 951 output.append(color_format("{color_title}runtaskdeps changed:{color_default}\n%s") % list_inline_diff(clean_a, clean_b, colors)) 952 else: 953 output.append(color_format("{color_title}runtaskdeps changed:")) 954 output.append("\n".join(changed)) 955 956 957 if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data: 958 a = a_data['runtaskhashes'] 959 b = b_data['runtaskhashes'] 960 changed, added, removed = dict_diff(a, b) 961 if added: 962 for dep in sorted(added): 963 bdep_found = False 964 if removed: 965 for bdep in removed: 966 if b[dep] == a[bdep]: 967 #output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep)) 968 bdep_found = True 969 if not bdep_found: 970 output.append(color_format("{color_title}Dependency on task %s was added{color_default} with hash %s") % (clean_basepath(dep), b[dep])) 971 if removed: 972 for dep in sorted(removed): 973 adep_found = False 974 if added: 975 for adep in added: 976 if b[adep] == a[dep]: 977 #output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep)) 978 adep_found = True 979 if not adep_found: 980 output.append(color_format("{color_title}Dependency on task %s was removed{color_default} with hash %s") % (clean_basepath(dep), a[dep])) 981 if changed: 982 for dep in sorted(changed): 983 if not collapsed: 984 output.append(color_format("{color_title}Hash for dependent task %s changed{color_default} from %s to %s") % (clean_basepath(dep), a[dep], b[dep])) 985 if callable(recursecb): 986 recout = recursecb(dep, a[dep], b[dep]) 987 if recout: 988 if collapsed: 989 output.extend(recout) 990 else: 991 # If a dependent hash changed, might as well print the line above and then defer to the changes in 992 # that hash since in all likelyhood, they're the same changes this task also saw. 993 output = [output[-1]] + recout 994 995 a_taint = a_data.get('taint', None) 996 b_taint = b_data.get('taint', None) 997 if a_taint != b_taint: 998 if a_taint and a_taint.startswith('nostamp:'): 999 a_taint = a_taint.replace('nostamp:', 'nostamp(uuid4):') 1000 if b_taint and b_taint.startswith('nostamp:'): 1001 b_taint = b_taint.replace('nostamp:', 'nostamp(uuid4):') 1002 output.append(color_format("{color_title}Taint (by forced/invalidated task) changed{color_default} from %s to %s") % (a_taint, b_taint)) 1003 1004 return output 1005 1006 1007def calc_basehash(sigdata): 1008 task = sigdata['task'] 1009 basedata = sigdata['varvals'][task] 1010 1011 if basedata is None: 1012 basedata = '' 1013 1014 alldeps = sigdata['taskdeps'] 1015 for dep in alldeps: 1016 basedata = basedata + dep 1017 val = sigdata['varvals'][dep] 1018 if val is not None: 1019 basedata = basedata + str(val) 1020 1021 return hashlib.sha256(basedata.encode("utf-8")).hexdigest() 1022 1023def calc_taskhash(sigdata): 1024 data = sigdata['basehash'] 1025 1026 for dep in sigdata['runtaskdeps']: 1027 data = data + sigdata['runtaskhashes'][dep] 1028 1029 for c in sigdata['file_checksum_values']: 1030 if c[1]: 1031 data = data + c[1] 1032 1033 if 'taint' in sigdata: 1034 if 'nostamp:' in sigdata['taint']: 1035 data = data + sigdata['taint'][8:] 1036 else: 1037 data = data + sigdata['taint'] 1038 1039 return hashlib.sha256(data.encode("utf-8")).hexdigest() 1040 1041 1042def dump_sigfile(a): 1043 output = [] 1044 1045 with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f: 1046 a_data = json.load(f, object_hook=SetDecoder) 1047 1048 output.append("basewhitelist: %s" % (a_data['basewhitelist'])) 1049 1050 output.append("taskwhitelist: %s" % (a_data['taskwhitelist'])) 1051 1052 output.append("Task dependencies: %s" % (sorted(a_data['taskdeps']))) 1053 1054 output.append("basehash: %s" % (a_data['basehash'])) 1055 1056 for dep in a_data['gendeps']: 1057 output.append("List of dependencies for variable %s is %s" % (dep, a_data['gendeps'][dep])) 1058 1059 for dep in a_data['varvals']: 1060 output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep])) 1061 1062 if 'runtaskdeps' in a_data: 1063 output.append("Tasks this task depends on: %s" % (a_data['runtaskdeps'])) 1064 1065 if 'file_checksum_values' in a_data: 1066 output.append("This task depends on the checksums of files: %s" % (a_data['file_checksum_values'])) 1067 1068 if 'runtaskhashes' in a_data: 1069 for dep in a_data['runtaskhashes']: 1070 output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep])) 1071 1072 if 'taint' in a_data: 1073 if a_data['taint'].startswith('nostamp:'): 1074 msg = a_data['taint'].replace('nostamp:', 'nostamp(uuid4):') 1075 else: 1076 msg = a_data['taint'] 1077 output.append("Tainted (by forced/invalidated task): %s" % msg) 1078 1079 if 'task' in a_data: 1080 computed_basehash = calc_basehash(a_data) 1081 output.append("Computed base hash is %s and from file %s" % (computed_basehash, a_data['basehash'])) 1082 else: 1083 output.append("Unable to compute base hash") 1084 1085 computed_taskhash = calc_taskhash(a_data) 1086 output.append("Computed task hash is %s" % computed_taskhash) 1087 1088 return output 1089