1# 2# SPDX-License-Identifier: GPL-2.0-only 3# 4 5import hashlib 6import logging 7import os 8import re 9import tempfile 10import pickle 11import bb.data 12import difflib 13import simplediff 14from bb.checksum import FileChecksumCache 15from bb import runqueue 16import hashserv 17import hashserv.client 18 19logger = logging.getLogger('BitBake.SigGen') 20hashequiv_logger = logging.getLogger('BitBake.SigGen.HashEquiv') 21 22def init(d): 23 siggens = [obj for obj in globals().values() 24 if type(obj) is type and issubclass(obj, SignatureGenerator)] 25 26 desired = d.getVar("BB_SIGNATURE_HANDLER") or "noop" 27 for sg in siggens: 28 if desired == sg.name: 29 return sg(d) 30 break 31 else: 32 logger.error("Invalid signature generator '%s', using default 'noop'\n" 33 "Available generators: %s", desired, 34 ', '.join(obj.name for obj in siggens)) 35 return SignatureGenerator(d) 36 37class SignatureGenerator(object): 38 """ 39 """ 40 name = "noop" 41 42 # If the derived class supports multiconfig datacaches, set this to True 43 # The default is False for backward compatibility with derived signature 44 # generators that do not understand multiconfig caches 45 supports_multiconfig_datacaches = False 46 47 def __init__(self, data): 48 self.basehash = {} 49 self.taskhash = {} 50 self.unihash = {} 51 self.runtaskdeps = {} 52 self.file_checksum_values = {} 53 self.taints = {} 54 self.unitaskhashes = {} 55 self.tidtopn = {} 56 self.setscenetasks = set() 57 58 def finalise(self, fn, d, varient): 59 return 60 61 def postparsing_clean_cache(self): 62 return 63 64 def get_unihash(self, tid): 65 return self.taskhash[tid] 66 67 def prep_taskhash(self, tid, deps, dataCaches): 68 return 69 70 def get_taskhash(self, tid, deps, dataCaches): 71 self.taskhash[tid] = hashlib.sha256(tid.encode("utf-8")).hexdigest() 72 return self.taskhash[tid] 73 74 def writeout_file_checksum_cache(self): 75 """Write/update the file checksum cache onto disk""" 76 return 77 78 def stampfile(self, stampbase, file_name, taskname, extrainfo): 79 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.') 80 81 def stampcleanmask(self, stampbase, file_name, taskname, extrainfo): 82 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.') 83 84 def dump_sigtask(self, fn, task, stampbase, runtime): 85 return 86 87 def invalidate_task(self, task, d, fn): 88 bb.build.del_stamp(task, d, fn) 89 90 def dump_sigs(self, dataCache, options): 91 return 92 93 def get_taskdata(self): 94 return (self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks) 95 96 def set_taskdata(self, data): 97 self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks = data 98 99 def reset(self, data): 100 self.__init__(data) 101 102 def get_taskhashes(self): 103 return self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn 104 105 def set_taskhashes(self, hashes): 106 self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn = hashes 107 108 def save_unitaskhashes(self): 109 return 110 111 def set_setscene_tasks(self, setscene_tasks): 112 return 113 114 @classmethod 115 def get_data_caches(cls, dataCaches, mc): 116 """ 117 This function returns the datacaches that should be passed to signature 118 generator functions. If the signature generator supports multiconfig 119 caches, the entire dictionary of data caches is sent, otherwise a 120 special proxy is sent that support both index access to all 121 multiconfigs, and also direct access for the default multiconfig. 122 123 The proxy class allows code in this class itself to always use 124 multiconfig aware code (to ease maintenance), but derived classes that 125 are unaware of multiconfig data caches can still access the default 126 multiconfig as expected. 127 128 Do not override this function in derived classes; it will be removed in 129 the future when support for multiconfig data caches is mandatory 130 """ 131 class DataCacheProxy(object): 132 def __init__(self): 133 pass 134 135 def __getitem__(self, key): 136 return dataCaches[key] 137 138 def __getattr__(self, name): 139 return getattr(dataCaches[mc], name) 140 141 if cls.supports_multiconfig_datacaches: 142 return dataCaches 143 144 return DataCacheProxy() 145 146class SignatureGeneratorBasic(SignatureGenerator): 147 """ 148 """ 149 name = "basic" 150 151 def __init__(self, data): 152 self.basehash = {} 153 self.taskhash = {} 154 self.unihash = {} 155 self.taskdeps = {} 156 self.runtaskdeps = {} 157 self.file_checksum_values = {} 158 self.taints = {} 159 self.gendeps = {} 160 self.lookupcache = {} 161 self.setscenetasks = set() 162 self.basewhitelist = set((data.getVar("BB_HASHBASE_WHITELIST") or "").split()) 163 self.taskwhitelist = None 164 self.init_rundepcheck(data) 165 checksum_cache_file = data.getVar("BB_HASH_CHECKSUM_CACHE_FILE") 166 if checksum_cache_file: 167 self.checksum_cache = FileChecksumCache() 168 self.checksum_cache.init_cache(data, checksum_cache_file) 169 else: 170 self.checksum_cache = None 171 172 self.unihash_cache = bb.cache.SimpleCache("3") 173 self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {}) 174 self.localdirsexclude = (data.getVar("BB_SIGNATURE_LOCAL_DIRS_EXCLUDE") or "CVS .bzr .git .hg .osc .p4 .repo .svn").split() 175 self.tidtopn = {} 176 177 def init_rundepcheck(self, data): 178 self.taskwhitelist = data.getVar("BB_HASHTASK_WHITELIST") or None 179 if self.taskwhitelist: 180 self.twl = re.compile(self.taskwhitelist) 181 else: 182 self.twl = None 183 184 def _build_data(self, fn, d): 185 186 ignore_mismatch = ((d.getVar("BB_HASH_IGNORE_MISMATCH") or '') == '1') 187 tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d, self.basewhitelist) 188 189 taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, self.basewhitelist, fn) 190 191 for task in tasklist: 192 tid = fn + ":" + task 193 if not ignore_mismatch and tid in self.basehash and self.basehash[tid] != basehash[tid]: 194 bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (tid, self.basehash[tid], basehash[tid])) 195 bb.error("The following commands may help:") 196 cmd = "$ bitbake %s -c%s" % (d.getVar('PN'), task) 197 # Make sure sigdata is dumped before run printdiff 198 bb.error("%s -Snone" % cmd) 199 bb.error("Then:") 200 bb.error("%s -Sprintdiff\n" % cmd) 201 self.basehash[tid] = basehash[tid] 202 203 self.taskdeps[fn] = taskdeps 204 self.gendeps[fn] = gendeps 205 self.lookupcache[fn] = lookupcache 206 207 return taskdeps 208 209 def set_setscene_tasks(self, setscene_tasks): 210 self.setscenetasks = set(setscene_tasks) 211 212 def finalise(self, fn, d, variant): 213 214 mc = d.getVar("__BBMULTICONFIG", False) or "" 215 if variant or mc: 216 fn = bb.cache.realfn2virtual(fn, variant, mc) 217 218 try: 219 taskdeps = self._build_data(fn, d) 220 except bb.parse.SkipRecipe: 221 raise 222 except: 223 bb.warn("Error during finalise of %s" % fn) 224 raise 225 226 #Slow but can be useful for debugging mismatched basehashes 227 #for task in self.taskdeps[fn]: 228 # self.dump_sigtask(fn, task, d.getVar("STAMP"), False) 229 230 for task in taskdeps: 231 d.setVar("BB_BASEHASH_task-%s" % task, self.basehash[fn + ":" + task]) 232 233 def postparsing_clean_cache(self): 234 # 235 # After parsing we can remove some things from memory to reduce our memory footprint 236 # 237 self.gendeps = {} 238 self.lookupcache = {} 239 self.taskdeps = {} 240 241 def rundep_check(self, fn, recipename, task, dep, depname, dataCaches): 242 # Return True if we should keep the dependency, False to drop it 243 # We only manipulate the dependencies for packages not in the whitelist 244 if self.twl and not self.twl.search(recipename): 245 # then process the actual dependencies 246 if self.twl.search(depname): 247 return False 248 return True 249 250 def read_taint(self, fn, task, stampbase): 251 taint = None 252 try: 253 with open(stampbase + '.' + task + '.taint', 'r') as taintf: 254 taint = taintf.read() 255 except IOError: 256 pass 257 return taint 258 259 def prep_taskhash(self, tid, deps, dataCaches): 260 261 (mc, _, task, fn) = bb.runqueue.split_tid_mcfn(tid) 262 263 self.basehash[tid] = dataCaches[mc].basetaskhash[tid] 264 self.runtaskdeps[tid] = [] 265 self.file_checksum_values[tid] = [] 266 recipename = dataCaches[mc].pkg_fn[fn] 267 268 self.tidtopn[tid] = recipename 269 270 for dep in sorted(deps, key=clean_basepath): 271 (depmc, _, _, depmcfn) = bb.runqueue.split_tid_mcfn(dep) 272 depname = dataCaches[depmc].pkg_fn[depmcfn] 273 if not self.supports_multiconfig_datacaches and mc != depmc: 274 # If the signature generator doesn't understand multiconfig 275 # data caches, any dependency not in the same multiconfig must 276 # be skipped for backward compatibility 277 continue 278 if not self.rundep_check(fn, recipename, task, dep, depname, dataCaches): 279 continue 280 if dep not in self.taskhash: 281 bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep) 282 self.runtaskdeps[tid].append(dep) 283 284 if task in dataCaches[mc].file_checksums[fn]: 285 if self.checksum_cache: 286 checksums = self.checksum_cache.get_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude) 287 else: 288 checksums = bb.fetch2.get_file_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude) 289 for (f,cs) in checksums: 290 self.file_checksum_values[tid].append((f,cs)) 291 292 taskdep = dataCaches[mc].task_deps[fn] 293 if 'nostamp' in taskdep and task in taskdep['nostamp']: 294 # Nostamp tasks need an implicit taint so that they force any dependent tasks to run 295 if tid in self.taints and self.taints[tid].startswith("nostamp:"): 296 # Don't reset taint value upon every call 297 pass 298 else: 299 import uuid 300 taint = str(uuid.uuid4()) 301 self.taints[tid] = "nostamp:" + taint 302 303 taint = self.read_taint(fn, task, dataCaches[mc].stamp[fn]) 304 if taint: 305 self.taints[tid] = taint 306 logger.warning("%s is tainted from a forced run" % tid) 307 308 return 309 310 def get_taskhash(self, tid, deps, dataCaches): 311 312 data = self.basehash[tid] 313 for dep in self.runtaskdeps[tid]: 314 if dep in self.unihash: 315 if self.unihash[dep] is None: 316 data = data + self.taskhash[dep] 317 else: 318 data = data + self.unihash[dep] 319 else: 320 data = data + self.get_unihash(dep) 321 322 for (f, cs) in self.file_checksum_values[tid]: 323 if cs: 324 data = data + cs 325 326 if tid in self.taints: 327 if self.taints[tid].startswith("nostamp:"): 328 data = data + self.taints[tid][8:] 329 else: 330 data = data + self.taints[tid] 331 332 h = hashlib.sha256(data.encode("utf-8")).hexdigest() 333 self.taskhash[tid] = h 334 #d.setVar("BB_TASKHASH_task-%s" % task, taskhash[task]) 335 return h 336 337 def writeout_file_checksum_cache(self): 338 """Write/update the file checksum cache onto disk""" 339 if self.checksum_cache: 340 self.checksum_cache.save_extras() 341 self.checksum_cache.save_merge() 342 else: 343 bb.fetch2.fetcher_parse_save() 344 bb.fetch2.fetcher_parse_done() 345 346 def save_unitaskhashes(self): 347 self.unihash_cache.save(self.unitaskhashes) 348 349 def dump_sigtask(self, fn, task, stampbase, runtime): 350 351 tid = fn + ":" + task 352 referencestamp = stampbase 353 if isinstance(runtime, str) and runtime.startswith("customfile"): 354 sigfile = stampbase 355 referencestamp = runtime[11:] 356 elif runtime and tid in self.taskhash: 357 sigfile = stampbase + "." + task + ".sigdata" + "." + self.get_unihash(tid) 358 else: 359 sigfile = stampbase + "." + task + ".sigbasedata" + "." + self.basehash[tid] 360 361 bb.utils.mkdirhier(os.path.dirname(sigfile)) 362 363 data = {} 364 data['task'] = task 365 data['basewhitelist'] = self.basewhitelist 366 data['taskwhitelist'] = self.taskwhitelist 367 data['taskdeps'] = self.taskdeps[fn][task] 368 data['basehash'] = self.basehash[tid] 369 data['gendeps'] = {} 370 data['varvals'] = {} 371 data['varvals'][task] = self.lookupcache[fn][task] 372 for dep in self.taskdeps[fn][task]: 373 if dep in self.basewhitelist: 374 continue 375 data['gendeps'][dep] = self.gendeps[fn][dep] 376 data['varvals'][dep] = self.lookupcache[fn][dep] 377 378 if runtime and tid in self.taskhash: 379 data['runtaskdeps'] = self.runtaskdeps[tid] 380 data['file_checksum_values'] = [(os.path.basename(f), cs) for f,cs in self.file_checksum_values[tid]] 381 data['runtaskhashes'] = {} 382 for dep in data['runtaskdeps']: 383 data['runtaskhashes'][dep] = self.get_unihash(dep) 384 data['taskhash'] = self.taskhash[tid] 385 data['unihash'] = self.get_unihash(tid) 386 387 taint = self.read_taint(fn, task, referencestamp) 388 if taint: 389 data['taint'] = taint 390 391 if runtime and tid in self.taints: 392 if 'nostamp:' in self.taints[tid]: 393 data['taint'] = self.taints[tid] 394 395 computed_basehash = calc_basehash(data) 396 if computed_basehash != self.basehash[tid]: 397 bb.error("Basehash mismatch %s versus %s for %s" % (computed_basehash, self.basehash[tid], tid)) 398 if runtime and tid in self.taskhash: 399 computed_taskhash = calc_taskhash(data) 400 if computed_taskhash != self.taskhash[tid]: 401 bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid)) 402 sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash) 403 404 fd, tmpfile = tempfile.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.") 405 try: 406 with os.fdopen(fd, "wb") as stream: 407 p = pickle.dump(data, stream, -1) 408 stream.flush() 409 os.chmod(tmpfile, 0o664) 410 os.rename(tmpfile, sigfile) 411 except (OSError, IOError) as err: 412 try: 413 os.unlink(tmpfile) 414 except OSError: 415 pass 416 raise err 417 418 def dump_sigfn(self, fn, dataCaches, options): 419 if fn in self.taskdeps: 420 for task in self.taskdeps[fn]: 421 tid = fn + ":" + task 422 mc = bb.runqueue.mc_from_tid(tid) 423 if tid not in self.taskhash: 424 continue 425 if dataCaches[mc].basetaskhash[tid] != self.basehash[tid]: 426 bb.error("Bitbake's cached basehash does not match the one we just generated (%s)!" % tid) 427 bb.error("The mismatched hashes were %s and %s" % (dataCaches[mc].basetaskhash[tid], self.basehash[tid])) 428 self.dump_sigtask(fn, task, dataCaches[mc].stamp[fn], True) 429 430class SignatureGeneratorBasicHash(SignatureGeneratorBasic): 431 name = "basichash" 432 433 def get_stampfile_hash(self, tid): 434 if tid in self.taskhash: 435 return self.taskhash[tid] 436 437 # If task is not in basehash, then error 438 return self.basehash[tid] 439 440 def stampfile(self, stampbase, fn, taskname, extrainfo, clean=False): 441 if taskname != "do_setscene" and taskname.endswith("_setscene"): 442 tid = fn + ":" + taskname[:-9] 443 else: 444 tid = fn + ":" + taskname 445 if clean: 446 h = "*" 447 else: 448 h = self.get_stampfile_hash(tid) 449 450 return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.') 451 452 def stampcleanmask(self, stampbase, fn, taskname, extrainfo): 453 return self.stampfile(stampbase, fn, taskname, extrainfo, clean=True) 454 455 def invalidate_task(self, task, d, fn): 456 bb.note("Tainting hash to force rebuild of task %s, %s" % (fn, task)) 457 bb.build.write_taint(task, d, fn) 458 459class SignatureGeneratorUniHashMixIn(object): 460 def __init__(self, data): 461 self.extramethod = {} 462 super().__init__(data) 463 464 def get_taskdata(self): 465 return (self.server, self.method, self.extramethod) + super().get_taskdata() 466 467 def set_taskdata(self, data): 468 self.server, self.method, self.extramethod = data[:3] 469 super().set_taskdata(data[3:]) 470 471 def client(self): 472 if getattr(self, '_client', None) is None: 473 self._client = hashserv.create_client(self.server) 474 return self._client 475 476 def get_stampfile_hash(self, tid): 477 if tid in self.taskhash: 478 # If a unique hash is reported, use it as the stampfile hash. This 479 # ensures that if a task won't be re-run if the taskhash changes, 480 # but it would result in the same output hash 481 unihash = self._get_unihash(tid) 482 if unihash is not None: 483 return unihash 484 485 return super().get_stampfile_hash(tid) 486 487 def set_unihash(self, tid, unihash): 488 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid) 489 key = mc + ":" + self.tidtopn[tid] + ":" + taskname 490 self.unitaskhashes[key] = (self.taskhash[tid], unihash) 491 self.unihash[tid] = unihash 492 493 def _get_unihash(self, tid, checkkey=None): 494 if tid not in self.tidtopn: 495 return None 496 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid) 497 key = mc + ":" + self.tidtopn[tid] + ":" + taskname 498 if key not in self.unitaskhashes: 499 return None 500 if not checkkey: 501 checkkey = self.taskhash[tid] 502 (key, unihash) = self.unitaskhashes[key] 503 if key != checkkey: 504 return None 505 return unihash 506 507 def get_unihash(self, tid): 508 taskhash = self.taskhash[tid] 509 510 # If its not a setscene task we can return 511 if self.setscenetasks and tid not in self.setscenetasks: 512 self.unihash[tid] = None 513 return taskhash 514 515 # TODO: This cache can grow unbounded. It probably only needs to keep 516 # for each task 517 unihash = self._get_unihash(tid) 518 if unihash is not None: 519 self.unihash[tid] = unihash 520 return unihash 521 522 # In the absence of being able to discover a unique hash from the 523 # server, make it be equivalent to the taskhash. The unique "hash" only 524 # really needs to be a unique string (not even necessarily a hash), but 525 # making it match the taskhash has a few advantages: 526 # 527 # 1) All of the sstate code that assumes hashes can be the same 528 # 2) It provides maximal compatibility with builders that don't use 529 # an equivalency server 530 # 3) The value is easy for multiple independent builders to derive the 531 # same unique hash from the same input. This means that if the 532 # independent builders find the same taskhash, but it isn't reported 533 # to the server, there is a better chance that they will agree on 534 # the unique hash. 535 unihash = taskhash 536 537 try: 538 method = self.method 539 if tid in self.extramethod: 540 method = method + self.extramethod[tid] 541 data = self.client().get_unihash(method, self.taskhash[tid]) 542 if data: 543 unihash = data 544 # A unique hash equal to the taskhash is not very interesting, 545 # so it is reported it at debug level 2. If they differ, that 546 # is much more interesting, so it is reported at debug level 1 547 hashequiv_logger.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server)) 548 else: 549 hashequiv_logger.debug(2, 'No reported unihash for %s:%s from %s' % (tid, taskhash, self.server)) 550 except hashserv.client.HashConnectionError as e: 551 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 552 553 self.set_unihash(tid, unihash) 554 self.unihash[tid] = unihash 555 return unihash 556 557 def report_unihash(self, path, task, d): 558 import importlib 559 560 taskhash = d.getVar('BB_TASKHASH') 561 unihash = d.getVar('BB_UNIHASH') 562 report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1' 563 tempdir = d.getVar('T') 564 fn = d.getVar('BB_FILENAME') 565 tid = fn + ':do_' + task 566 key = tid + ':' + taskhash 567 568 if self.setscenetasks and tid not in self.setscenetasks: 569 return 570 571 # This can happen if locked sigs are in action. Detect and just abort 572 if taskhash != self.taskhash[tid]: 573 return 574 575 # Sanity checks 576 cache_unihash = self._get_unihash(tid, checkkey=taskhash) 577 if cache_unihash is None: 578 bb.fatal('%s not in unihash cache. Please report this error' % key) 579 580 if cache_unihash != unihash: 581 bb.fatal("Cache unihash %s doesn't match BB_UNIHASH %s" % (cache_unihash, unihash)) 582 583 sigfile = None 584 sigfile_name = "depsig.do_%s.%d" % (task, os.getpid()) 585 sigfile_link = "depsig.do_%s" % task 586 587 try: 588 sigfile = open(os.path.join(tempdir, sigfile_name), 'w+b') 589 590 locs = {'path': path, 'sigfile': sigfile, 'task': task, 'd': d} 591 592 if "." in self.method: 593 (module, method) = self.method.rsplit('.', 1) 594 locs['method'] = getattr(importlib.import_module(module), method) 595 outhash = bb.utils.better_eval('method(path, sigfile, task, d)', locs) 596 else: 597 outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs) 598 599 try: 600 extra_data = {} 601 602 owner = d.getVar('SSTATE_HASHEQUIV_OWNER') 603 if owner: 604 extra_data['owner'] = owner 605 606 if report_taskdata: 607 sigfile.seek(0) 608 609 extra_data['PN'] = d.getVar('PN') 610 extra_data['PV'] = d.getVar('PV') 611 extra_data['PR'] = d.getVar('PR') 612 extra_data['task'] = task 613 extra_data['outhash_siginfo'] = sigfile.read().decode('utf-8') 614 615 method = self.method 616 if tid in self.extramethod: 617 method = method + self.extramethod[tid] 618 619 data = self.client().report_unihash(taskhash, method, outhash, unihash, extra_data) 620 new_unihash = data['unihash'] 621 622 if new_unihash != unihash: 623 hashequiv_logger.debug(1, 'Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server)) 624 bb.event.fire(bb.runqueue.taskUniHashUpdate(fn + ':do_' + task, new_unihash), d) 625 self.set_unihash(tid, new_unihash) 626 d.setVar('BB_UNIHASH', new_unihash) 627 else: 628 hashequiv_logger.debug(1, 'Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server)) 629 except hashserv.client.HashConnectionError as e: 630 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 631 finally: 632 if sigfile: 633 sigfile.close() 634 635 sigfile_link_path = os.path.join(tempdir, sigfile_link) 636 bb.utils.remove(sigfile_link_path) 637 638 try: 639 os.symlink(sigfile_name, sigfile_link_path) 640 except OSError: 641 pass 642 643 def report_unihash_equiv(self, tid, taskhash, wanted_unihash, current_unihash, datacaches): 644 try: 645 extra_data = {} 646 method = self.method 647 if tid in self.extramethod: 648 method = method + self.extramethod[tid] 649 650 data = self.client().report_unihash_equiv(taskhash, method, wanted_unihash, extra_data) 651 hashequiv_logger.verbose('Reported task %s as unihash %s to %s (%s)' % (tid, wanted_unihash, self.server, str(data))) 652 653 if data is None: 654 bb.warn("Server unable to handle unihash report") 655 return False 656 657 finalunihash = data['unihash'] 658 659 if finalunihash == current_unihash: 660 hashequiv_logger.verbose('Task %s unihash %s unchanged by server' % (tid, finalunihash)) 661 elif finalunihash == wanted_unihash: 662 hashequiv_logger.verbose('Task %s unihash changed %s -> %s as wanted' % (tid, current_unihash, finalunihash)) 663 self.set_unihash(tid, finalunihash) 664 return True 665 else: 666 # TODO: What to do here? 667 hashequiv_logger.verbose('Task %s unihash reported as unwanted hash %s' % (tid, finalunihash)) 668 669 except hashserv.client.HashConnectionError as e: 670 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 671 672 return False 673 674# 675# Dummy class used for bitbake-selftest 676# 677class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureGeneratorBasicHash): 678 name = "TestEquivHash" 679 def init_rundepcheck(self, data): 680 super().init_rundepcheck(data) 681 self.server = data.getVar('BB_HASHSERVE') 682 self.method = "sstate_output_hash" 683 684# 685# Dummy class used for bitbake-selftest 686# 687class SignatureGeneratorTestMulticonfigDepends(SignatureGeneratorBasicHash): 688 name = "TestMulticonfigDepends" 689 supports_multiconfig_datacaches = True 690 691def dump_this_task(outfile, d): 692 import bb.parse 693 fn = d.getVar("BB_FILENAME") 694 task = "do_" + d.getVar("BB_CURRENTTASK") 695 referencestamp = bb.build.stamp_internal(task, d, None, True) 696 bb.parse.siggen.dump_sigtask(fn, task, outfile, "customfile:" + referencestamp) 697 698def init_colors(enable_color): 699 """Initialise colour dict for passing to compare_sigfiles()""" 700 # First set up the colours 701 colors = {'color_title': '\033[1m', 702 'color_default': '\033[0m', 703 'color_add': '\033[0;32m', 704 'color_remove': '\033[0;31m', 705 } 706 # Leave all keys present but clear the values 707 if not enable_color: 708 for k in colors.keys(): 709 colors[k] = '' 710 return colors 711 712def worddiff_str(oldstr, newstr, colors=None): 713 if not colors: 714 colors = init_colors(False) 715 diff = simplediff.diff(oldstr.split(' '), newstr.split(' ')) 716 ret = [] 717 for change, value in diff: 718 value = ' '.join(value) 719 if change == '=': 720 ret.append(value) 721 elif change == '+': 722 item = '{color_add}{{+{value}+}}{color_default}'.format(value=value, **colors) 723 ret.append(item) 724 elif change == '-': 725 item = '{color_remove}[-{value}-]{color_default}'.format(value=value, **colors) 726 ret.append(item) 727 whitespace_note = '' 728 if oldstr != newstr and ' '.join(oldstr.split()) == ' '.join(newstr.split()): 729 whitespace_note = ' (whitespace changed)' 730 return '"%s"%s' % (' '.join(ret), whitespace_note) 731 732def list_inline_diff(oldlist, newlist, colors=None): 733 if not colors: 734 colors = init_colors(False) 735 diff = simplediff.diff(oldlist, newlist) 736 ret = [] 737 for change, value in diff: 738 value = ' '.join(value) 739 if change == '=': 740 ret.append("'%s'" % value) 741 elif change == '+': 742 item = '{color_add}+{value}{color_default}'.format(value=value, **colors) 743 ret.append(item) 744 elif change == '-': 745 item = '{color_remove}-{value}{color_default}'.format(value=value, **colors) 746 ret.append(item) 747 return '[%s]' % (', '.join(ret)) 748 749def clean_basepath(a): 750 mc = None 751 if a.startswith("mc:"): 752 _, mc, a = a.split(":", 2) 753 b = a.rsplit("/", 2)[1] + '/' + a.rsplit("/", 2)[2] 754 if a.startswith("virtual:"): 755 b = b + ":" + a.rsplit(":", 1)[0] 756 if mc: 757 b = b + ":mc:" + mc 758 return b 759 760def clean_basepaths(a): 761 b = {} 762 for x in a: 763 b[clean_basepath(x)] = a[x] 764 return b 765 766def clean_basepaths_list(a): 767 b = [] 768 for x in a: 769 b.append(clean_basepath(x)) 770 return b 771 772def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False): 773 output = [] 774 775 colors = init_colors(color) 776 def color_format(formatstr, **values): 777 """ 778 Return colour formatted string. 779 NOTE: call with the format string, not an already formatted string 780 containing values (otherwise you could have trouble with { and } 781 characters) 782 """ 783 if not formatstr.endswith('{color_default}'): 784 formatstr += '{color_default}' 785 # In newer python 3 versions you can pass both of these directly, 786 # but we only require 3.4 at the moment 787 formatparams = {} 788 formatparams.update(colors) 789 formatparams.update(values) 790 return formatstr.format(**formatparams) 791 792 with open(a, 'rb') as f: 793 p1 = pickle.Unpickler(f) 794 a_data = p1.load() 795 with open(b, 'rb') as f: 796 p2 = pickle.Unpickler(f) 797 b_data = p2.load() 798 799 def dict_diff(a, b, whitelist=set()): 800 sa = set(a.keys()) 801 sb = set(b.keys()) 802 common = sa & sb 803 changed = set() 804 for i in common: 805 if a[i] != b[i] and i not in whitelist: 806 changed.add(i) 807 added = sb - sa 808 removed = sa - sb 809 return changed, added, removed 810 811 def file_checksums_diff(a, b): 812 from collections import Counter 813 # Handle old siginfo format 814 if isinstance(a, dict): 815 a = [(os.path.basename(f), cs) for f, cs in a.items()] 816 if isinstance(b, dict): 817 b = [(os.path.basename(f), cs) for f, cs in b.items()] 818 # Compare lists, ensuring we can handle duplicate filenames if they exist 819 removedcount = Counter(a) 820 removedcount.subtract(b) 821 addedcount = Counter(b) 822 addedcount.subtract(a) 823 added = [] 824 for x in b: 825 if addedcount[x] > 0: 826 addedcount[x] -= 1 827 added.append(x) 828 removed = [] 829 changed = [] 830 for x in a: 831 if removedcount[x] > 0: 832 removedcount[x] -= 1 833 for y in added: 834 if y[0] == x[0]: 835 changed.append((x[0], x[1], y[1])) 836 added.remove(y) 837 break 838 else: 839 removed.append(x) 840 added = [x[0] for x in added] 841 removed = [x[0] for x in removed] 842 return changed, added, removed 843 844 if 'basewhitelist' in a_data and a_data['basewhitelist'] != b_data['basewhitelist']: 845 output.append(color_format("{color_title}basewhitelist changed{color_default} from '%s' to '%s'") % (a_data['basewhitelist'], b_data['basewhitelist'])) 846 if a_data['basewhitelist'] and b_data['basewhitelist']: 847 output.append("changed items: %s" % a_data['basewhitelist'].symmetric_difference(b_data['basewhitelist'])) 848 849 if 'taskwhitelist' in a_data and a_data['taskwhitelist'] != b_data['taskwhitelist']: 850 output.append(color_format("{color_title}taskwhitelist changed{color_default} from '%s' to '%s'") % (a_data['taskwhitelist'], b_data['taskwhitelist'])) 851 if a_data['taskwhitelist'] and b_data['taskwhitelist']: 852 output.append("changed items: %s" % a_data['taskwhitelist'].symmetric_difference(b_data['taskwhitelist'])) 853 854 if a_data['taskdeps'] != b_data['taskdeps']: 855 output.append(color_format("{color_title}Task dependencies changed{color_default} from:\n%s\nto:\n%s") % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps']))) 856 857 if a_data['basehash'] != b_data['basehash'] and not collapsed: 858 output.append(color_format("{color_title}basehash changed{color_default} from %s to %s") % (a_data['basehash'], b_data['basehash'])) 859 860 changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basewhitelist'] & b_data['basewhitelist']) 861 if changed: 862 for dep in changed: 863 output.append(color_format("{color_title}List of dependencies for variable %s changed from '{color_default}%s{color_title}' to '{color_default}%s{color_title}'") % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep])) 864 if a_data['gendeps'][dep] and b_data['gendeps'][dep]: 865 output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep])) 866 if added: 867 for dep in added: 868 output.append(color_format("{color_title}Dependency on variable %s was added") % (dep)) 869 if removed: 870 for dep in removed: 871 output.append(color_format("{color_title}Dependency on Variable %s was removed") % (dep)) 872 873 874 changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals']) 875 if changed: 876 for dep in changed: 877 oldval = a_data['varvals'][dep] 878 newval = b_data['varvals'][dep] 879 if newval and oldval and ('\n' in oldval or '\n' in newval): 880 diff = difflib.unified_diff(oldval.splitlines(), newval.splitlines(), lineterm='') 881 # Cut off the first two lines, since we aren't interested in 882 # the old/new filename (they are blank anyway in this case) 883 difflines = list(diff)[2:] 884 if color: 885 # Add colour to diff output 886 for i, line in enumerate(difflines): 887 if line.startswith('+'): 888 line = color_format('{color_add}{line}', line=line) 889 difflines[i] = line 890 elif line.startswith('-'): 891 line = color_format('{color_remove}{line}', line=line) 892 difflines[i] = line 893 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff='\n'.join(difflines))) 894 elif newval and oldval and (' ' in oldval or ' ' in newval): 895 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff=worddiff_str(oldval, newval, colors))) 896 else: 897 output.append(color_format("{color_title}Variable {var} value changed from '{color_default}{oldval}{color_title}' to '{color_default}{newval}{color_title}'{color_default}", var=dep, oldval=oldval, newval=newval)) 898 899 if not 'file_checksum_values' in a_data: 900 a_data['file_checksum_values'] = {} 901 if not 'file_checksum_values' in b_data: 902 b_data['file_checksum_values'] = {} 903 904 changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values']) 905 if changed: 906 for f, old, new in changed: 907 output.append(color_format("{color_title}Checksum for file %s changed{color_default} from %s to %s") % (f, old, new)) 908 if added: 909 for f in added: 910 output.append(color_format("{color_title}Dependency on checksum of file %s was added") % (f)) 911 if removed: 912 for f in removed: 913 output.append(color_format("{color_title}Dependency on checksum of file %s was removed") % (f)) 914 915 if not 'runtaskdeps' in a_data: 916 a_data['runtaskdeps'] = {} 917 if not 'runtaskdeps' in b_data: 918 b_data['runtaskdeps'] = {} 919 920 if not collapsed: 921 if len(a_data['runtaskdeps']) != len(b_data['runtaskdeps']): 922 changed = ["Number of task dependencies changed"] 923 else: 924 changed = [] 925 for idx, task in enumerate(a_data['runtaskdeps']): 926 a = a_data['runtaskdeps'][idx] 927 b = b_data['runtaskdeps'][idx] 928 if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b] and not collapsed: 929 changed.append("%s with hash %s\n changed to\n%s with hash %s" % (clean_basepath(a), a_data['runtaskhashes'][a], clean_basepath(b), b_data['runtaskhashes'][b])) 930 931 if changed: 932 clean_a = clean_basepaths_list(a_data['runtaskdeps']) 933 clean_b = clean_basepaths_list(b_data['runtaskdeps']) 934 if clean_a != clean_b: 935 output.append(color_format("{color_title}runtaskdeps changed:{color_default}\n%s") % list_inline_diff(clean_a, clean_b, colors)) 936 else: 937 output.append(color_format("{color_title}runtaskdeps changed:")) 938 output.append("\n".join(changed)) 939 940 941 if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data: 942 a = a_data['runtaskhashes'] 943 b = b_data['runtaskhashes'] 944 changed, added, removed = dict_diff(a, b) 945 if added: 946 for dep in added: 947 bdep_found = False 948 if removed: 949 for bdep in removed: 950 if b[dep] == a[bdep]: 951 #output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep)) 952 bdep_found = True 953 if not bdep_found: 954 output.append(color_format("{color_title}Dependency on task %s was added{color_default} with hash %s") % (clean_basepath(dep), b[dep])) 955 if removed: 956 for dep in removed: 957 adep_found = False 958 if added: 959 for adep in added: 960 if b[adep] == a[dep]: 961 #output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep)) 962 adep_found = True 963 if not adep_found: 964 output.append(color_format("{color_title}Dependency on task %s was removed{color_default} with hash %s") % (clean_basepath(dep), a[dep])) 965 if changed: 966 for dep in changed: 967 if not collapsed: 968 output.append(color_format("{color_title}Hash for dependent task %s changed{color_default} from %s to %s") % (clean_basepath(dep), a[dep], b[dep])) 969 if callable(recursecb): 970 recout = recursecb(dep, a[dep], b[dep]) 971 if recout: 972 if collapsed: 973 output.extend(recout) 974 else: 975 # If a dependent hash changed, might as well print the line above and then defer to the changes in 976 # that hash since in all likelyhood, they're the same changes this task also saw. 977 output = [output[-1]] + recout 978 979 a_taint = a_data.get('taint', None) 980 b_taint = b_data.get('taint', None) 981 if a_taint != b_taint: 982 if a_taint and a_taint.startswith('nostamp:'): 983 a_taint = a_taint.replace('nostamp:', 'nostamp(uuid4):') 984 if b_taint and b_taint.startswith('nostamp:'): 985 b_taint = b_taint.replace('nostamp:', 'nostamp(uuid4):') 986 output.append(color_format("{color_title}Taint (by forced/invalidated task) changed{color_default} from %s to %s") % (a_taint, b_taint)) 987 988 return output 989 990 991def calc_basehash(sigdata): 992 task = sigdata['task'] 993 basedata = sigdata['varvals'][task] 994 995 if basedata is None: 996 basedata = '' 997 998 alldeps = sigdata['taskdeps'] 999 for dep in alldeps: 1000 basedata = basedata + dep 1001 val = sigdata['varvals'][dep] 1002 if val is not None: 1003 basedata = basedata + str(val) 1004 1005 return hashlib.sha256(basedata.encode("utf-8")).hexdigest() 1006 1007def calc_taskhash(sigdata): 1008 data = sigdata['basehash'] 1009 1010 for dep in sigdata['runtaskdeps']: 1011 data = data + sigdata['runtaskhashes'][dep] 1012 1013 for c in sigdata['file_checksum_values']: 1014 if c[1]: 1015 data = data + c[1] 1016 1017 if 'taint' in sigdata: 1018 if 'nostamp:' in sigdata['taint']: 1019 data = data + sigdata['taint'][8:] 1020 else: 1021 data = data + sigdata['taint'] 1022 1023 return hashlib.sha256(data.encode("utf-8")).hexdigest() 1024 1025 1026def dump_sigfile(a): 1027 output = [] 1028 1029 with open(a, 'rb') as f: 1030 p1 = pickle.Unpickler(f) 1031 a_data = p1.load() 1032 1033 output.append("basewhitelist: %s" % (a_data['basewhitelist'])) 1034 1035 output.append("taskwhitelist: %s" % (a_data['taskwhitelist'])) 1036 1037 output.append("Task dependencies: %s" % (sorted(a_data['taskdeps']))) 1038 1039 output.append("basehash: %s" % (a_data['basehash'])) 1040 1041 for dep in a_data['gendeps']: 1042 output.append("List of dependencies for variable %s is %s" % (dep, a_data['gendeps'][dep])) 1043 1044 for dep in a_data['varvals']: 1045 output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep])) 1046 1047 if 'runtaskdeps' in a_data: 1048 output.append("Tasks this task depends on: %s" % (a_data['runtaskdeps'])) 1049 1050 if 'file_checksum_values' in a_data: 1051 output.append("This task depends on the checksums of files: %s" % (a_data['file_checksum_values'])) 1052 1053 if 'runtaskhashes' in a_data: 1054 for dep in a_data['runtaskhashes']: 1055 output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep])) 1056 1057 if 'taint' in a_data: 1058 if a_data['taint'].startswith('nostamp:'): 1059 msg = a_data['taint'].replace('nostamp:', 'nostamp(uuid4):') 1060 else: 1061 msg = a_data['taint'] 1062 output.append("Tainted (by forced/invalidated task): %s" % msg) 1063 1064 if 'task' in a_data: 1065 computed_basehash = calc_basehash(a_data) 1066 output.append("Computed base hash is %s and from file %s" % (computed_basehash, a_data['basehash'])) 1067 else: 1068 output.append("Unable to compute base hash") 1069 1070 computed_taskhash = calc_taskhash(a_data) 1071 output.append("Computed task hash is %s" % computed_taskhash) 1072 1073 return output 1074