1# 2# SPDX-License-Identifier: GPL-2.0-only 3# 4 5import hashlib 6import logging 7import os 8import re 9import tempfile 10import pickle 11import bb.data 12import difflib 13import simplediff 14from bb.checksum import FileChecksumCache 15from bb import runqueue 16 17logger = logging.getLogger('BitBake.SigGen') 18 19def init(d): 20 siggens = [obj for obj in globals().values() 21 if type(obj) is type and issubclass(obj, SignatureGenerator)] 22 23 desired = d.getVar("BB_SIGNATURE_HANDLER") or "noop" 24 for sg in siggens: 25 if desired == sg.name: 26 return sg(d) 27 break 28 else: 29 logger.error("Invalid signature generator '%s', using default 'noop'\n" 30 "Available generators: %s", desired, 31 ', '.join(obj.name for obj in siggens)) 32 return SignatureGenerator(d) 33 34class SignatureGenerator(object): 35 """ 36 """ 37 name = "noop" 38 39 def __init__(self, data): 40 self.basehash = {} 41 self.taskhash = {} 42 self.runtaskdeps = {} 43 self.file_checksum_values = {} 44 self.taints = {} 45 self.unitaskhashes = {} 46 47 def finalise(self, fn, d, varient): 48 return 49 50 def get_unihash(self, tid): 51 return self.taskhash[tid] 52 53 def get_taskhash(self, tid, deps, dataCache): 54 self.taskhash[tid] = hashlib.sha256(tid.encode("utf-8")).hexdigest() 55 return self.taskhash[tid] 56 57 def writeout_file_checksum_cache(self): 58 """Write/update the file checksum cache onto disk""" 59 return 60 61 def stampfile(self, stampbase, file_name, taskname, extrainfo): 62 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.') 63 64 def stampcleanmask(self, stampbase, file_name, taskname, extrainfo): 65 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.') 66 67 def dump_sigtask(self, fn, task, stampbase, runtime): 68 return 69 70 def invalidate_task(self, task, d, fn): 71 bb.build.del_stamp(task, d, fn) 72 73 def dump_sigs(self, dataCache, options): 74 return 75 76 def get_taskdata(self): 77 return (self.runtaskdeps, self.taskhash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes) 78 79 def set_taskdata(self, data): 80 self.runtaskdeps, self.taskhash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes = data 81 82 def reset(self, data): 83 self.__init__(data) 84 85 def get_taskhashes(self): 86 return self.taskhash, self.unitaskhashes 87 88 def set_taskhashes(self, hashes): 89 self.taskhash, self.unitaskhashes = hashes 90 91 def save_unitaskhashes(self): 92 return 93 94 95class SignatureGeneratorBasic(SignatureGenerator): 96 """ 97 """ 98 name = "basic" 99 100 def __init__(self, data): 101 self.basehash = {} 102 self.taskhash = {} 103 self.taskdeps = {} 104 self.runtaskdeps = {} 105 self.file_checksum_values = {} 106 self.taints = {} 107 self.gendeps = {} 108 self.lookupcache = {} 109 self.basewhitelist = set((data.getVar("BB_HASHBASE_WHITELIST") or "").split()) 110 self.taskwhitelist = None 111 self.init_rundepcheck(data) 112 checksum_cache_file = data.getVar("BB_HASH_CHECKSUM_CACHE_FILE") 113 if checksum_cache_file: 114 self.checksum_cache = FileChecksumCache() 115 self.checksum_cache.init_cache(data, checksum_cache_file) 116 else: 117 self.checksum_cache = None 118 119 self.unihash_cache = bb.cache.SimpleCache("1") 120 self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {}) 121 122 def init_rundepcheck(self, data): 123 self.taskwhitelist = data.getVar("BB_HASHTASK_WHITELIST") or None 124 if self.taskwhitelist: 125 self.twl = re.compile(self.taskwhitelist) 126 else: 127 self.twl = None 128 129 def _build_data(self, fn, d): 130 131 ignore_mismatch = ((d.getVar("BB_HASH_IGNORE_MISMATCH") or '') == '1') 132 tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d) 133 134 taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, self.basewhitelist, fn) 135 136 for task in tasklist: 137 tid = fn + ":" + task 138 if not ignore_mismatch and tid in self.basehash and self.basehash[tid] != basehash[tid]: 139 bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (tid, self.basehash[tid], basehash[tid])) 140 bb.error("The following commands may help:") 141 cmd = "$ bitbake %s -c%s" % (d.getVar('PN'), task) 142 # Make sure sigdata is dumped before run printdiff 143 bb.error("%s -Snone" % cmd) 144 bb.error("Then:") 145 bb.error("%s -Sprintdiff\n" % cmd) 146 self.basehash[tid] = basehash[tid] 147 148 self.taskdeps[fn] = taskdeps 149 self.gendeps[fn] = gendeps 150 self.lookupcache[fn] = lookupcache 151 152 return taskdeps 153 154 def finalise(self, fn, d, variant): 155 156 mc = d.getVar("__BBMULTICONFIG", False) or "" 157 if variant or mc: 158 fn = bb.cache.realfn2virtual(fn, variant, mc) 159 160 try: 161 taskdeps = self._build_data(fn, d) 162 except bb.parse.SkipRecipe: 163 raise 164 except: 165 bb.warn("Error during finalise of %s" % fn) 166 raise 167 168 #Slow but can be useful for debugging mismatched basehashes 169 #for task in self.taskdeps[fn]: 170 # self.dump_sigtask(fn, task, d.getVar("STAMP"), False) 171 172 for task in taskdeps: 173 d.setVar("BB_BASEHASH_task-%s" % task, self.basehash[fn + ":" + task]) 174 175 def rundep_check(self, fn, recipename, task, dep, depname, dataCache): 176 # Return True if we should keep the dependency, False to drop it 177 # We only manipulate the dependencies for packages not in the whitelist 178 if self.twl and not self.twl.search(recipename): 179 # then process the actual dependencies 180 if self.twl.search(depname): 181 return False 182 return True 183 184 def read_taint(self, fn, task, stampbase): 185 taint = None 186 try: 187 with open(stampbase + '.' + task + '.taint', 'r') as taintf: 188 taint = taintf.read() 189 except IOError: 190 pass 191 return taint 192 193 def get_taskhash(self, tid, deps, dataCache): 194 195 (mc, _, task, fn) = bb.runqueue.split_tid_mcfn(tid) 196 197 data = dataCache.basetaskhash[tid] 198 self.basehash[tid] = data 199 self.runtaskdeps[tid] = [] 200 self.file_checksum_values[tid] = [] 201 recipename = dataCache.pkg_fn[fn] 202 for dep in sorted(deps, key=clean_basepath): 203 (depmc, _, deptaskname, depfn) = bb.runqueue.split_tid_mcfn(dep) 204 if mc != depmc: 205 continue 206 depname = dataCache.pkg_fn[depfn] 207 if not self.rundep_check(fn, recipename, task, dep, depname, dataCache): 208 continue 209 if dep not in self.taskhash: 210 bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep) 211 data = data + self.get_unihash(dep) 212 self.runtaskdeps[tid].append(dep) 213 214 if task in dataCache.file_checksums[fn]: 215 if self.checksum_cache: 216 checksums = self.checksum_cache.get_checksums(dataCache.file_checksums[fn][task], recipename) 217 else: 218 checksums = bb.fetch2.get_file_checksums(dataCache.file_checksums[fn][task], recipename) 219 for (f,cs) in checksums: 220 self.file_checksum_values[tid].append((f,cs)) 221 if cs: 222 data = data + cs 223 224 taskdep = dataCache.task_deps[fn] 225 if 'nostamp' in taskdep and task in taskdep['nostamp']: 226 # Nostamp tasks need an implicit taint so that they force any dependent tasks to run 227 import uuid 228 taint = str(uuid.uuid4()) 229 data = data + taint 230 self.taints[tid] = "nostamp:" + taint 231 232 taint = self.read_taint(fn, task, dataCache.stamp[fn]) 233 if taint: 234 data = data + taint 235 self.taints[tid] = taint 236 logger.warning("%s is tainted from a forced run" % tid) 237 238 h = hashlib.sha256(data.encode("utf-8")).hexdigest() 239 self.taskhash[tid] = h 240 #d.setVar("BB_TASKHASH_task-%s" % task, taskhash[task]) 241 return h 242 243 def writeout_file_checksum_cache(self): 244 """Write/update the file checksum cache onto disk""" 245 if self.checksum_cache: 246 self.checksum_cache.save_extras() 247 self.checksum_cache.save_merge() 248 else: 249 bb.fetch2.fetcher_parse_save() 250 bb.fetch2.fetcher_parse_done() 251 252 def save_unitaskhashes(self): 253 self.unihash_cache.save(self.unitaskhashes) 254 255 def dump_sigtask(self, fn, task, stampbase, runtime): 256 257 tid = fn + ":" + task 258 referencestamp = stampbase 259 if isinstance(runtime, str) and runtime.startswith("customfile"): 260 sigfile = stampbase 261 referencestamp = runtime[11:] 262 elif runtime and tid in self.taskhash: 263 sigfile = stampbase + "." + task + ".sigdata" + "." + self.taskhash[tid] 264 else: 265 sigfile = stampbase + "." + task + ".sigbasedata" + "." + self.basehash[tid] 266 267 bb.utils.mkdirhier(os.path.dirname(sigfile)) 268 269 data = {} 270 data['task'] = task 271 data['basewhitelist'] = self.basewhitelist 272 data['taskwhitelist'] = self.taskwhitelist 273 data['taskdeps'] = self.taskdeps[fn][task] 274 data['basehash'] = self.basehash[tid] 275 data['gendeps'] = {} 276 data['varvals'] = {} 277 data['varvals'][task] = self.lookupcache[fn][task] 278 for dep in self.taskdeps[fn][task]: 279 if dep in self.basewhitelist: 280 continue 281 data['gendeps'][dep] = self.gendeps[fn][dep] 282 data['varvals'][dep] = self.lookupcache[fn][dep] 283 284 if runtime and tid in self.taskhash: 285 data['runtaskdeps'] = self.runtaskdeps[tid] 286 data['file_checksum_values'] = [(os.path.basename(f), cs) for f,cs in self.file_checksum_values[tid]] 287 data['runtaskhashes'] = {} 288 for dep in data['runtaskdeps']: 289 data['runtaskhashes'][dep] = self.get_unihash(dep) 290 data['taskhash'] = self.taskhash[tid] 291 292 taint = self.read_taint(fn, task, referencestamp) 293 if taint: 294 data['taint'] = taint 295 296 if runtime and tid in self.taints: 297 if 'nostamp:' in self.taints[tid]: 298 data['taint'] = self.taints[tid] 299 300 computed_basehash = calc_basehash(data) 301 if computed_basehash != self.basehash[tid]: 302 bb.error("Basehash mismatch %s versus %s for %s" % (computed_basehash, self.basehash[tid], tid)) 303 if runtime and tid in self.taskhash: 304 computed_taskhash = calc_taskhash(data) 305 if computed_taskhash != self.taskhash[tid]: 306 bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid)) 307 sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash) 308 309 fd, tmpfile = tempfile.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.") 310 try: 311 with os.fdopen(fd, "wb") as stream: 312 p = pickle.dump(data, stream, -1) 313 stream.flush() 314 os.chmod(tmpfile, 0o664) 315 os.rename(tmpfile, sigfile) 316 except (OSError, IOError) as err: 317 try: 318 os.unlink(tmpfile) 319 except OSError: 320 pass 321 raise err 322 323 def dump_sigfn(self, fn, dataCaches, options): 324 if fn in self.taskdeps: 325 for task in self.taskdeps[fn]: 326 tid = fn + ":" + task 327 mc = bb.runqueue.mc_from_tid(tid) 328 if tid not in self.taskhash: 329 continue 330 if dataCaches[mc].basetaskhash[tid] != self.basehash[tid]: 331 bb.error("Bitbake's cached basehash does not match the one we just generated (%s)!" % tid) 332 bb.error("The mismatched hashes were %s and %s" % (dataCaches[mc].basetaskhash[tid], self.basehash[tid])) 333 self.dump_sigtask(fn, task, dataCaches[mc].stamp[fn], True) 334 335class SignatureGeneratorBasicHash(SignatureGeneratorBasic): 336 name = "basichash" 337 338 def get_stampfile_hash(self, tid): 339 if tid in self.taskhash: 340 return self.taskhash[tid] 341 342 # If task is not in basehash, then error 343 return self.basehash[tid] 344 345 def stampfile(self, stampbase, fn, taskname, extrainfo, clean=False): 346 if taskname != "do_setscene" and taskname.endswith("_setscene"): 347 tid = fn + ":" + taskname[:-9] 348 else: 349 tid = fn + ":" + taskname 350 if clean: 351 h = "*" 352 else: 353 h = self.get_stampfile_hash(tid) 354 355 return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.') 356 357 def stampcleanmask(self, stampbase, fn, taskname, extrainfo): 358 return self.stampfile(stampbase, fn, taskname, extrainfo, clean=True) 359 360 def invalidate_task(self, task, d, fn): 361 bb.note("Tainting hash to force rebuild of task %s, %s" % (fn, task)) 362 bb.build.write_taint(task, d, fn) 363 364class SignatureGeneratorUniHashMixIn(object): 365 def get_taskdata(self): 366 return (self.server, self.method) + super().get_taskdata() 367 368 def set_taskdata(self, data): 369 self.server, self.method = data[:2] 370 super().set_taskdata(data[2:]) 371 372 def __get_task_unihash_key(self, tid): 373 # TODO: The key only *needs* to be the taskhash, the tid is just 374 # convenient 375 return '%s:%s' % (tid, self.taskhash[tid]) 376 377 def get_stampfile_hash(self, tid): 378 if tid in self.taskhash: 379 # If a unique hash is reported, use it as the stampfile hash. This 380 # ensures that if a task won't be re-run if the taskhash changes, 381 # but it would result in the same output hash 382 unihash = self.unitaskhashes.get(self.__get_task_unihash_key(tid), None) 383 if unihash is not None: 384 return unihash 385 386 return super().get_stampfile_hash(tid) 387 388 def set_unihash(self, tid, unihash): 389 self.unitaskhashes[self.__get_task_unihash_key(tid)] = unihash 390 391 def get_unihash(self, tid): 392 import urllib 393 import json 394 395 taskhash = self.taskhash[tid] 396 397 key = self.__get_task_unihash_key(tid) 398 399 # TODO: This cache can grow unbounded. It probably only needs to keep 400 # for each task 401 unihash = self.unitaskhashes.get(key, None) 402 if unihash is not None: 403 return unihash 404 405 # In the absence of being able to discover a unique hash from the 406 # server, make it be equivalent to the taskhash. The unique "hash" only 407 # really needs to be a unique string (not even necessarily a hash), but 408 # making it match the taskhash has a few advantages: 409 # 410 # 1) All of the sstate code that assumes hashes can be the same 411 # 2) It provides maximal compatibility with builders that don't use 412 # an equivalency server 413 # 3) The value is easy for multiple independent builders to derive the 414 # same unique hash from the same input. This means that if the 415 # independent builders find the same taskhash, but it isn't reported 416 # to the server, there is a better chance that they will agree on 417 # the unique hash. 418 unihash = taskhash 419 420 try: 421 url = '%s/v1/equivalent?%s' % (self.server, 422 urllib.parse.urlencode({'method': self.method, 'taskhash': self.taskhash[tid]})) 423 424 request = urllib.request.Request(url) 425 response = urllib.request.urlopen(request) 426 data = response.read().decode('utf-8') 427 428 json_data = json.loads(data) 429 430 if json_data: 431 unihash = json_data['unihash'] 432 # A unique hash equal to the taskhash is not very interesting, 433 # so it is reported it at debug level 2. If they differ, that 434 # is much more interesting, so it is reported at debug level 1 435 bb.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server)) 436 else: 437 bb.debug(2, 'No reported unihash for %s:%s from %s' % (tid, taskhash, self.server)) 438 except urllib.error.URLError as e: 439 bb.warn('Failure contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 440 except (KeyError, json.JSONDecodeError) as e: 441 bb.warn('Poorly formatted response from %s: %s' % (self.server, str(e))) 442 443 self.unitaskhashes[key] = unihash 444 return unihash 445 446 def report_unihash(self, path, task, d): 447 import urllib 448 import json 449 import tempfile 450 import base64 451 import importlib 452 453 taskhash = d.getVar('BB_TASKHASH') 454 unihash = d.getVar('BB_UNIHASH') 455 report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1' 456 tempdir = d.getVar('T') 457 fn = d.getVar('BB_FILENAME') 458 key = fn + ':do_' + task + ':' + taskhash 459 460 # Sanity checks 461 cache_unihash = self.unitaskhashes.get(key, None) 462 if cache_unihash is None: 463 bb.fatal('%s not in unihash cache. Please report this error' % key) 464 465 if cache_unihash != unihash: 466 bb.fatal("Cache unihash %s doesn't match BB_UNIHASH %s" % (cache_unihash, unihash)) 467 468 sigfile = None 469 sigfile_name = "depsig.do_%s.%d" % (task, os.getpid()) 470 sigfile_link = "depsig.do_%s" % task 471 472 try: 473 sigfile = open(os.path.join(tempdir, sigfile_name), 'w+b') 474 475 locs = {'path': path, 'sigfile': sigfile, 'task': task, 'd': d} 476 477 if "." in self.method: 478 (module, method) = self.method.rsplit('.', 1) 479 locs['method'] = getattr(importlib.import_module(module), method) 480 outhash = bb.utils.better_eval('method(path, sigfile, task, d)', locs) 481 else: 482 outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs) 483 484 try: 485 url = '%s/v1/equivalent' % self.server 486 task_data = { 487 'taskhash': taskhash, 488 'method': self.method, 489 'outhash': outhash, 490 'unihash': unihash, 491 'owner': d.getVar('SSTATE_HASHEQUIV_OWNER') 492 } 493 494 if report_taskdata: 495 sigfile.seek(0) 496 497 task_data['PN'] = d.getVar('PN') 498 task_data['PV'] = d.getVar('PV') 499 task_data['PR'] = d.getVar('PR') 500 task_data['task'] = task 501 task_data['outhash_siginfo'] = sigfile.read().decode('utf-8') 502 503 headers = {'content-type': 'application/json'} 504 505 request = urllib.request.Request(url, json.dumps(task_data).encode('utf-8'), headers) 506 response = urllib.request.urlopen(request) 507 data = response.read().decode('utf-8') 508 509 json_data = json.loads(data) 510 new_unihash = json_data['unihash'] 511 512 if new_unihash != unihash: 513 bb.debug(1, 'Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server)) 514 bb.event.fire(bb.runqueue.taskUniHashUpdate(fn + ':do_' + task, new_unihash), d) 515 else: 516 bb.debug(1, 'Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server)) 517 except urllib.error.URLError as e: 518 bb.warn('Failure contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 519 except (KeyError, json.JSONDecodeError) as e: 520 bb.warn('Poorly formatted response from %s: %s' % (self.server, str(e))) 521 finally: 522 if sigfile: 523 sigfile.close() 524 525 sigfile_link_path = os.path.join(tempdir, sigfile_link) 526 bb.utils.remove(sigfile_link_path) 527 528 try: 529 os.symlink(sigfile_name, sigfile_link_path) 530 except OSError: 531 pass 532 533 534# 535# Dummy class used for bitbake-selftest 536# 537class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureGeneratorBasicHash): 538 name = "TestEquivHash" 539 def init_rundepcheck(self, data): 540 super().init_rundepcheck(data) 541 self.server = "http://" + data.getVar('BB_HASHSERVE') 542 self.method = "sstate_output_hash" 543 544 545def dump_this_task(outfile, d): 546 import bb.parse 547 fn = d.getVar("BB_FILENAME") 548 task = "do_" + d.getVar("BB_CURRENTTASK") 549 referencestamp = bb.build.stamp_internal(task, d, None, True) 550 bb.parse.siggen.dump_sigtask(fn, task, outfile, "customfile:" + referencestamp) 551 552def init_colors(enable_color): 553 """Initialise colour dict for passing to compare_sigfiles()""" 554 # First set up the colours 555 colors = {'color_title': '\033[1m', 556 'color_default': '\033[0m', 557 'color_add': '\033[0;32m', 558 'color_remove': '\033[0;31m', 559 } 560 # Leave all keys present but clear the values 561 if not enable_color: 562 for k in colors.keys(): 563 colors[k] = '' 564 return colors 565 566def worddiff_str(oldstr, newstr, colors=None): 567 if not colors: 568 colors = init_colors(False) 569 diff = simplediff.diff(oldstr.split(' '), newstr.split(' ')) 570 ret = [] 571 for change, value in diff: 572 value = ' '.join(value) 573 if change == '=': 574 ret.append(value) 575 elif change == '+': 576 item = '{color_add}{{+{value}+}}{color_default}'.format(value=value, **colors) 577 ret.append(item) 578 elif change == '-': 579 item = '{color_remove}[-{value}-]{color_default}'.format(value=value, **colors) 580 ret.append(item) 581 whitespace_note = '' 582 if oldstr != newstr and ' '.join(oldstr.split()) == ' '.join(newstr.split()): 583 whitespace_note = ' (whitespace changed)' 584 return '"%s"%s' % (' '.join(ret), whitespace_note) 585 586def list_inline_diff(oldlist, newlist, colors=None): 587 if not colors: 588 colors = init_colors(False) 589 diff = simplediff.diff(oldlist, newlist) 590 ret = [] 591 for change, value in diff: 592 value = ' '.join(value) 593 if change == '=': 594 ret.append("'%s'" % value) 595 elif change == '+': 596 item = '{color_add}+{value}{color_default}'.format(value=value, **colors) 597 ret.append(item) 598 elif change == '-': 599 item = '{color_remove}-{value}{color_default}'.format(value=value, **colors) 600 ret.append(item) 601 return '[%s]' % (', '.join(ret)) 602 603def clean_basepath(a): 604 mc = None 605 if a.startswith("mc:"): 606 _, mc, a = a.split(":", 2) 607 b = a.rsplit("/", 2)[1] + '/' + a.rsplit("/", 2)[2] 608 if a.startswith("virtual:"): 609 b = b + ":" + a.rsplit(":", 1)[0] 610 if mc: 611 b = b + ":mc:" + mc 612 return b 613 614def clean_basepaths(a): 615 b = {} 616 for x in a: 617 b[clean_basepath(x)] = a[x] 618 return b 619 620def clean_basepaths_list(a): 621 b = [] 622 for x in a: 623 b.append(clean_basepath(x)) 624 return b 625 626def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False): 627 output = [] 628 629 colors = init_colors(color) 630 def color_format(formatstr, **values): 631 """ 632 Return colour formatted string. 633 NOTE: call with the format string, not an already formatted string 634 containing values (otherwise you could have trouble with { and } 635 characters) 636 """ 637 if not formatstr.endswith('{color_default}'): 638 formatstr += '{color_default}' 639 # In newer python 3 versions you can pass both of these directly, 640 # but we only require 3.4 at the moment 641 formatparams = {} 642 formatparams.update(colors) 643 formatparams.update(values) 644 return formatstr.format(**formatparams) 645 646 with open(a, 'rb') as f: 647 p1 = pickle.Unpickler(f) 648 a_data = p1.load() 649 with open(b, 'rb') as f: 650 p2 = pickle.Unpickler(f) 651 b_data = p2.load() 652 653 def dict_diff(a, b, whitelist=set()): 654 sa = set(a.keys()) 655 sb = set(b.keys()) 656 common = sa & sb 657 changed = set() 658 for i in common: 659 if a[i] != b[i] and i not in whitelist: 660 changed.add(i) 661 added = sb - sa 662 removed = sa - sb 663 return changed, added, removed 664 665 def file_checksums_diff(a, b): 666 from collections import Counter 667 # Handle old siginfo format 668 if isinstance(a, dict): 669 a = [(os.path.basename(f), cs) for f, cs in a.items()] 670 if isinstance(b, dict): 671 b = [(os.path.basename(f), cs) for f, cs in b.items()] 672 # Compare lists, ensuring we can handle duplicate filenames if they exist 673 removedcount = Counter(a) 674 removedcount.subtract(b) 675 addedcount = Counter(b) 676 addedcount.subtract(a) 677 added = [] 678 for x in b: 679 if addedcount[x] > 0: 680 addedcount[x] -= 1 681 added.append(x) 682 removed = [] 683 changed = [] 684 for x in a: 685 if removedcount[x] > 0: 686 removedcount[x] -= 1 687 for y in added: 688 if y[0] == x[0]: 689 changed.append((x[0], x[1], y[1])) 690 added.remove(y) 691 break 692 else: 693 removed.append(x) 694 added = [x[0] for x in added] 695 removed = [x[0] for x in removed] 696 return changed, added, removed 697 698 if 'basewhitelist' in a_data and a_data['basewhitelist'] != b_data['basewhitelist']: 699 output.append(color_format("{color_title}basewhitelist changed{color_default} from '%s' to '%s'") % (a_data['basewhitelist'], b_data['basewhitelist'])) 700 if a_data['basewhitelist'] and b_data['basewhitelist']: 701 output.append("changed items: %s" % a_data['basewhitelist'].symmetric_difference(b_data['basewhitelist'])) 702 703 if 'taskwhitelist' in a_data and a_data['taskwhitelist'] != b_data['taskwhitelist']: 704 output.append(color_format("{color_title}taskwhitelist changed{color_default} from '%s' to '%s'") % (a_data['taskwhitelist'], b_data['taskwhitelist'])) 705 if a_data['taskwhitelist'] and b_data['taskwhitelist']: 706 output.append("changed items: %s" % a_data['taskwhitelist'].symmetric_difference(b_data['taskwhitelist'])) 707 708 if a_data['taskdeps'] != b_data['taskdeps']: 709 output.append(color_format("{color_title}Task dependencies changed{color_default} from:\n%s\nto:\n%s") % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps']))) 710 711 if a_data['basehash'] != b_data['basehash'] and not collapsed: 712 output.append(color_format("{color_title}basehash changed{color_default} from %s to %s") % (a_data['basehash'], b_data['basehash'])) 713 714 changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basewhitelist'] & b_data['basewhitelist']) 715 if changed: 716 for dep in changed: 717 output.append(color_format("{color_title}List of dependencies for variable %s changed from '{color_default}%s{color_title}' to '{color_default}%s{color_title}'") % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep])) 718 if a_data['gendeps'][dep] and b_data['gendeps'][dep]: 719 output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep])) 720 if added: 721 for dep in added: 722 output.append(color_format("{color_title}Dependency on variable %s was added") % (dep)) 723 if removed: 724 for dep in removed: 725 output.append(color_format("{color_title}Dependency on Variable %s was removed") % (dep)) 726 727 728 changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals']) 729 if changed: 730 for dep in changed: 731 oldval = a_data['varvals'][dep] 732 newval = b_data['varvals'][dep] 733 if newval and oldval and ('\n' in oldval or '\n' in newval): 734 diff = difflib.unified_diff(oldval.splitlines(), newval.splitlines(), lineterm='') 735 # Cut off the first two lines, since we aren't interested in 736 # the old/new filename (they are blank anyway in this case) 737 difflines = list(diff)[2:] 738 if color: 739 # Add colour to diff output 740 for i, line in enumerate(difflines): 741 if line.startswith('+'): 742 line = color_format('{color_add}{line}', line=line) 743 difflines[i] = line 744 elif line.startswith('-'): 745 line = color_format('{color_remove}{line}', line=line) 746 difflines[i] = line 747 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff='\n'.join(difflines))) 748 elif newval and oldval and (' ' in oldval or ' ' in newval): 749 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff=worddiff_str(oldval, newval, colors))) 750 else: 751 output.append(color_format("{color_title}Variable {var} value changed from '{color_default}{oldval}{color_title}' to '{color_default}{newval}{color_title}'{color_default}", var=dep, oldval=oldval, newval=newval)) 752 753 if not 'file_checksum_values' in a_data: 754 a_data['file_checksum_values'] = {} 755 if not 'file_checksum_values' in b_data: 756 b_data['file_checksum_values'] = {} 757 758 changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values']) 759 if changed: 760 for f, old, new in changed: 761 output.append(color_format("{color_title}Checksum for file %s changed{color_default} from %s to %s") % (f, old, new)) 762 if added: 763 for f in added: 764 output.append(color_format("{color_title}Dependency on checksum of file %s was added") % (f)) 765 if removed: 766 for f in removed: 767 output.append(color_format("{color_title}Dependency on checksum of file %s was removed") % (f)) 768 769 if not 'runtaskdeps' in a_data: 770 a_data['runtaskdeps'] = {} 771 if not 'runtaskdeps' in b_data: 772 b_data['runtaskdeps'] = {} 773 774 if not collapsed: 775 if len(a_data['runtaskdeps']) != len(b_data['runtaskdeps']): 776 changed = ["Number of task dependencies changed"] 777 else: 778 changed = [] 779 for idx, task in enumerate(a_data['runtaskdeps']): 780 a = a_data['runtaskdeps'][idx] 781 b = b_data['runtaskdeps'][idx] 782 if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b] and not collapsed: 783 changed.append("%s with hash %s\n changed to\n%s with hash %s" % (clean_basepath(a), a_data['runtaskhashes'][a], clean_basepath(b), b_data['runtaskhashes'][b])) 784 785 if changed: 786 clean_a = clean_basepaths_list(a_data['runtaskdeps']) 787 clean_b = clean_basepaths_list(b_data['runtaskdeps']) 788 if clean_a != clean_b: 789 output.append(color_format("{color_title}runtaskdeps changed:{color_default}\n%s") % list_inline_diff(clean_a, clean_b, colors)) 790 else: 791 output.append(color_format("{color_title}runtaskdeps changed:")) 792 output.append("\n".join(changed)) 793 794 795 if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data: 796 a = a_data['runtaskhashes'] 797 b = b_data['runtaskhashes'] 798 changed, added, removed = dict_diff(a, b) 799 if added: 800 for dep in added: 801 bdep_found = False 802 if removed: 803 for bdep in removed: 804 if b[dep] == a[bdep]: 805 #output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep)) 806 bdep_found = True 807 if not bdep_found: 808 output.append(color_format("{color_title}Dependency on task %s was added{color_default} with hash %s") % (clean_basepath(dep), b[dep])) 809 if removed: 810 for dep in removed: 811 adep_found = False 812 if added: 813 for adep in added: 814 if b[adep] == a[dep]: 815 #output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep)) 816 adep_found = True 817 if not adep_found: 818 output.append(color_format("{color_title}Dependency on task %s was removed{color_default} with hash %s") % (clean_basepath(dep), a[dep])) 819 if changed: 820 for dep in changed: 821 if not collapsed: 822 output.append(color_format("{color_title}Hash for dependent task %s changed{color_default} from %s to %s") % (clean_basepath(dep), a[dep], b[dep])) 823 if callable(recursecb): 824 recout = recursecb(dep, a[dep], b[dep]) 825 if recout: 826 if collapsed: 827 output.extend(recout) 828 else: 829 # If a dependent hash changed, might as well print the line above and then defer to the changes in 830 # that hash since in all likelyhood, they're the same changes this task also saw. 831 output = [output[-1]] + recout 832 833 a_taint = a_data.get('taint', None) 834 b_taint = b_data.get('taint', None) 835 if a_taint != b_taint: 836 if a_taint and a_taint.startswith('nostamp:'): 837 a_taint = a_taint.replace('nostamp:', 'nostamp(uuid4):') 838 if b_taint and b_taint.startswith('nostamp:'): 839 b_taint = b_taint.replace('nostamp:', 'nostamp(uuid4):') 840 output.append(color_format("{color_title}Taint (by forced/invalidated task) changed{color_default} from %s to %s") % (a_taint, b_taint)) 841 842 return output 843 844 845def calc_basehash(sigdata): 846 task = sigdata['task'] 847 basedata = sigdata['varvals'][task] 848 849 if basedata is None: 850 basedata = '' 851 852 alldeps = sigdata['taskdeps'] 853 for dep in alldeps: 854 basedata = basedata + dep 855 val = sigdata['varvals'][dep] 856 if val is not None: 857 basedata = basedata + str(val) 858 859 return hashlib.sha256(basedata.encode("utf-8")).hexdigest() 860 861def calc_taskhash(sigdata): 862 data = sigdata['basehash'] 863 864 for dep in sigdata['runtaskdeps']: 865 data = data + sigdata['runtaskhashes'][dep] 866 867 for c in sigdata['file_checksum_values']: 868 if c[1]: 869 data = data + c[1] 870 871 if 'taint' in sigdata: 872 if 'nostamp:' in sigdata['taint']: 873 data = data + sigdata['taint'][8:] 874 else: 875 data = data + sigdata['taint'] 876 877 return hashlib.sha256(data.encode("utf-8")).hexdigest() 878 879 880def dump_sigfile(a): 881 output = [] 882 883 with open(a, 'rb') as f: 884 p1 = pickle.Unpickler(f) 885 a_data = p1.load() 886 887 output.append("basewhitelist: %s" % (a_data['basewhitelist'])) 888 889 output.append("taskwhitelist: %s" % (a_data['taskwhitelist'])) 890 891 output.append("Task dependencies: %s" % (sorted(a_data['taskdeps']))) 892 893 output.append("basehash: %s" % (a_data['basehash'])) 894 895 for dep in a_data['gendeps']: 896 output.append("List of dependencies for variable %s is %s" % (dep, a_data['gendeps'][dep])) 897 898 for dep in a_data['varvals']: 899 output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep])) 900 901 if 'runtaskdeps' in a_data: 902 output.append("Tasks this task depends on: %s" % (a_data['runtaskdeps'])) 903 904 if 'file_checksum_values' in a_data: 905 output.append("This task depends on the checksums of files: %s" % (a_data['file_checksum_values'])) 906 907 if 'runtaskhashes' in a_data: 908 for dep in a_data['runtaskhashes']: 909 output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep])) 910 911 if 'taint' in a_data: 912 if a_data['taint'].startswith('nostamp:'): 913 msg = a_data['taint'].replace('nostamp:', 'nostamp(uuid4):') 914 else: 915 msg = a_data['taint'] 916 output.append("Tainted (by forced/invalidated task): %s" % msg) 917 918 if 'task' in a_data: 919 computed_basehash = calc_basehash(a_data) 920 output.append("Computed base hash is %s and from file %s" % (computed_basehash, a_data['basehash'])) 921 else: 922 output.append("Unable to compute base hash") 923 924 computed_taskhash = calc_taskhash(a_data) 925 output.append("Computed task hash is %s" % computed_taskhash) 926 927 return output 928