1# 2# SPDX-License-Identifier: GPL-2.0-only 3# 4 5import hashlib 6import logging 7import os 8import re 9import tempfile 10import pickle 11import bb.data 12import difflib 13import simplediff 14from bb.checksum import FileChecksumCache 15from bb import runqueue 16import hashserv 17 18logger = logging.getLogger('BitBake.SigGen') 19hashequiv_logger = logging.getLogger('BitBake.SigGen.HashEquiv') 20 21def init(d): 22 siggens = [obj for obj in globals().values() 23 if type(obj) is type and issubclass(obj, SignatureGenerator)] 24 25 desired = d.getVar("BB_SIGNATURE_HANDLER") or "noop" 26 for sg in siggens: 27 if desired == sg.name: 28 return sg(d) 29 break 30 else: 31 logger.error("Invalid signature generator '%s', using default 'noop'\n" 32 "Available generators: %s", desired, 33 ', '.join(obj.name for obj in siggens)) 34 return SignatureGenerator(d) 35 36class SignatureGenerator(object): 37 """ 38 """ 39 name = "noop" 40 41 def __init__(self, data): 42 self.basehash = {} 43 self.taskhash = {} 44 self.unihash = {} 45 self.runtaskdeps = {} 46 self.file_checksum_values = {} 47 self.taints = {} 48 self.unitaskhashes = {} 49 self.tidtopn = {} 50 self.setscenetasks = set() 51 52 def finalise(self, fn, d, varient): 53 return 54 55 def postparsing_clean_cache(self): 56 return 57 58 def get_unihash(self, tid): 59 return self.taskhash[tid] 60 61 def prep_taskhash(self, tid, deps, dataCache): 62 return 63 64 def get_taskhash(self, tid, deps, dataCache): 65 self.taskhash[tid] = hashlib.sha256(tid.encode("utf-8")).hexdigest() 66 return self.taskhash[tid] 67 68 def writeout_file_checksum_cache(self): 69 """Write/update the file checksum cache onto disk""" 70 return 71 72 def stampfile(self, stampbase, file_name, taskname, extrainfo): 73 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.') 74 75 def stampcleanmask(self, stampbase, file_name, taskname, extrainfo): 76 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.') 77 78 def dump_sigtask(self, fn, task, stampbase, runtime): 79 return 80 81 def invalidate_task(self, task, d, fn): 82 bb.build.del_stamp(task, d, fn) 83 84 def dump_sigs(self, dataCache, options): 85 return 86 87 def get_taskdata(self): 88 return (self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks) 89 90 def set_taskdata(self, data): 91 self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks = data 92 93 def reset(self, data): 94 self.__init__(data) 95 96 def get_taskhashes(self): 97 return self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn 98 99 def set_taskhashes(self, hashes): 100 self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn = hashes 101 102 def save_unitaskhashes(self): 103 return 104 105 def set_setscene_tasks(self, setscene_tasks): 106 return 107 108class SignatureGeneratorBasic(SignatureGenerator): 109 """ 110 """ 111 name = "basic" 112 113 def __init__(self, data): 114 self.basehash = {} 115 self.taskhash = {} 116 self.unihash = {} 117 self.taskdeps = {} 118 self.runtaskdeps = {} 119 self.file_checksum_values = {} 120 self.taints = {} 121 self.gendeps = {} 122 self.lookupcache = {} 123 self.setscenetasks = set() 124 self.basewhitelist = set((data.getVar("BB_HASHBASE_WHITELIST") or "").split()) 125 self.taskwhitelist = None 126 self.init_rundepcheck(data) 127 checksum_cache_file = data.getVar("BB_HASH_CHECKSUM_CACHE_FILE") 128 if checksum_cache_file: 129 self.checksum_cache = FileChecksumCache() 130 self.checksum_cache.init_cache(data, checksum_cache_file) 131 else: 132 self.checksum_cache = None 133 134 self.unihash_cache = bb.cache.SimpleCache("3") 135 self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {}) 136 self.localdirsexclude = (data.getVar("BB_SIGNATURE_LOCAL_DIRS_EXCLUDE") or "CVS .bzr .git .hg .osc .p4 .repo .svn").split() 137 self.tidtopn = {} 138 139 def init_rundepcheck(self, data): 140 self.taskwhitelist = data.getVar("BB_HASHTASK_WHITELIST") or None 141 if self.taskwhitelist: 142 self.twl = re.compile(self.taskwhitelist) 143 else: 144 self.twl = None 145 146 def _build_data(self, fn, d): 147 148 ignore_mismatch = ((d.getVar("BB_HASH_IGNORE_MISMATCH") or '') == '1') 149 tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d, self.basewhitelist) 150 151 taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, self.basewhitelist, fn) 152 153 for task in tasklist: 154 tid = fn + ":" + task 155 if not ignore_mismatch and tid in self.basehash and self.basehash[tid] != basehash[tid]: 156 bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (tid, self.basehash[tid], basehash[tid])) 157 bb.error("The following commands may help:") 158 cmd = "$ bitbake %s -c%s" % (d.getVar('PN'), task) 159 # Make sure sigdata is dumped before run printdiff 160 bb.error("%s -Snone" % cmd) 161 bb.error("Then:") 162 bb.error("%s -Sprintdiff\n" % cmd) 163 self.basehash[tid] = basehash[tid] 164 165 self.taskdeps[fn] = taskdeps 166 self.gendeps[fn] = gendeps 167 self.lookupcache[fn] = lookupcache 168 169 return taskdeps 170 171 def set_setscene_tasks(self, setscene_tasks): 172 self.setscenetasks = set(setscene_tasks) 173 174 def finalise(self, fn, d, variant): 175 176 mc = d.getVar("__BBMULTICONFIG", False) or "" 177 if variant or mc: 178 fn = bb.cache.realfn2virtual(fn, variant, mc) 179 180 try: 181 taskdeps = self._build_data(fn, d) 182 except bb.parse.SkipRecipe: 183 raise 184 except: 185 bb.warn("Error during finalise of %s" % fn) 186 raise 187 188 #Slow but can be useful for debugging mismatched basehashes 189 #for task in self.taskdeps[fn]: 190 # self.dump_sigtask(fn, task, d.getVar("STAMP"), False) 191 192 for task in taskdeps: 193 d.setVar("BB_BASEHASH_task-%s" % task, self.basehash[fn + ":" + task]) 194 195 def postparsing_clean_cache(self): 196 # 197 # After parsing we can remove some things from memory to reduce our memory footprint 198 # 199 self.gendeps = {} 200 self.lookupcache = {} 201 self.taskdeps = {} 202 203 def rundep_check(self, fn, recipename, task, dep, depname, dataCache): 204 # Return True if we should keep the dependency, False to drop it 205 # We only manipulate the dependencies for packages not in the whitelist 206 if self.twl and not self.twl.search(recipename): 207 # then process the actual dependencies 208 if self.twl.search(depname): 209 return False 210 return True 211 212 def read_taint(self, fn, task, stampbase): 213 taint = None 214 try: 215 with open(stampbase + '.' + task + '.taint', 'r') as taintf: 216 taint = taintf.read() 217 except IOError: 218 pass 219 return taint 220 221 def prep_taskhash(self, tid, deps, dataCache): 222 223 (mc, _, task, fn) = bb.runqueue.split_tid_mcfn(tid) 224 225 self.basehash[tid] = dataCache.basetaskhash[tid] 226 self.runtaskdeps[tid] = [] 227 self.file_checksum_values[tid] = [] 228 recipename = dataCache.pkg_fn[fn] 229 230 self.tidtopn[tid] = recipename 231 232 for dep in sorted(deps, key=clean_basepath): 233 (depmc, _, deptaskname, depfn) = bb.runqueue.split_tid_mcfn(dep) 234 if mc != depmc: 235 continue 236 depname = dataCache.pkg_fn[depfn] 237 if not self.rundep_check(fn, recipename, task, dep, depname, dataCache): 238 continue 239 if dep not in self.taskhash: 240 bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep) 241 self.runtaskdeps[tid].append(dep) 242 243 if task in dataCache.file_checksums[fn]: 244 if self.checksum_cache: 245 checksums = self.checksum_cache.get_checksums(dataCache.file_checksums[fn][task], recipename, self.localdirsexclude) 246 else: 247 checksums = bb.fetch2.get_file_checksums(dataCache.file_checksums[fn][task], recipename, self.localdirsexclude) 248 for (f,cs) in checksums: 249 self.file_checksum_values[tid].append((f,cs)) 250 251 taskdep = dataCache.task_deps[fn] 252 if 'nostamp' in taskdep and task in taskdep['nostamp']: 253 # Nostamp tasks need an implicit taint so that they force any dependent tasks to run 254 if tid in self.taints and self.taints[tid].startswith("nostamp:"): 255 # Don't reset taint value upon every call 256 pass 257 else: 258 import uuid 259 taint = str(uuid.uuid4()) 260 self.taints[tid] = "nostamp:" + taint 261 262 taint = self.read_taint(fn, task, dataCache.stamp[fn]) 263 if taint: 264 self.taints[tid] = taint 265 logger.warning("%s is tainted from a forced run" % tid) 266 267 return 268 269 def get_taskhash(self, tid, deps, dataCache): 270 271 data = self.basehash[tid] 272 for dep in self.runtaskdeps[tid]: 273 if dep in self.unihash: 274 if self.unihash[dep] is None: 275 data = data + self.taskhash[dep] 276 else: 277 data = data + self.unihash[dep] 278 else: 279 data = data + self.get_unihash(dep) 280 281 for (f, cs) in self.file_checksum_values[tid]: 282 if cs: 283 data = data + cs 284 285 if tid in self.taints: 286 if self.taints[tid].startswith("nostamp:"): 287 data = data + self.taints[tid][8:] 288 else: 289 data = data + self.taints[tid] 290 291 h = hashlib.sha256(data.encode("utf-8")).hexdigest() 292 self.taskhash[tid] = h 293 #d.setVar("BB_TASKHASH_task-%s" % task, taskhash[task]) 294 return h 295 296 def writeout_file_checksum_cache(self): 297 """Write/update the file checksum cache onto disk""" 298 if self.checksum_cache: 299 self.checksum_cache.save_extras() 300 self.checksum_cache.save_merge() 301 else: 302 bb.fetch2.fetcher_parse_save() 303 bb.fetch2.fetcher_parse_done() 304 305 def save_unitaskhashes(self): 306 self.unihash_cache.save(self.unitaskhashes) 307 308 def dump_sigtask(self, fn, task, stampbase, runtime): 309 310 tid = fn + ":" + task 311 referencestamp = stampbase 312 if isinstance(runtime, str) and runtime.startswith("customfile"): 313 sigfile = stampbase 314 referencestamp = runtime[11:] 315 elif runtime and tid in self.taskhash: 316 sigfile = stampbase + "." + task + ".sigdata" + "." + self.get_unihash(tid) 317 else: 318 sigfile = stampbase + "." + task + ".sigbasedata" + "." + self.basehash[tid] 319 320 bb.utils.mkdirhier(os.path.dirname(sigfile)) 321 322 data = {} 323 data['task'] = task 324 data['basewhitelist'] = self.basewhitelist 325 data['taskwhitelist'] = self.taskwhitelist 326 data['taskdeps'] = self.taskdeps[fn][task] 327 data['basehash'] = self.basehash[tid] 328 data['gendeps'] = {} 329 data['varvals'] = {} 330 data['varvals'][task] = self.lookupcache[fn][task] 331 for dep in self.taskdeps[fn][task]: 332 if dep in self.basewhitelist: 333 continue 334 data['gendeps'][dep] = self.gendeps[fn][dep] 335 data['varvals'][dep] = self.lookupcache[fn][dep] 336 337 if runtime and tid in self.taskhash: 338 data['runtaskdeps'] = self.runtaskdeps[tid] 339 data['file_checksum_values'] = [(os.path.basename(f), cs) for f,cs in self.file_checksum_values[tid]] 340 data['runtaskhashes'] = {} 341 for dep in data['runtaskdeps']: 342 data['runtaskhashes'][dep] = self.get_unihash(dep) 343 data['taskhash'] = self.taskhash[tid] 344 data['unihash'] = self.get_unihash(tid) 345 346 taint = self.read_taint(fn, task, referencestamp) 347 if taint: 348 data['taint'] = taint 349 350 if runtime and tid in self.taints: 351 if 'nostamp:' in self.taints[tid]: 352 data['taint'] = self.taints[tid] 353 354 computed_basehash = calc_basehash(data) 355 if computed_basehash != self.basehash[tid]: 356 bb.error("Basehash mismatch %s versus %s for %s" % (computed_basehash, self.basehash[tid], tid)) 357 if runtime and tid in self.taskhash: 358 computed_taskhash = calc_taskhash(data) 359 if computed_taskhash != self.taskhash[tid]: 360 bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid)) 361 sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash) 362 363 fd, tmpfile = tempfile.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.") 364 try: 365 with os.fdopen(fd, "wb") as stream: 366 p = pickle.dump(data, stream, -1) 367 stream.flush() 368 os.chmod(tmpfile, 0o664) 369 os.rename(tmpfile, sigfile) 370 except (OSError, IOError) as err: 371 try: 372 os.unlink(tmpfile) 373 except OSError: 374 pass 375 raise err 376 377 def dump_sigfn(self, fn, dataCaches, options): 378 if fn in self.taskdeps: 379 for task in self.taskdeps[fn]: 380 tid = fn + ":" + task 381 mc = bb.runqueue.mc_from_tid(tid) 382 if tid not in self.taskhash: 383 continue 384 if dataCaches[mc].basetaskhash[tid] != self.basehash[tid]: 385 bb.error("Bitbake's cached basehash does not match the one we just generated (%s)!" % tid) 386 bb.error("The mismatched hashes were %s and %s" % (dataCaches[mc].basetaskhash[tid], self.basehash[tid])) 387 self.dump_sigtask(fn, task, dataCaches[mc].stamp[fn], True) 388 389class SignatureGeneratorBasicHash(SignatureGeneratorBasic): 390 name = "basichash" 391 392 def get_stampfile_hash(self, tid): 393 if tid in self.taskhash: 394 return self.taskhash[tid] 395 396 # If task is not in basehash, then error 397 return self.basehash[tid] 398 399 def stampfile(self, stampbase, fn, taskname, extrainfo, clean=False): 400 if taskname != "do_setscene" and taskname.endswith("_setscene"): 401 tid = fn + ":" + taskname[:-9] 402 else: 403 tid = fn + ":" + taskname 404 if clean: 405 h = "*" 406 else: 407 h = self.get_stampfile_hash(tid) 408 409 return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.') 410 411 def stampcleanmask(self, stampbase, fn, taskname, extrainfo): 412 return self.stampfile(stampbase, fn, taskname, extrainfo, clean=True) 413 414 def invalidate_task(self, task, d, fn): 415 bb.note("Tainting hash to force rebuild of task %s, %s" % (fn, task)) 416 bb.build.write_taint(task, d, fn) 417 418class SignatureGeneratorUniHashMixIn(object): 419 def __init__(self, data): 420 self.extramethod = {} 421 super().__init__(data) 422 423 def get_taskdata(self): 424 return (self.server, self.method, self.extramethod) + super().get_taskdata() 425 426 def set_taskdata(self, data): 427 self.server, self.method, self.extramethod = data[:3] 428 super().set_taskdata(data[3:]) 429 430 def client(self): 431 if getattr(self, '_client', None) is None: 432 self._client = hashserv.create_client(self.server) 433 return self._client 434 435 def get_stampfile_hash(self, tid): 436 if tid in self.taskhash: 437 # If a unique hash is reported, use it as the stampfile hash. This 438 # ensures that if a task won't be re-run if the taskhash changes, 439 # but it would result in the same output hash 440 unihash = self._get_unihash(tid) 441 if unihash is not None: 442 return unihash 443 444 return super().get_stampfile_hash(tid) 445 446 def set_unihash(self, tid, unihash): 447 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid) 448 key = mc + ":" + self.tidtopn[tid] + ":" + taskname 449 self.unitaskhashes[key] = (self.taskhash[tid], unihash) 450 self.unihash[tid] = unihash 451 452 def _get_unihash(self, tid, checkkey=None): 453 if tid not in self.tidtopn: 454 return None 455 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid) 456 key = mc + ":" + self.tidtopn[tid] + ":" + taskname 457 if key not in self.unitaskhashes: 458 return None 459 if not checkkey: 460 checkkey = self.taskhash[tid] 461 (key, unihash) = self.unitaskhashes[key] 462 if key != checkkey: 463 return None 464 return unihash 465 466 def get_unihash(self, tid): 467 taskhash = self.taskhash[tid] 468 469 # If its not a setscene task we can return 470 if self.setscenetasks and tid not in self.setscenetasks: 471 self.unihash[tid] = None 472 return taskhash 473 474 # TODO: This cache can grow unbounded. It probably only needs to keep 475 # for each task 476 unihash = self._get_unihash(tid) 477 if unihash is not None: 478 self.unihash[tid] = unihash 479 return unihash 480 481 # In the absence of being able to discover a unique hash from the 482 # server, make it be equivalent to the taskhash. The unique "hash" only 483 # really needs to be a unique string (not even necessarily a hash), but 484 # making it match the taskhash has a few advantages: 485 # 486 # 1) All of the sstate code that assumes hashes can be the same 487 # 2) It provides maximal compatibility with builders that don't use 488 # an equivalency server 489 # 3) The value is easy for multiple independent builders to derive the 490 # same unique hash from the same input. This means that if the 491 # independent builders find the same taskhash, but it isn't reported 492 # to the server, there is a better chance that they will agree on 493 # the unique hash. 494 unihash = taskhash 495 496 try: 497 method = self.method 498 if tid in self.extramethod: 499 method = method + self.extramethod[tid] 500 data = self.client().get_unihash(method, self.taskhash[tid]) 501 if data: 502 unihash = data 503 # A unique hash equal to the taskhash is not very interesting, 504 # so it is reported it at debug level 2. If they differ, that 505 # is much more interesting, so it is reported at debug level 1 506 hashequiv_logger.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server)) 507 else: 508 hashequiv_logger.debug(2, 'No reported unihash for %s:%s from %s' % (tid, taskhash, self.server)) 509 except hashserv.client.HashConnectionError as e: 510 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 511 512 self.set_unihash(tid, unihash) 513 self.unihash[tid] = unihash 514 return unihash 515 516 def report_unihash(self, path, task, d): 517 import importlib 518 519 taskhash = d.getVar('BB_TASKHASH') 520 unihash = d.getVar('BB_UNIHASH') 521 report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1' 522 tempdir = d.getVar('T') 523 fn = d.getVar('BB_FILENAME') 524 tid = fn + ':do_' + task 525 key = tid + ':' + taskhash 526 527 if self.setscenetasks and tid not in self.setscenetasks: 528 return 529 530 # This can happen if locked sigs are in action. Detect and just abort 531 if taskhash != self.taskhash[tid]: 532 return 533 534 # Sanity checks 535 cache_unihash = self._get_unihash(tid, checkkey=taskhash) 536 if cache_unihash is None: 537 bb.fatal('%s not in unihash cache. Please report this error' % key) 538 539 if cache_unihash != unihash: 540 bb.fatal("Cache unihash %s doesn't match BB_UNIHASH %s" % (cache_unihash, unihash)) 541 542 sigfile = None 543 sigfile_name = "depsig.do_%s.%d" % (task, os.getpid()) 544 sigfile_link = "depsig.do_%s" % task 545 546 try: 547 sigfile = open(os.path.join(tempdir, sigfile_name), 'w+b') 548 549 locs = {'path': path, 'sigfile': sigfile, 'task': task, 'd': d} 550 551 if "." in self.method: 552 (module, method) = self.method.rsplit('.', 1) 553 locs['method'] = getattr(importlib.import_module(module), method) 554 outhash = bb.utils.better_eval('method(path, sigfile, task, d)', locs) 555 else: 556 outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs) 557 558 try: 559 extra_data = {} 560 561 owner = d.getVar('SSTATE_HASHEQUIV_OWNER') 562 if owner: 563 extra_data['owner'] = owner 564 565 if report_taskdata: 566 sigfile.seek(0) 567 568 extra_data['PN'] = d.getVar('PN') 569 extra_data['PV'] = d.getVar('PV') 570 extra_data['PR'] = d.getVar('PR') 571 extra_data['task'] = task 572 extra_data['outhash_siginfo'] = sigfile.read().decode('utf-8') 573 574 method = self.method 575 if tid in self.extramethod: 576 method = method + self.extramethod[tid] 577 578 data = self.client().report_unihash(taskhash, method, outhash, unihash, extra_data) 579 new_unihash = data['unihash'] 580 581 if new_unihash != unihash: 582 hashequiv_logger.debug(1, 'Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server)) 583 bb.event.fire(bb.runqueue.taskUniHashUpdate(fn + ':do_' + task, new_unihash), d) 584 self.set_unihash(tid, new_unihash) 585 d.setVar('BB_UNIHASH', new_unihash) 586 else: 587 hashequiv_logger.debug(1, 'Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server)) 588 except hashserv.client.HashConnectionError as e: 589 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 590 finally: 591 if sigfile: 592 sigfile.close() 593 594 sigfile_link_path = os.path.join(tempdir, sigfile_link) 595 bb.utils.remove(sigfile_link_path) 596 597 try: 598 os.symlink(sigfile_name, sigfile_link_path) 599 except OSError: 600 pass 601 602 def report_unihash_equiv(self, tid, taskhash, wanted_unihash, current_unihash, datacaches): 603 try: 604 extra_data = {} 605 method = self.method 606 if tid in self.extramethod: 607 method = method + self.extramethod[tid] 608 609 data = self.client().report_unihash_equiv(taskhash, method, wanted_unihash, extra_data) 610 hashequiv_logger.verbose('Reported task %s as unihash %s to %s (%s)' % (tid, wanted_unihash, self.server, str(data))) 611 612 if data is None: 613 bb.warn("Server unable to handle unihash report") 614 return False 615 616 finalunihash = data['unihash'] 617 618 if finalunihash == current_unihash: 619 hashequiv_logger.verbose('Task %s unihash %s unchanged by server' % (tid, finalunihash)) 620 elif finalunihash == wanted_unihash: 621 hashequiv_logger.verbose('Task %s unihash changed %s -> %s as wanted' % (tid, current_unihash, finalunihash)) 622 self.set_unihash(tid, finalunihash) 623 return True 624 else: 625 # TODO: What to do here? 626 hashequiv_logger.verbose('Task %s unihash reported as unwanted hash %s' % (tid, finalunihash)) 627 628 except hashserv.client.HashConnectionError as e: 629 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 630 631 return False 632 633# 634# Dummy class used for bitbake-selftest 635# 636class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureGeneratorBasicHash): 637 name = "TestEquivHash" 638 def init_rundepcheck(self, data): 639 super().init_rundepcheck(data) 640 self.server = data.getVar('BB_HASHSERVE') 641 self.method = "sstate_output_hash" 642 643 644def dump_this_task(outfile, d): 645 import bb.parse 646 fn = d.getVar("BB_FILENAME") 647 task = "do_" + d.getVar("BB_CURRENTTASK") 648 referencestamp = bb.build.stamp_internal(task, d, None, True) 649 bb.parse.siggen.dump_sigtask(fn, task, outfile, "customfile:" + referencestamp) 650 651def init_colors(enable_color): 652 """Initialise colour dict for passing to compare_sigfiles()""" 653 # First set up the colours 654 colors = {'color_title': '\033[1m', 655 'color_default': '\033[0m', 656 'color_add': '\033[0;32m', 657 'color_remove': '\033[0;31m', 658 } 659 # Leave all keys present but clear the values 660 if not enable_color: 661 for k in colors.keys(): 662 colors[k] = '' 663 return colors 664 665def worddiff_str(oldstr, newstr, colors=None): 666 if not colors: 667 colors = init_colors(False) 668 diff = simplediff.diff(oldstr.split(' '), newstr.split(' ')) 669 ret = [] 670 for change, value in diff: 671 value = ' '.join(value) 672 if change == '=': 673 ret.append(value) 674 elif change == '+': 675 item = '{color_add}{{+{value}+}}{color_default}'.format(value=value, **colors) 676 ret.append(item) 677 elif change == '-': 678 item = '{color_remove}[-{value}-]{color_default}'.format(value=value, **colors) 679 ret.append(item) 680 whitespace_note = '' 681 if oldstr != newstr and ' '.join(oldstr.split()) == ' '.join(newstr.split()): 682 whitespace_note = ' (whitespace changed)' 683 return '"%s"%s' % (' '.join(ret), whitespace_note) 684 685def list_inline_diff(oldlist, newlist, colors=None): 686 if not colors: 687 colors = init_colors(False) 688 diff = simplediff.diff(oldlist, newlist) 689 ret = [] 690 for change, value in diff: 691 value = ' '.join(value) 692 if change == '=': 693 ret.append("'%s'" % value) 694 elif change == '+': 695 item = '{color_add}+{value}{color_default}'.format(value=value, **colors) 696 ret.append(item) 697 elif change == '-': 698 item = '{color_remove}-{value}{color_default}'.format(value=value, **colors) 699 ret.append(item) 700 return '[%s]' % (', '.join(ret)) 701 702def clean_basepath(a): 703 mc = None 704 if a.startswith("mc:"): 705 _, mc, a = a.split(":", 2) 706 b = a.rsplit("/", 2)[1] + '/' + a.rsplit("/", 2)[2] 707 if a.startswith("virtual:"): 708 b = b + ":" + a.rsplit(":", 1)[0] 709 if mc: 710 b = b + ":mc:" + mc 711 return b 712 713def clean_basepaths(a): 714 b = {} 715 for x in a: 716 b[clean_basepath(x)] = a[x] 717 return b 718 719def clean_basepaths_list(a): 720 b = [] 721 for x in a: 722 b.append(clean_basepath(x)) 723 return b 724 725def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False): 726 output = [] 727 728 colors = init_colors(color) 729 def color_format(formatstr, **values): 730 """ 731 Return colour formatted string. 732 NOTE: call with the format string, not an already formatted string 733 containing values (otherwise you could have trouble with { and } 734 characters) 735 """ 736 if not formatstr.endswith('{color_default}'): 737 formatstr += '{color_default}' 738 # In newer python 3 versions you can pass both of these directly, 739 # but we only require 3.4 at the moment 740 formatparams = {} 741 formatparams.update(colors) 742 formatparams.update(values) 743 return formatstr.format(**formatparams) 744 745 with open(a, 'rb') as f: 746 p1 = pickle.Unpickler(f) 747 a_data = p1.load() 748 with open(b, 'rb') as f: 749 p2 = pickle.Unpickler(f) 750 b_data = p2.load() 751 752 def dict_diff(a, b, whitelist=set()): 753 sa = set(a.keys()) 754 sb = set(b.keys()) 755 common = sa & sb 756 changed = set() 757 for i in common: 758 if a[i] != b[i] and i not in whitelist: 759 changed.add(i) 760 added = sb - sa 761 removed = sa - sb 762 return changed, added, removed 763 764 def file_checksums_diff(a, b): 765 from collections import Counter 766 # Handle old siginfo format 767 if isinstance(a, dict): 768 a = [(os.path.basename(f), cs) for f, cs in a.items()] 769 if isinstance(b, dict): 770 b = [(os.path.basename(f), cs) for f, cs in b.items()] 771 # Compare lists, ensuring we can handle duplicate filenames if they exist 772 removedcount = Counter(a) 773 removedcount.subtract(b) 774 addedcount = Counter(b) 775 addedcount.subtract(a) 776 added = [] 777 for x in b: 778 if addedcount[x] > 0: 779 addedcount[x] -= 1 780 added.append(x) 781 removed = [] 782 changed = [] 783 for x in a: 784 if removedcount[x] > 0: 785 removedcount[x] -= 1 786 for y in added: 787 if y[0] == x[0]: 788 changed.append((x[0], x[1], y[1])) 789 added.remove(y) 790 break 791 else: 792 removed.append(x) 793 added = [x[0] for x in added] 794 removed = [x[0] for x in removed] 795 return changed, added, removed 796 797 if 'basewhitelist' in a_data and a_data['basewhitelist'] != b_data['basewhitelist']: 798 output.append(color_format("{color_title}basewhitelist changed{color_default} from '%s' to '%s'") % (a_data['basewhitelist'], b_data['basewhitelist'])) 799 if a_data['basewhitelist'] and b_data['basewhitelist']: 800 output.append("changed items: %s" % a_data['basewhitelist'].symmetric_difference(b_data['basewhitelist'])) 801 802 if 'taskwhitelist' in a_data and a_data['taskwhitelist'] != b_data['taskwhitelist']: 803 output.append(color_format("{color_title}taskwhitelist changed{color_default} from '%s' to '%s'") % (a_data['taskwhitelist'], b_data['taskwhitelist'])) 804 if a_data['taskwhitelist'] and b_data['taskwhitelist']: 805 output.append("changed items: %s" % a_data['taskwhitelist'].symmetric_difference(b_data['taskwhitelist'])) 806 807 if a_data['taskdeps'] != b_data['taskdeps']: 808 output.append(color_format("{color_title}Task dependencies changed{color_default} from:\n%s\nto:\n%s") % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps']))) 809 810 if a_data['basehash'] != b_data['basehash'] and not collapsed: 811 output.append(color_format("{color_title}basehash changed{color_default} from %s to %s") % (a_data['basehash'], b_data['basehash'])) 812 813 changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basewhitelist'] & b_data['basewhitelist']) 814 if changed: 815 for dep in changed: 816 output.append(color_format("{color_title}List of dependencies for variable %s changed from '{color_default}%s{color_title}' to '{color_default}%s{color_title}'") % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep])) 817 if a_data['gendeps'][dep] and b_data['gendeps'][dep]: 818 output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep])) 819 if added: 820 for dep in added: 821 output.append(color_format("{color_title}Dependency on variable %s was added") % (dep)) 822 if removed: 823 for dep in removed: 824 output.append(color_format("{color_title}Dependency on Variable %s was removed") % (dep)) 825 826 827 changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals']) 828 if changed: 829 for dep in changed: 830 oldval = a_data['varvals'][dep] 831 newval = b_data['varvals'][dep] 832 if newval and oldval and ('\n' in oldval or '\n' in newval): 833 diff = difflib.unified_diff(oldval.splitlines(), newval.splitlines(), lineterm='') 834 # Cut off the first two lines, since we aren't interested in 835 # the old/new filename (they are blank anyway in this case) 836 difflines = list(diff)[2:] 837 if color: 838 # Add colour to diff output 839 for i, line in enumerate(difflines): 840 if line.startswith('+'): 841 line = color_format('{color_add}{line}', line=line) 842 difflines[i] = line 843 elif line.startswith('-'): 844 line = color_format('{color_remove}{line}', line=line) 845 difflines[i] = line 846 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff='\n'.join(difflines))) 847 elif newval and oldval and (' ' in oldval or ' ' in newval): 848 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff=worddiff_str(oldval, newval, colors))) 849 else: 850 output.append(color_format("{color_title}Variable {var} value changed from '{color_default}{oldval}{color_title}' to '{color_default}{newval}{color_title}'{color_default}", var=dep, oldval=oldval, newval=newval)) 851 852 if not 'file_checksum_values' in a_data: 853 a_data['file_checksum_values'] = {} 854 if not 'file_checksum_values' in b_data: 855 b_data['file_checksum_values'] = {} 856 857 changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values']) 858 if changed: 859 for f, old, new in changed: 860 output.append(color_format("{color_title}Checksum for file %s changed{color_default} from %s to %s") % (f, old, new)) 861 if added: 862 for f in added: 863 output.append(color_format("{color_title}Dependency on checksum of file %s was added") % (f)) 864 if removed: 865 for f in removed: 866 output.append(color_format("{color_title}Dependency on checksum of file %s was removed") % (f)) 867 868 if not 'runtaskdeps' in a_data: 869 a_data['runtaskdeps'] = {} 870 if not 'runtaskdeps' in b_data: 871 b_data['runtaskdeps'] = {} 872 873 if not collapsed: 874 if len(a_data['runtaskdeps']) != len(b_data['runtaskdeps']): 875 changed = ["Number of task dependencies changed"] 876 else: 877 changed = [] 878 for idx, task in enumerate(a_data['runtaskdeps']): 879 a = a_data['runtaskdeps'][idx] 880 b = b_data['runtaskdeps'][idx] 881 if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b] and not collapsed: 882 changed.append("%s with hash %s\n changed to\n%s with hash %s" % (clean_basepath(a), a_data['runtaskhashes'][a], clean_basepath(b), b_data['runtaskhashes'][b])) 883 884 if changed: 885 clean_a = clean_basepaths_list(a_data['runtaskdeps']) 886 clean_b = clean_basepaths_list(b_data['runtaskdeps']) 887 if clean_a != clean_b: 888 output.append(color_format("{color_title}runtaskdeps changed:{color_default}\n%s") % list_inline_diff(clean_a, clean_b, colors)) 889 else: 890 output.append(color_format("{color_title}runtaskdeps changed:")) 891 output.append("\n".join(changed)) 892 893 894 if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data: 895 a = a_data['runtaskhashes'] 896 b = b_data['runtaskhashes'] 897 changed, added, removed = dict_diff(a, b) 898 if added: 899 for dep in added: 900 bdep_found = False 901 if removed: 902 for bdep in removed: 903 if b[dep] == a[bdep]: 904 #output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep)) 905 bdep_found = True 906 if not bdep_found: 907 output.append(color_format("{color_title}Dependency on task %s was added{color_default} with hash %s") % (clean_basepath(dep), b[dep])) 908 if removed: 909 for dep in removed: 910 adep_found = False 911 if added: 912 for adep in added: 913 if b[adep] == a[dep]: 914 #output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep)) 915 adep_found = True 916 if not adep_found: 917 output.append(color_format("{color_title}Dependency on task %s was removed{color_default} with hash %s") % (clean_basepath(dep), a[dep])) 918 if changed: 919 for dep in changed: 920 if not collapsed: 921 output.append(color_format("{color_title}Hash for dependent task %s changed{color_default} from %s to %s") % (clean_basepath(dep), a[dep], b[dep])) 922 if callable(recursecb): 923 recout = recursecb(dep, a[dep], b[dep]) 924 if recout: 925 if collapsed: 926 output.extend(recout) 927 else: 928 # If a dependent hash changed, might as well print the line above and then defer to the changes in 929 # that hash since in all likelyhood, they're the same changes this task also saw. 930 output = [output[-1]] + recout 931 932 a_taint = a_data.get('taint', None) 933 b_taint = b_data.get('taint', None) 934 if a_taint != b_taint: 935 if a_taint and a_taint.startswith('nostamp:'): 936 a_taint = a_taint.replace('nostamp:', 'nostamp(uuid4):') 937 if b_taint and b_taint.startswith('nostamp:'): 938 b_taint = b_taint.replace('nostamp:', 'nostamp(uuid4):') 939 output.append(color_format("{color_title}Taint (by forced/invalidated task) changed{color_default} from %s to %s") % (a_taint, b_taint)) 940 941 return output 942 943 944def calc_basehash(sigdata): 945 task = sigdata['task'] 946 basedata = sigdata['varvals'][task] 947 948 if basedata is None: 949 basedata = '' 950 951 alldeps = sigdata['taskdeps'] 952 for dep in alldeps: 953 basedata = basedata + dep 954 val = sigdata['varvals'][dep] 955 if val is not None: 956 basedata = basedata + str(val) 957 958 return hashlib.sha256(basedata.encode("utf-8")).hexdigest() 959 960def calc_taskhash(sigdata): 961 data = sigdata['basehash'] 962 963 for dep in sigdata['runtaskdeps']: 964 data = data + sigdata['runtaskhashes'][dep] 965 966 for c in sigdata['file_checksum_values']: 967 if c[1]: 968 data = data + c[1] 969 970 if 'taint' in sigdata: 971 if 'nostamp:' in sigdata['taint']: 972 data = data + sigdata['taint'][8:] 973 else: 974 data = data + sigdata['taint'] 975 976 return hashlib.sha256(data.encode("utf-8")).hexdigest() 977 978 979def dump_sigfile(a): 980 output = [] 981 982 with open(a, 'rb') as f: 983 p1 = pickle.Unpickler(f) 984 a_data = p1.load() 985 986 output.append("basewhitelist: %s" % (a_data['basewhitelist'])) 987 988 output.append("taskwhitelist: %s" % (a_data['taskwhitelist'])) 989 990 output.append("Task dependencies: %s" % (sorted(a_data['taskdeps']))) 991 992 output.append("basehash: %s" % (a_data['basehash'])) 993 994 for dep in a_data['gendeps']: 995 output.append("List of dependencies for variable %s is %s" % (dep, a_data['gendeps'][dep])) 996 997 for dep in a_data['varvals']: 998 output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep])) 999 1000 if 'runtaskdeps' in a_data: 1001 output.append("Tasks this task depends on: %s" % (a_data['runtaskdeps'])) 1002 1003 if 'file_checksum_values' in a_data: 1004 output.append("This task depends on the checksums of files: %s" % (a_data['file_checksum_values'])) 1005 1006 if 'runtaskhashes' in a_data: 1007 for dep in a_data['runtaskhashes']: 1008 output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep])) 1009 1010 if 'taint' in a_data: 1011 if a_data['taint'].startswith('nostamp:'): 1012 msg = a_data['taint'].replace('nostamp:', 'nostamp(uuid4):') 1013 else: 1014 msg = a_data['taint'] 1015 output.append("Tainted (by forced/invalidated task): %s" % msg) 1016 1017 if 'task' in a_data: 1018 computed_basehash = calc_basehash(a_data) 1019 output.append("Computed base hash is %s and from file %s" % (computed_basehash, a_data['basehash'])) 1020 else: 1021 output.append("Unable to compute base hash") 1022 1023 computed_taskhash = calc_taskhash(a_data) 1024 output.append("Computed task hash is %s" % computed_taskhash) 1025 1026 return output 1027