1# 2# SPDX-License-Identifier: GPL-2.0-only 3# 4 5import hashlib 6import logging 7import os 8import re 9import tempfile 10import pickle 11import bb.data 12import difflib 13import simplediff 14from bb.checksum import FileChecksumCache 15from bb import runqueue 16import hashserv 17import hashserv.client 18 19logger = logging.getLogger('BitBake.SigGen') 20hashequiv_logger = logging.getLogger('BitBake.SigGen.HashEquiv') 21 22def init(d): 23 siggens = [obj for obj in globals().values() 24 if type(obj) is type and issubclass(obj, SignatureGenerator)] 25 26 desired = d.getVar("BB_SIGNATURE_HANDLER") or "noop" 27 for sg in siggens: 28 if desired == sg.name: 29 return sg(d) 30 break 31 else: 32 logger.error("Invalid signature generator '%s', using default 'noop'\n" 33 "Available generators: %s", desired, 34 ', '.join(obj.name for obj in siggens)) 35 return SignatureGenerator(d) 36 37class SignatureGenerator(object): 38 """ 39 """ 40 name = "noop" 41 42 # If the derived class supports multiconfig datacaches, set this to True 43 # The default is False for backward compatibility with derived signature 44 # generators that do not understand multiconfig caches 45 supports_multiconfig_datacaches = False 46 47 def __init__(self, data): 48 self.basehash = {} 49 self.taskhash = {} 50 self.unihash = {} 51 self.runtaskdeps = {} 52 self.file_checksum_values = {} 53 self.taints = {} 54 self.unitaskhashes = {} 55 self.tidtopn = {} 56 self.setscenetasks = set() 57 58 def finalise(self, fn, d, varient): 59 return 60 61 def postparsing_clean_cache(self): 62 return 63 64 def get_unihash(self, tid): 65 return self.taskhash[tid] 66 67 def prep_taskhash(self, tid, deps, dataCaches): 68 return 69 70 def get_taskhash(self, tid, deps, dataCaches): 71 self.taskhash[tid] = hashlib.sha256(tid.encode("utf-8")).hexdigest() 72 return self.taskhash[tid] 73 74 def writeout_file_checksum_cache(self): 75 """Write/update the file checksum cache onto disk""" 76 return 77 78 def stampfile(self, stampbase, file_name, taskname, extrainfo): 79 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.') 80 81 def stampcleanmask(self, stampbase, file_name, taskname, extrainfo): 82 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.') 83 84 def dump_sigtask(self, fn, task, stampbase, runtime): 85 return 86 87 def invalidate_task(self, task, d, fn): 88 bb.build.del_stamp(task, d, fn) 89 90 def dump_sigs(self, dataCache, options): 91 return 92 93 def get_taskdata(self): 94 return (self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks) 95 96 def set_taskdata(self, data): 97 self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks = data 98 99 def reset(self, data): 100 self.__init__(data) 101 102 def get_taskhashes(self): 103 return self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn 104 105 def set_taskhashes(self, hashes): 106 self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn = hashes 107 108 def save_unitaskhashes(self): 109 return 110 111 def set_setscene_tasks(self, setscene_tasks): 112 return 113 114 @classmethod 115 def get_data_caches(cls, dataCaches, mc): 116 """ 117 This function returns the datacaches that should be passed to signature 118 generator functions. If the signature generator supports multiconfig 119 caches, the entire dictionary of data caches is sent, otherwise a 120 special proxy is sent that support both index access to all 121 multiconfigs, and also direct access for the default multiconfig. 122 123 The proxy class allows code in this class itself to always use 124 multiconfig aware code (to ease maintenance), but derived classes that 125 are unaware of multiconfig data caches can still access the default 126 multiconfig as expected. 127 128 Do not override this function in derived classes; it will be removed in 129 the future when support for multiconfig data caches is mandatory 130 """ 131 class DataCacheProxy(object): 132 def __init__(self): 133 pass 134 135 def __getitem__(self, key): 136 return dataCaches[key] 137 138 def __getattr__(self, name): 139 return getattr(dataCaches[mc], name) 140 141 if cls.supports_multiconfig_datacaches: 142 return dataCaches 143 144 return DataCacheProxy() 145 146class SignatureGeneratorBasic(SignatureGenerator): 147 """ 148 """ 149 name = "basic" 150 151 def __init__(self, data): 152 self.basehash = {} 153 self.taskhash = {} 154 self.unihash = {} 155 self.taskdeps = {} 156 self.runtaskdeps = {} 157 self.file_checksum_values = {} 158 self.taints = {} 159 self.gendeps = {} 160 self.lookupcache = {} 161 self.setscenetasks = set() 162 self.basewhitelist = set((data.getVar("BB_HASHBASE_WHITELIST") or "").split()) 163 self.taskwhitelist = None 164 self.init_rundepcheck(data) 165 checksum_cache_file = data.getVar("BB_HASH_CHECKSUM_CACHE_FILE") 166 if checksum_cache_file: 167 self.checksum_cache = FileChecksumCache() 168 self.checksum_cache.init_cache(data, checksum_cache_file) 169 else: 170 self.checksum_cache = None 171 172 self.unihash_cache = bb.cache.SimpleCache("3") 173 self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {}) 174 self.localdirsexclude = (data.getVar("BB_SIGNATURE_LOCAL_DIRS_EXCLUDE") or "CVS .bzr .git .hg .osc .p4 .repo .svn").split() 175 self.tidtopn = {} 176 177 def init_rundepcheck(self, data): 178 self.taskwhitelist = data.getVar("BB_HASHTASK_WHITELIST") or None 179 if self.taskwhitelist: 180 self.twl = re.compile(self.taskwhitelist) 181 else: 182 self.twl = None 183 184 def _build_data(self, fn, d): 185 186 ignore_mismatch = ((d.getVar("BB_HASH_IGNORE_MISMATCH") or '') == '1') 187 tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d, self.basewhitelist) 188 189 taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, self.basewhitelist, fn) 190 191 for task in tasklist: 192 tid = fn + ":" + task 193 if not ignore_mismatch and tid in self.basehash and self.basehash[tid] != basehash[tid]: 194 bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (tid, self.basehash[tid], basehash[tid])) 195 bb.error("The following commands may help:") 196 cmd = "$ bitbake %s -c%s" % (d.getVar('PN'), task) 197 # Make sure sigdata is dumped before run printdiff 198 bb.error("%s -Snone" % cmd) 199 bb.error("Then:") 200 bb.error("%s -Sprintdiff\n" % cmd) 201 self.basehash[tid] = basehash[tid] 202 203 self.taskdeps[fn] = taskdeps 204 self.gendeps[fn] = gendeps 205 self.lookupcache[fn] = lookupcache 206 207 return taskdeps 208 209 def set_setscene_tasks(self, setscene_tasks): 210 self.setscenetasks = set(setscene_tasks) 211 212 def finalise(self, fn, d, variant): 213 214 mc = d.getVar("__BBMULTICONFIG", False) or "" 215 if variant or mc: 216 fn = bb.cache.realfn2virtual(fn, variant, mc) 217 218 try: 219 taskdeps = self._build_data(fn, d) 220 except bb.parse.SkipRecipe: 221 raise 222 except: 223 bb.warn("Error during finalise of %s" % fn) 224 raise 225 226 #Slow but can be useful for debugging mismatched basehashes 227 #for task in self.taskdeps[fn]: 228 # self.dump_sigtask(fn, task, d.getVar("STAMP"), False) 229 230 for task in taskdeps: 231 d.setVar("BB_BASEHASH:task-%s" % task, self.basehash[fn + ":" + task]) 232 233 def postparsing_clean_cache(self): 234 # 235 # After parsing we can remove some things from memory to reduce our memory footprint 236 # 237 self.gendeps = {} 238 self.lookupcache = {} 239 self.taskdeps = {} 240 241 def rundep_check(self, fn, recipename, task, dep, depname, dataCaches): 242 # Return True if we should keep the dependency, False to drop it 243 # We only manipulate the dependencies for packages not in the whitelist 244 if self.twl and not self.twl.search(recipename): 245 # then process the actual dependencies 246 if self.twl.search(depname): 247 return False 248 return True 249 250 def read_taint(self, fn, task, stampbase): 251 taint = None 252 try: 253 with open(stampbase + '.' + task + '.taint', 'r') as taintf: 254 taint = taintf.read() 255 except IOError: 256 pass 257 return taint 258 259 def prep_taskhash(self, tid, deps, dataCaches): 260 261 (mc, _, task, fn) = bb.runqueue.split_tid_mcfn(tid) 262 263 self.basehash[tid] = dataCaches[mc].basetaskhash[tid] 264 self.runtaskdeps[tid] = [] 265 self.file_checksum_values[tid] = [] 266 recipename = dataCaches[mc].pkg_fn[fn] 267 268 self.tidtopn[tid] = recipename 269 270 for dep in sorted(deps, key=clean_basepath): 271 (depmc, _, _, depmcfn) = bb.runqueue.split_tid_mcfn(dep) 272 depname = dataCaches[depmc].pkg_fn[depmcfn] 273 if not self.supports_multiconfig_datacaches and mc != depmc: 274 # If the signature generator doesn't understand multiconfig 275 # data caches, any dependency not in the same multiconfig must 276 # be skipped for backward compatibility 277 continue 278 if not self.rundep_check(fn, recipename, task, dep, depname, dataCaches): 279 continue 280 if dep not in self.taskhash: 281 bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep) 282 self.runtaskdeps[tid].append(dep) 283 284 if task in dataCaches[mc].file_checksums[fn]: 285 if self.checksum_cache: 286 checksums = self.checksum_cache.get_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude) 287 else: 288 checksums = bb.fetch2.get_file_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude) 289 for (f,cs) in checksums: 290 self.file_checksum_values[tid].append((f,cs)) 291 292 taskdep = dataCaches[mc].task_deps[fn] 293 if 'nostamp' in taskdep and task in taskdep['nostamp']: 294 # Nostamp tasks need an implicit taint so that they force any dependent tasks to run 295 if tid in self.taints and self.taints[tid].startswith("nostamp:"): 296 # Don't reset taint value upon every call 297 pass 298 else: 299 import uuid 300 taint = str(uuid.uuid4()) 301 self.taints[tid] = "nostamp:" + taint 302 303 taint = self.read_taint(fn, task, dataCaches[mc].stamp[fn]) 304 if taint: 305 self.taints[tid] = taint 306 logger.warning("%s is tainted from a forced run" % tid) 307 308 return 309 310 def get_taskhash(self, tid, deps, dataCaches): 311 312 data = self.basehash[tid] 313 for dep in self.runtaskdeps[tid]: 314 data = data + self.get_unihash(dep) 315 316 for (f, cs) in self.file_checksum_values[tid]: 317 if cs: 318 data = data + cs 319 320 if tid in self.taints: 321 if self.taints[tid].startswith("nostamp:"): 322 data = data + self.taints[tid][8:] 323 else: 324 data = data + self.taints[tid] 325 326 h = hashlib.sha256(data.encode("utf-8")).hexdigest() 327 self.taskhash[tid] = h 328 #d.setVar("BB_TASKHASH:task-%s" % task, taskhash[task]) 329 return h 330 331 def writeout_file_checksum_cache(self): 332 """Write/update the file checksum cache onto disk""" 333 if self.checksum_cache: 334 self.checksum_cache.save_extras() 335 self.checksum_cache.save_merge() 336 else: 337 bb.fetch2.fetcher_parse_save() 338 bb.fetch2.fetcher_parse_done() 339 340 def save_unitaskhashes(self): 341 self.unihash_cache.save(self.unitaskhashes) 342 343 def dump_sigtask(self, fn, task, stampbase, runtime): 344 345 tid = fn + ":" + task 346 referencestamp = stampbase 347 if isinstance(runtime, str) and runtime.startswith("customfile"): 348 sigfile = stampbase 349 referencestamp = runtime[11:] 350 elif runtime and tid in self.taskhash: 351 sigfile = stampbase + "." + task + ".sigdata" + "." + self.get_unihash(tid) 352 else: 353 sigfile = stampbase + "." + task + ".sigbasedata" + "." + self.basehash[tid] 354 355 with bb.utils.umask(0o002): 356 bb.utils.mkdirhier(os.path.dirname(sigfile)) 357 358 data = {} 359 data['task'] = task 360 data['basewhitelist'] = self.basewhitelist 361 data['taskwhitelist'] = self.taskwhitelist 362 data['taskdeps'] = self.taskdeps[fn][task] 363 data['basehash'] = self.basehash[tid] 364 data['gendeps'] = {} 365 data['varvals'] = {} 366 data['varvals'][task] = self.lookupcache[fn][task] 367 for dep in self.taskdeps[fn][task]: 368 if dep in self.basewhitelist: 369 continue 370 data['gendeps'][dep] = self.gendeps[fn][dep] 371 data['varvals'][dep] = self.lookupcache[fn][dep] 372 373 if runtime and tid in self.taskhash: 374 data['runtaskdeps'] = self.runtaskdeps[tid] 375 data['file_checksum_values'] = [(os.path.basename(f), cs) for f,cs in self.file_checksum_values[tid]] 376 data['runtaskhashes'] = {} 377 for dep in data['runtaskdeps']: 378 data['runtaskhashes'][dep] = self.get_unihash(dep) 379 data['taskhash'] = self.taskhash[tid] 380 data['unihash'] = self.get_unihash(tid) 381 382 taint = self.read_taint(fn, task, referencestamp) 383 if taint: 384 data['taint'] = taint 385 386 if runtime and tid in self.taints: 387 if 'nostamp:' in self.taints[tid]: 388 data['taint'] = self.taints[tid] 389 390 computed_basehash = calc_basehash(data) 391 if computed_basehash != self.basehash[tid]: 392 bb.error("Basehash mismatch %s versus %s for %s" % (computed_basehash, self.basehash[tid], tid)) 393 if runtime and tid in self.taskhash: 394 computed_taskhash = calc_taskhash(data) 395 if computed_taskhash != self.taskhash[tid]: 396 bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid)) 397 sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash) 398 399 fd, tmpfile = tempfile.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.") 400 try: 401 with os.fdopen(fd, "wb") as stream: 402 p = pickle.dump(data, stream, -1) 403 stream.flush() 404 os.chmod(tmpfile, 0o664) 405 bb.utils.rename(tmpfile, sigfile) 406 except (OSError, IOError) as err: 407 try: 408 os.unlink(tmpfile) 409 except OSError: 410 pass 411 raise err 412 413 def dump_sigfn(self, fn, dataCaches, options): 414 if fn in self.taskdeps: 415 for task in self.taskdeps[fn]: 416 tid = fn + ":" + task 417 mc = bb.runqueue.mc_from_tid(tid) 418 if tid not in self.taskhash: 419 continue 420 if dataCaches[mc].basetaskhash[tid] != self.basehash[tid]: 421 bb.error("Bitbake's cached basehash does not match the one we just generated (%s)!" % tid) 422 bb.error("The mismatched hashes were %s and %s" % (dataCaches[mc].basetaskhash[tid], self.basehash[tid])) 423 self.dump_sigtask(fn, task, dataCaches[mc].stamp[fn], True) 424 425class SignatureGeneratorBasicHash(SignatureGeneratorBasic): 426 name = "basichash" 427 428 def get_stampfile_hash(self, tid): 429 if tid in self.taskhash: 430 return self.taskhash[tid] 431 432 # If task is not in basehash, then error 433 return self.basehash[tid] 434 435 def stampfile(self, stampbase, fn, taskname, extrainfo, clean=False): 436 if taskname != "do_setscene" and taskname.endswith("_setscene"): 437 tid = fn + ":" + taskname[:-9] 438 else: 439 tid = fn + ":" + taskname 440 if clean: 441 h = "*" 442 else: 443 h = self.get_stampfile_hash(tid) 444 445 return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.') 446 447 def stampcleanmask(self, stampbase, fn, taskname, extrainfo): 448 return self.stampfile(stampbase, fn, taskname, extrainfo, clean=True) 449 450 def invalidate_task(self, task, d, fn): 451 bb.note("Tainting hash to force rebuild of task %s, %s" % (fn, task)) 452 bb.build.write_taint(task, d, fn) 453 454class SignatureGeneratorUniHashMixIn(object): 455 def __init__(self, data): 456 self.extramethod = {} 457 super().__init__(data) 458 459 def get_taskdata(self): 460 return (self.server, self.method, self.extramethod) + super().get_taskdata() 461 462 def set_taskdata(self, data): 463 self.server, self.method, self.extramethod = data[:3] 464 super().set_taskdata(data[3:]) 465 466 def client(self): 467 if getattr(self, '_client', None) is None: 468 self._client = hashserv.create_client(self.server) 469 return self._client 470 471 def get_stampfile_hash(self, tid): 472 if tid in self.taskhash: 473 # If a unique hash is reported, use it as the stampfile hash. This 474 # ensures that if a task won't be re-run if the taskhash changes, 475 # but it would result in the same output hash 476 unihash = self._get_unihash(tid) 477 if unihash is not None: 478 return unihash 479 480 return super().get_stampfile_hash(tid) 481 482 def set_unihash(self, tid, unihash): 483 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid) 484 key = mc + ":" + self.tidtopn[tid] + ":" + taskname 485 self.unitaskhashes[key] = (self.taskhash[tid], unihash) 486 self.unihash[tid] = unihash 487 488 def _get_unihash(self, tid, checkkey=None): 489 if tid not in self.tidtopn: 490 return None 491 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid) 492 key = mc + ":" + self.tidtopn[tid] + ":" + taskname 493 if key not in self.unitaskhashes: 494 return None 495 if not checkkey: 496 checkkey = self.taskhash[tid] 497 (key, unihash) = self.unitaskhashes[key] 498 if key != checkkey: 499 return None 500 return unihash 501 502 def get_unihash(self, tid): 503 taskhash = self.taskhash[tid] 504 505 # If its not a setscene task we can return 506 if self.setscenetasks and tid not in self.setscenetasks: 507 self.unihash[tid] = None 508 return taskhash 509 510 # TODO: This cache can grow unbounded. It probably only needs to keep 511 # for each task 512 unihash = self._get_unihash(tid) 513 if unihash is not None: 514 self.unihash[tid] = unihash 515 return unihash 516 517 # In the absence of being able to discover a unique hash from the 518 # server, make it be equivalent to the taskhash. The unique "hash" only 519 # really needs to be a unique string (not even necessarily a hash), but 520 # making it match the taskhash has a few advantages: 521 # 522 # 1) All of the sstate code that assumes hashes can be the same 523 # 2) It provides maximal compatibility with builders that don't use 524 # an equivalency server 525 # 3) The value is easy for multiple independent builders to derive the 526 # same unique hash from the same input. This means that if the 527 # independent builders find the same taskhash, but it isn't reported 528 # to the server, there is a better chance that they will agree on 529 # the unique hash. 530 unihash = taskhash 531 532 try: 533 method = self.method 534 if tid in self.extramethod: 535 method = method + self.extramethod[tid] 536 data = self.client().get_unihash(method, self.taskhash[tid]) 537 if data: 538 unihash = data 539 # A unique hash equal to the taskhash is not very interesting, 540 # so it is reported it at debug level 2. If they differ, that 541 # is much more interesting, so it is reported at debug level 1 542 hashequiv_logger.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server)) 543 else: 544 hashequiv_logger.debug2('No reported unihash for %s:%s from %s' % (tid, taskhash, self.server)) 545 except ConnectionError as e: 546 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 547 548 self.set_unihash(tid, unihash) 549 self.unihash[tid] = unihash 550 return unihash 551 552 def report_unihash(self, path, task, d): 553 import importlib 554 555 taskhash = d.getVar('BB_TASKHASH') 556 unihash = d.getVar('BB_UNIHASH') 557 report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1' 558 tempdir = d.getVar('T') 559 fn = d.getVar('BB_FILENAME') 560 tid = fn + ':do_' + task 561 key = tid + ':' + taskhash 562 563 if self.setscenetasks and tid not in self.setscenetasks: 564 return 565 566 # This can happen if locked sigs are in action. Detect and just abort 567 if taskhash != self.taskhash[tid]: 568 return 569 570 # Sanity checks 571 cache_unihash = self._get_unihash(tid, checkkey=taskhash) 572 if cache_unihash is None: 573 bb.fatal('%s not in unihash cache. Please report this error' % key) 574 575 if cache_unihash != unihash: 576 bb.fatal("Cache unihash %s doesn't match BB_UNIHASH %s" % (cache_unihash, unihash)) 577 578 sigfile = None 579 sigfile_name = "depsig.do_%s.%d" % (task, os.getpid()) 580 sigfile_link = "depsig.do_%s" % task 581 582 try: 583 sigfile = open(os.path.join(tempdir, sigfile_name), 'w+b') 584 585 locs = {'path': path, 'sigfile': sigfile, 'task': task, 'd': d} 586 587 if "." in self.method: 588 (module, method) = self.method.rsplit('.', 1) 589 locs['method'] = getattr(importlib.import_module(module), method) 590 outhash = bb.utils.better_eval('method(path, sigfile, task, d)', locs) 591 else: 592 outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs) 593 594 try: 595 extra_data = {} 596 597 owner = d.getVar('SSTATE_HASHEQUIV_OWNER') 598 if owner: 599 extra_data['owner'] = owner 600 601 if report_taskdata: 602 sigfile.seek(0) 603 604 extra_data['PN'] = d.getVar('PN') 605 extra_data['PV'] = d.getVar('PV') 606 extra_data['PR'] = d.getVar('PR') 607 extra_data['task'] = task 608 extra_data['outhash_siginfo'] = sigfile.read().decode('utf-8') 609 610 method = self.method 611 if tid in self.extramethod: 612 method = method + self.extramethod[tid] 613 614 data = self.client().report_unihash(taskhash, method, outhash, unihash, extra_data) 615 new_unihash = data['unihash'] 616 617 if new_unihash != unihash: 618 hashequiv_logger.debug('Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server)) 619 bb.event.fire(bb.runqueue.taskUniHashUpdate(fn + ':do_' + task, new_unihash), d) 620 self.set_unihash(tid, new_unihash) 621 d.setVar('BB_UNIHASH', new_unihash) 622 else: 623 hashequiv_logger.debug('Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server)) 624 except ConnectionError as e: 625 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 626 finally: 627 if sigfile: 628 sigfile.close() 629 630 sigfile_link_path = os.path.join(tempdir, sigfile_link) 631 bb.utils.remove(sigfile_link_path) 632 633 try: 634 os.symlink(sigfile_name, sigfile_link_path) 635 except OSError: 636 pass 637 638 def report_unihash_equiv(self, tid, taskhash, wanted_unihash, current_unihash, datacaches): 639 try: 640 extra_data = {} 641 method = self.method 642 if tid in self.extramethod: 643 method = method + self.extramethod[tid] 644 645 data = self.client().report_unihash_equiv(taskhash, method, wanted_unihash, extra_data) 646 hashequiv_logger.verbose('Reported task %s as unihash %s to %s (%s)' % (tid, wanted_unihash, self.server, str(data))) 647 648 if data is None: 649 bb.warn("Server unable to handle unihash report") 650 return False 651 652 finalunihash = data['unihash'] 653 654 if finalunihash == current_unihash: 655 hashequiv_logger.verbose('Task %s unihash %s unchanged by server' % (tid, finalunihash)) 656 elif finalunihash == wanted_unihash: 657 hashequiv_logger.verbose('Task %s unihash changed %s -> %s as wanted' % (tid, current_unihash, finalunihash)) 658 self.set_unihash(tid, finalunihash) 659 return True 660 else: 661 # TODO: What to do here? 662 hashequiv_logger.verbose('Task %s unihash reported as unwanted hash %s' % (tid, finalunihash)) 663 664 except ConnectionError as e: 665 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 666 667 return False 668 669# 670# Dummy class used for bitbake-selftest 671# 672class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureGeneratorBasicHash): 673 name = "TestEquivHash" 674 def init_rundepcheck(self, data): 675 super().init_rundepcheck(data) 676 self.server = data.getVar('BB_HASHSERVE') 677 self.method = "sstate_output_hash" 678 679# 680# Dummy class used for bitbake-selftest 681# 682class SignatureGeneratorTestMulticonfigDepends(SignatureGeneratorBasicHash): 683 name = "TestMulticonfigDepends" 684 supports_multiconfig_datacaches = True 685 686def dump_this_task(outfile, d): 687 import bb.parse 688 fn = d.getVar("BB_FILENAME") 689 task = "do_" + d.getVar("BB_CURRENTTASK") 690 referencestamp = bb.build.stamp_internal(task, d, None, True) 691 bb.parse.siggen.dump_sigtask(fn, task, outfile, "customfile:" + referencestamp) 692 693def init_colors(enable_color): 694 """Initialise colour dict for passing to compare_sigfiles()""" 695 # First set up the colours 696 colors = {'color_title': '\033[1m', 697 'color_default': '\033[0m', 698 'color_add': '\033[0;32m', 699 'color_remove': '\033[0;31m', 700 } 701 # Leave all keys present but clear the values 702 if not enable_color: 703 for k in colors.keys(): 704 colors[k] = '' 705 return colors 706 707def worddiff_str(oldstr, newstr, colors=None): 708 if not colors: 709 colors = init_colors(False) 710 diff = simplediff.diff(oldstr.split(' '), newstr.split(' ')) 711 ret = [] 712 for change, value in diff: 713 value = ' '.join(value) 714 if change == '=': 715 ret.append(value) 716 elif change == '+': 717 item = '{color_add}{{+{value}+}}{color_default}'.format(value=value, **colors) 718 ret.append(item) 719 elif change == '-': 720 item = '{color_remove}[-{value}-]{color_default}'.format(value=value, **colors) 721 ret.append(item) 722 whitespace_note = '' 723 if oldstr != newstr and ' '.join(oldstr.split()) == ' '.join(newstr.split()): 724 whitespace_note = ' (whitespace changed)' 725 return '"%s"%s' % (' '.join(ret), whitespace_note) 726 727def list_inline_diff(oldlist, newlist, colors=None): 728 if not colors: 729 colors = init_colors(False) 730 diff = simplediff.diff(oldlist, newlist) 731 ret = [] 732 for change, value in diff: 733 value = ' '.join(value) 734 if change == '=': 735 ret.append("'%s'" % value) 736 elif change == '+': 737 item = '{color_add}+{value}{color_default}'.format(value=value, **colors) 738 ret.append(item) 739 elif change == '-': 740 item = '{color_remove}-{value}{color_default}'.format(value=value, **colors) 741 ret.append(item) 742 return '[%s]' % (', '.join(ret)) 743 744def clean_basepath(basepath): 745 basepath, dir, recipe_task = basepath.rsplit("/", 2) 746 cleaned = dir + '/' + recipe_task 747 748 if basepath[0] == '/': 749 return cleaned 750 751 if basepath.startswith("mc:") and basepath.count(':') >= 2: 752 mc, mc_name, basepath = basepath.split(":", 2) 753 mc_suffix = ':mc:' + mc_name 754 else: 755 mc_suffix = '' 756 757 # mc stuff now removed from basepath. Whatever was next, if present will be the first 758 # suffix. ':/', recipe path start, marks the end of this. Something like 759 # 'virtual:a[:b[:c]]:/path...' (b and c being optional) 760 if basepath[0] != '/': 761 cleaned += ':' + basepath.split(':/', 1)[0] 762 763 return cleaned + mc_suffix 764 765def clean_basepaths(a): 766 b = {} 767 for x in a: 768 b[clean_basepath(x)] = a[x] 769 return b 770 771def clean_basepaths_list(a): 772 b = [] 773 for x in a: 774 b.append(clean_basepath(x)) 775 return b 776 777def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False): 778 output = [] 779 780 colors = init_colors(color) 781 def color_format(formatstr, **values): 782 """ 783 Return colour formatted string. 784 NOTE: call with the format string, not an already formatted string 785 containing values (otherwise you could have trouble with { and } 786 characters) 787 """ 788 if not formatstr.endswith('{color_default}'): 789 formatstr += '{color_default}' 790 # In newer python 3 versions you can pass both of these directly, 791 # but we only require 3.4 at the moment 792 formatparams = {} 793 formatparams.update(colors) 794 formatparams.update(values) 795 return formatstr.format(**formatparams) 796 797 with open(a, 'rb') as f: 798 p1 = pickle.Unpickler(f) 799 a_data = p1.load() 800 with open(b, 'rb') as f: 801 p2 = pickle.Unpickler(f) 802 b_data = p2.load() 803 804 def dict_diff(a, b, whitelist=set()): 805 sa = set(a.keys()) 806 sb = set(b.keys()) 807 common = sa & sb 808 changed = set() 809 for i in common: 810 if a[i] != b[i] and i not in whitelist: 811 changed.add(i) 812 added = sb - sa 813 removed = sa - sb 814 return changed, added, removed 815 816 def file_checksums_diff(a, b): 817 from collections import Counter 818 # Handle old siginfo format 819 if isinstance(a, dict): 820 a = [(os.path.basename(f), cs) for f, cs in a.items()] 821 if isinstance(b, dict): 822 b = [(os.path.basename(f), cs) for f, cs in b.items()] 823 # Compare lists, ensuring we can handle duplicate filenames if they exist 824 removedcount = Counter(a) 825 removedcount.subtract(b) 826 addedcount = Counter(b) 827 addedcount.subtract(a) 828 added = [] 829 for x in b: 830 if addedcount[x] > 0: 831 addedcount[x] -= 1 832 added.append(x) 833 removed = [] 834 changed = [] 835 for x in a: 836 if removedcount[x] > 0: 837 removedcount[x] -= 1 838 for y in added: 839 if y[0] == x[0]: 840 changed.append((x[0], x[1], y[1])) 841 added.remove(y) 842 break 843 else: 844 removed.append(x) 845 added = [x[0] for x in added] 846 removed = [x[0] for x in removed] 847 return changed, added, removed 848 849 if 'basewhitelist' in a_data and a_data['basewhitelist'] != b_data['basewhitelist']: 850 output.append(color_format("{color_title}basewhitelist changed{color_default} from '%s' to '%s'") % (a_data['basewhitelist'], b_data['basewhitelist'])) 851 if a_data['basewhitelist'] and b_data['basewhitelist']: 852 output.append("changed items: %s" % a_data['basewhitelist'].symmetric_difference(b_data['basewhitelist'])) 853 854 if 'taskwhitelist' in a_data and a_data['taskwhitelist'] != b_data['taskwhitelist']: 855 output.append(color_format("{color_title}taskwhitelist changed{color_default} from '%s' to '%s'") % (a_data['taskwhitelist'], b_data['taskwhitelist'])) 856 if a_data['taskwhitelist'] and b_data['taskwhitelist']: 857 output.append("changed items: %s" % a_data['taskwhitelist'].symmetric_difference(b_data['taskwhitelist'])) 858 859 if a_data['taskdeps'] != b_data['taskdeps']: 860 output.append(color_format("{color_title}Task dependencies changed{color_default} from:\n%s\nto:\n%s") % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps']))) 861 862 if a_data['basehash'] != b_data['basehash'] and not collapsed: 863 output.append(color_format("{color_title}basehash changed{color_default} from %s to %s") % (a_data['basehash'], b_data['basehash'])) 864 865 changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basewhitelist'] & b_data['basewhitelist']) 866 if changed: 867 for dep in sorted(changed): 868 output.append(color_format("{color_title}List of dependencies for variable %s changed from '{color_default}%s{color_title}' to '{color_default}%s{color_title}'") % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep])) 869 if a_data['gendeps'][dep] and b_data['gendeps'][dep]: 870 output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep])) 871 if added: 872 for dep in sorted(added): 873 output.append(color_format("{color_title}Dependency on variable %s was added") % (dep)) 874 if removed: 875 for dep in sorted(removed): 876 output.append(color_format("{color_title}Dependency on Variable %s was removed") % (dep)) 877 878 879 changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals']) 880 if changed: 881 for dep in sorted(changed): 882 oldval = a_data['varvals'][dep] 883 newval = b_data['varvals'][dep] 884 if newval and oldval and ('\n' in oldval or '\n' in newval): 885 diff = difflib.unified_diff(oldval.splitlines(), newval.splitlines(), lineterm='') 886 # Cut off the first two lines, since we aren't interested in 887 # the old/new filename (they are blank anyway in this case) 888 difflines = list(diff)[2:] 889 if color: 890 # Add colour to diff output 891 for i, line in enumerate(difflines): 892 if line.startswith('+'): 893 line = color_format('{color_add}{line}', line=line) 894 difflines[i] = line 895 elif line.startswith('-'): 896 line = color_format('{color_remove}{line}', line=line) 897 difflines[i] = line 898 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff='\n'.join(difflines))) 899 elif newval and oldval and (' ' in oldval or ' ' in newval): 900 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff=worddiff_str(oldval, newval, colors))) 901 else: 902 output.append(color_format("{color_title}Variable {var} value changed from '{color_default}{oldval}{color_title}' to '{color_default}{newval}{color_title}'{color_default}", var=dep, oldval=oldval, newval=newval)) 903 904 if not 'file_checksum_values' in a_data: 905 a_data['file_checksum_values'] = {} 906 if not 'file_checksum_values' in b_data: 907 b_data['file_checksum_values'] = {} 908 909 changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values']) 910 if changed: 911 for f, old, new in changed: 912 output.append(color_format("{color_title}Checksum for file %s changed{color_default} from %s to %s") % (f, old, new)) 913 if added: 914 for f in added: 915 output.append(color_format("{color_title}Dependency on checksum of file %s was added") % (f)) 916 if removed: 917 for f in removed: 918 output.append(color_format("{color_title}Dependency on checksum of file %s was removed") % (f)) 919 920 if not 'runtaskdeps' in a_data: 921 a_data['runtaskdeps'] = {} 922 if not 'runtaskdeps' in b_data: 923 b_data['runtaskdeps'] = {} 924 925 if not collapsed: 926 if len(a_data['runtaskdeps']) != len(b_data['runtaskdeps']): 927 changed = ["Number of task dependencies changed"] 928 else: 929 changed = [] 930 for idx, task in enumerate(a_data['runtaskdeps']): 931 a = a_data['runtaskdeps'][idx] 932 b = b_data['runtaskdeps'][idx] 933 if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b] and not collapsed: 934 changed.append("%s with hash %s\n changed to\n%s with hash %s" % (clean_basepath(a), a_data['runtaskhashes'][a], clean_basepath(b), b_data['runtaskhashes'][b])) 935 936 if changed: 937 clean_a = clean_basepaths_list(a_data['runtaskdeps']) 938 clean_b = clean_basepaths_list(b_data['runtaskdeps']) 939 if clean_a != clean_b: 940 output.append(color_format("{color_title}runtaskdeps changed:{color_default}\n%s") % list_inline_diff(clean_a, clean_b, colors)) 941 else: 942 output.append(color_format("{color_title}runtaskdeps changed:")) 943 output.append("\n".join(changed)) 944 945 946 if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data: 947 a = a_data['runtaskhashes'] 948 b = b_data['runtaskhashes'] 949 changed, added, removed = dict_diff(a, b) 950 if added: 951 for dep in sorted(added): 952 bdep_found = False 953 if removed: 954 for bdep in removed: 955 if b[dep] == a[bdep]: 956 #output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep)) 957 bdep_found = True 958 if not bdep_found: 959 output.append(color_format("{color_title}Dependency on task %s was added{color_default} with hash %s") % (clean_basepath(dep), b[dep])) 960 if removed: 961 for dep in sorted(removed): 962 adep_found = False 963 if added: 964 for adep in added: 965 if b[adep] == a[dep]: 966 #output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep)) 967 adep_found = True 968 if not adep_found: 969 output.append(color_format("{color_title}Dependency on task %s was removed{color_default} with hash %s") % (clean_basepath(dep), a[dep])) 970 if changed: 971 for dep in sorted(changed): 972 if not collapsed: 973 output.append(color_format("{color_title}Hash for dependent task %s changed{color_default} from %s to %s") % (clean_basepath(dep), a[dep], b[dep])) 974 if callable(recursecb): 975 recout = recursecb(dep, a[dep], b[dep]) 976 if recout: 977 if collapsed: 978 output.extend(recout) 979 else: 980 # If a dependent hash changed, might as well print the line above and then defer to the changes in 981 # that hash since in all likelyhood, they're the same changes this task also saw. 982 output = [output[-1]] + recout 983 984 a_taint = a_data.get('taint', None) 985 b_taint = b_data.get('taint', None) 986 if a_taint != b_taint: 987 if a_taint and a_taint.startswith('nostamp:'): 988 a_taint = a_taint.replace('nostamp:', 'nostamp(uuid4):') 989 if b_taint and b_taint.startswith('nostamp:'): 990 b_taint = b_taint.replace('nostamp:', 'nostamp(uuid4):') 991 output.append(color_format("{color_title}Taint (by forced/invalidated task) changed{color_default} from %s to %s") % (a_taint, b_taint)) 992 993 return output 994 995 996def calc_basehash(sigdata): 997 task = sigdata['task'] 998 basedata = sigdata['varvals'][task] 999 1000 if basedata is None: 1001 basedata = '' 1002 1003 alldeps = sigdata['taskdeps'] 1004 for dep in alldeps: 1005 basedata = basedata + dep 1006 val = sigdata['varvals'][dep] 1007 if val is not None: 1008 basedata = basedata + str(val) 1009 1010 return hashlib.sha256(basedata.encode("utf-8")).hexdigest() 1011 1012def calc_taskhash(sigdata): 1013 data = sigdata['basehash'] 1014 1015 for dep in sigdata['runtaskdeps']: 1016 data = data + sigdata['runtaskhashes'][dep] 1017 1018 for c in sigdata['file_checksum_values']: 1019 if c[1]: 1020 data = data + c[1] 1021 1022 if 'taint' in sigdata: 1023 if 'nostamp:' in sigdata['taint']: 1024 data = data + sigdata['taint'][8:] 1025 else: 1026 data = data + sigdata['taint'] 1027 1028 return hashlib.sha256(data.encode("utf-8")).hexdigest() 1029 1030 1031def dump_sigfile(a): 1032 output = [] 1033 1034 with open(a, 'rb') as f: 1035 p1 = pickle.Unpickler(f) 1036 a_data = p1.load() 1037 1038 output.append("basewhitelist: %s" % (a_data['basewhitelist'])) 1039 1040 output.append("taskwhitelist: %s" % (a_data['taskwhitelist'])) 1041 1042 output.append("Task dependencies: %s" % (sorted(a_data['taskdeps']))) 1043 1044 output.append("basehash: %s" % (a_data['basehash'])) 1045 1046 for dep in a_data['gendeps']: 1047 output.append("List of dependencies for variable %s is %s" % (dep, a_data['gendeps'][dep])) 1048 1049 for dep in a_data['varvals']: 1050 output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep])) 1051 1052 if 'runtaskdeps' in a_data: 1053 output.append("Tasks this task depends on: %s" % (a_data['runtaskdeps'])) 1054 1055 if 'file_checksum_values' in a_data: 1056 output.append("This task depends on the checksums of files: %s" % (a_data['file_checksum_values'])) 1057 1058 if 'runtaskhashes' in a_data: 1059 for dep in a_data['runtaskhashes']: 1060 output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep])) 1061 1062 if 'taint' in a_data: 1063 if a_data['taint'].startswith('nostamp:'): 1064 msg = a_data['taint'].replace('nostamp:', 'nostamp(uuid4):') 1065 else: 1066 msg = a_data['taint'] 1067 output.append("Tainted (by forced/invalidated task): %s" % msg) 1068 1069 if 'task' in a_data: 1070 computed_basehash = calc_basehash(a_data) 1071 output.append("Computed base hash is %s and from file %s" % (computed_basehash, a_data['basehash'])) 1072 else: 1073 output.append("Unable to compute base hash") 1074 1075 computed_taskhash = calc_taskhash(a_data) 1076 output.append("Computed task hash is %s" % computed_taskhash) 1077 1078 return output 1079