1# 2# SPDX-License-Identifier: GPL-2.0-only 3# 4 5import hashlib 6import logging 7import os 8import re 9import tempfile 10import pickle 11import bb.data 12import difflib 13import simplediff 14from bb.checksum import FileChecksumCache 15from bb import runqueue 16import hashserv 17 18logger = logging.getLogger('BitBake.SigGen') 19hashequiv_logger = logging.getLogger('BitBake.SigGen.HashEquiv') 20 21def init(d): 22 siggens = [obj for obj in globals().values() 23 if type(obj) is type and issubclass(obj, SignatureGenerator)] 24 25 desired = d.getVar("BB_SIGNATURE_HANDLER") or "noop" 26 for sg in siggens: 27 if desired == sg.name: 28 return sg(d) 29 break 30 else: 31 logger.error("Invalid signature generator '%s', using default 'noop'\n" 32 "Available generators: %s", desired, 33 ', '.join(obj.name for obj in siggens)) 34 return SignatureGenerator(d) 35 36class SignatureGenerator(object): 37 """ 38 """ 39 name = "noop" 40 41 # If the derived class supports multiconfig datacaches, set this to True 42 # The default is False for backward compatibility with derived signature 43 # generators that do not understand multiconfig caches 44 supports_multiconfig_datacaches = False 45 46 def __init__(self, data): 47 self.basehash = {} 48 self.taskhash = {} 49 self.unihash = {} 50 self.runtaskdeps = {} 51 self.file_checksum_values = {} 52 self.taints = {} 53 self.unitaskhashes = {} 54 self.tidtopn = {} 55 self.setscenetasks = set() 56 57 def finalise(self, fn, d, varient): 58 return 59 60 def postparsing_clean_cache(self): 61 return 62 63 def get_unihash(self, tid): 64 return self.taskhash[tid] 65 66 def prep_taskhash(self, tid, deps, dataCaches): 67 return 68 69 def get_taskhash(self, tid, deps, dataCaches): 70 self.taskhash[tid] = hashlib.sha256(tid.encode("utf-8")).hexdigest() 71 return self.taskhash[tid] 72 73 def writeout_file_checksum_cache(self): 74 """Write/update the file checksum cache onto disk""" 75 return 76 77 def stampfile(self, stampbase, file_name, taskname, extrainfo): 78 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.') 79 80 def stampcleanmask(self, stampbase, file_name, taskname, extrainfo): 81 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.') 82 83 def dump_sigtask(self, fn, task, stampbase, runtime): 84 return 85 86 def invalidate_task(self, task, d, fn): 87 bb.build.del_stamp(task, d, fn) 88 89 def dump_sigs(self, dataCache, options): 90 return 91 92 def get_taskdata(self): 93 return (self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks) 94 95 def set_taskdata(self, data): 96 self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks = data 97 98 def reset(self, data): 99 self.__init__(data) 100 101 def get_taskhashes(self): 102 return self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn 103 104 def set_taskhashes(self, hashes): 105 self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn = hashes 106 107 def save_unitaskhashes(self): 108 return 109 110 def set_setscene_tasks(self, setscene_tasks): 111 return 112 113 @classmethod 114 def get_data_caches(cls, dataCaches, mc): 115 """ 116 This function returns the datacaches that should be passed to signature 117 generator functions. If the signature generator supports multiconfig 118 caches, the entire dictionary of data caches is sent, otherwise a 119 special proxy is sent that support both index access to all 120 multiconfigs, and also direct access for the default multiconfig. 121 122 The proxy class allows code in this class itself to always use 123 multiconfig aware code (to ease maintenance), but derived classes that 124 are unaware of multiconfig data caches can still access the default 125 multiconfig as expected. 126 127 Do not override this function in derived classes; it will be removed in 128 the future when support for multiconfig data caches is mandatory 129 """ 130 class DataCacheProxy(object): 131 def __init__(self): 132 pass 133 134 def __getitem__(self, key): 135 return dataCaches[key] 136 137 def __getattr__(self, name): 138 return getattr(dataCaches[mc], name) 139 140 if cls.supports_multiconfig_datacaches: 141 return dataCaches 142 143 return DataCacheProxy() 144 145class SignatureGeneratorBasic(SignatureGenerator): 146 """ 147 """ 148 name = "basic" 149 150 def __init__(self, data): 151 self.basehash = {} 152 self.taskhash = {} 153 self.unihash = {} 154 self.taskdeps = {} 155 self.runtaskdeps = {} 156 self.file_checksum_values = {} 157 self.taints = {} 158 self.gendeps = {} 159 self.lookupcache = {} 160 self.setscenetasks = set() 161 self.basewhitelist = set((data.getVar("BB_HASHBASE_WHITELIST") or "").split()) 162 self.taskwhitelist = None 163 self.init_rundepcheck(data) 164 checksum_cache_file = data.getVar("BB_HASH_CHECKSUM_CACHE_FILE") 165 if checksum_cache_file: 166 self.checksum_cache = FileChecksumCache() 167 self.checksum_cache.init_cache(data, checksum_cache_file) 168 else: 169 self.checksum_cache = None 170 171 self.unihash_cache = bb.cache.SimpleCache("3") 172 self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {}) 173 self.localdirsexclude = (data.getVar("BB_SIGNATURE_LOCAL_DIRS_EXCLUDE") or "CVS .bzr .git .hg .osc .p4 .repo .svn").split() 174 self.tidtopn = {} 175 176 def init_rundepcheck(self, data): 177 self.taskwhitelist = data.getVar("BB_HASHTASK_WHITELIST") or None 178 if self.taskwhitelist: 179 self.twl = re.compile(self.taskwhitelist) 180 else: 181 self.twl = None 182 183 def _build_data(self, fn, d): 184 185 ignore_mismatch = ((d.getVar("BB_HASH_IGNORE_MISMATCH") or '') == '1') 186 tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d, self.basewhitelist) 187 188 taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, self.basewhitelist, fn) 189 190 for task in tasklist: 191 tid = fn + ":" + task 192 if not ignore_mismatch and tid in self.basehash and self.basehash[tid] != basehash[tid]: 193 bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (tid, self.basehash[tid], basehash[tid])) 194 bb.error("The following commands may help:") 195 cmd = "$ bitbake %s -c%s" % (d.getVar('PN'), task) 196 # Make sure sigdata is dumped before run printdiff 197 bb.error("%s -Snone" % cmd) 198 bb.error("Then:") 199 bb.error("%s -Sprintdiff\n" % cmd) 200 self.basehash[tid] = basehash[tid] 201 202 self.taskdeps[fn] = taskdeps 203 self.gendeps[fn] = gendeps 204 self.lookupcache[fn] = lookupcache 205 206 return taskdeps 207 208 def set_setscene_tasks(self, setscene_tasks): 209 self.setscenetasks = set(setscene_tasks) 210 211 def finalise(self, fn, d, variant): 212 213 mc = d.getVar("__BBMULTICONFIG", False) or "" 214 if variant or mc: 215 fn = bb.cache.realfn2virtual(fn, variant, mc) 216 217 try: 218 taskdeps = self._build_data(fn, d) 219 except bb.parse.SkipRecipe: 220 raise 221 except: 222 bb.warn("Error during finalise of %s" % fn) 223 raise 224 225 #Slow but can be useful for debugging mismatched basehashes 226 #for task in self.taskdeps[fn]: 227 # self.dump_sigtask(fn, task, d.getVar("STAMP"), False) 228 229 for task in taskdeps: 230 d.setVar("BB_BASEHASH_task-%s" % task, self.basehash[fn + ":" + task]) 231 232 def postparsing_clean_cache(self): 233 # 234 # After parsing we can remove some things from memory to reduce our memory footprint 235 # 236 self.gendeps = {} 237 self.lookupcache = {} 238 self.taskdeps = {} 239 240 def rundep_check(self, fn, recipename, task, dep, depname, dataCaches): 241 # Return True if we should keep the dependency, False to drop it 242 # We only manipulate the dependencies for packages not in the whitelist 243 if self.twl and not self.twl.search(recipename): 244 # then process the actual dependencies 245 if self.twl.search(depname): 246 return False 247 return True 248 249 def read_taint(self, fn, task, stampbase): 250 taint = None 251 try: 252 with open(stampbase + '.' + task + '.taint', 'r') as taintf: 253 taint = taintf.read() 254 except IOError: 255 pass 256 return taint 257 258 def prep_taskhash(self, tid, deps, dataCaches): 259 260 (mc, _, task, fn) = bb.runqueue.split_tid_mcfn(tid) 261 262 self.basehash[tid] = dataCaches[mc].basetaskhash[tid] 263 self.runtaskdeps[tid] = [] 264 self.file_checksum_values[tid] = [] 265 recipename = dataCaches[mc].pkg_fn[fn] 266 267 self.tidtopn[tid] = recipename 268 269 for dep in sorted(deps, key=clean_basepath): 270 (depmc, _, _, depmcfn) = bb.runqueue.split_tid_mcfn(dep) 271 depname = dataCaches[depmc].pkg_fn[depmcfn] 272 if not self.supports_multiconfig_datacaches and mc != depmc: 273 # If the signature generator doesn't understand multiconfig 274 # data caches, any dependency not in the same multiconfig must 275 # be skipped for backward compatibility 276 continue 277 if not self.rundep_check(fn, recipename, task, dep, depname, dataCaches): 278 continue 279 if dep not in self.taskhash: 280 bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep) 281 self.runtaskdeps[tid].append(dep) 282 283 if task in dataCaches[mc].file_checksums[fn]: 284 if self.checksum_cache: 285 checksums = self.checksum_cache.get_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude) 286 else: 287 checksums = bb.fetch2.get_file_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude) 288 for (f,cs) in checksums: 289 self.file_checksum_values[tid].append((f,cs)) 290 291 taskdep = dataCaches[mc].task_deps[fn] 292 if 'nostamp' in taskdep and task in taskdep['nostamp']: 293 # Nostamp tasks need an implicit taint so that they force any dependent tasks to run 294 if tid in self.taints and self.taints[tid].startswith("nostamp:"): 295 # Don't reset taint value upon every call 296 pass 297 else: 298 import uuid 299 taint = str(uuid.uuid4()) 300 self.taints[tid] = "nostamp:" + taint 301 302 taint = self.read_taint(fn, task, dataCaches[mc].stamp[fn]) 303 if taint: 304 self.taints[tid] = taint 305 logger.warning("%s is tainted from a forced run" % tid) 306 307 return 308 309 def get_taskhash(self, tid, deps, dataCaches): 310 311 data = self.basehash[tid] 312 for dep in self.runtaskdeps[tid]: 313 if dep in self.unihash: 314 if self.unihash[dep] is None: 315 data = data + self.taskhash[dep] 316 else: 317 data = data + self.unihash[dep] 318 else: 319 data = data + self.get_unihash(dep) 320 321 for (f, cs) in self.file_checksum_values[tid]: 322 if cs: 323 data = data + cs 324 325 if tid in self.taints: 326 if self.taints[tid].startswith("nostamp:"): 327 data = data + self.taints[tid][8:] 328 else: 329 data = data + self.taints[tid] 330 331 h = hashlib.sha256(data.encode("utf-8")).hexdigest() 332 self.taskhash[tid] = h 333 #d.setVar("BB_TASKHASH_task-%s" % task, taskhash[task]) 334 return h 335 336 def writeout_file_checksum_cache(self): 337 """Write/update the file checksum cache onto disk""" 338 if self.checksum_cache: 339 self.checksum_cache.save_extras() 340 self.checksum_cache.save_merge() 341 else: 342 bb.fetch2.fetcher_parse_save() 343 bb.fetch2.fetcher_parse_done() 344 345 def save_unitaskhashes(self): 346 self.unihash_cache.save(self.unitaskhashes) 347 348 def dump_sigtask(self, fn, task, stampbase, runtime): 349 350 tid = fn + ":" + task 351 referencestamp = stampbase 352 if isinstance(runtime, str) and runtime.startswith("customfile"): 353 sigfile = stampbase 354 referencestamp = runtime[11:] 355 elif runtime and tid in self.taskhash: 356 sigfile = stampbase + "." + task + ".sigdata" + "." + self.get_unihash(tid) 357 else: 358 sigfile = stampbase + "." + task + ".sigbasedata" + "." + self.basehash[tid] 359 360 bb.utils.mkdirhier(os.path.dirname(sigfile)) 361 362 data = {} 363 data['task'] = task 364 data['basewhitelist'] = self.basewhitelist 365 data['taskwhitelist'] = self.taskwhitelist 366 data['taskdeps'] = self.taskdeps[fn][task] 367 data['basehash'] = self.basehash[tid] 368 data['gendeps'] = {} 369 data['varvals'] = {} 370 data['varvals'][task] = self.lookupcache[fn][task] 371 for dep in self.taskdeps[fn][task]: 372 if dep in self.basewhitelist: 373 continue 374 data['gendeps'][dep] = self.gendeps[fn][dep] 375 data['varvals'][dep] = self.lookupcache[fn][dep] 376 377 if runtime and tid in self.taskhash: 378 data['runtaskdeps'] = self.runtaskdeps[tid] 379 data['file_checksum_values'] = [(os.path.basename(f), cs) for f,cs in self.file_checksum_values[tid]] 380 data['runtaskhashes'] = {} 381 for dep in data['runtaskdeps']: 382 data['runtaskhashes'][dep] = self.get_unihash(dep) 383 data['taskhash'] = self.taskhash[tid] 384 data['unihash'] = self.get_unihash(tid) 385 386 taint = self.read_taint(fn, task, referencestamp) 387 if taint: 388 data['taint'] = taint 389 390 if runtime and tid in self.taints: 391 if 'nostamp:' in self.taints[tid]: 392 data['taint'] = self.taints[tid] 393 394 computed_basehash = calc_basehash(data) 395 if computed_basehash != self.basehash[tid]: 396 bb.error("Basehash mismatch %s versus %s for %s" % (computed_basehash, self.basehash[tid], tid)) 397 if runtime and tid in self.taskhash: 398 computed_taskhash = calc_taskhash(data) 399 if computed_taskhash != self.taskhash[tid]: 400 bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid)) 401 sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash) 402 403 fd, tmpfile = tempfile.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.") 404 try: 405 with os.fdopen(fd, "wb") as stream: 406 p = pickle.dump(data, stream, -1) 407 stream.flush() 408 os.chmod(tmpfile, 0o664) 409 os.rename(tmpfile, sigfile) 410 except (OSError, IOError) as err: 411 try: 412 os.unlink(tmpfile) 413 except OSError: 414 pass 415 raise err 416 417 def dump_sigfn(self, fn, dataCaches, options): 418 if fn in self.taskdeps: 419 for task in self.taskdeps[fn]: 420 tid = fn + ":" + task 421 mc = bb.runqueue.mc_from_tid(tid) 422 if tid not in self.taskhash: 423 continue 424 if dataCaches[mc].basetaskhash[tid] != self.basehash[tid]: 425 bb.error("Bitbake's cached basehash does not match the one we just generated (%s)!" % tid) 426 bb.error("The mismatched hashes were %s and %s" % (dataCaches[mc].basetaskhash[tid], self.basehash[tid])) 427 self.dump_sigtask(fn, task, dataCaches[mc].stamp[fn], True) 428 429class SignatureGeneratorBasicHash(SignatureGeneratorBasic): 430 name = "basichash" 431 432 def get_stampfile_hash(self, tid): 433 if tid in self.taskhash: 434 return self.taskhash[tid] 435 436 # If task is not in basehash, then error 437 return self.basehash[tid] 438 439 def stampfile(self, stampbase, fn, taskname, extrainfo, clean=False): 440 if taskname != "do_setscene" and taskname.endswith("_setscene"): 441 tid = fn + ":" + taskname[:-9] 442 else: 443 tid = fn + ":" + taskname 444 if clean: 445 h = "*" 446 else: 447 h = self.get_stampfile_hash(tid) 448 449 return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.') 450 451 def stampcleanmask(self, stampbase, fn, taskname, extrainfo): 452 return self.stampfile(stampbase, fn, taskname, extrainfo, clean=True) 453 454 def invalidate_task(self, task, d, fn): 455 bb.note("Tainting hash to force rebuild of task %s, %s" % (fn, task)) 456 bb.build.write_taint(task, d, fn) 457 458class SignatureGeneratorUniHashMixIn(object): 459 def __init__(self, data): 460 self.extramethod = {} 461 super().__init__(data) 462 463 def get_taskdata(self): 464 return (self.server, self.method, self.extramethod) + super().get_taskdata() 465 466 def set_taskdata(self, data): 467 self.server, self.method, self.extramethod = data[:3] 468 super().set_taskdata(data[3:]) 469 470 def client(self): 471 if getattr(self, '_client', None) is None: 472 self._client = hashserv.create_client(self.server) 473 return self._client 474 475 def get_stampfile_hash(self, tid): 476 if tid in self.taskhash: 477 # If a unique hash is reported, use it as the stampfile hash. This 478 # ensures that if a task won't be re-run if the taskhash changes, 479 # but it would result in the same output hash 480 unihash = self._get_unihash(tid) 481 if unihash is not None: 482 return unihash 483 484 return super().get_stampfile_hash(tid) 485 486 def set_unihash(self, tid, unihash): 487 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid) 488 key = mc + ":" + self.tidtopn[tid] + ":" + taskname 489 self.unitaskhashes[key] = (self.taskhash[tid], unihash) 490 self.unihash[tid] = unihash 491 492 def _get_unihash(self, tid, checkkey=None): 493 if tid not in self.tidtopn: 494 return None 495 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid) 496 key = mc + ":" + self.tidtopn[tid] + ":" + taskname 497 if key not in self.unitaskhashes: 498 return None 499 if not checkkey: 500 checkkey = self.taskhash[tid] 501 (key, unihash) = self.unitaskhashes[key] 502 if key != checkkey: 503 return None 504 return unihash 505 506 def get_unihash(self, tid): 507 taskhash = self.taskhash[tid] 508 509 # If its not a setscene task we can return 510 if self.setscenetasks and tid not in self.setscenetasks: 511 self.unihash[tid] = None 512 return taskhash 513 514 # TODO: This cache can grow unbounded. It probably only needs to keep 515 # for each task 516 unihash = self._get_unihash(tid) 517 if unihash is not None: 518 self.unihash[tid] = unihash 519 return unihash 520 521 # In the absence of being able to discover a unique hash from the 522 # server, make it be equivalent to the taskhash. The unique "hash" only 523 # really needs to be a unique string (not even necessarily a hash), but 524 # making it match the taskhash has a few advantages: 525 # 526 # 1) All of the sstate code that assumes hashes can be the same 527 # 2) It provides maximal compatibility with builders that don't use 528 # an equivalency server 529 # 3) The value is easy for multiple independent builders to derive the 530 # same unique hash from the same input. This means that if the 531 # independent builders find the same taskhash, but it isn't reported 532 # to the server, there is a better chance that they will agree on 533 # the unique hash. 534 unihash = taskhash 535 536 try: 537 method = self.method 538 if tid in self.extramethod: 539 method = method + self.extramethod[tid] 540 data = self.client().get_unihash(method, self.taskhash[tid]) 541 if data: 542 unihash = data 543 # A unique hash equal to the taskhash is not very interesting, 544 # so it is reported it at debug level 2. If they differ, that 545 # is much more interesting, so it is reported at debug level 1 546 hashequiv_logger.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server)) 547 else: 548 hashequiv_logger.debug(2, 'No reported unihash for %s:%s from %s' % (tid, taskhash, self.server)) 549 except hashserv.client.HashConnectionError as e: 550 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 551 552 self.set_unihash(tid, unihash) 553 self.unihash[tid] = unihash 554 return unihash 555 556 def report_unihash(self, path, task, d): 557 import importlib 558 559 taskhash = d.getVar('BB_TASKHASH') 560 unihash = d.getVar('BB_UNIHASH') 561 report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1' 562 tempdir = d.getVar('T') 563 fn = d.getVar('BB_FILENAME') 564 tid = fn + ':do_' + task 565 key = tid + ':' + taskhash 566 567 if self.setscenetasks and tid not in self.setscenetasks: 568 return 569 570 # This can happen if locked sigs are in action. Detect and just abort 571 if taskhash != self.taskhash[tid]: 572 return 573 574 # Sanity checks 575 cache_unihash = self._get_unihash(tid, checkkey=taskhash) 576 if cache_unihash is None: 577 bb.fatal('%s not in unihash cache. Please report this error' % key) 578 579 if cache_unihash != unihash: 580 bb.fatal("Cache unihash %s doesn't match BB_UNIHASH %s" % (cache_unihash, unihash)) 581 582 sigfile = None 583 sigfile_name = "depsig.do_%s.%d" % (task, os.getpid()) 584 sigfile_link = "depsig.do_%s" % task 585 586 try: 587 sigfile = open(os.path.join(tempdir, sigfile_name), 'w+b') 588 589 locs = {'path': path, 'sigfile': sigfile, 'task': task, 'd': d} 590 591 if "." in self.method: 592 (module, method) = self.method.rsplit('.', 1) 593 locs['method'] = getattr(importlib.import_module(module), method) 594 outhash = bb.utils.better_eval('method(path, sigfile, task, d)', locs) 595 else: 596 outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs) 597 598 try: 599 extra_data = {} 600 601 owner = d.getVar('SSTATE_HASHEQUIV_OWNER') 602 if owner: 603 extra_data['owner'] = owner 604 605 if report_taskdata: 606 sigfile.seek(0) 607 608 extra_data['PN'] = d.getVar('PN') 609 extra_data['PV'] = d.getVar('PV') 610 extra_data['PR'] = d.getVar('PR') 611 extra_data['task'] = task 612 extra_data['outhash_siginfo'] = sigfile.read().decode('utf-8') 613 614 method = self.method 615 if tid in self.extramethod: 616 method = method + self.extramethod[tid] 617 618 data = self.client().report_unihash(taskhash, method, outhash, unihash, extra_data) 619 new_unihash = data['unihash'] 620 621 if new_unihash != unihash: 622 hashequiv_logger.debug(1, 'Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server)) 623 bb.event.fire(bb.runqueue.taskUniHashUpdate(fn + ':do_' + task, new_unihash), d) 624 self.set_unihash(tid, new_unihash) 625 d.setVar('BB_UNIHASH', new_unihash) 626 else: 627 hashequiv_logger.debug(1, 'Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server)) 628 except hashserv.client.HashConnectionError as e: 629 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 630 finally: 631 if sigfile: 632 sigfile.close() 633 634 sigfile_link_path = os.path.join(tempdir, sigfile_link) 635 bb.utils.remove(sigfile_link_path) 636 637 try: 638 os.symlink(sigfile_name, sigfile_link_path) 639 except OSError: 640 pass 641 642 def report_unihash_equiv(self, tid, taskhash, wanted_unihash, current_unihash, datacaches): 643 try: 644 extra_data = {} 645 method = self.method 646 if tid in self.extramethod: 647 method = method + self.extramethod[tid] 648 649 data = self.client().report_unihash_equiv(taskhash, method, wanted_unihash, extra_data) 650 hashequiv_logger.verbose('Reported task %s as unihash %s to %s (%s)' % (tid, wanted_unihash, self.server, str(data))) 651 652 if data is None: 653 bb.warn("Server unable to handle unihash report") 654 return False 655 656 finalunihash = data['unihash'] 657 658 if finalunihash == current_unihash: 659 hashequiv_logger.verbose('Task %s unihash %s unchanged by server' % (tid, finalunihash)) 660 elif finalunihash == wanted_unihash: 661 hashequiv_logger.verbose('Task %s unihash changed %s -> %s as wanted' % (tid, current_unihash, finalunihash)) 662 self.set_unihash(tid, finalunihash) 663 return True 664 else: 665 # TODO: What to do here? 666 hashequiv_logger.verbose('Task %s unihash reported as unwanted hash %s' % (tid, finalunihash)) 667 668 except hashserv.client.HashConnectionError as e: 669 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 670 671 return False 672 673# 674# Dummy class used for bitbake-selftest 675# 676class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureGeneratorBasicHash): 677 name = "TestEquivHash" 678 def init_rundepcheck(self, data): 679 super().init_rundepcheck(data) 680 self.server = data.getVar('BB_HASHSERVE') 681 self.method = "sstate_output_hash" 682 683# 684# Dummy class used for bitbake-selftest 685# 686class SignatureGeneratorTestMulticonfigDepends(SignatureGeneratorBasicHash): 687 name = "TestMulticonfigDepends" 688 supports_multiconfig_datacaches = True 689 690def dump_this_task(outfile, d): 691 import bb.parse 692 fn = d.getVar("BB_FILENAME") 693 task = "do_" + d.getVar("BB_CURRENTTASK") 694 referencestamp = bb.build.stamp_internal(task, d, None, True) 695 bb.parse.siggen.dump_sigtask(fn, task, outfile, "customfile:" + referencestamp) 696 697def init_colors(enable_color): 698 """Initialise colour dict for passing to compare_sigfiles()""" 699 # First set up the colours 700 colors = {'color_title': '\033[1m', 701 'color_default': '\033[0m', 702 'color_add': '\033[0;32m', 703 'color_remove': '\033[0;31m', 704 } 705 # Leave all keys present but clear the values 706 if not enable_color: 707 for k in colors.keys(): 708 colors[k] = '' 709 return colors 710 711def worddiff_str(oldstr, newstr, colors=None): 712 if not colors: 713 colors = init_colors(False) 714 diff = simplediff.diff(oldstr.split(' '), newstr.split(' ')) 715 ret = [] 716 for change, value in diff: 717 value = ' '.join(value) 718 if change == '=': 719 ret.append(value) 720 elif change == '+': 721 item = '{color_add}{{+{value}+}}{color_default}'.format(value=value, **colors) 722 ret.append(item) 723 elif change == '-': 724 item = '{color_remove}[-{value}-]{color_default}'.format(value=value, **colors) 725 ret.append(item) 726 whitespace_note = '' 727 if oldstr != newstr and ' '.join(oldstr.split()) == ' '.join(newstr.split()): 728 whitespace_note = ' (whitespace changed)' 729 return '"%s"%s' % (' '.join(ret), whitespace_note) 730 731def list_inline_diff(oldlist, newlist, colors=None): 732 if not colors: 733 colors = init_colors(False) 734 diff = simplediff.diff(oldlist, newlist) 735 ret = [] 736 for change, value in diff: 737 value = ' '.join(value) 738 if change == '=': 739 ret.append("'%s'" % value) 740 elif change == '+': 741 item = '{color_add}+{value}{color_default}'.format(value=value, **colors) 742 ret.append(item) 743 elif change == '-': 744 item = '{color_remove}-{value}{color_default}'.format(value=value, **colors) 745 ret.append(item) 746 return '[%s]' % (', '.join(ret)) 747 748def clean_basepath(a): 749 mc = None 750 if a.startswith("mc:"): 751 _, mc, a = a.split(":", 2) 752 b = a.rsplit("/", 2)[1] + '/' + a.rsplit("/", 2)[2] 753 if a.startswith("virtual:"): 754 b = b + ":" + a.rsplit(":", 1)[0] 755 if mc: 756 b = b + ":mc:" + mc 757 return b 758 759def clean_basepaths(a): 760 b = {} 761 for x in a: 762 b[clean_basepath(x)] = a[x] 763 return b 764 765def clean_basepaths_list(a): 766 b = [] 767 for x in a: 768 b.append(clean_basepath(x)) 769 return b 770 771def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False): 772 output = [] 773 774 colors = init_colors(color) 775 def color_format(formatstr, **values): 776 """ 777 Return colour formatted string. 778 NOTE: call with the format string, not an already formatted string 779 containing values (otherwise you could have trouble with { and } 780 characters) 781 """ 782 if not formatstr.endswith('{color_default}'): 783 formatstr += '{color_default}' 784 # In newer python 3 versions you can pass both of these directly, 785 # but we only require 3.4 at the moment 786 formatparams = {} 787 formatparams.update(colors) 788 formatparams.update(values) 789 return formatstr.format(**formatparams) 790 791 with open(a, 'rb') as f: 792 p1 = pickle.Unpickler(f) 793 a_data = p1.load() 794 with open(b, 'rb') as f: 795 p2 = pickle.Unpickler(f) 796 b_data = p2.load() 797 798 def dict_diff(a, b, whitelist=set()): 799 sa = set(a.keys()) 800 sb = set(b.keys()) 801 common = sa & sb 802 changed = set() 803 for i in common: 804 if a[i] != b[i] and i not in whitelist: 805 changed.add(i) 806 added = sb - sa 807 removed = sa - sb 808 return changed, added, removed 809 810 def file_checksums_diff(a, b): 811 from collections import Counter 812 # Handle old siginfo format 813 if isinstance(a, dict): 814 a = [(os.path.basename(f), cs) for f, cs in a.items()] 815 if isinstance(b, dict): 816 b = [(os.path.basename(f), cs) for f, cs in b.items()] 817 # Compare lists, ensuring we can handle duplicate filenames if they exist 818 removedcount = Counter(a) 819 removedcount.subtract(b) 820 addedcount = Counter(b) 821 addedcount.subtract(a) 822 added = [] 823 for x in b: 824 if addedcount[x] > 0: 825 addedcount[x] -= 1 826 added.append(x) 827 removed = [] 828 changed = [] 829 for x in a: 830 if removedcount[x] > 0: 831 removedcount[x] -= 1 832 for y in added: 833 if y[0] == x[0]: 834 changed.append((x[0], x[1], y[1])) 835 added.remove(y) 836 break 837 else: 838 removed.append(x) 839 added = [x[0] for x in added] 840 removed = [x[0] for x in removed] 841 return changed, added, removed 842 843 if 'basewhitelist' in a_data and a_data['basewhitelist'] != b_data['basewhitelist']: 844 output.append(color_format("{color_title}basewhitelist changed{color_default} from '%s' to '%s'") % (a_data['basewhitelist'], b_data['basewhitelist'])) 845 if a_data['basewhitelist'] and b_data['basewhitelist']: 846 output.append("changed items: %s" % a_data['basewhitelist'].symmetric_difference(b_data['basewhitelist'])) 847 848 if 'taskwhitelist' in a_data and a_data['taskwhitelist'] != b_data['taskwhitelist']: 849 output.append(color_format("{color_title}taskwhitelist changed{color_default} from '%s' to '%s'") % (a_data['taskwhitelist'], b_data['taskwhitelist'])) 850 if a_data['taskwhitelist'] and b_data['taskwhitelist']: 851 output.append("changed items: %s" % a_data['taskwhitelist'].symmetric_difference(b_data['taskwhitelist'])) 852 853 if a_data['taskdeps'] != b_data['taskdeps']: 854 output.append(color_format("{color_title}Task dependencies changed{color_default} from:\n%s\nto:\n%s") % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps']))) 855 856 if a_data['basehash'] != b_data['basehash'] and not collapsed: 857 output.append(color_format("{color_title}basehash changed{color_default} from %s to %s") % (a_data['basehash'], b_data['basehash'])) 858 859 changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basewhitelist'] & b_data['basewhitelist']) 860 if changed: 861 for dep in changed: 862 output.append(color_format("{color_title}List of dependencies for variable %s changed from '{color_default}%s{color_title}' to '{color_default}%s{color_title}'") % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep])) 863 if a_data['gendeps'][dep] and b_data['gendeps'][dep]: 864 output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep])) 865 if added: 866 for dep in added: 867 output.append(color_format("{color_title}Dependency on variable %s was added") % (dep)) 868 if removed: 869 for dep in removed: 870 output.append(color_format("{color_title}Dependency on Variable %s was removed") % (dep)) 871 872 873 changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals']) 874 if changed: 875 for dep in changed: 876 oldval = a_data['varvals'][dep] 877 newval = b_data['varvals'][dep] 878 if newval and oldval and ('\n' in oldval or '\n' in newval): 879 diff = difflib.unified_diff(oldval.splitlines(), newval.splitlines(), lineterm='') 880 # Cut off the first two lines, since we aren't interested in 881 # the old/new filename (they are blank anyway in this case) 882 difflines = list(diff)[2:] 883 if color: 884 # Add colour to diff output 885 for i, line in enumerate(difflines): 886 if line.startswith('+'): 887 line = color_format('{color_add}{line}', line=line) 888 difflines[i] = line 889 elif line.startswith('-'): 890 line = color_format('{color_remove}{line}', line=line) 891 difflines[i] = line 892 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff='\n'.join(difflines))) 893 elif newval and oldval and (' ' in oldval or ' ' in newval): 894 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff=worddiff_str(oldval, newval, colors))) 895 else: 896 output.append(color_format("{color_title}Variable {var} value changed from '{color_default}{oldval}{color_title}' to '{color_default}{newval}{color_title}'{color_default}", var=dep, oldval=oldval, newval=newval)) 897 898 if not 'file_checksum_values' in a_data: 899 a_data['file_checksum_values'] = {} 900 if not 'file_checksum_values' in b_data: 901 b_data['file_checksum_values'] = {} 902 903 changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values']) 904 if changed: 905 for f, old, new in changed: 906 output.append(color_format("{color_title}Checksum for file %s changed{color_default} from %s to %s") % (f, old, new)) 907 if added: 908 for f in added: 909 output.append(color_format("{color_title}Dependency on checksum of file %s was added") % (f)) 910 if removed: 911 for f in removed: 912 output.append(color_format("{color_title}Dependency on checksum of file %s was removed") % (f)) 913 914 if not 'runtaskdeps' in a_data: 915 a_data['runtaskdeps'] = {} 916 if not 'runtaskdeps' in b_data: 917 b_data['runtaskdeps'] = {} 918 919 if not collapsed: 920 if len(a_data['runtaskdeps']) != len(b_data['runtaskdeps']): 921 changed = ["Number of task dependencies changed"] 922 else: 923 changed = [] 924 for idx, task in enumerate(a_data['runtaskdeps']): 925 a = a_data['runtaskdeps'][idx] 926 b = b_data['runtaskdeps'][idx] 927 if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b] and not collapsed: 928 changed.append("%s with hash %s\n changed to\n%s with hash %s" % (clean_basepath(a), a_data['runtaskhashes'][a], clean_basepath(b), b_data['runtaskhashes'][b])) 929 930 if changed: 931 clean_a = clean_basepaths_list(a_data['runtaskdeps']) 932 clean_b = clean_basepaths_list(b_data['runtaskdeps']) 933 if clean_a != clean_b: 934 output.append(color_format("{color_title}runtaskdeps changed:{color_default}\n%s") % list_inline_diff(clean_a, clean_b, colors)) 935 else: 936 output.append(color_format("{color_title}runtaskdeps changed:")) 937 output.append("\n".join(changed)) 938 939 940 if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data: 941 a = a_data['runtaskhashes'] 942 b = b_data['runtaskhashes'] 943 changed, added, removed = dict_diff(a, b) 944 if added: 945 for dep in added: 946 bdep_found = False 947 if removed: 948 for bdep in removed: 949 if b[dep] == a[bdep]: 950 #output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep)) 951 bdep_found = True 952 if not bdep_found: 953 output.append(color_format("{color_title}Dependency on task %s was added{color_default} with hash %s") % (clean_basepath(dep), b[dep])) 954 if removed: 955 for dep in removed: 956 adep_found = False 957 if added: 958 for adep in added: 959 if b[adep] == a[dep]: 960 #output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep)) 961 adep_found = True 962 if not adep_found: 963 output.append(color_format("{color_title}Dependency on task %s was removed{color_default} with hash %s") % (clean_basepath(dep), a[dep])) 964 if changed: 965 for dep in changed: 966 if not collapsed: 967 output.append(color_format("{color_title}Hash for dependent task %s changed{color_default} from %s to %s") % (clean_basepath(dep), a[dep], b[dep])) 968 if callable(recursecb): 969 recout = recursecb(dep, a[dep], b[dep]) 970 if recout: 971 if collapsed: 972 output.extend(recout) 973 else: 974 # If a dependent hash changed, might as well print the line above and then defer to the changes in 975 # that hash since in all likelyhood, they're the same changes this task also saw. 976 output = [output[-1]] + recout 977 978 a_taint = a_data.get('taint', None) 979 b_taint = b_data.get('taint', None) 980 if a_taint != b_taint: 981 if a_taint and a_taint.startswith('nostamp:'): 982 a_taint = a_taint.replace('nostamp:', 'nostamp(uuid4):') 983 if b_taint and b_taint.startswith('nostamp:'): 984 b_taint = b_taint.replace('nostamp:', 'nostamp(uuid4):') 985 output.append(color_format("{color_title}Taint (by forced/invalidated task) changed{color_default} from %s to %s") % (a_taint, b_taint)) 986 987 return output 988 989 990def calc_basehash(sigdata): 991 task = sigdata['task'] 992 basedata = sigdata['varvals'][task] 993 994 if basedata is None: 995 basedata = '' 996 997 alldeps = sigdata['taskdeps'] 998 for dep in alldeps: 999 basedata = basedata + dep 1000 val = sigdata['varvals'][dep] 1001 if val is not None: 1002 basedata = basedata + str(val) 1003 1004 return hashlib.sha256(basedata.encode("utf-8")).hexdigest() 1005 1006def calc_taskhash(sigdata): 1007 data = sigdata['basehash'] 1008 1009 for dep in sigdata['runtaskdeps']: 1010 data = data + sigdata['runtaskhashes'][dep] 1011 1012 for c in sigdata['file_checksum_values']: 1013 if c[1]: 1014 data = data + c[1] 1015 1016 if 'taint' in sigdata: 1017 if 'nostamp:' in sigdata['taint']: 1018 data = data + sigdata['taint'][8:] 1019 else: 1020 data = data + sigdata['taint'] 1021 1022 return hashlib.sha256(data.encode("utf-8")).hexdigest() 1023 1024 1025def dump_sigfile(a): 1026 output = [] 1027 1028 with open(a, 'rb') as f: 1029 p1 = pickle.Unpickler(f) 1030 a_data = p1.load() 1031 1032 output.append("basewhitelist: %s" % (a_data['basewhitelist'])) 1033 1034 output.append("taskwhitelist: %s" % (a_data['taskwhitelist'])) 1035 1036 output.append("Task dependencies: %s" % (sorted(a_data['taskdeps']))) 1037 1038 output.append("basehash: %s" % (a_data['basehash'])) 1039 1040 for dep in a_data['gendeps']: 1041 output.append("List of dependencies for variable %s is %s" % (dep, a_data['gendeps'][dep])) 1042 1043 for dep in a_data['varvals']: 1044 output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep])) 1045 1046 if 'runtaskdeps' in a_data: 1047 output.append("Tasks this task depends on: %s" % (a_data['runtaskdeps'])) 1048 1049 if 'file_checksum_values' in a_data: 1050 output.append("This task depends on the checksums of files: %s" % (a_data['file_checksum_values'])) 1051 1052 if 'runtaskhashes' in a_data: 1053 for dep in a_data['runtaskhashes']: 1054 output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep])) 1055 1056 if 'taint' in a_data: 1057 if a_data['taint'].startswith('nostamp:'): 1058 msg = a_data['taint'].replace('nostamp:', 'nostamp(uuid4):') 1059 else: 1060 msg = a_data['taint'] 1061 output.append("Tainted (by forced/invalidated task): %s" % msg) 1062 1063 if 'task' in a_data: 1064 computed_basehash = calc_basehash(a_data) 1065 output.append("Computed base hash is %s and from file %s" % (computed_basehash, a_data['basehash'])) 1066 else: 1067 output.append("Unable to compute base hash") 1068 1069 computed_taskhash = calc_taskhash(a_data) 1070 output.append("Computed task hash is %s" % computed_taskhash) 1071 1072 return output 1073