1# resulttool - regression analysis 2# 3# Copyright (c) 2019, Intel Corporation. 4# Copyright (c) 2019, Linux Foundation 5# 6# SPDX-License-Identifier: GPL-2.0-only 7# 8 9import resulttool.resultutils as resultutils 10 11from oeqa.utils.git import GitRepo 12import oeqa.utils.gitarchive as gitarchive 13 14METADATA_MATCH_TABLE = { 15 "oeselftest": "OESELFTEST_METADATA" 16} 17 18OESELFTEST_METADATA_GUESS_TABLE={ 19 "trigger-build-posttrigger": { 20 "run_all_tests": False, 21 "run_tests":["buildoptions.SourceMirroring.test_yocto_source_mirror"], 22 "skips": None, 23 "machine": None, 24 "select_tags":None, 25 "exclude_tags": None 26 }, 27 "reproducible": { 28 "run_all_tests": False, 29 "run_tests":["reproducible"], 30 "skips": None, 31 "machine": None, 32 "select_tags":None, 33 "exclude_tags": None 34 }, 35 "arch-qemu-quick": { 36 "run_all_tests": True, 37 "run_tests":None, 38 "skips": None, 39 "machine": None, 40 "select_tags":["machine"], 41 "exclude_tags": None 42 }, 43 "arch-qemu-full-x86-or-x86_64": { 44 "run_all_tests": True, 45 "run_tests":None, 46 "skips": None, 47 "machine": None, 48 "select_tags":["machine", "toolchain-system"], 49 "exclude_tags": None 50 }, 51 "arch-qemu-full-others": { 52 "run_all_tests": True, 53 "run_tests":None, 54 "skips": None, 55 "machine": None, 56 "select_tags":["machine", "toolchain-user"], 57 "exclude_tags": None 58 }, 59 "selftest": { 60 "run_all_tests": True, 61 "run_tests":None, 62 "skips": ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror", "reproducible"], 63 "machine": None, 64 "select_tags":None, 65 "exclude_tags": ["machine", "toolchain-system", "toolchain-user"] 66 }, 67 "bringup": { 68 "run_all_tests": True, 69 "run_tests":None, 70 "skips": ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror"], 71 "machine": None, 72 "select_tags":None, 73 "exclude_tags": ["machine", "toolchain-system", "toolchain-user"] 74 } 75} 76 77STATUS_STRINGS = { 78 "None": "No matching test result" 79} 80 81REGRESSIONS_DISPLAY_LIMIT=50 82 83MISSING_TESTS_BANNER = "-------------------------- Missing tests --------------------------" 84ADDITIONAL_DATA_BANNER = "--------------------- Matches and improvements --------------------" 85 86def test_has_at_least_one_matching_tag(test, tag_list): 87 return "oetags" in test and any(oetag in tag_list for oetag in test["oetags"]) 88 89def all_tests_have_at_least_one_matching_tag(results, tag_list): 90 return all(test_has_at_least_one_matching_tag(test_result, tag_list) or test_name.startswith("ptestresult") for (test_name, test_result) in results.items()) 91 92def any_test_have_any_matching_tag(results, tag_list): 93 return any(test_has_at_least_one_matching_tag(test, tag_list) for test in results.values()) 94 95def have_skipped_test(result, test_prefix): 96 return all( result[test]['status'] == "SKIPPED" for test in result if test.startswith(test_prefix)) 97 98def have_all_tests_skipped(result, test_prefixes_list): 99 return all(have_skipped_test(result, test_prefix) for test_prefix in test_prefixes_list) 100 101def guess_oeselftest_metadata(results): 102 """ 103 When an oeselftest test result is lacking OESELFTEST_METADATA, we can try to guess it based on results content. 104 Check results for specific values (absence/presence of oetags, number and name of executed tests...), 105 and if it matches one of known configuration from autobuilder configuration, apply guessed OSELFTEST_METADATA 106 to it to allow proper test filtering. 107 This guessing process is tightly coupled to config.json in autobuilder. It should trigger less and less, 108 as new tests will have OESELFTEST_METADATA properly appended at test reporting time 109 """ 110 111 if len(results) == 1 and "buildoptions.SourceMirroring.test_yocto_source_mirror" in results: 112 return OESELFTEST_METADATA_GUESS_TABLE['trigger-build-posttrigger'] 113 elif all(result.startswith("reproducible") for result in results): 114 return OESELFTEST_METADATA_GUESS_TABLE['reproducible'] 115 elif all_tests_have_at_least_one_matching_tag(results, ["machine"]): 116 return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-quick'] 117 elif all_tests_have_at_least_one_matching_tag(results, ["machine", "toolchain-system"]): 118 return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-full-x86-or-x86_64'] 119 elif all_tests_have_at_least_one_matching_tag(results, ["machine", "toolchain-user"]): 120 return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-full-others'] 121 elif not any_test_have_any_matching_tag(results, ["machine", "toolchain-user", "toolchain-system"]): 122 if have_all_tests_skipped(results, ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror", "reproducible"]): 123 return OESELFTEST_METADATA_GUESS_TABLE['selftest'] 124 elif have_all_tests_skipped(results, ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror"]): 125 return OESELFTEST_METADATA_GUESS_TABLE['bringup'] 126 127 return None 128 129 130def metadata_matches(base_configuration, target_configuration): 131 """ 132 For passed base and target, check test type. If test type matches one of 133 properties described in METADATA_MATCH_TABLE, compare metadata if it is 134 present in base. Return true if metadata matches, or if base lacks some 135 data (either TEST_TYPE or the corresponding metadata) 136 """ 137 test_type = base_configuration.get('TEST_TYPE') 138 if test_type not in METADATA_MATCH_TABLE: 139 return True 140 141 metadata_key = METADATA_MATCH_TABLE.get(test_type) 142 if target_configuration.get(metadata_key) != base_configuration.get(metadata_key): 143 return False 144 145 return True 146 147 148def machine_matches(base_configuration, target_configuration): 149 return base_configuration.get('MACHINE') == target_configuration.get('MACHINE') 150 151 152def can_be_compared(logger, base, target): 153 """ 154 Some tests are not relevant to be compared, for example some oeselftest 155 run with different tests sets or parameters. Return true if tests can be 156 compared 157 """ 158 ret = True 159 base_configuration = base['configuration'] 160 target_configuration = target['configuration'] 161 162 # Older test results lack proper OESELFTEST_METADATA: if not present, try to guess it based on tests results. 163 if base_configuration.get('TEST_TYPE') == 'oeselftest' and 'OESELFTEST_METADATA' not in base_configuration: 164 guess = guess_oeselftest_metadata(base['result']) 165 if guess is None: 166 logger.error(f"ERROR: did not manage to guess oeselftest metadata for {base_configuration['STARTTIME']}") 167 else: 168 logger.debug(f"Enriching {base_configuration['STARTTIME']} with {guess}") 169 base_configuration['OESELFTEST_METADATA'] = guess 170 if target_configuration.get('TEST_TYPE') == 'oeselftest' and 'OESELFTEST_METADATA' not in target_configuration: 171 guess = guess_oeselftest_metadata(target['result']) 172 if guess is None: 173 logger.error(f"ERROR: did not manage to guess oeselftest metadata for {target_configuration['STARTTIME']}") 174 else: 175 logger.debug(f"Enriching {target_configuration['STARTTIME']} with {guess}") 176 target_configuration['OESELFTEST_METADATA'] = guess 177 178 # Test runs with LTP results in should only be compared with other runs with LTP tests in them 179 if base_configuration.get('TEST_TYPE') == 'runtime' and any(result.startswith("ltpresult") for result in base['result']): 180 ret = target_configuration.get('TEST_TYPE') == 'runtime' and any(result.startswith("ltpresult") for result in target['result']) 181 182 return ret and metadata_matches(base_configuration, target_configuration) \ 183 and machine_matches(base_configuration, target_configuration) 184 185def get_status_str(raw_status): 186 raw_status_lower = raw_status.lower() if raw_status else "None" 187 return STATUS_STRINGS.get(raw_status_lower, raw_status) 188 189def get_additional_info_line(new_pass_count, new_tests): 190 result=[] 191 if new_tests: 192 result.append(f'+{new_tests} test(s) present') 193 if new_pass_count: 194 result.append(f'+{new_pass_count} test(s) now passing') 195 196 if not result: 197 return "" 198 199 return ' -> ' + ', '.join(result) + '\n' 200 201def compare_result(logger, base_name, target_name, base_result, target_result, display_limit=None): 202 base_result = base_result.get('result') 203 target_result = target_result.get('result') 204 result = {} 205 new_tests = 0 206 regressions = {} 207 resultstring = "" 208 new_tests = 0 209 new_pass_count = 0 210 211 display_limit = int(display_limit) if display_limit else REGRESSIONS_DISPLAY_LIMIT 212 213 if base_result and target_result: 214 for k in base_result: 215 if k in ['ptestresult.rawlogs', 'ptestresult.sections']: 216 continue 217 base_testcase = base_result[k] 218 base_status = base_testcase.get('status') 219 if base_status: 220 target_testcase = target_result.get(k, {}) 221 target_status = target_testcase.get('status') 222 if base_status != target_status: 223 result[k] = {'base': base_status, 'target': target_status} 224 else: 225 logger.error('Failed to retrieved base test case status: %s' % k) 226 227 # Also count new tests that were not present in base results: it 228 # could be newly added tests, but it could also highlights some tests 229 # renames or fixed faulty ptests 230 for k in target_result: 231 if k not in base_result: 232 new_tests += 1 233 if result: 234 new_pass_count = sum(test['target'] is not None and test['target'].startswith("PASS") for test in result.values()) 235 # Print a regression report only if at least one test has a regression status (FAIL, SKIPPED, absent...) 236 if new_pass_count < len(result): 237 resultstring = "Regression: %s\n %s\n" % (base_name, target_name) 238 for k in sorted(result): 239 if not result[k]['target'] or not result[k]['target'].startswith("PASS"): 240 # Differentiate each ptest kind when listing regressions 241 key_parts = k.split('.') 242 key = '.'.join(key_parts[:2]) if k.startswith('ptest') else key_parts[0] 243 # Append new regression to corresponding test family 244 regressions[key] = regressions.setdefault(key, []) + [' %s: %s -> %s\n' % (k, get_status_str(result[k]['base']), get_status_str(result[k]['target']))] 245 resultstring += f" Total: {sum([len(regressions[r]) for r in regressions])} new regression(s):\n" 246 for k in regressions: 247 resultstring += f" {len(regressions[k])} regression(s) for {k}\n" 248 count_to_print=min([display_limit, len(regressions[k])]) if display_limit > 0 else len(regressions[k]) 249 resultstring += ''.join(regressions[k][:count_to_print]) 250 if count_to_print < len(regressions[k]): 251 resultstring+=' [...]\n' 252 if new_pass_count > 0: 253 resultstring += f' Additionally, {new_pass_count} previously failing test(s) is/are now passing\n' 254 if new_tests > 0: 255 resultstring += f' Additionally, {new_tests} new test(s) is/are present\n' 256 else: 257 resultstring = "%s\n%s\n" % (base_name, target_name) 258 result = None 259 else: 260 resultstring = "%s\n%s\n" % (base_name, target_name) 261 262 if not result: 263 additional_info = get_additional_info_line(new_pass_count, new_tests) 264 if additional_info: 265 resultstring += additional_info 266 267 return result, resultstring 268 269def get_results(logger, source): 270 return resultutils.load_resultsdata(source, configmap=resultutils.regression_map) 271 272def regression(args, logger): 273 base_results = get_results(logger, args.base_result) 274 target_results = get_results(logger, args.target_result) 275 276 regression_common(args, logger, base_results, target_results) 277 278# Some test case naming is poor and contains random strings, particularly lttng/babeltrace. 279# Truncating the test names works since they contain file and line number identifiers 280# which allows us to match them without the random components. 281def fixup_ptest_names(results, logger): 282 for r in results: 283 for i in results[r]: 284 tests = list(results[r][i]['result'].keys()) 285 for test in tests: 286 new = None 287 if test.startswith(("ptestresult.lttng-tools.", "ptestresult.babeltrace.", "ptestresult.babeltrace2")) and "_-_" in test: 288 new = test.split("_-_")[0] 289 elif test.startswith(("ptestresult.curl.")) and "__" in test: 290 new = test.split("__")[0] 291 elif test.startswith(("ptestresult.dbus.")) and "__" in test: 292 new = test.split("__")[0] 293 elif test.startswith("ptestresult.binutils") and "build-st-" in test: 294 new = test.split(" ")[0] 295 elif test.startswith("ptestresult.gcc") and "/tmp/runtest." in test: 296 new = ".".join(test.split(".")[:2]) 297 if new: 298 results[r][i]['result'][new] = results[r][i]['result'][test] 299 del results[r][i]['result'][test] 300 301def regression_common(args, logger, base_results, target_results): 302 if args.base_result_id: 303 base_results = resultutils.filter_resultsdata(base_results, args.base_result_id) 304 if args.target_result_id: 305 target_results = resultutils.filter_resultsdata(target_results, args.target_result_id) 306 307 fixup_ptest_names(base_results, logger) 308 fixup_ptest_names(target_results, logger) 309 310 matches = [] 311 regressions = [] 312 notfound = [] 313 314 for a in base_results: 315 if a in target_results: 316 base = list(base_results[a].keys()) 317 target = list(target_results[a].keys()) 318 # We may have multiple base/targets which are for different configurations. Start by 319 # removing any pairs which match 320 for c in base.copy(): 321 for b in target.copy(): 322 if not can_be_compared(logger, base_results[a][c], target_results[a][b]): 323 continue 324 res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b], args.limit) 325 if not res: 326 matches.append(resstr) 327 base.remove(c) 328 target.remove(b) 329 break 330 # Should only now see regressions, we may not be able to match multiple pairs directly 331 for c in base: 332 for b in target: 333 if not can_be_compared(logger, base_results[a][c], target_results[a][b]): 334 continue 335 res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b], args.limit) 336 if res: 337 regressions.append(resstr) 338 else: 339 notfound.append("%s not found in target" % a) 340 print("\n".join(sorted(regressions))) 341 print("\n" + MISSING_TESTS_BANNER + "\n") 342 print("\n".join(sorted(notfound))) 343 print("\n" + ADDITIONAL_DATA_BANNER + "\n") 344 print("\n".join(sorted(matches))) 345 return 0 346 347def regression_git(args, logger): 348 base_results = {} 349 target_results = {} 350 351 tag_name = "{branch}/{commit_number}-g{commit}/{tag_number}" 352 repo = GitRepo(args.repo) 353 354 revs = gitarchive.get_test_revs(logger, repo, tag_name, branch=args.branch) 355 356 if args.branch2: 357 revs2 = gitarchive.get_test_revs(logger, repo, tag_name, branch=args.branch2) 358 if not len(revs2): 359 logger.error("No revisions found to compare against") 360 return 1 361 if not len(revs): 362 logger.error("No revision to report on found") 363 return 1 364 else: 365 if len(revs) < 2: 366 logger.error("Only %d tester revisions found, unable to generate report" % len(revs)) 367 return 1 368 369 # Pick revisions 370 if args.commit: 371 if args.commit_number: 372 logger.warning("Ignoring --commit-number as --commit was specified") 373 index1 = gitarchive.rev_find(revs, 'commit', args.commit) 374 elif args.commit_number: 375 index1 = gitarchive.rev_find(revs, 'commit_number', args.commit_number) 376 else: 377 index1 = len(revs) - 1 378 379 if args.branch2: 380 revs2.append(revs[index1]) 381 index1 = len(revs2) - 1 382 revs = revs2 383 384 if args.commit2: 385 if args.commit_number2: 386 logger.warning("Ignoring --commit-number2 as --commit2 was specified") 387 index2 = gitarchive.rev_find(revs, 'commit', args.commit2) 388 elif args.commit_number2: 389 index2 = gitarchive.rev_find(revs, 'commit_number', args.commit_number2) 390 else: 391 if index1 > 0: 392 index2 = index1 - 1 393 # Find the closest matching commit number for comparision 394 # In future we could check the commit is a common ancestor and 395 # continue back if not but this good enough for now 396 while index2 > 0 and revs[index2].commit_number > revs[index1].commit_number: 397 index2 = index2 - 1 398 else: 399 logger.error("Unable to determine the other commit, use " 400 "--commit2 or --commit-number2 to specify it") 401 return 1 402 403 logger.info("Comparing:\n%s\nto\n%s\n" % (revs[index1], revs[index2])) 404 405 base_results = resultutils.git_get_result(repo, revs[index1][2]) 406 target_results = resultutils.git_get_result(repo, revs[index2][2]) 407 408 regression_common(args, logger, base_results, target_results) 409 410 return 0 411 412def register_commands(subparsers): 413 """Register subcommands from this plugin""" 414 415 parser_build = subparsers.add_parser('regression', help='regression file/directory analysis', 416 description='regression analysis comparing the base set of results to the target results', 417 group='analysis') 418 parser_build.set_defaults(func=regression) 419 parser_build.add_argument('base_result', 420 help='base result file/directory/URL for the comparison') 421 parser_build.add_argument('target_result', 422 help='target result file/directory/URL to compare with') 423 parser_build.add_argument('-b', '--base-result-id', default='', 424 help='(optional) filter the base results to this result ID') 425 parser_build.add_argument('-t', '--target-result-id', default='', 426 help='(optional) filter the target results to this result ID') 427 parser_build.add_argument('-l', '--limit', default=REGRESSIONS_DISPLAY_LIMIT, help="Maximum number of changes to display per test. Can be set to 0 to print all changes") 428 429 parser_build = subparsers.add_parser('regression-git', help='regression git analysis', 430 description='regression analysis comparing base result set to target ' 431 'result set', 432 group='analysis') 433 parser_build.set_defaults(func=regression_git) 434 parser_build.add_argument('repo', 435 help='the git repository containing the data') 436 parser_build.add_argument('-b', '--base-result-id', default='', 437 help='(optional) default select regression based on configurations unless base result ' 438 'id was provided') 439 parser_build.add_argument('-t', '--target-result-id', default='', 440 help='(optional) default select regression based on configurations unless target result ' 441 'id was provided') 442 443 parser_build.add_argument('--branch', '-B', default='master', help="Branch to find commit in") 444 parser_build.add_argument('--branch2', help="Branch to find comparision revisions in") 445 parser_build.add_argument('--commit', help="Revision to search for") 446 parser_build.add_argument('--commit-number', help="Revision number to search for, redundant if --commit is specified") 447 parser_build.add_argument('--commit2', help="Revision to compare with") 448 parser_build.add_argument('--commit-number2', help="Revision number to compare with, redundant if --commit2 is specified") 449 parser_build.add_argument('-l', '--limit', default=REGRESSIONS_DISPLAY_LIMIT, help="Maximum number of changes to display per test. Can be set to 0 to print all changes") 450 451