1# resulttool - regression analysis
2#
3# Copyright (c) 2019, Intel Corporation.
4# Copyright (c) 2019, Linux Foundation
5#
6# SPDX-License-Identifier: GPL-2.0-only
7#
8
9import resulttool.resultutils as resultutils
10
11from oeqa.utils.git import GitRepo
12import oeqa.utils.gitarchive as gitarchive
13
14METADATA_MATCH_TABLE = {
15    "oeselftest": "OESELFTEST_METADATA"
16}
17
18OESELFTEST_METADATA_GUESS_TABLE={
19    "trigger-build-posttrigger": {
20        "run_all_tests": False,
21        "run_tests":["buildoptions.SourceMirroring.test_yocto_source_mirror"],
22        "skips": None,
23        "machine": None,
24        "select_tags":None,
25        "exclude_tags": None
26    },
27    "reproducible": {
28        "run_all_tests": False,
29        "run_tests":["reproducible"],
30        "skips": None,
31        "machine": None,
32        "select_tags":None,
33        "exclude_tags": None
34    },
35    "arch-qemu-quick": {
36        "run_all_tests": True,
37        "run_tests":None,
38        "skips": None,
39        "machine": None,
40        "select_tags":["machine"],
41        "exclude_tags": None
42    },
43    "arch-qemu-full-x86-or-x86_64": {
44        "run_all_tests": True,
45        "run_tests":None,
46        "skips": None,
47        "machine": None,
48        "select_tags":["machine", "toolchain-system"],
49        "exclude_tags": None
50    },
51    "arch-qemu-full-others": {
52        "run_all_tests": True,
53        "run_tests":None,
54        "skips": None,
55        "machine": None,
56        "select_tags":["machine", "toolchain-user"],
57        "exclude_tags": None
58    },
59    "selftest": {
60        "run_all_tests": True,
61        "run_tests":None,
62        "skips": ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror", "reproducible"],
63        "machine": None,
64        "select_tags":None,
65        "exclude_tags": ["machine", "toolchain-system", "toolchain-user"]
66    },
67    "bringup": {
68        "run_all_tests": True,
69        "run_tests":None,
70        "skips": ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror"],
71        "machine": None,
72        "select_tags":None,
73        "exclude_tags": ["machine", "toolchain-system", "toolchain-user"]
74    }
75}
76
77STATUS_STRINGS = {
78    "None": "No matching test result"
79}
80
81REGRESSIONS_DISPLAY_LIMIT=50
82
83MISSING_TESTS_BANNER =   "-------------------------- Missing tests --------------------------"
84ADDITIONAL_DATA_BANNER = "--------------------- Matches and improvements --------------------"
85
86def test_has_at_least_one_matching_tag(test, tag_list):
87    return "oetags" in test and any(oetag in tag_list for oetag in test["oetags"])
88
89def all_tests_have_at_least_one_matching_tag(results, tag_list):
90    return all(test_has_at_least_one_matching_tag(test_result, tag_list) or test_name.startswith("ptestresult") for (test_name, test_result) in results.items())
91
92def any_test_have_any_matching_tag(results, tag_list):
93    return any(test_has_at_least_one_matching_tag(test, tag_list) for test in results.values())
94
95def have_skipped_test(result, test_prefix):
96    return all( result[test]['status'] == "SKIPPED" for test in result if test.startswith(test_prefix))
97
98def have_all_tests_skipped(result, test_prefixes_list):
99    return all(have_skipped_test(result, test_prefix) for test_prefix in test_prefixes_list)
100
101def guess_oeselftest_metadata(results):
102    """
103    When an oeselftest test result is lacking OESELFTEST_METADATA, we can try to guess it based on results content.
104    Check results for specific values (absence/presence of oetags, number and name of executed tests...),
105    and if it matches one of known configuration from autobuilder configuration, apply guessed OSELFTEST_METADATA
106    to it to allow proper test filtering.
107    This guessing process is tightly coupled to config.json in autobuilder. It should trigger less and less,
108    as new tests will have OESELFTEST_METADATA properly appended at test reporting time
109    """
110
111    if len(results) == 1 and "buildoptions.SourceMirroring.test_yocto_source_mirror" in results:
112        return OESELFTEST_METADATA_GUESS_TABLE['trigger-build-posttrigger']
113    elif all(result.startswith("reproducible") for result in results):
114        return OESELFTEST_METADATA_GUESS_TABLE['reproducible']
115    elif all_tests_have_at_least_one_matching_tag(results, ["machine"]):
116        return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-quick']
117    elif all_tests_have_at_least_one_matching_tag(results, ["machine", "toolchain-system"]):
118        return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-full-x86-or-x86_64']
119    elif all_tests_have_at_least_one_matching_tag(results, ["machine", "toolchain-user"]):
120        return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-full-others']
121    elif not any_test_have_any_matching_tag(results, ["machine", "toolchain-user", "toolchain-system"]):
122        if have_all_tests_skipped(results, ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror", "reproducible"]):
123            return OESELFTEST_METADATA_GUESS_TABLE['selftest']
124        elif have_all_tests_skipped(results, ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror"]):
125            return OESELFTEST_METADATA_GUESS_TABLE['bringup']
126
127    return None
128
129
130def metadata_matches(base_configuration, target_configuration):
131    """
132    For passed base and target, check test type. If test type matches one of
133    properties described in METADATA_MATCH_TABLE, compare metadata if it is
134    present in base. Return true if metadata matches, or if base lacks some
135    data (either TEST_TYPE or the corresponding metadata)
136    """
137    test_type = base_configuration.get('TEST_TYPE')
138    if test_type not in METADATA_MATCH_TABLE:
139        return True
140
141    metadata_key = METADATA_MATCH_TABLE.get(test_type)
142    if target_configuration.get(metadata_key) != base_configuration.get(metadata_key):
143        return False
144
145    return True
146
147
148def machine_matches(base_configuration, target_configuration):
149    return base_configuration.get('MACHINE') == target_configuration.get('MACHINE')
150
151
152def can_be_compared(logger, base, target):
153    """
154    Some tests are not relevant to be compared, for example some oeselftest
155    run with different tests sets or parameters. Return true if tests can be
156    compared
157    """
158    ret = True
159    base_configuration = base['configuration']
160    target_configuration = target['configuration']
161
162    # Older test results lack proper OESELFTEST_METADATA: if not present, try to guess it based on tests results.
163    if base_configuration.get('TEST_TYPE') == 'oeselftest' and 'OESELFTEST_METADATA' not in base_configuration:
164        guess = guess_oeselftest_metadata(base['result'])
165        if guess is None:
166            logger.error(f"ERROR: did not manage to guess oeselftest metadata for {base_configuration['STARTTIME']}")
167        else:
168            logger.debug(f"Enriching {base_configuration['STARTTIME']} with {guess}")
169            base_configuration['OESELFTEST_METADATA'] = guess
170    if target_configuration.get('TEST_TYPE') == 'oeselftest' and 'OESELFTEST_METADATA' not in target_configuration:
171        guess = guess_oeselftest_metadata(target['result'])
172        if guess is None:
173            logger.error(f"ERROR: did not manage to guess oeselftest metadata for {target_configuration['STARTTIME']}")
174        else:
175            logger.debug(f"Enriching {target_configuration['STARTTIME']} with {guess}")
176            target_configuration['OESELFTEST_METADATA'] = guess
177
178    # Test runs with LTP results in should only be compared with other runs with LTP tests in them
179    if base_configuration.get('TEST_TYPE') == 'runtime' and any(result.startswith("ltpresult") for result in base['result']):
180        ret = target_configuration.get('TEST_TYPE') == 'runtime' and any(result.startswith("ltpresult") for result in target['result'])
181
182    return ret and metadata_matches(base_configuration, target_configuration) \
183        and machine_matches(base_configuration, target_configuration)
184
185def get_status_str(raw_status):
186    raw_status_lower = raw_status.lower() if raw_status else "None"
187    return STATUS_STRINGS.get(raw_status_lower, raw_status)
188
189def get_additional_info_line(new_pass_count, new_tests):
190    result=[]
191    if new_tests:
192        result.append(f'+{new_tests} test(s) present')
193    if new_pass_count:
194        result.append(f'+{new_pass_count} test(s) now passing')
195
196    if not result:
197        return ""
198
199    return '    -> ' + ', '.join(result) + '\n'
200
201def compare_result(logger, base_name, target_name, base_result, target_result, display_limit=None):
202    base_result = base_result.get('result')
203    target_result = target_result.get('result')
204    result = {}
205    new_tests = 0
206    regressions = {}
207    resultstring = ""
208    new_tests = 0
209    new_pass_count = 0
210
211    display_limit = int(display_limit) if display_limit else REGRESSIONS_DISPLAY_LIMIT
212
213    if base_result and target_result:
214        for k in base_result:
215            base_testcase = base_result[k]
216            base_status = base_testcase.get('status')
217            if base_status:
218                target_testcase = target_result.get(k, {})
219                target_status = target_testcase.get('status')
220                if base_status != target_status:
221                    result[k] = {'base': base_status, 'target': target_status}
222            else:
223                logger.error('Failed to retrieved base test case status: %s' % k)
224
225        # Also count new tests that were not present in base results: it
226        # could be newly added tests, but it could also highlights some tests
227        # renames or fixed faulty ptests
228        for k in target_result:
229            if k not in base_result:
230                new_tests += 1
231    if result:
232        new_pass_count = sum(test['target'] is not None and test['target'].startswith("PASS") for test in result.values())
233        # Print a regression report only if at least one test has a regression status (FAIL, SKIPPED, absent...)
234        if new_pass_count < len(result):
235            resultstring = "Regression:  %s\n             %s\n" % (base_name, target_name)
236            for k in sorted(result):
237                if not result[k]['target'] or not result[k]['target'].startswith("PASS"):
238                    # Differentiate each ptest kind when listing regressions
239                    key_parts = k.split('.')
240                    key = '.'.join(key_parts[:2]) if k.startswith('ptest') else key_parts[0]
241                    # Append new regression to corresponding test family
242                    regressions[key] = regressions.setdefault(key, []) + ['        %s: %s -> %s\n' % (k, get_status_str(result[k]['base']), get_status_str(result[k]['target']))]
243            resultstring += f"    Total: {sum([len(regressions[r]) for r in regressions])} new regression(s):\n"
244            for k in regressions:
245                resultstring += f"    {len(regressions[k])} regression(s) for {k}\n"
246                count_to_print=min([display_limit, len(regressions[k])]) if display_limit > 0 else len(regressions[k])
247                resultstring += ''.join(regressions[k][:count_to_print])
248                if count_to_print < len(regressions[k]):
249                    resultstring+='        [...]\n'
250            if new_pass_count > 0:
251                resultstring += f'    Additionally, {new_pass_count} previously failing test(s) is/are now passing\n'
252            if new_tests > 0:
253                resultstring += f'    Additionally, {new_tests} new test(s) is/are present\n'
254        else:
255            resultstring = "%s\n%s\n" % (base_name, target_name)
256            result = None
257    else:
258        resultstring = "%s\n%s\n" % (base_name, target_name)
259
260    if not result:
261        additional_info = get_additional_info_line(new_pass_count, new_tests)
262        if additional_info:
263            resultstring += additional_info
264
265    return result, resultstring
266
267def get_results(logger, source):
268    return resultutils.load_resultsdata(source, configmap=resultutils.regression_map)
269
270def regression(args, logger):
271    base_results = get_results(logger, args.base_result)
272    target_results = get_results(logger, args.target_result)
273
274    regression_common(args, logger, base_results, target_results)
275
276# Some test case naming is poor and contains random strings, particularly lttng/babeltrace.
277# Truncating the test names works since they contain file and line number identifiers
278# which allows us to match them without the random components.
279def fixup_ptest_names(results, logger):
280    for r in results:
281        for i in results[r]:
282            tests = list(results[r][i]['result'].keys())
283            for test in tests:
284                new = None
285                if test.startswith(("ptestresult.lttng-tools.", "ptestresult.babeltrace.", "ptestresult.babeltrace2")) and "_-_" in test:
286                    new = test.split("_-_")[0]
287                elif test.startswith(("ptestresult.curl.")) and "__" in test:
288                    new = test.split("__")[0]
289                elif test.startswith(("ptestresult.dbus.")) and "__" in test:
290                    new = test.split("__")[0]
291                elif test.startswith("ptestresult.binutils") and "build-st-" in test:
292                    new = test.split(" ")[0]
293                elif test.startswith("ptestresult.gcc") and "/tmp/runtest." in test:
294                    new = ".".join(test.split(".")[:2])
295                if new:
296                    results[r][i]['result'][new] = results[r][i]['result'][test]
297                    del results[r][i]['result'][test]
298
299def regression_common(args, logger, base_results, target_results):
300    if args.base_result_id:
301        base_results = resultutils.filter_resultsdata(base_results, args.base_result_id)
302    if args.target_result_id:
303        target_results = resultutils.filter_resultsdata(target_results, args.target_result_id)
304
305    fixup_ptest_names(base_results, logger)
306    fixup_ptest_names(target_results, logger)
307
308    matches = []
309    regressions = []
310    notfound = []
311
312    for a in base_results:
313        if a in target_results:
314            base = list(base_results[a].keys())
315            target = list(target_results[a].keys())
316            # We may have multiple base/targets which are for different configurations. Start by
317            # removing any pairs which match
318            for c in base.copy():
319                for b in target.copy():
320                    if not can_be_compared(logger, base_results[a][c], target_results[a][b]):
321                        continue
322                    res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b], args.limit)
323                    if not res:
324                        matches.append(resstr)
325                        base.remove(c)
326                        target.remove(b)
327                        break
328            # Should only now see regressions, we may not be able to match multiple pairs directly
329            for c in base:
330                for b in target:
331                    if not can_be_compared(logger, base_results[a][c], target_results[a][b]):
332                        continue
333                    res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b], args.limit)
334                    if res:
335                        regressions.append(resstr)
336        else:
337            notfound.append("%s not found in target" % a)
338    print("\n".join(sorted(regressions)))
339    print("\n" + MISSING_TESTS_BANNER + "\n")
340    print("\n".join(sorted(notfound)))
341    print("\n" + ADDITIONAL_DATA_BANNER + "\n")
342    print("\n".join(sorted(matches)))
343    return 0
344
345def regression_git(args, logger):
346    base_results = {}
347    target_results = {}
348
349    tag_name = "{branch}/{commit_number}-g{commit}/{tag_number}"
350    repo = GitRepo(args.repo)
351
352    revs = gitarchive.get_test_revs(logger, repo, tag_name, branch=args.branch)
353
354    if args.branch2:
355        revs2 = gitarchive.get_test_revs(logger, repo, tag_name, branch=args.branch2)
356        if not len(revs2):
357            logger.error("No revisions found to compare against")
358            return 1
359        if not len(revs):
360            logger.error("No revision to report on found")
361            return 1
362    else:
363        if len(revs) < 2:
364            logger.error("Only %d tester revisions found, unable to generate report" % len(revs))
365            return 1
366
367    # Pick revisions
368    if args.commit:
369        if args.commit_number:
370            logger.warning("Ignoring --commit-number as --commit was specified")
371        index1 = gitarchive.rev_find(revs, 'commit', args.commit)
372    elif args.commit_number:
373        index1 = gitarchive.rev_find(revs, 'commit_number', args.commit_number)
374    else:
375        index1 = len(revs) - 1
376
377    if args.branch2:
378        revs2.append(revs[index1])
379        index1 = len(revs2) - 1
380        revs = revs2
381
382    if args.commit2:
383        if args.commit_number2:
384            logger.warning("Ignoring --commit-number2 as --commit2 was specified")
385        index2 = gitarchive.rev_find(revs, 'commit', args.commit2)
386    elif args.commit_number2:
387        index2 = gitarchive.rev_find(revs, 'commit_number', args.commit_number2)
388    else:
389        if index1 > 0:
390            index2 = index1 - 1
391            # Find the closest matching commit number for comparision
392            # In future we could check the commit is a common ancestor and
393            # continue back if not but this good enough for now
394            while index2 > 0 and revs[index2].commit_number > revs[index1].commit_number:
395                index2 = index2 - 1
396        else:
397            logger.error("Unable to determine the other commit, use "
398                      "--commit2 or --commit-number2 to specify it")
399            return 1
400
401    logger.info("Comparing:\n%s\nto\n%s\n" % (revs[index1], revs[index2]))
402
403    base_results = resultutils.git_get_result(repo, revs[index1][2])
404    target_results = resultutils.git_get_result(repo, revs[index2][2])
405
406    regression_common(args, logger, base_results, target_results)
407
408    return 0
409
410def register_commands(subparsers):
411    """Register subcommands from this plugin"""
412
413    parser_build = subparsers.add_parser('regression', help='regression file/directory analysis',
414                                         description='regression analysis comparing the base set of results to the target results',
415                                         group='analysis')
416    parser_build.set_defaults(func=regression)
417    parser_build.add_argument('base_result',
418                              help='base result file/directory/URL for the comparison')
419    parser_build.add_argument('target_result',
420                              help='target result file/directory/URL to compare with')
421    parser_build.add_argument('-b', '--base-result-id', default='',
422                              help='(optional) filter the base results to this result ID')
423    parser_build.add_argument('-t', '--target-result-id', default='',
424                              help='(optional) filter the target results to this result ID')
425
426    parser_build = subparsers.add_parser('regression-git', help='regression git analysis',
427                                         description='regression analysis comparing base result set to target '
428                                                     'result set',
429                                         group='analysis')
430    parser_build.set_defaults(func=regression_git)
431    parser_build.add_argument('repo',
432                              help='the git repository containing the data')
433    parser_build.add_argument('-b', '--base-result-id', default='',
434                              help='(optional) default select regression based on configurations unless base result '
435                                   'id was provided')
436    parser_build.add_argument('-t', '--target-result-id', default='',
437                              help='(optional) default select regression based on configurations unless target result '
438                                   'id was provided')
439
440    parser_build.add_argument('--branch', '-B', default='master', help="Branch to find commit in")
441    parser_build.add_argument('--branch2', help="Branch to find comparision revisions in")
442    parser_build.add_argument('--commit', help="Revision to search for")
443    parser_build.add_argument('--commit-number', help="Revision number to search for, redundant if --commit is specified")
444    parser_build.add_argument('--commit2', help="Revision to compare with")
445    parser_build.add_argument('--commit-number2', help="Revision number to compare with, redundant if --commit2 is specified")
446    parser_build.add_argument('-l', '--limit', default=REGRESSIONS_DISPLAY_LIMIT, help="Maximum number of changes to display per test. Can be set to 0 to print all changes")
447
448