1#!/usr/bin/env python3 2# 3# Examine build performance test results 4# 5# Copyright (c) 2017, Intel Corporation. 6# 7# SPDX-License-Identifier: GPL-2.0-only 8# 9 10import argparse 11import json 12import logging 13import os 14import re 15import sys 16from collections import namedtuple, OrderedDict 17from operator import attrgetter 18from xml.etree import ElementTree as ET 19 20# Import oe libs 21scripts_path = os.path.dirname(os.path.realpath(__file__)) 22sys.path.append(os.path.join(scripts_path, 'lib')) 23import scriptpath 24from build_perf import print_table 25from build_perf.report import (metadata_xml_to_json, results_xml_to_json, 26 aggregate_data, aggregate_metadata, measurement_stats, 27 AggregateTestData) 28from build_perf import html 29from buildstats import BuildStats, diff_buildstats, BSVerDiff 30 31scriptpath.add_oe_lib_path() 32 33from oeqa.utils.git import GitRepo, GitError 34import oeqa.utils.gitarchive as gitarchive 35 36 37# Setup logging 38logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s") 39log = logging.getLogger('oe-build-perf-report') 40 41def list_test_revs(repo, tag_name, verbosity, **kwargs): 42 """Get list of all tested revisions""" 43 valid_kwargs = dict([(k, v) for k, v in kwargs.items() if v is not None]) 44 45 fields, revs = gitarchive.get_test_runs(log, repo, tag_name, **valid_kwargs) 46 ignore_fields = ['tag_number'] 47 if verbosity < 2: 48 extra_fields = ['COMMITS', 'TEST RUNS'] 49 ignore_fields.extend(['commit_number', 'commit']) 50 else: 51 extra_fields = ['TEST RUNS'] 52 53 print_fields = [i for i, f in enumerate(fields) if f not in ignore_fields] 54 55 # Sort revs 56 rows = [[fields[i].upper() for i in print_fields] + extra_fields] 57 58 prev = [''] * len(print_fields) 59 prev_commit = None 60 commit_cnt = 0 61 commit_field = fields.index('commit') 62 for rev in revs: 63 # Only use fields that we want to print 64 cols = [rev[i] for i in print_fields] 65 66 67 if cols != prev: 68 commit_cnt = 1 69 test_run_cnt = 1 70 new_row = [''] * (len(print_fields) + len(extra_fields)) 71 72 for i in print_fields: 73 if cols[i] != prev[i]: 74 break 75 new_row[i:-len(extra_fields)] = cols[i:] 76 rows.append(new_row) 77 else: 78 if rev[commit_field] != prev_commit: 79 commit_cnt += 1 80 test_run_cnt += 1 81 82 if verbosity < 2: 83 new_row[-2] = commit_cnt 84 new_row[-1] = test_run_cnt 85 prev = cols 86 prev_commit = rev[commit_field] 87 88 print_table(rows) 89 90def is_xml_format(repo, commit): 91 """Check if the commit contains xml (or json) data""" 92 if repo.rev_parse(commit + ':results.xml'): 93 log.debug("Detected report in xml format in %s", commit) 94 return True 95 else: 96 log.debug("No xml report in %s, assuming json formatted results", commit) 97 return False 98 99def read_results(repo, tags, xml=True): 100 """Read result files from repo""" 101 102 def parse_xml_stream(data): 103 """Parse multiple concatenated XML objects""" 104 objs = [] 105 xml_d = "" 106 for line in data.splitlines(): 107 if xml_d and line.startswith('<?xml version='): 108 objs.append(ET.fromstring(xml_d)) 109 xml_d = line 110 else: 111 xml_d += line 112 objs.append(ET.fromstring(xml_d)) 113 return objs 114 115 def parse_json_stream(data): 116 """Parse multiple concatenated JSON objects""" 117 objs = [] 118 json_d = "" 119 for line in data.splitlines(): 120 if line == '}{': 121 json_d += '}' 122 objs.append(json.loads(json_d, object_pairs_hook=OrderedDict)) 123 json_d = '{' 124 else: 125 json_d += line 126 objs.append(json.loads(json_d, object_pairs_hook=OrderedDict)) 127 return objs 128 129 num_revs = len(tags) 130 131 # Optimize by reading all data with one git command 132 log.debug("Loading raw result data from %d tags, %s...", num_revs, tags[0]) 133 if xml: 134 git_objs = [tag + ':metadata.xml' for tag in tags] + [tag + ':results.xml' for tag in tags] 135 data = parse_xml_stream(repo.run_cmd(['show'] + git_objs + ['--'])) 136 return ([metadata_xml_to_json(e) for e in data[0:num_revs]], 137 [results_xml_to_json(e) for e in data[num_revs:]]) 138 else: 139 git_objs = [tag + ':metadata.json' for tag in tags] + [tag + ':results.json' for tag in tags] 140 data = parse_json_stream(repo.run_cmd(['show'] + git_objs + ['--'])) 141 return data[0:num_revs], data[num_revs:] 142 143 144def get_data_item(data, key): 145 """Nested getitem lookup""" 146 for k in key.split('.'): 147 data = data[k] 148 return data 149 150 151def metadata_diff(metadata_l, metadata_r): 152 """Prepare a metadata diff for printing""" 153 keys = [('Hostname', 'hostname', 'hostname'), 154 ('Branch', 'branch', 'layers.meta.branch'), 155 ('Commit number', 'commit_num', 'layers.meta.commit_count'), 156 ('Commit', 'commit', 'layers.meta.commit'), 157 ('Number of test runs', 'testrun_count', 'testrun_count') 158 ] 159 160 def _metadata_diff(key): 161 """Diff metadata from two test reports""" 162 try: 163 val1 = get_data_item(metadata_l, key) 164 except KeyError: 165 val1 = '(N/A)' 166 try: 167 val2 = get_data_item(metadata_r, key) 168 except KeyError: 169 val2 = '(N/A)' 170 return val1, val2 171 172 metadata = OrderedDict() 173 for title, key, key_json in keys: 174 value_l, value_r = _metadata_diff(key_json) 175 metadata[key] = {'title': title, 176 'value_old': value_l, 177 'value': value_r} 178 return metadata 179 180 181def print_diff_report(metadata_l, data_l, metadata_r, data_r): 182 """Print differences between two data sets""" 183 184 # First, print general metadata 185 print("\nTEST METADATA:\n==============") 186 meta_diff = metadata_diff(metadata_l, metadata_r) 187 rows = [] 188 row_fmt = ['{:{wid}} ', '{:<{wid}} ', '{:<{wid}}'] 189 rows = [['', 'CURRENT COMMIT', 'COMPARING WITH']] 190 for key, val in meta_diff.items(): 191 # Shorten commit hashes 192 if key == 'commit': 193 rows.append([val['title'] + ':', val['value'][:20], val['value_old'][:20]]) 194 else: 195 rows.append([val['title'] + ':', val['value'], val['value_old']]) 196 print_table(rows, row_fmt) 197 198 199 # Print test results 200 print("\nTEST RESULTS:\n=============") 201 202 tests = list(data_l['tests'].keys()) 203 # Append tests that are only present in 'right' set 204 tests += [t for t in list(data_r['tests'].keys()) if t not in tests] 205 206 # Prepare data to be printed 207 rows = [] 208 row_fmt = ['{:8}', '{:{wid}}', '{:{wid}}', ' {:>{wid}}', ' {:{wid}} ', '{:{wid}}', 209 ' {:>{wid}}', ' {:>{wid}}'] 210 num_cols = len(row_fmt) 211 for test in tests: 212 test_l = data_l['tests'][test] if test in data_l['tests'] else None 213 test_r = data_r['tests'][test] if test in data_r['tests'] else None 214 pref = ' ' 215 if test_l is None: 216 pref = '+' 217 elif test_r is None: 218 pref = '-' 219 descr = test_l['description'] if test_l else test_r['description'] 220 heading = "{} {}: {}".format(pref, test, descr) 221 222 rows.append([heading]) 223 224 # Generate the list of measurements 225 meas_l = test_l['measurements'] if test_l else {} 226 meas_r = test_r['measurements'] if test_r else {} 227 measurements = list(meas_l.keys()) 228 measurements += [m for m in list(meas_r.keys()) if m not in measurements] 229 230 for meas in measurements: 231 m_pref = ' ' 232 if meas in meas_l: 233 stats_l = measurement_stats(meas_l[meas], 'l.') 234 else: 235 stats_l = measurement_stats(None, 'l.') 236 m_pref = '+' 237 if meas in meas_r: 238 stats_r = measurement_stats(meas_r[meas], 'r.') 239 else: 240 stats_r = measurement_stats(None, 'r.') 241 m_pref = '-' 242 stats = stats_l.copy() 243 stats.update(stats_r) 244 245 absdiff = stats['val_cls'](stats['r.mean'] - stats['l.mean']) 246 reldiff = "{:+.1f} %".format(absdiff * 100 / stats['l.mean']) 247 if stats['r.mean'] > stats['l.mean']: 248 absdiff = '+' + str(absdiff) 249 else: 250 absdiff = str(absdiff) 251 rows.append(['', m_pref, stats['name'] + ' ' + stats['quantity'], 252 str(stats['l.mean']), '->', str(stats['r.mean']), 253 absdiff, reldiff]) 254 rows.append([''] * num_cols) 255 256 print_table(rows, row_fmt) 257 258 print() 259 260 261class BSSummary(object): 262 def __init__(self, bs1, bs2): 263 self.tasks = {'count': bs2.num_tasks, 264 'change': '{:+d}'.format(bs2.num_tasks - bs1.num_tasks)} 265 self.top_consumer = None 266 self.top_decrease = None 267 self.top_increase = None 268 self.ver_diff = OrderedDict() 269 270 tasks_diff = diff_buildstats(bs1, bs2, 'cputime') 271 272 # Get top consumers of resources 273 tasks_diff = sorted(tasks_diff, key=attrgetter('value2')) 274 self.top_consumer = tasks_diff[-5:] 275 276 # Get biggest increase and decrease in resource usage 277 tasks_diff = sorted(tasks_diff, key=attrgetter('absdiff')) 278 self.top_decrease = tasks_diff[0:5] 279 self.top_increase = tasks_diff[-5:] 280 281 # Compare recipe versions and prepare data for display 282 ver_diff = BSVerDiff(bs1, bs2) 283 if ver_diff: 284 if ver_diff.new: 285 self.ver_diff['New recipes'] = [(n, r.evr) for n, r in ver_diff.new.items()] 286 if ver_diff.dropped: 287 self.ver_diff['Dropped recipes'] = [(n, r.evr) for n, r in ver_diff.dropped.items()] 288 if ver_diff.echanged: 289 self.ver_diff['Epoch changed'] = [(n, "{} → {}".format(r.left.evr, r.right.evr)) for n, r in ver_diff.echanged.items()] 290 if ver_diff.vchanged: 291 self.ver_diff['Version changed'] = [(n, "{} → {}".format(r.left.version, r.right.version)) for n, r in ver_diff.vchanged.items()] 292 if ver_diff.rchanged: 293 self.ver_diff['Revision changed'] = [(n, "{} → {}".format(r.left.evr, r.right.evr)) for n, r in ver_diff.rchanged.items()] 294 295 296def print_html_report(data, id_comp, buildstats): 297 """Print report in html format""" 298 # Handle metadata 299 metadata = metadata_diff(data[id_comp].metadata, data[-1].metadata) 300 301 # Generate list of tests 302 tests = [] 303 for test in data[-1].results['tests'].keys(): 304 test_r = data[-1].results['tests'][test] 305 new_test = {'name': test_r['name'], 306 'description': test_r['description'], 307 'status': test_r['status'], 308 'measurements': [], 309 'err_type': test_r.get('err_type'), 310 } 311 # Limit length of err output shown 312 if 'message' in test_r: 313 lines = test_r['message'].splitlines() 314 if len(lines) > 20: 315 new_test['message'] = '...\n' + '\n'.join(lines[-20:]) 316 else: 317 new_test['message'] = test_r['message'] 318 319 320 # Generate the list of measurements 321 for meas in test_r['measurements'].keys(): 322 meas_r = test_r['measurements'][meas] 323 meas_type = 'time' if meas_r['type'] == 'sysres' else 'size' 324 new_meas = {'name': meas_r['name'], 325 'legend': meas_r['legend'], 326 'description': meas_r['name'] + ' ' + meas_type, 327 } 328 samples = [] 329 330 # Run through all revisions in our data 331 for meta, test_data in data: 332 if (not test in test_data['tests'] or 333 not meas in test_data['tests'][test]['measurements']): 334 samples.append(measurement_stats(None)) 335 continue 336 test_i = test_data['tests'][test] 337 meas_i = test_i['measurements'][meas] 338 commit_num = get_data_item(meta, 'layers.meta.commit_count') 339 samples.append(measurement_stats(meas_i)) 340 samples[-1]['commit_num'] = commit_num 341 342 absdiff = samples[-1]['val_cls'](samples[-1]['mean'] - samples[id_comp]['mean']) 343 reldiff = absdiff * 100 / samples[id_comp]['mean'] 344 new_meas['absdiff'] = absdiff 345 new_meas['absdiff_str'] = str(absdiff) if absdiff < 0 else '+' + str(absdiff) 346 new_meas['reldiff'] = reldiff 347 new_meas['reldiff_str'] = "{:+.1f} %".format(reldiff) 348 new_meas['samples'] = samples 349 new_meas['value'] = samples[-1] 350 new_meas['value_type'] = samples[-1]['val_cls'] 351 352 # Compare buildstats 353 bs_key = test + '.' + meas 354 rev = str(metadata['commit_num']['value']) 355 comp_rev = str(metadata['commit_num']['value_old']) 356 if (buildstats and rev in buildstats and bs_key in buildstats[rev] and 357 comp_rev in buildstats and bs_key in buildstats[comp_rev]): 358 new_meas['buildstats'] = BSSummary(buildstats[comp_rev][bs_key], 359 buildstats[rev][bs_key]) 360 361 362 new_test['measurements'].append(new_meas) 363 tests.append(new_test) 364 365 # Chart options 366 chart_opts = {'haxis': {'min': get_data_item(data[0][0], 'layers.meta.commit_count'), 367 'max': get_data_item(data[-1][0], 'layers.meta.commit_count')} 368 } 369 370 print(html.template.render(title="Build Perf Test Report", 371 metadata=metadata, test_data=tests, 372 chart_opts=chart_opts)) 373 374 375def get_buildstats(repo, notes_ref, notes_ref2, revs, outdir=None): 376 """Get the buildstats from git notes""" 377 full_ref = 'refs/notes/' + notes_ref 378 if not repo.rev_parse(full_ref): 379 log.error("No buildstats found, please try running " 380 "'git fetch origin %s:%s' to fetch them from the remote", 381 full_ref, full_ref) 382 return 383 384 missing = False 385 buildstats = {} 386 log.info("Parsing buildstats from 'refs/notes/%s'", notes_ref) 387 for rev in revs: 388 buildstats[rev.commit_number] = {} 389 log.debug('Dumping buildstats for %s (%s)', rev.commit_number, 390 rev.commit) 391 for tag in rev.tags: 392 log.debug(' %s', tag) 393 try: 394 try: 395 bs_all = json.loads(repo.run_cmd(['notes', '--ref', notes_ref, 'show', tag + '^0'])) 396 except GitError: 397 if notes_ref2: 398 bs_all = json.loads(repo.run_cmd(['notes', '--ref', notes_ref2, 'show', tag + '^0'])) 399 else: 400 raise 401 except GitError: 402 log.warning("Buildstats not found for %s", tag) 403 bs_all = {} 404 missing = True 405 406 for measurement, bs in bs_all.items(): 407 # Write out onto disk 408 if outdir: 409 tag_base, run_id = tag.rsplit('/', 1) 410 tag_base = tag_base.replace('/', '_') 411 bs_dir = os.path.join(outdir, measurement, tag_base) 412 if not os.path.exists(bs_dir): 413 os.makedirs(bs_dir) 414 with open(os.path.join(bs_dir, run_id + '.json'), 'w') as f: 415 json.dump(bs, f, indent=2) 416 417 # Read buildstats into a dict 418 _bs = BuildStats.from_json(bs) 419 if measurement not in buildstats[rev.commit_number]: 420 buildstats[rev.commit_number][measurement] = _bs 421 else: 422 buildstats[rev.commit_number][measurement].aggregate(_bs) 423 424 if missing: 425 log.info("Buildstats were missing for some test runs, please " 426 "run 'git fetch origin %s:%s' and try again", 427 full_ref, full_ref) 428 429 return buildstats 430 431 432def auto_args(repo, args): 433 """Guess arguments, if not defined by the user""" 434 # Get the latest commit in the repo 435 log.debug("Guessing arguments from the latest commit") 436 msg = repo.run_cmd(['log', '-1', '--branches', '--remotes', '--format=%b']) 437 for line in msg.splitlines(): 438 split = line.split(':', 1) 439 if len(split) != 2: 440 continue 441 442 key = split[0] 443 val = split[1].strip() 444 if key == 'hostname' and not args.hostname: 445 log.debug("Using hostname %s", val) 446 args.hostname = val 447 elif key == 'branch' and not args.branch: 448 log.debug("Using branch %s", val) 449 args.branch = val 450 451 452def parse_args(argv): 453 """Parse command line arguments""" 454 description = """ 455Examine build performance test results from a Git repository""" 456 parser = argparse.ArgumentParser( 457 formatter_class=argparse.ArgumentDefaultsHelpFormatter, 458 description=description) 459 460 parser.add_argument('--debug', '-d', action='store_true', 461 help="Verbose logging") 462 parser.add_argument('--repo', '-r', required=True, 463 help="Results repository (local git clone)") 464 parser.add_argument('--list', '-l', action='count', 465 help="List available test runs") 466 parser.add_argument('--html', action='store_true', 467 help="Generate report in html format") 468 group = parser.add_argument_group('Tag and revision') 469 group.add_argument('--tag-name', '-t', 470 default='{hostname}/{branch}/{machine}/{commit_number}-g{commit}/{tag_number}', 471 help="Tag name (pattern) for finding results") 472 group.add_argument('--hostname', '-H') 473 group.add_argument('--branch', '-B', default='master', help="Branch to find commit in") 474 group.add_argument('--branch2', help="Branch to find comparision revisions in") 475 group.add_argument('--machine', default='qemux86') 476 group.add_argument('--history-length', default=25, type=int, 477 help="Number of tested revisions to plot in html report") 478 group.add_argument('--commit', 479 help="Revision to search for") 480 group.add_argument('--commit-number', 481 help="Revision number to search for, redundant if " 482 "--commit is specified") 483 group.add_argument('--commit2', 484 help="Revision to compare with") 485 group.add_argument('--commit-number2', 486 help="Revision number to compare with, redundant if " 487 "--commit2 is specified") 488 parser.add_argument('--dump-buildstats', nargs='?', const='.', 489 help="Dump buildstats of the tests") 490 491 return parser.parse_args(argv) 492 493 494def main(argv=None): 495 """Script entry point""" 496 args = parse_args(argv) 497 if args.debug: 498 log.setLevel(logging.DEBUG) 499 500 repo = GitRepo(args.repo) 501 502 if args.list: 503 list_test_revs(repo, args.tag_name, args.list, hostname=args.hostname) 504 return 0 505 506 # Determine hostname which to use 507 if not args.hostname: 508 auto_args(repo, args) 509 510 revs = gitarchive.get_test_revs(log, repo, args.tag_name, hostname=args.hostname, 511 branch=args.branch, machine=args.machine) 512 if args.branch2 and args.branch2 != args.branch: 513 revs2 = gitarchive.get_test_revs(log, repo, args.tag_name, hostname=args.hostname, 514 branch=args.branch2, machine=args.machine) 515 if not len(revs2): 516 log.error("No revisions found to compare against") 517 return 1 518 if not len(revs): 519 log.error("No revision to report on found") 520 return 1 521 else: 522 if len(revs) < 2: 523 log.error("Only %d tester revisions found, unable to generate report" % len(revs)) 524 return 1 525 526 # Pick revisions 527 if args.commit: 528 if args.commit_number: 529 log.warning("Ignoring --commit-number as --commit was specified") 530 index1 = gitarchive.rev_find(revs, 'commit', args.commit) 531 elif args.commit_number: 532 index1 = gitarchive.rev_find(revs, 'commit_number', args.commit_number) 533 else: 534 index1 = len(revs) - 1 535 536 if args.branch2 and args.branch2 != args.branch: 537 revs2.append(revs[index1]) 538 index1 = len(revs2) - 1 539 revs = revs2 540 541 if args.commit2: 542 if args.commit_number2: 543 log.warning("Ignoring --commit-number2 as --commit2 was specified") 544 index2 = gitarchive.rev_find(revs, 'commit', args.commit2) 545 elif args.commit_number2: 546 index2 = gitarchive.rev_find(revs, 'commit_number', args.commit_number2) 547 else: 548 if index1 > 0: 549 index2 = index1 - 1 550 # Find the closest matching commit number for comparision 551 # In future we could check the commit is a common ancestor and 552 # continue back if not but this good enough for now 553 while index2 > 0 and revs[index2].commit_number > revs[index1].commit_number: 554 index2 = index2 - 1 555 else: 556 log.error("Unable to determine the other commit, use " 557 "--commit2 or --commit-number2 to specify it") 558 return 1 559 560 index_l = min(index1, index2) 561 index_r = max(index1, index2) 562 563 rev_l = revs[index_l] 564 rev_r = revs[index_r] 565 log.debug("Using 'left' revision %s (%s), %s test runs:\n %s", 566 rev_l.commit_number, rev_l.commit, len(rev_l.tags), 567 '\n '.join(rev_l.tags)) 568 log.debug("Using 'right' revision %s (%s), %s test runs:\n %s", 569 rev_r.commit_number, rev_r.commit, len(rev_r.tags), 570 '\n '.join(rev_r.tags)) 571 572 # Check report format used in the repo (assume all reports in the same fmt) 573 xml = is_xml_format(repo, revs[index_r].tags[-1]) 574 575 if args.html: 576 index_0 = max(0, min(index_l, index_r - args.history_length)) 577 rev_range = range(index_0, index_r + 1) 578 else: 579 # We do not need range of commits for text report (no graphs) 580 index_0 = index_l 581 rev_range = (index_l, index_r) 582 583 # Read raw data 584 log.debug("Reading %d revisions, starting from %s (%s)", 585 len(rev_range), revs[index_0].commit_number, revs[index_0].commit) 586 raw_data = [read_results(repo, revs[i].tags, xml) for i in rev_range] 587 588 data = [] 589 for raw_m, raw_d in raw_data: 590 data.append(AggregateTestData(aggregate_metadata(raw_m), 591 aggregate_data(raw_d))) 592 593 # Read buildstats only when needed 594 buildstats = None 595 if args.dump_buildstats or args.html: 596 outdir = 'oe-build-perf-buildstats' if args.dump_buildstats else None 597 notes_ref = 'buildstats/{}/{}/{}'.format(args.hostname, args.branch, args.machine) 598 notes_ref2 = None 599 if args.branch2: 600 notes_ref = 'buildstats/{}/{}/{}'.format(args.hostname, args.branch2, args.machine) 601 notes_ref2 = 'buildstats/{}/{}/{}'.format(args.hostname, args.branch, args.machine) 602 buildstats = get_buildstats(repo, notes_ref, notes_ref2, [rev_l, rev_r], outdir) 603 604 # Print report 605 if not args.html: 606 print_diff_report(data[0].metadata, data[0].results, 607 data[1].metadata, data[1].results) 608 else: 609 # Re-map 'left' list index to the data table where index_0 maps to 0 610 print_html_report(data, index_l - index_0, buildstats) 611 612 return 0 613 614if __name__ == "__main__": 615 sys.exit(main()) 616