1#!/usr/bin/env python3 2# 3# Examine build performance test results 4# 5# Copyright (c) 2017, Intel Corporation. 6# 7# SPDX-License-Identifier: GPL-2.0-only 8# 9 10import argparse 11import json 12import logging 13import os 14import re 15import sys 16from collections import namedtuple, OrderedDict 17from operator import attrgetter 18from xml.etree import ElementTree as ET 19 20# Import oe libs 21scripts_path = os.path.dirname(os.path.realpath(__file__)) 22sys.path.append(os.path.join(scripts_path, 'lib')) 23import scriptpath 24from build_perf import print_table 25from build_perf.report import (metadata_xml_to_json, results_xml_to_json, 26 aggregate_data, aggregate_metadata, measurement_stats, 27 AggregateTestData) 28from build_perf import html 29from buildstats import BuildStats, diff_buildstats, BSVerDiff 30 31scriptpath.add_oe_lib_path() 32 33from oeqa.utils.git import GitRepo, GitError 34import oeqa.utils.gitarchive as gitarchive 35 36 37# Setup logging 38logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s") 39log = logging.getLogger('oe-build-perf-report') 40 41def list_test_revs(repo, tag_name, verbosity, **kwargs): 42 """Get list of all tested revisions""" 43 valid_kwargs = dict([(k, v) for k, v in kwargs.items() if v is not None]) 44 45 fields, revs = gitarchive.get_test_runs(log, repo, tag_name, **valid_kwargs) 46 ignore_fields = ['tag_number'] 47 if verbosity < 2: 48 extra_fields = ['COMMITS', 'TEST RUNS'] 49 ignore_fields.extend(['commit_number', 'commit']) 50 else: 51 extra_fields = ['TEST RUNS'] 52 53 print_fields = [i for i, f in enumerate(fields) if f not in ignore_fields] 54 55 # Sort revs 56 rows = [[fields[i].upper() for i in print_fields] + extra_fields] 57 58 prev = [''] * len(print_fields) 59 prev_commit = None 60 commit_cnt = 0 61 commit_field = fields.index('commit') 62 for rev in revs: 63 # Only use fields that we want to print 64 cols = [rev[i] for i in print_fields] 65 66 67 if cols != prev: 68 commit_cnt = 1 69 test_run_cnt = 1 70 new_row = [''] * (len(print_fields) + len(extra_fields)) 71 72 for i in print_fields: 73 if cols[i] != prev[i]: 74 break 75 new_row[i:-len(extra_fields)] = cols[i:] 76 rows.append(new_row) 77 else: 78 if rev[commit_field] != prev_commit: 79 commit_cnt += 1 80 test_run_cnt += 1 81 82 if verbosity < 2: 83 new_row[-2] = commit_cnt 84 new_row[-1] = test_run_cnt 85 prev = cols 86 prev_commit = rev[commit_field] 87 88 print_table(rows) 89 90def is_xml_format(repo, commit): 91 """Check if the commit contains xml (or json) data""" 92 if repo.rev_parse(commit + ':results.xml'): 93 log.debug("Detected report in xml format in %s", commit) 94 return True 95 else: 96 log.debug("No xml report in %s, assuming json formatted results", commit) 97 return False 98 99def read_results(repo, tags, xml=True): 100 """Read result files from repo""" 101 102 def parse_xml_stream(data): 103 """Parse multiple concatenated XML objects""" 104 objs = [] 105 xml_d = "" 106 for line in data.splitlines(): 107 if xml_d and line.startswith('<?xml version='): 108 objs.append(ET.fromstring(xml_d)) 109 xml_d = line 110 else: 111 xml_d += line 112 objs.append(ET.fromstring(xml_d)) 113 return objs 114 115 def parse_json_stream(data): 116 """Parse multiple concatenated JSON objects""" 117 objs = [] 118 json_d = "" 119 for line in data.splitlines(): 120 if line == '}{': 121 json_d += '}' 122 objs.append(json.loads(json_d, object_pairs_hook=OrderedDict)) 123 json_d = '{' 124 else: 125 json_d += line 126 objs.append(json.loads(json_d, object_pairs_hook=OrderedDict)) 127 return objs 128 129 num_revs = len(tags) 130 131 # Optimize by reading all data with one git command 132 log.debug("Loading raw result data from %d tags, %s...", num_revs, tags[0]) 133 if xml: 134 git_objs = [tag + ':metadata.xml' for tag in tags] + [tag + ':results.xml' for tag in tags] 135 data = parse_xml_stream(repo.run_cmd(['show'] + git_objs + ['--'])) 136 return ([metadata_xml_to_json(e) for e in data[0:num_revs]], 137 [results_xml_to_json(e) for e in data[num_revs:]]) 138 else: 139 git_objs = [tag + ':metadata.json' for tag in tags] + [tag + ':results.json' for tag in tags] 140 data = parse_json_stream(repo.run_cmd(['show'] + git_objs + ['--'])) 141 return data[0:num_revs], data[num_revs:] 142 143 144def get_data_item(data, key): 145 """Nested getitem lookup""" 146 for k in key.split('.'): 147 data = data[k] 148 return data 149 150 151def metadata_diff(metadata_l, metadata_r): 152 """Prepare a metadata diff for printing""" 153 keys = [('Hostname', 'hostname', 'hostname'), 154 ('Branch', 'branch', 'layers.meta.branch'), 155 ('Commit number', 'commit_num', 'layers.meta.commit_count'), 156 ('Commit', 'commit', 'layers.meta.commit'), 157 ('Number of test runs', 'testrun_count', 'testrun_count') 158 ] 159 160 def _metadata_diff(key): 161 """Diff metadata from two test reports""" 162 try: 163 val1 = get_data_item(metadata_l, key) 164 except KeyError: 165 val1 = '(N/A)' 166 try: 167 val2 = get_data_item(metadata_r, key) 168 except KeyError: 169 val2 = '(N/A)' 170 return val1, val2 171 172 metadata = OrderedDict() 173 for title, key, key_json in keys: 174 value_l, value_r = _metadata_diff(key_json) 175 metadata[key] = {'title': title, 176 'value_old': value_l, 177 'value': value_r} 178 return metadata 179 180 181def print_diff_report(metadata_l, data_l, metadata_r, data_r): 182 """Print differences between two data sets""" 183 184 # First, print general metadata 185 print("\nTEST METADATA:\n==============") 186 meta_diff = metadata_diff(metadata_l, metadata_r) 187 rows = [] 188 row_fmt = ['{:{wid}} ', '{:<{wid}} ', '{:<{wid}}'] 189 rows = [['', 'CURRENT COMMIT', 'COMPARING WITH']] 190 for key, val in meta_diff.items(): 191 # Shorten commit hashes 192 if key == 'commit': 193 rows.append([val['title'] + ':', val['value'][:20], val['value_old'][:20]]) 194 else: 195 rows.append([val['title'] + ':', val['value'], val['value_old']]) 196 print_table(rows, row_fmt) 197 198 199 # Print test results 200 print("\nTEST RESULTS:\n=============") 201 202 tests = list(data_l['tests'].keys()) 203 # Append tests that are only present in 'right' set 204 tests += [t for t in list(data_r['tests'].keys()) if t not in tests] 205 206 # Prepare data to be printed 207 rows = [] 208 row_fmt = ['{:8}', '{:{wid}}', '{:{wid}}', ' {:>{wid}}', ' {:{wid}} ', '{:{wid}}', 209 ' {:>{wid}}', ' {:>{wid}}'] 210 num_cols = len(row_fmt) 211 for test in tests: 212 test_l = data_l['tests'][test] if test in data_l['tests'] else None 213 test_r = data_r['tests'][test] if test in data_r['tests'] else None 214 pref = ' ' 215 if test_l is None: 216 pref = '+' 217 elif test_r is None: 218 pref = '-' 219 descr = test_l['description'] if test_l else test_r['description'] 220 heading = "{} {}: {}".format(pref, test, descr) 221 222 rows.append([heading]) 223 224 # Generate the list of measurements 225 meas_l = test_l['measurements'] if test_l else {} 226 meas_r = test_r['measurements'] if test_r else {} 227 measurements = list(meas_l.keys()) 228 measurements += [m for m in list(meas_r.keys()) if m not in measurements] 229 230 for meas in measurements: 231 m_pref = ' ' 232 if meas in meas_l: 233 stats_l = measurement_stats(meas_l[meas], 'l.') 234 else: 235 stats_l = measurement_stats(None, 'l.') 236 m_pref = '+' 237 if meas in meas_r: 238 stats_r = measurement_stats(meas_r[meas], 'r.') 239 else: 240 stats_r = measurement_stats(None, 'r.') 241 m_pref = '-' 242 stats = stats_l.copy() 243 stats.update(stats_r) 244 245 absdiff = stats['val_cls'](stats['r.mean'] - stats['l.mean']) 246 reldiff = "{:+.1f} %".format(absdiff * 100 / stats['l.mean']) 247 if stats['r.mean'] > stats['l.mean']: 248 absdiff = '+' + str(absdiff) 249 else: 250 absdiff = str(absdiff) 251 rows.append(['', m_pref, stats['name'] + ' ' + stats['quantity'], 252 str(stats['l.mean']), '->', str(stats['r.mean']), 253 absdiff, reldiff]) 254 rows.append([''] * num_cols) 255 256 print_table(rows, row_fmt) 257 258 print() 259 260 261class BSSummary(object): 262 def __init__(self, bs1, bs2): 263 self.tasks = {'count': bs2.num_tasks, 264 'change': '{:+d}'.format(bs2.num_tasks - bs1.num_tasks)} 265 self.top_consumer = None 266 self.top_decrease = None 267 self.top_increase = None 268 self.ver_diff = OrderedDict() 269 270 tasks_diff = diff_buildstats(bs1, bs2, 'cputime') 271 272 # Get top consumers of resources 273 tasks_diff = sorted(tasks_diff, key=attrgetter('value2')) 274 self.top_consumer = tasks_diff[-5:] 275 276 # Get biggest increase and decrease in resource usage 277 tasks_diff = sorted(tasks_diff, key=attrgetter('absdiff')) 278 self.top_decrease = tasks_diff[0:5] 279 self.top_increase = tasks_diff[-5:] 280 281 # Compare recipe versions and prepare data for display 282 ver_diff = BSVerDiff(bs1, bs2) 283 if ver_diff: 284 if ver_diff.new: 285 self.ver_diff['New recipes'] = [(n, r.evr) for n, r in ver_diff.new.items()] 286 if ver_diff.dropped: 287 self.ver_diff['Dropped recipes'] = [(n, r.evr) for n, r in ver_diff.dropped.items()] 288 if ver_diff.echanged: 289 self.ver_diff['Epoch changed'] = [(n, "{} → {}".format(r.left.evr, r.right.evr)) for n, r in ver_diff.echanged.items()] 290 if ver_diff.vchanged: 291 self.ver_diff['Version changed'] = [(n, "{} → {}".format(r.left.version, r.right.version)) for n, r in ver_diff.vchanged.items()] 292 if ver_diff.rchanged: 293 self.ver_diff['Revision changed'] = [(n, "{} → {}".format(r.left.evr, r.right.evr)) for n, r in ver_diff.rchanged.items()] 294 295 296def print_html_report(data, id_comp, buildstats): 297 """Print report in html format""" 298 # Handle metadata 299 metadata = metadata_diff(data[id_comp].metadata, data[-1].metadata) 300 301 # Generate list of tests 302 tests = [] 303 for test in data[-1].results['tests'].keys(): 304 test_r = data[-1].results['tests'][test] 305 new_test = {'name': test_r['name'], 306 'description': test_r['description'], 307 'status': test_r['status'], 308 'measurements': [], 309 'err_type': test_r.get('err_type'), 310 } 311 # Limit length of err output shown 312 if 'message' in test_r: 313 lines = test_r['message'].splitlines() 314 if len(lines) > 20: 315 new_test['message'] = '...\n' + '\n'.join(lines[-20:]) 316 else: 317 new_test['message'] = test_r['message'] 318 319 320 # Generate the list of measurements 321 for meas in test_r['measurements'].keys(): 322 meas_r = test_r['measurements'][meas] 323 meas_type = 'time' if meas_r['type'] == 'sysres' else 'size' 324 new_meas = {'name': meas_r['name'], 325 'legend': meas_r['legend'], 326 'description': meas_r['name'] + ' ' + meas_type, 327 } 328 samples = [] 329 330 # Run through all revisions in our data 331 for meta, test_data in data: 332 if (not test in test_data['tests'] or 333 not meas in test_data['tests'][test]['measurements']): 334 samples.append(measurement_stats(None)) 335 continue 336 test_i = test_data['tests'][test] 337 meas_i = test_i['measurements'][meas] 338 commit_num = get_data_item(meta, 'layers.meta.commit_count') 339 commit = get_data_item(meta, 'layers.meta.commit') 340 # Add start_time for both test measurement types of sysres and disk usage 341 start_time = test_i['start_time'][0] 342 samples.append(measurement_stats(meas_i, '', start_time)) 343 samples[-1]['commit_num'] = commit_num 344 samples[-1]['commit'] = commit 345 346 absdiff = samples[-1]['val_cls'](samples[-1]['mean'] - samples[id_comp]['mean']) 347 reldiff = absdiff * 100 / samples[id_comp]['mean'] 348 new_meas['absdiff'] = absdiff 349 new_meas['absdiff_str'] = str(absdiff) if absdiff < 0 else '+' + str(absdiff) 350 new_meas['reldiff'] = reldiff 351 new_meas['reldiff_str'] = "{:+.1f} %".format(reldiff) 352 new_meas['samples'] = samples 353 new_meas['value'] = samples[-1] 354 new_meas['value_type'] = samples[-1]['val_cls'] 355 356 # Compare buildstats 357 bs_key = test + '.' + meas 358 rev = str(metadata['commit_num']['value']) 359 comp_rev = str(metadata['commit_num']['value_old']) 360 if (buildstats and rev in buildstats and bs_key in buildstats[rev] and 361 comp_rev in buildstats and bs_key in buildstats[comp_rev]): 362 new_meas['buildstats'] = BSSummary(buildstats[comp_rev][bs_key], 363 buildstats[rev][bs_key]) 364 365 366 new_test['measurements'].append(new_meas) 367 tests.append(new_test) 368 369 # Chart options 370 chart_opts = {'haxis': {'min': get_data_item(data[0][0], 'layers.meta.commit_count'), 371 'max': get_data_item(data[-1][0], 'layers.meta.commit_count')} 372 } 373 374 print(html.template.render(title="Build Perf Test Report", 375 metadata=metadata, test_data=tests, 376 chart_opts=chart_opts)) 377 378 379def get_buildstats(repo, notes_ref, notes_ref2, revs, outdir=None): 380 """Get the buildstats from git notes""" 381 full_ref = 'refs/notes/' + notes_ref 382 if not repo.rev_parse(full_ref): 383 log.error("No buildstats found, please try running " 384 "'git fetch origin %s:%s' to fetch them from the remote", 385 full_ref, full_ref) 386 return 387 388 missing = False 389 buildstats = {} 390 log.info("Parsing buildstats from 'refs/notes/%s'", notes_ref) 391 for rev in revs: 392 buildstats[rev.commit_number] = {} 393 log.debug('Dumping buildstats for %s (%s)', rev.commit_number, 394 rev.commit) 395 for tag in rev.tags: 396 log.debug(' %s', tag) 397 try: 398 try: 399 bs_all = json.loads(repo.run_cmd(['notes', '--ref', notes_ref, 'show', tag + '^0'])) 400 except GitError: 401 if notes_ref2: 402 bs_all = json.loads(repo.run_cmd(['notes', '--ref', notes_ref2, 'show', tag + '^0'])) 403 else: 404 raise 405 except GitError: 406 log.warning("Buildstats not found for %s", tag) 407 bs_all = {} 408 missing = True 409 410 for measurement, bs in bs_all.items(): 411 # Write out onto disk 412 if outdir: 413 tag_base, run_id = tag.rsplit('/', 1) 414 tag_base = tag_base.replace('/', '_') 415 bs_dir = os.path.join(outdir, measurement, tag_base) 416 if not os.path.exists(bs_dir): 417 os.makedirs(bs_dir) 418 with open(os.path.join(bs_dir, run_id + '.json'), 'w') as f: 419 json.dump(bs, f, indent=2) 420 421 # Read buildstats into a dict 422 _bs = BuildStats.from_json(bs) 423 if measurement not in buildstats[rev.commit_number]: 424 buildstats[rev.commit_number][measurement] = _bs 425 else: 426 buildstats[rev.commit_number][measurement].aggregate(_bs) 427 428 if missing: 429 log.info("Buildstats were missing for some test runs, please " 430 "run 'git fetch origin %s:%s' and try again", 431 full_ref, full_ref) 432 433 return buildstats 434 435 436def auto_args(repo, args): 437 """Guess arguments, if not defined by the user""" 438 # Get the latest commit in the repo 439 log.debug("Guessing arguments from the latest commit") 440 msg = repo.run_cmd(['log', '-1', '--branches', '--remotes', '--format=%b']) 441 for line in msg.splitlines(): 442 split = line.split(':', 1) 443 if len(split) != 2: 444 continue 445 446 key = split[0] 447 val = split[1].strip() 448 if key == 'hostname' and not args.hostname: 449 log.debug("Using hostname %s", val) 450 args.hostname = val 451 elif key == 'branch' and not args.branch: 452 log.debug("Using branch %s", val) 453 args.branch = val 454 455 456def parse_args(argv): 457 """Parse command line arguments""" 458 description = """ 459Examine build performance test results from a Git repository""" 460 parser = argparse.ArgumentParser( 461 formatter_class=argparse.ArgumentDefaultsHelpFormatter, 462 description=description) 463 464 parser.add_argument('--debug', '-d', action='store_true', 465 help="Verbose logging") 466 parser.add_argument('--repo', '-r', required=True, 467 help="Results repository (local git clone)") 468 parser.add_argument('--list', '-l', action='count', 469 help="List available test runs") 470 parser.add_argument('--html', action='store_true', 471 help="Generate report in html format") 472 group = parser.add_argument_group('Tag and revision') 473 group.add_argument('--tag-name', '-t', 474 default='{hostname}/{branch}/{machine}/{commit_number}-g{commit}/{tag_number}', 475 help="Tag name (pattern) for finding results") 476 group.add_argument('--hostname', '-H') 477 group.add_argument('--branch', '-B', default='master', help="Branch to find commit in") 478 group.add_argument('--branch2', help="Branch to find comparision revisions in") 479 group.add_argument('--machine', default='qemux86') 480 group.add_argument('--history-length', default=300, type=int, 481 help="Number of tested revisions to plot in html report") 482 group.add_argument('--commit', 483 help="Revision to search for") 484 group.add_argument('--commit-number', 485 help="Revision number to search for, redundant if " 486 "--commit is specified") 487 group.add_argument('--commit2', 488 help="Revision to compare with") 489 group.add_argument('--commit-number2', 490 help="Revision number to compare with, redundant if " 491 "--commit2 is specified") 492 parser.add_argument('--dump-buildstats', nargs='?', const='.', 493 help="Dump buildstats of the tests") 494 495 return parser.parse_args(argv) 496 497 498def main(argv=None): 499 """Script entry point""" 500 args = parse_args(argv) 501 if args.debug: 502 log.setLevel(logging.DEBUG) 503 504 repo = GitRepo(args.repo) 505 506 if args.list: 507 list_test_revs(repo, args.tag_name, args.list, hostname=args.hostname) 508 return 0 509 510 # Determine hostname which to use 511 if not args.hostname: 512 auto_args(repo, args) 513 514 revs = gitarchive.get_test_revs(log, repo, args.tag_name, hostname=args.hostname, 515 branch=args.branch, machine=args.machine) 516 if args.branch2 and args.branch2 != args.branch: 517 revs2 = gitarchive.get_test_revs(log, repo, args.tag_name, hostname=args.hostname, 518 branch=args.branch2, machine=args.machine) 519 if not len(revs2): 520 log.error("No revisions found to compare against") 521 return 1 522 if not len(revs): 523 log.error("No revision to report on found") 524 return 1 525 else: 526 if len(revs) < 2: 527 log.error("Only %d tester revisions found, unable to generate report" % len(revs)) 528 return 1 529 530 # Pick revisions 531 if args.commit: 532 if args.commit_number: 533 log.warning("Ignoring --commit-number as --commit was specified") 534 index1 = gitarchive.rev_find(revs, 'commit', args.commit) 535 elif args.commit_number: 536 index1 = gitarchive.rev_find(revs, 'commit_number', args.commit_number) 537 else: 538 index1 = len(revs) - 1 539 540 if args.branch2 and args.branch2 != args.branch: 541 revs2.append(revs[index1]) 542 index1 = len(revs2) - 1 543 revs = revs2 544 545 if args.commit2: 546 if args.commit_number2: 547 log.warning("Ignoring --commit-number2 as --commit2 was specified") 548 index2 = gitarchive.rev_find(revs, 'commit', args.commit2) 549 elif args.commit_number2: 550 index2 = gitarchive.rev_find(revs, 'commit_number', args.commit_number2) 551 else: 552 if index1 > 0: 553 index2 = index1 - 1 554 # Find the closest matching commit number for comparision 555 # In future we could check the commit is a common ancestor and 556 # continue back if not but this good enough for now 557 while index2 > 0 and revs[index2].commit_number > revs[index1].commit_number: 558 index2 = index2 - 1 559 else: 560 log.error("Unable to determine the other commit, use " 561 "--commit2 or --commit-number2 to specify it") 562 return 1 563 564 index_l = min(index1, index2) 565 index_r = max(index1, index2) 566 567 rev_l = revs[index_l] 568 rev_r = revs[index_r] 569 log.debug("Using 'left' revision %s (%s), %s test runs:\n %s", 570 rev_l.commit_number, rev_l.commit, len(rev_l.tags), 571 '\n '.join(rev_l.tags)) 572 log.debug("Using 'right' revision %s (%s), %s test runs:\n %s", 573 rev_r.commit_number, rev_r.commit, len(rev_r.tags), 574 '\n '.join(rev_r.tags)) 575 576 # Check report format used in the repo (assume all reports in the same fmt) 577 xml = is_xml_format(repo, revs[index_r].tags[-1]) 578 579 if args.html: 580 index_0 = max(0, min(index_l, index_r - args.history_length)) 581 rev_range = range(index_0, index_r + 1) 582 else: 583 # We do not need range of commits for text report (no graphs) 584 index_0 = index_l 585 rev_range = (index_l, index_r) 586 587 # Read raw data 588 log.debug("Reading %d revisions, starting from %s (%s)", 589 len(rev_range), revs[index_0].commit_number, revs[index_0].commit) 590 raw_data = [read_results(repo, revs[i].tags, xml) for i in rev_range] 591 592 data = [] 593 for raw_m, raw_d in raw_data: 594 data.append(AggregateTestData(aggregate_metadata(raw_m), 595 aggregate_data(raw_d))) 596 597 # Read buildstats only when needed 598 buildstats = None 599 if args.dump_buildstats or args.html: 600 outdir = 'oe-build-perf-buildstats' if args.dump_buildstats else None 601 notes_ref = 'buildstats/{}/{}/{}'.format(args.hostname, args.branch, args.machine) 602 notes_ref2 = None 603 if args.branch2: 604 notes_ref = 'buildstats/{}/{}/{}'.format(args.hostname, args.branch2, args.machine) 605 notes_ref2 = 'buildstats/{}/{}/{}'.format(args.hostname, args.branch, args.machine) 606 buildstats = get_buildstats(repo, notes_ref, notes_ref2, [rev_l, rev_r], outdir) 607 608 # Print report 609 if not args.html: 610 print_diff_report(data[0].metadata, data[0].results, 611 data[1].metadata, data[1].results) 612 else: 613 # Re-map 'left' list index to the data table where index_0 maps to 0 614 print_html_report(data, index_l - index_0, buildstats) 615 616 return 0 617 618if __name__ == "__main__": 619 sys.exit(main()) 620