1# 2# Copyright (c) 2017, Intel Corporation. 3# 4# SPDX-License-Identifier: GPL-2.0-only 5# 6"""Handling of build perf test reports""" 7from collections import OrderedDict, namedtuple 8from collections.abc import Mapping 9from datetime import datetime, timezone 10from numbers import Number 11from statistics import mean, stdev, variance 12 13 14AggregateTestData = namedtuple('AggregateTestData', ['metadata', 'results']) 15 16 17def isofmt_to_timestamp(string): 18 """Convert timestamp string in ISO 8601 format into unix timestamp""" 19 if '.' in string: 20 dt = datetime.strptime(string, '%Y-%m-%dT%H:%M:%S.%f') 21 else: 22 dt = datetime.strptime(string, '%Y-%m-%dT%H:%M:%S') 23 return dt.replace(tzinfo=timezone.utc).timestamp() 24 25 26def metadata_xml_to_json(elem): 27 """Convert metadata xml into JSON format""" 28 assert elem.tag == 'metadata', "Invalid metadata file format" 29 30 def _xml_to_json(elem): 31 """Convert xml element to JSON object""" 32 out = OrderedDict() 33 for child in elem.getchildren(): 34 key = child.attrib.get('name', child.tag) 35 if len(child): 36 out[key] = _xml_to_json(child) 37 else: 38 out[key] = child.text 39 return out 40 return _xml_to_json(elem) 41 42 43def results_xml_to_json(elem): 44 """Convert results xml into JSON format""" 45 rusage_fields = ('ru_utime', 'ru_stime', 'ru_maxrss', 'ru_minflt', 46 'ru_majflt', 'ru_inblock', 'ru_oublock', 'ru_nvcsw', 47 'ru_nivcsw') 48 iostat_fields = ('rchar', 'wchar', 'syscr', 'syscw', 'read_bytes', 49 'write_bytes', 'cancelled_write_bytes') 50 51 def _read_measurement(elem): 52 """Convert measurement to JSON""" 53 data = OrderedDict() 54 data['type'] = elem.tag 55 data['name'] = elem.attrib['name'] 56 data['legend'] = elem.attrib['legend'] 57 values = OrderedDict() 58 59 # SYSRES measurement 60 if elem.tag == 'sysres': 61 for subel in elem: 62 if subel.tag == 'time': 63 values['start_time'] = isofmt_to_timestamp(subel.attrib['timestamp']) 64 values['elapsed_time'] = float(subel.text) 65 elif subel.tag == 'rusage': 66 rusage = OrderedDict() 67 for field in rusage_fields: 68 if 'time' in field: 69 rusage[field] = float(subel.attrib[field]) 70 else: 71 rusage[field] = int(subel.attrib[field]) 72 values['rusage'] = rusage 73 elif subel.tag == 'iostat': 74 values['iostat'] = OrderedDict([(f, int(subel.attrib[f])) 75 for f in iostat_fields]) 76 elif subel.tag == 'buildstats_file': 77 values['buildstats_file'] = subel.text 78 else: 79 raise TypeError("Unknown sysres value element '{}'".format(subel.tag)) 80 # DISKUSAGE measurement 81 elif elem.tag == 'diskusage': 82 values['size'] = int(elem.find('size').text) 83 else: 84 raise Exception("Unknown measurement tag '{}'".format(elem.tag)) 85 data['values'] = values 86 return data 87 88 def _read_testcase(elem): 89 """Convert testcase into JSON""" 90 assert elem.tag == 'testcase', "Expecting 'testcase' element instead of {}".format(elem.tag) 91 92 data = OrderedDict() 93 data['name'] = elem.attrib['name'] 94 data['description'] = elem.attrib['description'] 95 data['status'] = 'SUCCESS' 96 data['start_time'] = isofmt_to_timestamp(elem.attrib['timestamp']) 97 data['elapsed_time'] = float(elem.attrib['time']) 98 measurements = OrderedDict() 99 100 for subel in elem.getchildren(): 101 if subel.tag == 'error' or subel.tag == 'failure': 102 data['status'] = subel.tag.upper() 103 data['message'] = subel.attrib['message'] 104 data['err_type'] = subel.attrib['type'] 105 data['err_output'] = subel.text 106 elif subel.tag == 'skipped': 107 data['status'] = 'SKIPPED' 108 data['message'] = subel.text 109 else: 110 measurements[subel.attrib['name']] = _read_measurement(subel) 111 data['measurements'] = measurements 112 return data 113 114 def _read_testsuite(elem): 115 """Convert suite to JSON""" 116 assert elem.tag == 'testsuite', \ 117 "Expecting 'testsuite' element instead of {}".format(elem.tag) 118 119 data = OrderedDict() 120 if 'hostname' in elem.attrib: 121 data['tester_host'] = elem.attrib['hostname'] 122 data['start_time'] = isofmt_to_timestamp(elem.attrib['timestamp']) 123 data['elapsed_time'] = float(elem.attrib['time']) 124 tests = OrderedDict() 125 126 for case in elem.getchildren(): 127 tests[case.attrib['name']] = _read_testcase(case) 128 data['tests'] = tests 129 return data 130 131 # Main function 132 assert elem.tag == 'testsuites', "Invalid test report format" 133 assert len(elem) == 1, "Too many testsuites" 134 135 return _read_testsuite(elem.getchildren()[0]) 136 137 138def aggregate_metadata(metadata): 139 """Aggregate metadata into one, basically a sanity check""" 140 mutable_keys = ('pretty_name', 'version_id') 141 142 def aggregate_obj(aggregate, obj, assert_str=True): 143 """Aggregate objects together""" 144 assert type(aggregate) is type(obj), \ 145 "Type mismatch: {} != {}".format(type(aggregate), type(obj)) 146 if isinstance(obj, Mapping): 147 assert set(aggregate.keys()) == set(obj.keys()) 148 for key, val in obj.items(): 149 aggregate_obj(aggregate[key], val, key not in mutable_keys) 150 elif isinstance(obj, list): 151 assert len(aggregate) == len(obj) 152 for i, val in enumerate(obj): 153 aggregate_obj(aggregate[i], val) 154 elif not isinstance(obj, str) or (isinstance(obj, str) and assert_str): 155 assert aggregate == obj, "Data mismatch {} != {}".format(aggregate, obj) 156 157 if not metadata: 158 return {} 159 160 # Do the aggregation 161 aggregate = metadata[0].copy() 162 for testrun in metadata[1:]: 163 aggregate_obj(aggregate, testrun) 164 aggregate['testrun_count'] = len(metadata) 165 return aggregate 166 167 168def aggregate_data(data): 169 """Aggregate multiple test results JSON structures into one""" 170 171 mutable_keys = ('status', 'message', 'err_type', 'err_output') 172 173 class SampleList(list): 174 """Container for numerical samples""" 175 pass 176 177 def new_aggregate_obj(obj): 178 """Create new object for aggregate""" 179 if isinstance(obj, Number): 180 new_obj = SampleList() 181 new_obj.append(obj) 182 elif isinstance(obj, str): 183 new_obj = obj 184 else: 185 # Lists and and dicts are kept as is 186 new_obj = obj.__class__() 187 aggregate_obj(new_obj, obj) 188 return new_obj 189 190 def aggregate_obj(aggregate, obj, assert_str=True): 191 """Recursive "aggregation" of JSON objects""" 192 if isinstance(obj, Number): 193 assert isinstance(aggregate, SampleList) 194 aggregate.append(obj) 195 return 196 197 assert type(aggregate) == type(obj), \ 198 "Type mismatch: {} != {}".format(type(aggregate), type(obj)) 199 if isinstance(obj, Mapping): 200 for key, val in obj.items(): 201 if not key in aggregate: 202 aggregate[key] = new_aggregate_obj(val) 203 else: 204 aggregate_obj(aggregate[key], val, key not in mutable_keys) 205 elif isinstance(obj, list): 206 for i, val in enumerate(obj): 207 if i >= len(aggregate): 208 aggregate[key] = new_aggregate_obj(val) 209 else: 210 aggregate_obj(aggregate[i], val) 211 elif isinstance(obj, str): 212 # Sanity check for data 213 if assert_str: 214 assert aggregate == obj, "Data mismatch {} != {}".format(aggregate, obj) 215 else: 216 raise Exception("BUG: unable to aggregate '{}' ({})".format(type(obj), str(obj))) 217 218 if not data: 219 return {} 220 221 # Do the aggregation 222 aggregate = data[0].__class__() 223 for testrun in data: 224 aggregate_obj(aggregate, testrun) 225 return aggregate 226 227 228class MeasurementVal(float): 229 """Base class representing measurement values""" 230 gv_data_type = 'number' 231 232 def gv_value(self): 233 """Value formatting for visualization""" 234 if self != self: 235 return "null" 236 else: 237 return self 238 239 240class TimeVal(MeasurementVal): 241 """Class representing time values""" 242 quantity = 'time' 243 gv_title = 'elapsed time' 244 gv_data_type = 'timeofday' 245 246 def hms(self): 247 """Split time into hours, minutes and seconeds""" 248 hhh = int(abs(self) / 3600) 249 mmm = int((abs(self) % 3600) / 60) 250 sss = abs(self) % 60 251 return hhh, mmm, sss 252 253 def __str__(self): 254 if self != self: 255 return "nan" 256 hh, mm, ss = self.hms() 257 sign = '-' if self < 0 else '' 258 if hh > 0: 259 return '{}{:d}:{:02d}:{:02.0f}'.format(sign, hh, mm, ss) 260 elif mm > 0: 261 return '{}{:d}:{:04.1f}'.format(sign, mm, ss) 262 elif ss > 1: 263 return '{}{:.1f} s'.format(sign, ss) 264 else: 265 return '{}{:.2f} s'.format(sign, ss) 266 267 def gv_value(self): 268 """Value formatting for visualization""" 269 if self != self: 270 return "null" 271 hh, mm, ss = self.hms() 272 return [hh, mm, int(ss), int(ss*1000) % 1000] 273 274 275class SizeVal(MeasurementVal): 276 """Class representing time values""" 277 quantity = 'size' 278 gv_title = 'size in MiB' 279 gv_data_type = 'number' 280 281 def __str__(self): 282 if self != self: 283 return "nan" 284 if abs(self) < 1024: 285 return '{:.1f} kiB'.format(self) 286 elif abs(self) < 1048576: 287 return '{:.2f} MiB'.format(self / 1024) 288 else: 289 return '{:.2f} GiB'.format(self / 1048576) 290 291 def gv_value(self): 292 """Value formatting for visualization""" 293 if self != self: 294 return "null" 295 return self / 1024 296 297def measurement_stats(meas, prefix=''): 298 """Get statistics of a measurement""" 299 if not meas: 300 return {prefix + 'sample_cnt': 0, 301 prefix + 'mean': MeasurementVal('nan'), 302 prefix + 'stdev': MeasurementVal('nan'), 303 prefix + 'variance': MeasurementVal('nan'), 304 prefix + 'min': MeasurementVal('nan'), 305 prefix + 'max': MeasurementVal('nan'), 306 prefix + 'minus': MeasurementVal('nan'), 307 prefix + 'plus': MeasurementVal('nan')} 308 309 stats = {'name': meas['name']} 310 if meas['type'] == 'sysres': 311 val_cls = TimeVal 312 values = meas['values']['elapsed_time'] 313 elif meas['type'] == 'diskusage': 314 val_cls = SizeVal 315 values = meas['values']['size'] 316 else: 317 raise Exception("Unknown measurement type '{}'".format(meas['type'])) 318 stats['val_cls'] = val_cls 319 stats['quantity'] = val_cls.quantity 320 stats[prefix + 'sample_cnt'] = len(values) 321 322 mean_val = val_cls(mean(values)) 323 min_val = val_cls(min(values)) 324 max_val = val_cls(max(values)) 325 326 stats[prefix + 'mean'] = mean_val 327 if len(values) > 1: 328 stats[prefix + 'stdev'] = val_cls(stdev(values)) 329 stats[prefix + 'variance'] = val_cls(variance(values)) 330 else: 331 stats[prefix + 'stdev'] = float('nan') 332 stats[prefix + 'variance'] = float('nan') 333 stats[prefix + 'min'] = min_val 334 stats[prefix + 'max'] = max_val 335 stats[prefix + 'minus'] = val_cls(mean_val - min_val) 336 stats[prefix + 'plus'] = val_cls(max_val - mean_val) 337 338 return stats 339 340