1#
2# Copyright (c) 2017, Intel Corporation.
3#
4# SPDX-License-Identifier: GPL-2.0-only
5#
6"""Handling of build perf test reports"""
7from collections import OrderedDict, namedtuple
8from collections.abc import Mapping
9from datetime import datetime, timezone
10from numbers import Number
11from statistics import mean, stdev, variance
12
13
14AggregateTestData = namedtuple('AggregateTestData', ['metadata', 'results'])
15
16
17def isofmt_to_timestamp(string):
18    """Convert timestamp string in ISO 8601 format into unix timestamp"""
19    if '.' in string:
20        dt = datetime.strptime(string, '%Y-%m-%dT%H:%M:%S.%f')
21    else:
22        dt = datetime.strptime(string, '%Y-%m-%dT%H:%M:%S')
23    return dt.replace(tzinfo=timezone.utc).timestamp()
24
25
26def metadata_xml_to_json(elem):
27    """Convert metadata xml into JSON format"""
28    assert elem.tag == 'metadata', "Invalid metadata file format"
29
30    def _xml_to_json(elem):
31        """Convert xml element to JSON object"""
32        out = OrderedDict()
33        for child in elem.getchildren():
34            key = child.attrib.get('name', child.tag)
35            if len(child):
36                out[key] = _xml_to_json(child)
37            else:
38                out[key] = child.text
39        return out
40    return _xml_to_json(elem)
41
42
43def results_xml_to_json(elem):
44    """Convert results xml into JSON format"""
45    rusage_fields = ('ru_utime', 'ru_stime', 'ru_maxrss', 'ru_minflt',
46                     'ru_majflt', 'ru_inblock', 'ru_oublock', 'ru_nvcsw',
47                     'ru_nivcsw')
48    iostat_fields = ('rchar', 'wchar', 'syscr', 'syscw', 'read_bytes',
49                     'write_bytes', 'cancelled_write_bytes')
50
51    def _read_measurement(elem):
52        """Convert measurement to JSON"""
53        data = OrderedDict()
54        data['type'] = elem.tag
55        data['name'] = elem.attrib['name']
56        data['legend'] = elem.attrib['legend']
57        values = OrderedDict()
58
59        # SYSRES measurement
60        if elem.tag == 'sysres':
61            for subel in elem:
62                if subel.tag == 'time':
63                    values['start_time'] = isofmt_to_timestamp(subel.attrib['timestamp'])
64                    values['elapsed_time'] = float(subel.text)
65                elif subel.tag == 'rusage':
66                    rusage = OrderedDict()
67                    for field in rusage_fields:
68                        if 'time' in field:
69                            rusage[field] = float(subel.attrib[field])
70                        else:
71                            rusage[field] = int(subel.attrib[field])
72                    values['rusage'] = rusage
73                elif subel.tag == 'iostat':
74                    values['iostat'] = OrderedDict([(f, int(subel.attrib[f]))
75                        for f in iostat_fields])
76                elif subel.tag == 'buildstats_file':
77                    values['buildstats_file'] = subel.text
78                else:
79                    raise TypeError("Unknown sysres value element '{}'".format(subel.tag))
80        # DISKUSAGE measurement
81        elif elem.tag == 'diskusage':
82            values['size'] = int(elem.find('size').text)
83        else:
84            raise Exception("Unknown measurement tag '{}'".format(elem.tag))
85        data['values'] = values
86        return data
87
88    def _read_testcase(elem):
89        """Convert testcase into JSON"""
90        assert elem.tag == 'testcase', "Expecting 'testcase' element instead of {}".format(elem.tag)
91
92        data = OrderedDict()
93        data['name'] = elem.attrib['name']
94        data['description'] = elem.attrib['description']
95        data['status'] = 'SUCCESS'
96        data['start_time'] = isofmt_to_timestamp(elem.attrib['timestamp'])
97        data['elapsed_time'] = float(elem.attrib['time'])
98        measurements = OrderedDict()
99
100        for subel in elem.getchildren():
101            if subel.tag == 'error' or subel.tag == 'failure':
102                data['status'] = subel.tag.upper()
103                data['message'] = subel.attrib['message']
104                data['err_type'] = subel.attrib['type']
105                data['err_output'] = subel.text
106            elif subel.tag == 'skipped':
107                data['status'] = 'SKIPPED'
108                data['message'] = subel.text
109            else:
110                measurements[subel.attrib['name']] = _read_measurement(subel)
111        data['measurements'] = measurements
112        return data
113
114    def _read_testsuite(elem):
115        """Convert suite to JSON"""
116        assert elem.tag == 'testsuite', \
117                "Expecting 'testsuite' element instead of {}".format(elem.tag)
118
119        data = OrderedDict()
120        if 'hostname' in elem.attrib:
121            data['tester_host'] = elem.attrib['hostname']
122        data['start_time'] = isofmt_to_timestamp(elem.attrib['timestamp'])
123        data['elapsed_time'] = float(elem.attrib['time'])
124        tests = OrderedDict()
125
126        for case in elem.getchildren():
127            tests[case.attrib['name']] = _read_testcase(case)
128        data['tests'] = tests
129        return data
130
131    # Main function
132    assert elem.tag == 'testsuites', "Invalid test report format"
133    assert len(elem) == 1, "Too many testsuites"
134
135    return _read_testsuite(elem.getchildren()[0])
136
137
138def aggregate_metadata(metadata):
139    """Aggregate metadata into one, basically a sanity check"""
140    mutable_keys = ('pretty_name', 'version_id')
141
142    def aggregate_obj(aggregate, obj, assert_str=True):
143        """Aggregate objects together"""
144        assert type(aggregate) is type(obj), \
145                "Type mismatch: {} != {}".format(type(aggregate), type(obj))
146        if isinstance(obj, Mapping):
147            assert set(aggregate.keys()) == set(obj.keys())
148            for key, val in obj.items():
149                aggregate_obj(aggregate[key], val, key not in mutable_keys)
150        elif isinstance(obj, list):
151            assert len(aggregate) == len(obj)
152            for i, val in enumerate(obj):
153                aggregate_obj(aggregate[i], val)
154        elif not isinstance(obj, str) or (isinstance(obj, str) and assert_str):
155            assert aggregate == obj, "Data mismatch {} != {}".format(aggregate, obj)
156
157    if not metadata:
158        return {}
159
160    # Do the aggregation
161    aggregate = metadata[0].copy()
162    for testrun in metadata[1:]:
163        aggregate_obj(aggregate, testrun)
164    aggregate['testrun_count'] = len(metadata)
165    return aggregate
166
167
168def aggregate_data(data):
169    """Aggregate multiple test results JSON structures into one"""
170
171    mutable_keys = ('status', 'message', 'err_type', 'err_output')
172
173    class SampleList(list):
174        """Container for numerical samples"""
175        pass
176
177    def new_aggregate_obj(obj):
178        """Create new object for aggregate"""
179        if isinstance(obj, Number):
180            new_obj = SampleList()
181            new_obj.append(obj)
182        elif isinstance(obj, str):
183            new_obj = obj
184        else:
185            # Lists and and dicts are kept as is
186            new_obj = obj.__class__()
187            aggregate_obj(new_obj, obj)
188        return new_obj
189
190    def aggregate_obj(aggregate, obj, assert_str=True):
191        """Recursive "aggregation" of JSON objects"""
192        if isinstance(obj, Number):
193            assert isinstance(aggregate, SampleList)
194            aggregate.append(obj)
195            return
196
197        assert type(aggregate) == type(obj), \
198                "Type mismatch: {} != {}".format(type(aggregate), type(obj))
199        if isinstance(obj, Mapping):
200            for key, val in obj.items():
201                if not key in aggregate:
202                    aggregate[key] = new_aggregate_obj(val)
203                else:
204                    aggregate_obj(aggregate[key], val, key not in mutable_keys)
205        elif isinstance(obj, list):
206            for i, val in enumerate(obj):
207                if i >= len(aggregate):
208                    aggregate[key] = new_aggregate_obj(val)
209                else:
210                    aggregate_obj(aggregate[i], val)
211        elif isinstance(obj, str):
212            # Sanity check for data
213            if assert_str:
214                assert aggregate == obj, "Data mismatch {} != {}".format(aggregate, obj)
215        else:
216            raise Exception("BUG: unable to aggregate '{}' ({})".format(type(obj), str(obj)))
217
218    if not data:
219        return {}
220
221    # Do the aggregation
222    aggregate = data[0].__class__()
223    for testrun in data:
224        aggregate_obj(aggregate, testrun)
225    return aggregate
226
227
228class MeasurementVal(float):
229    """Base class representing measurement values"""
230    gv_data_type = 'number'
231
232    def gv_value(self):
233        """Value formatting for visualization"""
234        if self != self:
235            return "null"
236        else:
237            return self
238
239
240class TimeVal(MeasurementVal):
241    """Class representing time values"""
242    quantity = 'time'
243    gv_title = 'elapsed time'
244    gv_data_type = 'timeofday'
245
246    def hms(self):
247        """Split time into hours, minutes and seconeds"""
248        hhh = int(abs(self) / 3600)
249        mmm = int((abs(self) % 3600) / 60)
250        sss = abs(self) % 60
251        return hhh, mmm, sss
252
253    def __str__(self):
254        if self != self:
255            return "nan"
256        hh, mm, ss = self.hms()
257        sign = '-' if self < 0 else ''
258        if hh > 0:
259            return '{}{:d}:{:02d}:{:02.0f}'.format(sign, hh, mm, ss)
260        elif mm > 0:
261            return '{}{:d}:{:04.1f}'.format(sign, mm, ss)
262        elif ss > 1:
263            return '{}{:.1f} s'.format(sign, ss)
264        else:
265            return '{}{:.2f} s'.format(sign, ss)
266
267    def gv_value(self):
268        """Value formatting for visualization"""
269        if self != self:
270            return "null"
271        hh, mm, ss = self.hms()
272        return [hh, mm, int(ss), int(ss*1000) % 1000]
273
274
275class SizeVal(MeasurementVal):
276    """Class representing time values"""
277    quantity = 'size'
278    gv_title = 'size in MiB'
279    gv_data_type = 'number'
280
281    def __str__(self):
282        if self != self:
283            return "nan"
284        if abs(self) < 1024:
285            return '{:.1f} kiB'.format(self)
286        elif abs(self) < 1048576:
287            return '{:.2f} MiB'.format(self / 1024)
288        else:
289            return '{:.2f} GiB'.format(self / 1048576)
290
291    def gv_value(self):
292        """Value formatting for visualization"""
293        if self != self:
294            return "null"
295        return self / 1024
296
297def measurement_stats(meas, prefix=''):
298    """Get statistics of a measurement"""
299    if not meas:
300        return {prefix + 'sample_cnt': 0,
301                prefix + 'mean': MeasurementVal('nan'),
302                prefix + 'stdev': MeasurementVal('nan'),
303                prefix + 'variance': MeasurementVal('nan'),
304                prefix + 'min': MeasurementVal('nan'),
305                prefix + 'max': MeasurementVal('nan'),
306                prefix + 'minus': MeasurementVal('nan'),
307                prefix + 'plus': MeasurementVal('nan')}
308
309    stats = {'name': meas['name']}
310    if meas['type'] == 'sysres':
311        val_cls = TimeVal
312        values = meas['values']['elapsed_time']
313    elif meas['type'] == 'diskusage':
314        val_cls = SizeVal
315        values = meas['values']['size']
316    else:
317        raise Exception("Unknown measurement type '{}'".format(meas['type']))
318    stats['val_cls'] = val_cls
319    stats['quantity'] = val_cls.quantity
320    stats[prefix + 'sample_cnt'] = len(values)
321
322    mean_val = val_cls(mean(values))
323    min_val = val_cls(min(values))
324    max_val = val_cls(max(values))
325
326    stats[prefix + 'mean'] = mean_val
327    if len(values) > 1:
328        stats[prefix + 'stdev'] = val_cls(stdev(values))
329        stats[prefix + 'variance'] = val_cls(variance(values))
330    else:
331        stats[prefix + 'stdev'] = float('nan')
332        stats[prefix + 'variance'] = float('nan')
333    stats[prefix + 'min'] = min_val
334    stats[prefix + 'max'] = max_val
335    stats[prefix + 'minus'] = val_cls(mean_val - min_val)
336    stats[prefix + 'plus'] = val_cls(max_val - mean_val)
337
338    return stats
339
340